mirror of https://github.com/PCSX2/pcsx2.git
Mostly code cleanups, XBYAK 2.99, VEX conversion for the sw renderer (3-5% faster), GSState::Move fix for dark cloud 2 invention crash.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4287 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
e2d36a53a4
commit
ca7abd983a
|
@ -193,6 +193,7 @@ static const int __pagesize = PCSX2_PAGESIZE;
|
|||
|
||||
# define __aligned(alig) __declspec(align(alig))
|
||||
# define __aligned16 __declspec(align(16))
|
||||
# define __aligned32 __declspec(align(32))
|
||||
# define __pagealigned __declspec(align(PCSX2_PAGESIZE))
|
||||
|
||||
// Deprecated; use __align instead.
|
||||
|
|
|
@ -153,7 +153,7 @@ static INT32 _GSopen(void* dsp, char* title, int renderer)
|
|||
{
|
||||
GSDevice* dev = NULL;
|
||||
|
||||
if( renderer == -1 )
|
||||
if(renderer == -1)
|
||||
{
|
||||
renderer = theApp.GetConfig("renderer", 0);
|
||||
}
|
||||
|
@ -167,6 +167,7 @@ static INT32 _GSopen(void* dsp, char* title, int renderer)
|
|||
// GSopen call then they'll get corrupted graphics, but that's not my problem.
|
||||
|
||||
delete s_gs;
|
||||
|
||||
s_gs = NULL;
|
||||
}
|
||||
|
||||
|
@ -178,20 +179,25 @@ static INT32 _GSopen(void* dsp, char* title, int renderer)
|
|||
case 12: case 13: new GSDeviceNull(); break;
|
||||
}
|
||||
|
||||
if( !dev ) return -1;
|
||||
if(!dev) return -1;
|
||||
|
||||
if( !s_gs )
|
||||
if(!s_gs)
|
||||
{
|
||||
switch(renderer)
|
||||
{
|
||||
default:
|
||||
case 0: s_gs = new GSRendererDX9(); break;
|
||||
case 3: s_gs = new GSRendererDX11(); break;
|
||||
case 0:
|
||||
s_gs = new GSRendererDX9();
|
||||
break;
|
||||
case 3:
|
||||
s_gs = new GSRendererDX11();
|
||||
break;
|
||||
case 2: case 5: case 8: case 11: case 13:
|
||||
s_gs = new GSRendererNull(); break;
|
||||
|
||||
s_gs = new GSRendererNull();
|
||||
break;
|
||||
case 1: case 4: case 7: case 10: case 12:
|
||||
s_gs = new GSRendererSW(); break;
|
||||
s_gs = new GSRendererSW();
|
||||
break;
|
||||
}
|
||||
|
||||
s_renderer = renderer;
|
||||
|
@ -519,72 +525,6 @@ EXPORT_C GSsetFrameLimit(int limit)
|
|||
|
||||
#ifdef _WINDOWS
|
||||
|
||||
// Returns false if the window's been closed or an invalid packet was encountered.
|
||||
static __forceinline bool LoopDatPacket_Thingamajig(HWND hWnd, uint8 (®s)[0x2000], vector<uint8>& buff, FILE* fp, long start)
|
||||
{
|
||||
switch(fgetc(fp))
|
||||
{
|
||||
case EOF:
|
||||
fseek(fp, start, 0);
|
||||
return !!IsWindowVisible(hWnd);
|
||||
|
||||
case 0:
|
||||
{
|
||||
uint32 index = fgetc(fp);
|
||||
uint32 size;
|
||||
|
||||
fread(&size, 4, 1, fp);
|
||||
|
||||
switch(index)
|
||||
{
|
||||
case 0:
|
||||
{
|
||||
if(buff.size() < 0x4000) buff.resize(0x4000);
|
||||
uint32 addr = 0x4000 - size;
|
||||
fread(&buff[0] + addr, size, 1, fp);
|
||||
GSgifTransfer1(&buff[0], addr);
|
||||
}
|
||||
break;
|
||||
|
||||
case 1:
|
||||
if(buff.size() < size) buff.resize(size);
|
||||
fread(&buff[0], size, 1, fp);
|
||||
GSgifTransfer2(&buff[0], size / 16);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
if(buff.size() < size) buff.resize(size);
|
||||
fread(&buff[0], size, 1, fp);
|
||||
GSgifTransfer3(&buff[0], size / 16);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 1:
|
||||
GSvsync(fgetc(fp));
|
||||
return !!IsWindowVisible(hWnd);
|
||||
|
||||
case 2:
|
||||
{
|
||||
uint32 size;
|
||||
fread(&size, 4, 1, fp);
|
||||
if(buff.size() < size) buff.resize(size);
|
||||
GSreadFIFO2(&buff[0], size / 16);
|
||||
}
|
||||
break;
|
||||
|
||||
case 3:
|
||||
fread(regs, 0x2000, 1, fp);
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// lpszCmdLine:
|
||||
// First parameter is the renderer.
|
||||
// Second parameter is the gs file to load and run.
|
||||
|
@ -634,7 +574,73 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
|
|||
|
||||
GSvsync(1);
|
||||
|
||||
while( LoopDatPacket_Thingamajig(hWnd, regs, buff, fp, start) ) ;
|
||||
bool exit = false;
|
||||
|
||||
while(!exit)
|
||||
{
|
||||
uint32 index;
|
||||
uint32 size;
|
||||
uint32 addr;
|
||||
|
||||
int pos;
|
||||
|
||||
switch(fgetc(fp))
|
||||
{
|
||||
case EOF:
|
||||
fseek(fp, start, 0);
|
||||
exit = !IsWindowVisible(hWnd);
|
||||
break;
|
||||
|
||||
case 0:
|
||||
index = fgetc(fp);
|
||||
fread(&size, 4, 1, fp);
|
||||
|
||||
switch(index)
|
||||
{
|
||||
case 0:
|
||||
if(buff.size() < 0x4000) buff.resize(0x4000);
|
||||
addr = 0x4000 - size;
|
||||
fread(buff.data() + addr, size, 1, fp);
|
||||
GSgifTransfer1(buff.data(), addr);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
if(buff.size() < size) buff.resize(size);
|
||||
fread(buff.data(), size, 1, fp);
|
||||
GSgifTransfer2(buff.data(), size / 16);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
if(buff.size() < size) buff.resize(size);
|
||||
fread(buff.data(), size, 1, fp);
|
||||
GSgifTransfer3(buff.data(), size / 16);
|
||||
break;
|
||||
|
||||
case 3:
|
||||
if(buff.size() < size) buff.resize(size);
|
||||
fread(buff.data(), size, 1, fp);
|
||||
GSgifTransfer(buff.data(), size / 16);
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case 1:
|
||||
GSvsync(fgetc(fp));
|
||||
exit = !IsWindowVisible(hWnd);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
fread(&size, 4, 1, fp);
|
||||
if(buff.size() < size) buff.resize(size);
|
||||
GSreadFIFO2(&buff[0], size / 16);
|
||||
break;
|
||||
|
||||
case 3:
|
||||
fread(regs, 0x2000, 1, fp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
GSclose();
|
||||
GSshutdown();
|
||||
|
@ -672,7 +678,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
|
|||
{PSM_PSMZ16S, "16ZS"},
|
||||
};
|
||||
|
||||
uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 16);
|
||||
uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32);
|
||||
|
||||
for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i;
|
||||
|
||||
|
@ -809,7 +815,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
|
|||
{
|
||||
GSLocalMemory mem;
|
||||
|
||||
uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 16);
|
||||
uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32);
|
||||
|
||||
for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i;
|
||||
|
||||
|
|
|
@ -77,6 +77,7 @@ enum GIF_REG
|
|||
GIF_REG_CLAMP_1 = 0x08,
|
||||
GIF_REG_CLAMP_2 = 0x09,
|
||||
GIF_REG_FOG = 0x0a,
|
||||
GIF_REG_INVALID = 0x0b,
|
||||
GIF_REG_XYZF3 = 0x0c,
|
||||
GIF_REG_XYZ3 = 0x0d,
|
||||
GIF_REG_A_D = 0x0e,
|
||||
|
@ -1077,7 +1078,7 @@ REG128_SET(GIFPackedReg)
|
|||
GIFPackedNOP NOP;
|
||||
REG_SET_END
|
||||
|
||||
__aligned16 struct GIFPath
|
||||
__aligned32 struct GIFPath
|
||||
{
|
||||
GIFTag tag;
|
||||
uint32 reg;
|
||||
|
@ -1107,9 +1108,12 @@ __aligned16 struct GIFPath
|
|||
if((++reg & 0xf) == nreg)
|
||||
{
|
||||
reg = 0;
|
||||
|
||||
if(--nloop == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1201,7 +1201,7 @@ public:
|
|||
|
||||
#else
|
||||
/*
|
||||
__aligned16 uint32 block[8 * 8];
|
||||
__aligned32 uint32 block[8 * 8];
|
||||
|
||||
UnpackBlock4HL(src, srcpitch, block);
|
||||
|
||||
|
@ -1316,7 +1316,7 @@ public:
|
|||
|
||||
#else
|
||||
/*
|
||||
__aligned16 uint32 block[8 * 8];
|
||||
__aligned32 uint32 block[8 * 8];
|
||||
|
||||
UnpackBlock4HH(src, srcpitch, block);
|
||||
|
||||
|
@ -1467,7 +1467,7 @@ public:
|
|||
|
||||
#else
|
||||
|
||||
__aligned16 uint8 block[16 * 16];
|
||||
__aligned32 uint8 block[16 * 16];
|
||||
|
||||
ReadBlock8<true>(src, (uint8*)block, sizeof(block) / 16);
|
||||
|
||||
|
@ -1542,7 +1542,7 @@ public:
|
|||
|
||||
#else
|
||||
|
||||
__aligned16 uint8 block[(32 / 2) * 16];
|
||||
__aligned32 uint8 block[(32 / 2) * 16];
|
||||
|
||||
ReadBlock4<true>(src, (uint8*)block, sizeof(block) / 16);
|
||||
|
||||
|
@ -1583,7 +1583,7 @@ public:
|
|||
|
||||
#else
|
||||
|
||||
__aligned16 uint32 block[8 * 8];
|
||||
__aligned32 uint32 block[8 * 8];
|
||||
|
||||
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
|
@ -1624,7 +1624,7 @@ public:
|
|||
|
||||
#else
|
||||
|
||||
__aligned16 uint32 block[8 * 8];
|
||||
__aligned32 uint32 block[8 * 8];
|
||||
|
||||
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
|
@ -1665,7 +1665,7 @@ public:
|
|||
|
||||
#else
|
||||
|
||||
__aligned16 uint32 block[8 * 8];
|
||||
__aligned32 uint32 block[8 * 8];
|
||||
|
||||
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
|
|
|
@ -68,7 +68,8 @@ void GSCaptureDlg::OnInit()
|
|||
|
||||
ComboBoxAppend(IDC_CODECS, "Uncompressed", 0, true);
|
||||
|
||||
CoInitialize(0);
|
||||
CoInitialize(0); // this is obviously wrong here, each thread should call this on start, and where is CoUninitalize?
|
||||
|
||||
BeginEnumSysDev(CLSID_VideoCompressorCategory, moniker)
|
||||
{
|
||||
Codec c;
|
||||
|
@ -195,6 +196,7 @@ bool GSCaptureDlg::OnCommand(HWND hWnd, UINT id, UINT code)
|
|||
if (ris != 2)
|
||||
{
|
||||
wstring s = wstring(c.DisplayName.m_str);
|
||||
|
||||
theApp.SetConfig("CaptureVideoCodecDisplayName", string(s.begin(), s.end()).c_str());
|
||||
}
|
||||
else
|
||||
|
|
|
@ -126,7 +126,7 @@ void GSClut::Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
|||
|
||||
void GSClut::WriteCLUT32_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ASSERT(TEX0.CSA == 0);
|
||||
|
||||
|
@ -135,7 +135,7 @@ void GSClut::WriteCLUT32_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TE
|
|||
|
||||
void GSClut::WriteCLUT32_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ASSERT(TEX0.CSA < 16);
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
|
||||
class GSLocalMemory;
|
||||
|
||||
__aligned16 class GSClut : public GSAlignedClass<16>
|
||||
__aligned32 class GSClut : public GSAlignedClass<32>
|
||||
{
|
||||
GSLocalMemory* m_mem;
|
||||
|
||||
|
@ -37,7 +37,7 @@ __aligned16 class GSClut : public GSAlignedClass<16>
|
|||
uint32* m_buff32;
|
||||
uint64* m_buff64;
|
||||
|
||||
__aligned16 struct WriteState
|
||||
__aligned32 struct WriteState
|
||||
{
|
||||
GIFRegTEX0 TEX0;
|
||||
GIFRegTEXCLUT TEXCLUT;
|
||||
|
@ -45,7 +45,7 @@ __aligned16 class GSClut : public GSAlignedClass<16>
|
|||
bool IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
|
||||
} m_write;
|
||||
|
||||
__aligned16 struct ReadState
|
||||
__aligned32 struct ReadState
|
||||
{
|
||||
GIFRegTEX0 TEX0;
|
||||
GIFRegTEXA TEXA;
|
||||
|
|
|
@ -145,8 +145,11 @@ void GSDevice::Recycle(GSTexture* t)
|
|||
if(t)
|
||||
{
|
||||
t->last_frame_used = m_frame;
|
||||
|
||||
m_pool.push_front(t);
|
||||
|
||||
//printf("%d\n",m_pool.size());
|
||||
|
||||
while(m_pool.size() > 300)
|
||||
{
|
||||
delete m_pool.back();
|
||||
|
@ -159,9 +162,11 @@ void GSDevice::Recycle(GSTexture* t)
|
|||
void GSDevice::AgePool()
|
||||
{
|
||||
m_frame++;
|
||||
while (m_pool.size() > 20 && m_frame - m_pool.back()->last_frame_used > 10)
|
||||
|
||||
while(m_pool.size() > 20 && m_frame - m_pool.back()->last_frame_used > 10)
|
||||
{
|
||||
delete m_pool.back();
|
||||
|
||||
m_pool.pop_back();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ struct InterlaceConstantBuffer
|
|||
|
||||
#pragma pack(pop)
|
||||
|
||||
class GSDevice : public GSAlignedClass<16>
|
||||
class GSDevice : public GSAlignedClass<32>
|
||||
{
|
||||
list<GSTexture*> m_pool;
|
||||
|
||||
|
@ -66,7 +66,7 @@ protected:
|
|||
struct {size_t stride, start, count, limit;} m_vertices;
|
||||
uint32 m_msaa;
|
||||
DXGI_SAMPLE_DESC m_msaa_desc;
|
||||
unsigned m_frame; // for ageing the pool
|
||||
unsigned int m_frame; // for ageing the pool
|
||||
|
||||
virtual GSTexture* Create(int type, int w, int h, bool msaa, int format) = 0;
|
||||
|
||||
|
|
|
@ -229,8 +229,10 @@ bool GSDevice11::Create(GSWnd* wnd)
|
|||
}
|
||||
}
|
||||
|
||||
if (m_msaa_desc.Count == 1)
|
||||
if(m_msaa_desc.Count == 1)
|
||||
{
|
||||
m_msaa = 0;
|
||||
}
|
||||
|
||||
// convert
|
||||
|
||||
|
@ -378,7 +380,7 @@ bool GSDevice11::Create(GSWnd* wnd)
|
|||
|
||||
if(m_wnd->IsManaged())
|
||||
{
|
||||
SetExclusive( !theApp.GetConfig("windowed", 1) );
|
||||
SetExclusive(!theApp.GetConfig("windowed", 1));
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -392,11 +394,14 @@ bool GSDevice11::Reset(int w, int h)
|
|||
if(m_swapchain)
|
||||
{
|
||||
DXGI_SWAP_CHAIN_DESC scd;
|
||||
|
||||
memset(&scd, 0, sizeof(scd));
|
||||
|
||||
m_swapchain->GetDesc(&scd);
|
||||
m_swapchain->ResizeBuffers(scd.BufferCount, w, h, scd.BufferDesc.Format, 0);
|
||||
|
||||
CComPtr<ID3D11Texture2D> backbuffer;
|
||||
|
||||
if(FAILED(m_swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&backbuffer)))
|
||||
{
|
||||
return false;
|
||||
|
@ -422,9 +427,12 @@ void GSDevice11::SetExclusive(bool isExcl)
|
|||
m_swapchain->ResizeTarget(&desc);
|
||||
*/
|
||||
|
||||
HRESULT hr = m_swapchain->SetFullscreenState( isExcl, NULL );
|
||||
HRESULT hr = m_swapchain->SetFullscreenState(isExcl, NULL);
|
||||
|
||||
if(hr == DXGI_ERROR_NOT_CURRENTLY_AVAILABLE)
|
||||
{
|
||||
fprintf(stderr, "(GSdx10) SetExclusive(%s) failed; request unavailable.", isExcl ? "true" : "false");
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice11::Flip()
|
||||
|
@ -885,10 +893,13 @@ void GSDevice11::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1)
|
|||
void GSDevice11::PSSetShaderResource(int i, GSTexture* sr)
|
||||
{
|
||||
ID3D11ShaderResourceView* srv = NULL;
|
||||
if (sr) srv = *(GSTexture11*)sr;
|
||||
|
||||
if (m_state.ps_srv[i] != srv) {
|
||||
if(sr) srv = *(GSTexture11*)sr;
|
||||
|
||||
if(m_state.ps_srv[i] != srv)
|
||||
{
|
||||
m_state.ps_srv[i] = srv;
|
||||
|
||||
m_srv_changed = true;
|
||||
}
|
||||
}
|
||||
|
@ -914,13 +925,17 @@ void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
|
|||
m_ctx->PSSetShader(ps, NULL, 0);
|
||||
}
|
||||
|
||||
if (m_srv_changed) {
|
||||
if (m_srv_changed)
|
||||
{
|
||||
m_ctx->PSSetShaderResources(0, 3, m_state.ps_srv);
|
||||
|
||||
m_srv_changed = false;
|
||||
}
|
||||
|
||||
if (m_ss_changed) {
|
||||
if(m_ss_changed)
|
||||
{
|
||||
m_ctx->PSSetSamplers(0, 3, m_state.ps_ss);
|
||||
|
||||
m_ss_changed = false;
|
||||
}
|
||||
|
||||
|
@ -982,8 +997,8 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector
|
|||
|
||||
vp.TopLeftX = 0;
|
||||
vp.TopLeftY = 0;
|
||||
vp.Width = (FLOAT)rt->GetWidth();
|
||||
vp.Height = (FLOAT)rt->GetHeight();
|
||||
vp.Width = (float)rt->GetWidth();
|
||||
vp.Height = (float)rt->GetHeight();
|
||||
vp.MinDepth = 0.0f;
|
||||
vp.MaxDepth = 1.0f;
|
||||
|
||||
|
|
|
@ -31,7 +31,6 @@ GSDevice9::GSDevice9()
|
|||
|
||||
memset(&m_pp, 0, sizeof(m_pp));
|
||||
memset(&m_d3dcaps, 0, sizeof(m_d3dcaps));
|
||||
|
||||
memset(&m_state, 0, sizeof(m_state));
|
||||
|
||||
m_state.bf = 0xffffffff;
|
||||
|
@ -39,81 +38,109 @@ GSDevice9::GSDevice9()
|
|||
|
||||
GSDevice9::~GSDevice9()
|
||||
{
|
||||
for_each(m_mskfix.begin(), m_mskfix.end(), delete_second());
|
||||
|
||||
for_each(m_om_bs.begin(), m_om_bs.end(), delete_second());
|
||||
for_each(m_om_dss.begin(), m_om_dss.end(), delete_second());
|
||||
for_each(m_ps_ss.begin(), m_ps_ss.end(), delete_second());
|
||||
for_each(m_mskfix.begin(), m_mskfix.end(), delete_second());
|
||||
|
||||
if(m_state.vs_cb) _aligned_free(m_state.vs_cb);
|
||||
if(m_state.ps_cb) _aligned_free(m_state.ps_cb);
|
||||
}
|
||||
|
||||
// if supported and null != msaa_desc, msaa_desc will contain requested Count and Quality
|
||||
|
||||
static bool IsMsaaSupported(IDirect3D9* d3d, D3DFORMAT depth_format, uint msaaCount, DXGI_SAMPLE_DESC* msaa_desc = NULL)
|
||||
{
|
||||
if(msaaCount > 16) return false;
|
||||
|
||||
//if supported and null!=msaa_desc, msaa_desc will contain requested Count and Quality
|
||||
static bool IsMsaaSupported(CComPtr<IDirect3D9>& d3d, D3DFORMAT depth_format, uint msaaCount, OUT DXGI_SAMPLE_DESC* msaa_desc=NULL){
|
||||
D3DCAPS9 d3dcaps;
|
||||
|
||||
if (msaaCount>16) return false;
|
||||
|
||||
memset(&d3dcaps, 0, sizeof(d3dcaps));
|
||||
|
||||
d3d->GetDeviceCaps(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &d3dcaps);
|
||||
|
||||
DWORD quality[2] = {0, 0};
|
||||
|
||||
if(SUCCEEDED(d3d->CheckDeviceMultiSampleType(d3dcaps.AdapterOrdinal, d3dcaps.DeviceType, D3DFMT_A8R8G8B8, TRUE, (D3DMULTISAMPLE_TYPE)msaaCount, &quality[0])) && quality[0] >0
|
||||
&& SUCCEEDED(d3d->CheckDeviceMultiSampleType(d3dcaps.AdapterOrdinal, d3dcaps.DeviceType, depth_format, TRUE, (D3DMULTISAMPLE_TYPE)msaaCount, &quality[1])) && quality[1] >0
|
||||
){
|
||||
if (msaa_desc){
|
||||
if(SUCCEEDED(d3d->CheckDeviceMultiSampleType(d3dcaps.AdapterOrdinal, d3dcaps.DeviceType, D3DFMT_A8R8G8B8, TRUE, (D3DMULTISAMPLE_TYPE)msaaCount, &quality[0])) && quality[0] > 0
|
||||
&& SUCCEEDED(d3d->CheckDeviceMultiSampleType(d3dcaps.AdapterOrdinal, d3dcaps.DeviceType, depth_format, TRUE, (D3DMULTISAMPLE_TYPE)msaaCount, &quality[1])) && quality[1] > 0)
|
||||
{
|
||||
if(msaa_desc)
|
||||
{
|
||||
msaa_desc->Count = msaaCount;
|
||||
msaa_desc->Quality = std::min<DWORD>(quality[0] - 1, quality[1] - 1);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool TestDepthFormat(CComPtr<IDirect3D9> &d3d, D3DFORMAT format)
|
||||
static bool TestDepthFormat(IDirect3D9* d3d, D3DFORMAT format)
|
||||
{
|
||||
if (FAILED(d3d->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, format)))
|
||||
if(FAILED(d3d->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, format)))
|
||||
{
|
||||
return false;
|
||||
if (FAILED(d3d->CheckDepthStencilMatch(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, D3DFMT_X8R8G8B8, D3DFMT_X8R8G8B8, format)))
|
||||
}
|
||||
|
||||
if(FAILED(d3d->CheckDepthStencilMatch(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, D3DFMT_X8R8G8B8, D3DFMT_X8R8G8B8, format)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static D3DFORMAT BestD3dFormat(IDirect3D9* d3d, int msaaCount = 0, DXGI_SAMPLE_DESC* msaa_desc = NULL)
|
||||
{
|
||||
// In descending order of preference
|
||||
|
||||
//In descending order of preference
|
||||
static D3DFORMAT s_DX9formatsToSearch[]={D3DFMT_D32, D3DFMT_D32F_LOCKABLE, D3DFMT_D24S8};
|
||||
static D3DFORMAT fmts[] =
|
||||
{
|
||||
D3DFMT_D32,
|
||||
D3DFMT_D32F_LOCKABLE,
|
||||
D3DFMT_D24S8
|
||||
};
|
||||
|
||||
static D3DFORMAT BestD3dFormat(CComPtr<IDirect3D9>& d3d, int msaaCount=0, OUT DXGI_SAMPLE_DESC* msaa_desc=NULL){
|
||||
if(!d3d) return D3DFMT_UNKNOWN;
|
||||
if (1==msaaCount) msaaCount=0;
|
||||
if(1 == msaaCount) msaaCount = 0;
|
||||
|
||||
for (int i=0; i<sizeof(s_DX9formatsToSearch); i++)
|
||||
if (TestDepthFormat(d3d, s_DX9formatsToSearch[i]) && (!msaaCount || IsMsaaSupported(d3d, s_DX9formatsToSearch[i], msaaCount, msaa_desc)))
|
||||
return s_DX9formatsToSearch[i];
|
||||
for(int i = 0; i < sizeof(fmts); i++)
|
||||
{
|
||||
if(TestDepthFormat(d3d, fmts[i]) && (!msaaCount || IsMsaaSupported(d3d, fmts[i], msaaCount, msaa_desc)))
|
||||
{
|
||||
return fmts[i];
|
||||
}
|
||||
}
|
||||
|
||||
return D3DFMT_UNKNOWN;
|
||||
}
|
||||
|
||||
//return: 32, 24, or 0 if not supported. if 1==msaa, considered as msaa=0
|
||||
uint GSDevice9::GetMaxDepth(uint msaa=0){
|
||||
// return: 32, 24, or 0 if not supported. if 1==msaa, considered as msaa=0
|
||||
|
||||
uint GSDevice9::GetMaxDepth(uint msaa = 0)
|
||||
{
|
||||
CComPtr<IDirect3D9> d3d;
|
||||
|
||||
d3d.Attach(Direct3DCreate9(D3D_SDK_VERSION));
|
||||
|
||||
D3DFORMAT f=BestD3dFormat(d3d, msaa);
|
||||
switch (f){
|
||||
case D3DFMT_D32: case D3DFMT_D32F_LOCKABLE: return 32;
|
||||
case D3DFMT_D24S8: return 24;
|
||||
switch(BestD3dFormat(d3d, msaa))
|
||||
{
|
||||
case D3DFMT_D32:
|
||||
case D3DFMT_D32F_LOCKABLE:
|
||||
return 32;
|
||||
case D3DFMT_D24S8:
|
||||
return 24;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void GSDevice9::ForceValidMsaaConfig(){
|
||||
if (0==GetMaxDepth(theApp.GetConfig("msaa", 0)))
|
||||
theApp.SetConfig("msaa", 0);//replace invalid msaa value in ini file with 0.
|
||||
void GSDevice9::ForceValidMsaaConfig()
|
||||
{
|
||||
if(0 == GetMaxDepth(theApp.GetConfig("msaa", 0)))
|
||||
{
|
||||
theApp.SetConfig("msaa", 0); // replace invalid msaa value in ini file with 0.
|
||||
}
|
||||
};
|
||||
|
||||
bool GSDevice9::Create(GSWnd* wnd)
|
||||
|
@ -128,17 +155,26 @@ bool GSDevice9::Create(GSWnd* wnd)
|
|||
m_d3d.Attach(Direct3DCreate9(D3D_SDK_VERSION));
|
||||
|
||||
if(!m_d3d) return false;
|
||||
ForceValidMsaaConfig();
|
||||
//Get best format/depth for msaa. Assumption is that if the resulting depth is 24 instead of possible 32,
|
||||
// the user was already warned when she selected it. (Lower res z buffer without warning is unacceptable).
|
||||
m_depth_format=BestD3dFormat(m_d3d, m_msaa, &m_msaa_desc);
|
||||
if (D3DFMT_UNKNOWN == m_depth_format){
|
||||
//can't find a format with requested msaa, try without.
|
||||
m_depth_format = BestD3dFormat(m_d3d, 0);
|
||||
if (D3DFMT_UNKNOWN == m_depth_format)
|
||||
return false;
|
||||
|
||||
m_msaa=0;
|
||||
ForceValidMsaaConfig();
|
||||
|
||||
// Get best format/depth for msaa. Assumption is that if the resulting depth is 24 instead of possible 32,
|
||||
// the user was already warned when she selected it. (Lower res z buffer without warning is unacceptable).
|
||||
|
||||
m_depth_format = BestD3dFormat(m_d3d, m_msaa, &m_msaa_desc);
|
||||
|
||||
if(D3DFMT_UNKNOWN == m_depth_format)
|
||||
{
|
||||
// can't find a format with requested msaa, try without.
|
||||
|
||||
m_depth_format = BestD3dFormat(m_d3d, 0);
|
||||
|
||||
if(D3DFMT_UNKNOWN == m_depth_format)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
m_msaa = 0;
|
||||
}
|
||||
|
||||
memset(&m_d3dcaps, 0, sizeof(m_d3dcaps));
|
||||
|
@ -180,7 +216,6 @@ bool GSDevice9::Create(GSWnd* wnd)
|
|||
return false;
|
||||
}
|
||||
|
||||
|
||||
if(!Reset(1, 1))
|
||||
{
|
||||
return false;
|
||||
|
@ -274,7 +309,8 @@ bool GSDevice9::Create(GSWnd* wnd)
|
|||
|
||||
void GSDevice9::SetVsync(bool enable)
|
||||
{
|
||||
if( m_vsync == enable ) return;
|
||||
if(m_vsync == enable) return;
|
||||
|
||||
__super::SetVsync(enable);
|
||||
|
||||
// Clever trick: Delete the backbuffer, so that the next Present will fail and
|
||||
|
@ -282,6 +318,7 @@ void GSDevice9::SetVsync(bool enable)
|
|||
// vsync settings. :)
|
||||
|
||||
delete m_backbuffer;
|
||||
|
||||
m_backbuffer = NULL;
|
||||
}
|
||||
|
||||
|
@ -293,6 +330,7 @@ bool GSDevice9::Reset(int w, int h)
|
|||
HRESULT hr;
|
||||
|
||||
int mode = (!m_wnd->IsManaged() || theApp.GetConfig("windowed", 1)) ? Windowed : Fullscreen;
|
||||
|
||||
if(mode == DontCare)
|
||||
{
|
||||
mode = m_pp.Windowed ? Windowed : Fullscreen;
|
||||
|
@ -707,11 +745,11 @@ void GSDevice9::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, c
|
|||
|
||||
IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices));
|
||||
IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP);
|
||||
IASetInputLayout(m_convert.il);
|
||||
|
||||
// vs
|
||||
|
||||
VSSetShader(m_convert.vs, NULL, 0);
|
||||
IASetInputLayout(m_convert.il);
|
||||
|
||||
// ps
|
||||
|
||||
|
@ -904,7 +942,7 @@ void GSDevice9::VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int
|
|||
{
|
||||
if(m_state.vs_cb) _aligned_free(m_state.vs_cb);
|
||||
|
||||
m_state.vs_cb = (float*)_aligned_malloc(size, 16);
|
||||
m_state.vs_cb = (float*)_aligned_malloc(size, 32);
|
||||
}
|
||||
|
||||
m_state.vs_cb_len = vs_cb_len;
|
||||
|
@ -926,10 +964,13 @@ void GSDevice9::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1)
|
|||
void GSDevice9::PSSetShaderResource(int i, GSTexture* sr)
|
||||
{
|
||||
IDirect3DTexture9* srv = NULL;
|
||||
if (sr) srv = *(GSTexture9*)sr;
|
||||
|
||||
if (m_state.ps_srvs[i] != srv) {
|
||||
if(sr) srv = *(GSTexture9*)sr;
|
||||
|
||||
if(m_state.ps_srvs[i] != srv)
|
||||
{
|
||||
m_state.ps_srvs[i] = srv;
|
||||
|
||||
m_dev->SetTexture(i, srv);
|
||||
}
|
||||
}
|
||||
|
@ -953,7 +994,7 @@ void GSDevice9::PSSetShader(IDirect3DPixelShader9* ps, const float* ps_cb, int p
|
|||
{
|
||||
if(m_state.ps_cb) _aligned_free(m_state.ps_cb);
|
||||
|
||||
m_state.ps_cb = (float*)_aligned_malloc(size, 16);
|
||||
m_state.ps_cb = (float*)_aligned_malloc(size, 32);
|
||||
}
|
||||
|
||||
m_state.ps_cb_len = ps_cb_len;
|
||||
|
|
|
@ -30,7 +30,7 @@ class GSDeviceDX : public GSDevice
|
|||
public:
|
||||
#pragma pack(push, 1)
|
||||
|
||||
__aligned16 struct VSConstantBuffer
|
||||
__aligned32 struct VSConstantBuffer
|
||||
{
|
||||
GSVector4 VertexScale;
|
||||
GSVector4 VertexOffset;
|
||||
|
@ -86,7 +86,7 @@ public:
|
|||
VSSelector() : key(0) {}
|
||||
};
|
||||
|
||||
__aligned16 struct PSConstantBuffer
|
||||
__aligned32 struct PSConstantBuffer
|
||||
{
|
||||
GSVector4 FogColor_AREF;
|
||||
GSVector4 HalfTexel;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -67,10 +67,10 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
|
|||
void mix16(const Xmm& a, const Xmm& b, const Xmm& temp);
|
||||
void clamp16(const Xmm& a, const Xmm& temp);
|
||||
void alltrue();
|
||||
void blend8(const Xmm& a, const Xmm& b);
|
||||
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
|
||||
void blend8r(const Xmm& b, const Xmm& a);
|
||||
void blendr(const Xmm& b, const Xmm& a, const Xmm& mask);
|
||||
void blend8(const Xmm& a, const Xmm& b);
|
||||
void blend8r(const Xmm& b, const Xmm& a);
|
||||
|
||||
public:
|
||||
GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize);
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
__aligned16 class GSDrawingContext
|
||||
__aligned32 class GSDrawingContext
|
||||
{
|
||||
public:
|
||||
GIFRegXYOFFSET XYOFFSET;
|
||||
|
@ -43,7 +43,7 @@ public:
|
|||
GIFRegFRAME FRAME;
|
||||
GIFRegZBUF ZBUF;
|
||||
|
||||
__aligned16 struct
|
||||
__aligned32 struct
|
||||
{
|
||||
GSVector4i dx10;
|
||||
GSVector4 dx9;
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
__aligned16 class GSDrawingEnvironment
|
||||
__aligned32 class GSDrawingEnvironment
|
||||
{
|
||||
public:
|
||||
GIFRegPRIM PRIM;
|
||||
|
|
|
@ -56,14 +56,14 @@ uint32 GSLocalMemory::pageOffset16SZ[32][64][64];
|
|||
uint32 GSLocalMemory::pageOffset8[32][64][128];
|
||||
uint32 GSLocalMemory::pageOffset4[32][128][128];
|
||||
|
||||
int GSLocalMemory::rowOffset32[2048];
|
||||
int GSLocalMemory::rowOffset32Z[2048];
|
||||
int GSLocalMemory::rowOffset16[2048];
|
||||
int GSLocalMemory::rowOffset16S[2048];
|
||||
int GSLocalMemory::rowOffset16Z[2048];
|
||||
int GSLocalMemory::rowOffset16SZ[2048];
|
||||
int GSLocalMemory::rowOffset8[2][2048];
|
||||
int GSLocalMemory::rowOffset4[2][2048];
|
||||
int GSLocalMemory::rowOffset32[4096];
|
||||
int GSLocalMemory::rowOffset32Z[4096];
|
||||
int GSLocalMemory::rowOffset16[4096];
|
||||
int GSLocalMemory::rowOffset16S[4096];
|
||||
int GSLocalMemory::rowOffset16Z[4096];
|
||||
int GSLocalMemory::rowOffset16SZ[4096];
|
||||
int GSLocalMemory::rowOffset8[2][4096];
|
||||
int GSLocalMemory::rowOffset4[2][4096];
|
||||
|
||||
short GSLocalMemory::blockOffset32[256];
|
||||
short GSLocalMemory::blockOffset32Z[256];
|
||||
|
@ -116,44 +116,44 @@ GSLocalMemory::GSLocalMemory()
|
|||
|
||||
for(int x = 0; x < countof(rowOffset32); x++)
|
||||
{
|
||||
rowOffset32[x] = (int)PixelAddress32(x, 0, 0, 32) - (int)PixelAddress32(0, 0, 0, 32);
|
||||
rowOffset32[x] = (int)PixelAddress32(x & 0x7ff, 0, 0, 32) - (int)PixelAddress32(0, 0, 0, 32);
|
||||
}
|
||||
|
||||
for(int x = 0; x < countof(rowOffset32Z); x++)
|
||||
{
|
||||
rowOffset32Z[x] = (int)PixelAddress32Z(x, 0, 0, 32) - (int)PixelAddress32Z(0, 0, 0, 32);
|
||||
rowOffset32Z[x] = (int)PixelAddress32Z(x & 0x7ff, 0, 0, 32) - (int)PixelAddress32Z(0, 0, 0, 32);
|
||||
}
|
||||
|
||||
for(int x = 0; x < countof(rowOffset16); x++)
|
||||
{
|
||||
rowOffset16[x] = (int)PixelAddress16(x, 0, 0, 32) - (int)PixelAddress16(0, 0, 0, 32);
|
||||
rowOffset16[x] = (int)PixelAddress16(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16(0, 0, 0, 32);
|
||||
}
|
||||
|
||||
for(int x = 0; x < countof(rowOffset16S); x++)
|
||||
{
|
||||
rowOffset16S[x] = (int)PixelAddress16S(x, 0, 0, 32) - (int)PixelAddress16S(0, 0, 0, 32);
|
||||
rowOffset16S[x] = (int)PixelAddress16S(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16S(0, 0, 0, 32);
|
||||
}
|
||||
|
||||
for(int x = 0; x < countof(rowOffset16Z); x++)
|
||||
{
|
||||
rowOffset16Z[x] = (int)PixelAddress16Z(x, 0, 0, 32) - (int)PixelAddress16Z(0, 0, 0, 32);
|
||||
rowOffset16Z[x] = (int)PixelAddress16Z(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16Z(0, 0, 0, 32);
|
||||
}
|
||||
|
||||
for(int x = 0; x < countof(rowOffset16SZ); x++)
|
||||
{
|
||||
rowOffset16SZ[x] = (int)PixelAddress16SZ(x, 0, 0, 32) - (int)PixelAddress16SZ(0, 0, 0, 32);
|
||||
rowOffset16SZ[x] = (int)PixelAddress16SZ(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16SZ(0, 0, 0, 32);
|
||||
}
|
||||
|
||||
for(int x = 0; x < countof(rowOffset8[0]); x++)
|
||||
{
|
||||
rowOffset8[0][x] = (int)PixelAddress8(x, 0, 0, 32) - (int)PixelAddress8(0, 0, 0, 32);
|
||||
rowOffset8[1][x] = (int)PixelAddress8(x, 2, 0, 32) - (int)PixelAddress8(0, 2, 0, 32);
|
||||
rowOffset8[0][x] = (int)PixelAddress8(x & 0x7ff, 0, 0, 32) - (int)PixelAddress8(0, 0, 0, 32);
|
||||
rowOffset8[1][x] = (int)PixelAddress8(x & 0x7ff, 2, 0, 32) - (int)PixelAddress8(0, 2, 0, 32);
|
||||
}
|
||||
|
||||
for(int x = 0; x < countof(rowOffset4[0]); x++)
|
||||
{
|
||||
rowOffset4[0][x] = (int)PixelAddress4(x, 0, 0, 32) - (int)PixelAddress4(0, 0, 0, 32);
|
||||
rowOffset4[1][x] = (int)PixelAddress4(x, 2, 0, 32) - (int)PixelAddress4(0, 2, 0, 32);
|
||||
rowOffset4[0][x] = (int)PixelAddress4(x & 0x7ff, 0, 0, 32) - (int)PixelAddress4(0, 0, 0, 32);
|
||||
rowOffset4[1][x] = (int)PixelAddress4(x & 0x7ff, 2, 0, 32) - (int)PixelAddress4(0, 2, 0, 32);
|
||||
}
|
||||
|
||||
for(int x = 0; x < countof(blockOffset32); x++)
|
||||
|
@ -459,7 +459,7 @@ GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
|
|||
return i->second;
|
||||
}
|
||||
|
||||
GSOffset* o = (GSOffset*)_aligned_malloc(sizeof(GSOffset), 16);
|
||||
GSOffset* o = (GSOffset*)_aligned_malloc(sizeof(GSOffset), 32);
|
||||
|
||||
o->hash = hash;
|
||||
|
||||
|
@ -474,9 +474,9 @@ GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
|
|||
|
||||
pixelAddress pa = m_psm[psm].pa;
|
||||
|
||||
for(int i = 0; i < 2048; i++)
|
||||
for(int i = 0; i < 4096; i++)
|
||||
{
|
||||
o->pixel.row[i] = (int)pa(0, i, bp, bw);
|
||||
o->pixel.row[i] = (int)pa(0, i & 0x7ff, bp, bw);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 8; i++)
|
||||
|
@ -513,7 +513,7 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G
|
|||
return i->second;
|
||||
}
|
||||
|
||||
GSPixelOffset4* o = (GSPixelOffset4*)_aligned_malloc(sizeof(GSPixelOffset4), 16);
|
||||
GSPixelOffset4* o = (GSPixelOffset4*)_aligned_malloc(sizeof(GSPixelOffset4), 32);
|
||||
|
||||
o->hash = hash;
|
||||
|
||||
|
@ -628,7 +628,7 @@ void GSLocalMemory::WriteImageLeftRight(int l, int r, int y, int h, const uint8*
|
|||
template<int psm, int bsx, int bsy, int trbpp>
|
||||
void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF)
|
||||
{
|
||||
__aligned16 uint8 buff[64]; // merge buffer for one column
|
||||
__aligned32 uint8 buff[64]; // merge buffer for one column
|
||||
|
||||
uint32 bp = BITBLTBUF.DBP;
|
||||
uint32 bw = BITBLTBUF.DBW;
|
||||
|
@ -1438,7 +1438,7 @@ void GSLocalMemory::ReadTexture24(const GSOffset* RESTRICT o, const GSVector4i&
|
|||
|
||||
void GSLocalMemory::ReadTexture16(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
__aligned16 uint16 block[16 * 8];
|
||||
__aligned32 uint16 block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
{
|
||||
|
@ -1451,7 +1451,7 @@ void GSLocalMemory::ReadTexture16(const GSOffset* RESTRICT o, const GSVector4i&
|
|||
|
||||
void GSLocalMemory::ReadTexture16S(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
__aligned16 uint16 block[16 * 8];
|
||||
__aligned32 uint16 block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
{
|
||||
|
@ -1548,7 +1548,7 @@ void GSLocalMemory::ReadTexture24Z(const GSOffset* RESTRICT o, const GSVector4i&
|
|||
|
||||
void GSLocalMemory::ReadTexture16Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
__aligned16 uint16 block[16 * 8];
|
||||
__aligned32 uint16 block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
{
|
||||
|
@ -1561,7 +1561,7 @@ void GSLocalMemory::ReadTexture16Z(const GSOffset* RESTRICT o, const GSVector4i&
|
|||
|
||||
void GSLocalMemory::ReadTexture16SZ(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
__aligned16 uint16 block[16 * 8];
|
||||
__aligned32 uint16 block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
{
|
||||
|
@ -1576,14 +1576,14 @@ void GSLocalMemory::ReadTexture16SZ(const GSOffset* RESTRICT o, const GSVector4i
|
|||
|
||||
void GSLocalMemory::ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadBlock32<true>(BlockPtr(bp), dst, dstpitch);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
if(TEXA.AEM)
|
||||
{
|
||||
|
@ -1597,7 +1597,7 @@ void GSLocalMemory::ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, cons
|
|||
|
||||
void GSLocalMemory::ReadTextureBlock16(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
__aligned16 uint16 block[16 * 8];
|
||||
__aligned32 uint16 block[16 * 8];
|
||||
|
||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
|
@ -1606,7 +1606,7 @@ void GSLocalMemory::ReadTextureBlock16(uint32 bp, uint8* dst, int dstpitch, cons
|
|||
|
||||
void GSLocalMemory::ReadTextureBlock16S(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
__aligned16 uint16 block[16 * 8];
|
||||
__aligned32 uint16 block[16 * 8];
|
||||
|
||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
|
@ -1615,49 +1615,49 @@ void GSLocalMemory::ReadTextureBlock16S(uint32 bp, uint8* dst, int dstpitch, con
|
|||
|
||||
void GSLocalMemory::ReadTextureBlock8(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadAndExpandBlock8_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock4(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadAndExpandBlock4_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock8H(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadAndExpandBlock8H_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock4HL(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadAndExpandBlock4HL_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock4HH(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadAndExpandBlock4HH_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock32Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadBlock32<true>(BlockPtr(bp), dst, dstpitch);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock24Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
if(TEXA.AEM)
|
||||
{
|
||||
|
@ -1671,7 +1671,7 @@ void GSLocalMemory::ReadTextureBlock24Z(uint32 bp, uint8* dst, int dstpitch, con
|
|||
|
||||
void GSLocalMemory::ReadTextureBlock16Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
__aligned16 uint16 block[16 * 8];
|
||||
__aligned32 uint16 block[16 * 8];
|
||||
|
||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
|
@ -1680,7 +1680,7 @@ void GSLocalMemory::ReadTextureBlock16Z(uint32 bp, uint8* dst, int dstpitch, con
|
|||
|
||||
void GSLocalMemory::ReadTextureBlock16SZ(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
__aligned16 uint16 block[16 * 8];
|
||||
__aligned32 uint16 block[16 * 8];
|
||||
|
||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
|
@ -1823,28 +1823,28 @@ void GSLocalMemory::ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, cons
|
|||
|
||||
void GSLocalMemory::ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadBlock4P(BlockPtr(bp), dst, dstpitch);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock8HP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadBlock8HP(BlockPtr(bp), dst, dstpitch);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock4HLP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadBlock4HLP(BlockPtr(bp), dst, dstpitch);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock4HHP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(16);
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadBlock4HHP(BlockPtr(bp), dst, dstpitch);
|
||||
}
|
||||
|
@ -1855,7 +1855,7 @@ HRESULT GSLocalMemory::SaveBMP(const string& fn, uint32 bp, uint32 bw, uint32 ps
|
|||
{
|
||||
int pitch = w * 4;
|
||||
int size = pitch * h;
|
||||
void* bits = ::_aligned_malloc(size, 16);
|
||||
void* bits = _aligned_malloc(size, 32);
|
||||
|
||||
GIFRegTEX0 TEX0;
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ struct GSOffset
|
|||
|
||||
struct
|
||||
{
|
||||
int row[2048]; // yn (n = 0 1 2 ...)
|
||||
int row[4096]; // yn (n = 0 1 2 ...) NOTE: this wraps around above 2048, only transfers should address the upper half (dark cloud 2 inventing)
|
||||
int* col[8]; // rowOffset*
|
||||
} pixel;
|
||||
|
||||
|
@ -116,14 +116,14 @@ protected:
|
|||
static uint32 pageOffset8[32][64][128];
|
||||
static uint32 pageOffset4[32][128][128];
|
||||
|
||||
static int rowOffset32[2048];
|
||||
static int rowOffset32Z[2048];
|
||||
static int rowOffset16[2048];
|
||||
static int rowOffset16S[2048];
|
||||
static int rowOffset16Z[2048];
|
||||
static int rowOffset16SZ[2048];
|
||||
static int rowOffset8[2][2048];
|
||||
static int rowOffset4[2][2048];
|
||||
static int rowOffset32[4096];
|
||||
static int rowOffset32Z[4096];
|
||||
static int rowOffset16[4096];
|
||||
static int rowOffset16S[4096];
|
||||
static int rowOffset16Z[4096];
|
||||
static int rowOffset16SZ[4096];
|
||||
static int rowOffset8[2][4096];
|
||||
static int rowOffset4[2][4096];
|
||||
|
||||
static short blockOffset32[256];
|
||||
static short blockOffset32Z[256];
|
||||
|
|
|
@ -29,18 +29,20 @@
|
|||
// Using a spinning finish on the main (MTGS) thread is apparently a big win still, over trying
|
||||
// to wait out all the pending m_finished semaphores. It leaves one spinwait in the rasterizer,
|
||||
// but that's still worlds better than 2-6 spinning threads like before.
|
||||
#define UseSpinningFinish 1
|
||||
|
||||
#define UseSpinningFinish
|
||||
|
||||
// Set this to 1 to remove a lot of non-const div/modulus ops from the rasterization process.
|
||||
// Might likely be a measurable speedup but limits threading to 1, 2, 4, and 8 threads.
|
||||
// note by rama: Speedup is around 5% on average.
|
||||
#define UseConstThreadCount 0
|
||||
|
||||
#if UseConstThreadCount
|
||||
// #define UseConstThreadCount
|
||||
|
||||
#ifdef UseConstThreadCount
|
||||
// ThreadsConst - const number of threads. User-configured threads (in GSdx panel) must match
|
||||
// this value if UseConstThreadCount is enabled. [yeah, it's hacky for now]
|
||||
static const int ThreadsConst = 2;
|
||||
static const int ThreadMaskConst = ThreadsConst-1;
|
||||
static const int ThreadMaskConst = ThreadsConst - 1;
|
||||
#endif
|
||||
|
||||
GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads)
|
||||
|
@ -57,11 +59,15 @@ GSRasterizer::~GSRasterizer()
|
|||
|
||||
__forceinline bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
|
||||
{
|
||||
#if UseConstThreadCount
|
||||
return (ThreadMaskConst==0) || ((scanline & ThreadMaskConst) == m_id);
|
||||
#else
|
||||
#ifdef UseConstThreadCount
|
||||
|
||||
return ThreadMaskConst == 0 || (scanline & ThreadMaskConst) == m_id;
|
||||
|
||||
#else
|
||||
|
||||
return (scanline % m_threads) == m_id;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void GSRasterizer::Draw(const GSRasterizerData* data)
|
||||
|
@ -871,7 +877,7 @@ void GSRasterizerMT::ThreadProc()
|
|||
{
|
||||
// _mm_setcsr(MXCSR);
|
||||
|
||||
while( true )
|
||||
while(true)
|
||||
{
|
||||
sem_wait(&m_semaphore);
|
||||
|
||||
|
@ -879,10 +885,15 @@ void GSRasterizerMT::ThreadProc()
|
|||
|
||||
__super::Draw(m_data);
|
||||
|
||||
if( UseSpinningFinish )
|
||||
_interlockedbittestandreset( &m_sync, m_id );
|
||||
else
|
||||
#ifdef UseSpinningFinish
|
||||
|
||||
_interlockedbittestandreset(&m_sync, m_id);
|
||||
|
||||
#else
|
||||
|
||||
sem_post(&m_finished);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
sem_post(&m_stopped);
|
||||
|
@ -917,33 +928,36 @@ void GSRasterizerList::Draw(const GSRasterizerData* data)
|
|||
|
||||
m_sync = m_syncstart;
|
||||
|
||||
for(unsigned i=1; i<size(); ++i)
|
||||
for(size_t i = 1; i < size(); i++)
|
||||
{
|
||||
(*this)[i]->Draw(data);
|
||||
}
|
||||
|
||||
(*this)[0]->Draw(data);
|
||||
|
||||
if( UseSpinningFinish )
|
||||
{
|
||||
#ifdef UseSpinningFinish
|
||||
|
||||
while(m_sync) _mm_pause();
|
||||
}
|
||||
else
|
||||
|
||||
#else
|
||||
|
||||
for(size_t i = 1; i < size(); i++)
|
||||
{
|
||||
for(unsigned i=1; i<size(); ++i )
|
||||
sem_wait(&m_finished);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
m_stats.ticks = __rdtsc() - start;
|
||||
|
||||
for(unsigned i=0; i<size(); ++i)
|
||||
for(size_t i = 0; i < size(); i++)
|
||||
{
|
||||
GSRasterizerStats s;
|
||||
|
||||
(*this)[i]->GetStats(s);
|
||||
|
||||
m_stats.pixels += s.pixels;
|
||||
m_stats.prims = max(m_stats.prims, s.prims);
|
||||
m_stats.prims = std::max<int>(m_stats.prims, s.prims);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
#include "pthread.h"
|
||||
#include "semaphore.h"
|
||||
|
||||
__aligned16 class GSRasterizerData
|
||||
__aligned32 class GSRasterizerData
|
||||
{
|
||||
public:
|
||||
GSVector4i scissor;
|
||||
|
@ -50,7 +50,7 @@ public:
|
|||
virtual void PrintStats() = 0;
|
||||
};
|
||||
|
||||
class IDrawScanline : public GSAlignedClass<16>
|
||||
class IDrawScanline : public GSAlignedClass<32>
|
||||
{
|
||||
public:
|
||||
typedef void (__fastcall *DrawScanlineStaticPtr)(int right, int left, int top, const GSVertexSW& v);
|
||||
|
@ -153,9 +153,11 @@ public:
|
|||
push_back(new GSRasterizer(new DS(parent, 0), 0, threads));
|
||||
|
||||
m_syncstart = 0;
|
||||
|
||||
for(int i = 1; i < threads; i++)
|
||||
{
|
||||
push_back(new GSRasterizerMT(new DS(parent, i), i, threads, m_finished, m_sync));
|
||||
|
||||
_interlockedbittestandset(&m_syncstart, i);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
GSRenderer::GSRenderer()
|
||||
: GSState()
|
||||
, m_tex_buff( (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 16) )
|
||||
, m_tex_buff((uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32))
|
||||
, m_vt(this)
|
||||
, m_dev(NULL)
|
||||
, m_shader(0)
|
||||
|
@ -61,9 +61,10 @@ GSRenderer::~GSRenderer()
|
|||
m_dev->Reset(1, 1, GSDevice::Windowed);
|
||||
}*/
|
||||
|
||||
_aligned_free( m_tex_buff );
|
||||
_aligned_free(m_tex_buff);
|
||||
|
||||
delete m_dev;
|
||||
|
||||
DeleteCriticalSection(&m_pGSsetTitle_Crit);
|
||||
}
|
||||
|
||||
|
@ -220,13 +221,6 @@ bool GSRenderer::Merge(int field)
|
|||
r.bottom = r.top + y;
|
||||
}
|
||||
|
||||
// Breaks the blur filter, and actually makes games blurry again.
|
||||
// This might have to do with earlier changes to device size detection.
|
||||
/*if(blurdetected && i == 1)
|
||||
{
|
||||
r += GSVector4i(0, 1).xyxy();
|
||||
}*/
|
||||
|
||||
GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy();
|
||||
|
||||
src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy();
|
||||
|
@ -380,8 +374,8 @@ void GSRenderer::VSync(int field)
|
|||
|
||||
EnterCriticalSection(&m_pGSsetTitle_Crit);
|
||||
|
||||
strncpy(m_GStitleInfoBuffer, s.c_str(), ArraySize(m_GStitleInfoBuffer)-1);
|
||||
m_GStitleInfoBuffer[sizeof(m_GStitleInfoBuffer)-1] = 0;// make sure null terminated even if text overflows
|
||||
strncpy(m_GStitleInfoBuffer, s.c_str(), countof(m_GStitleInfoBuffer) - 1);
|
||||
m_GStitleInfoBuffer[sizeof(m_GStitleInfoBuffer) - 1] = 0;// make sure null terminated even if text overflows
|
||||
|
||||
LeaveCriticalSection(&m_pGSsetTitle_Crit);
|
||||
}
|
||||
|
|
|
@ -158,12 +158,13 @@ protected:
|
|||
void GrowVertexBuffer()
|
||||
{
|
||||
m_maxcount = max(10000, m_maxcount * 3/2);
|
||||
m_vertices = (Vertex*)_aligned_realloc(m_vertices, sizeof(Vertex) * m_maxcount, 16);
|
||||
m_vertices = (Vertex*)_aligned_realloc(m_vertices, sizeof(Vertex) * m_maxcount, 32);
|
||||
m_maxcount -= 100;
|
||||
}
|
||||
|
||||
// Returns a pointer to the drawing vertex. Can return NULL!
|
||||
template<uint32 prim> __fi Vertex* BaseDrawingKick(int& count)
|
||||
|
||||
template<uint32 prim> __forceinline Vertex* DrawingKick(bool skip, int& count)
|
||||
{
|
||||
switch(prim)
|
||||
{
|
||||
|
@ -237,7 +238,7 @@ protected:
|
|||
__assume(0);
|
||||
}
|
||||
|
||||
return v;
|
||||
return !skip ? v : NULL;
|
||||
}
|
||||
|
||||
virtual void Draw() = 0;
|
||||
|
|
|
@ -249,7 +249,9 @@ public:
|
|||
ps_sel.clr1 = om_bsel.IsCLR1();
|
||||
ps_sel.fba = context->FBA.FBA;
|
||||
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
|
||||
|
||||
if (UserHacks_AlphaHack) ps_sel.aout = 1;
|
||||
|
||||
if(PRIM->FGE)
|
||||
{
|
||||
ps_sel.fog = 1;
|
||||
|
|
|
@ -38,20 +38,20 @@ bool GSRendererDX11::CreateDevice(GSDevice* dev)
|
|||
return true;
|
||||
}
|
||||
|
||||
void GSRendererDX11::DoVertexKick()
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererDX11::VertexKick(bool skip)
|
||||
{
|
||||
const bool tme = PRIM->TME;
|
||||
const bool fst = PRIM->FST;
|
||||
|
||||
GSVertexHW11& dst = m_vl.AddTail();
|
||||
|
||||
dst.vi[0] = m_v.vi[0];
|
||||
dst.vi[1] = m_v.vi[1];
|
||||
|
||||
#ifdef USE_UPSCALE_HACKS
|
||||
|
||||
if(tme && fst)
|
||||
{
|
||||
//GSVector4::storel(&dst.ST, m_v.GetUV());
|
||||
|
||||
int Udiff = 0;
|
||||
int Vdiff = 0;
|
||||
int Uadjust = 0;
|
||||
|
@ -95,6 +95,7 @@ void GSRendererDX11::DoVertexKick()
|
|||
else if (Vdiff <= 1) { Vadjust = 1; }
|
||||
}
|
||||
}
|
||||
|
||||
dst.ST.S = (float)m_v.UV.U - Uadjust;
|
||||
dst.ST.T = (float)m_v.UV.V - Vadjust;
|
||||
}
|
||||
|
@ -104,22 +105,20 @@ void GSRendererDX11::DoVertexKick()
|
|||
//dst.XYZ.X += 5;
|
||||
//dst.XYZ.Y += 5;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
if(tme && fst)
|
||||
{
|
||||
GSVector4::storel(&dst.ST, m_v.GetUV());
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template< uint32 prim >
|
||||
void GSRendererDX11::DrawingKick( bool skip )
|
||||
{
|
||||
int count;
|
||||
|
||||
GSVertexHW11* v = BaseDrawingKick<prim>(count);
|
||||
if (skip || !v) return;
|
||||
int count = 0;
|
||||
|
||||
if(GSVertexHW11* v = DrawingKick<prim>(skip, count))
|
||||
{
|
||||
GSVector4i scissor = m_context->scissor.dx10;
|
||||
|
||||
GSVector4i pmin, pmax;
|
||||
|
@ -202,6 +201,7 @@ void GSRendererDX11::DrawingKick( bool skip )
|
|||
}
|
||||
|
||||
m_count += count;
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererDX11::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||
|
|
|
@ -36,8 +36,5 @@ public:
|
|||
|
||||
bool CreateDevice(GSDevice* dev);
|
||||
|
||||
template<uint32 prim>
|
||||
void DrawingKick( bool skip );
|
||||
|
||||
void DoVertexKick();
|
||||
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip);
|
||||
};
|
||||
|
|
|
@ -57,11 +57,9 @@ bool GSRendererDX9::CreateDevice(GSDevice* dev)
|
|||
return true;
|
||||
}
|
||||
|
||||
void GSRendererDX9::DoVertexKick()
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererDX9::VertexKick(bool skip)
|
||||
{
|
||||
const bool tme = PRIM->TME;
|
||||
const bool fst = PRIM->FST;
|
||||
|
||||
GSVertexHW9& dst = m_vl.AddTail();
|
||||
|
||||
dst.p = GSVector4(((GSVector4i)m_v.XYZ).upl16());
|
||||
|
@ -142,20 +140,17 @@ void GSRendererDX9::DoVertexKick()
|
|||
|
||||
dst.c0 = m_v.RGBAQ.u32[0];
|
||||
dst.c1 = m_v.FOG.u32[1];
|
||||
}
|
||||
|
||||
template< uint32 prim >
|
||||
void GSRendererDX9::DrawingKick( bool skip )
|
||||
{
|
||||
int count;
|
||||
//
|
||||
|
||||
// BaseDrawingKick can never return NULL here because the DrawingKick function
|
||||
// tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only
|
||||
// condition where this function would return NULL).
|
||||
|
||||
GSVertexHW9* v = BaseDrawingKick<prim>(count);
|
||||
if (skip || !v) return;
|
||||
int count = 0;
|
||||
|
||||
if(GSVertexHW9* v = DrawingKick<prim>(skip, count))
|
||||
{
|
||||
GSVector4 scissor = m_context->scissor.dx9;
|
||||
|
||||
GSVector4 pmin, pmax;
|
||||
|
@ -228,6 +223,7 @@ void GSRendererDX9::DrawingKick( bool skip )
|
|||
}
|
||||
|
||||
m_count += count;
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererDX9::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||
|
|
|
@ -43,8 +43,5 @@ public:
|
|||
|
||||
bool CreateDevice(GSDevice* dev);
|
||||
|
||||
template<uint32 prim>
|
||||
void DrawingKick( bool skip );
|
||||
|
||||
void DoVertexKick();
|
||||
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip);
|
||||
};
|
||||
|
|
|
@ -43,10 +43,7 @@ public:
|
|||
InitVertexKick<GSRendererNull>();
|
||||
}
|
||||
|
||||
virtual ~GSRendererNull() {}
|
||||
|
||||
template<uint32 prim>
|
||||
void DrawingKick( bool skip ) {}
|
||||
|
||||
void DoVertexKick() {}
|
||||
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
|
|
@ -94,6 +94,7 @@ GSTexture* GSRendererSW::GetOutput(int i)
|
|||
if(m_dev->ResizeTexture(&m_texture[i], w, h))
|
||||
{
|
||||
uint8* buff = GetTextureBufferLock();
|
||||
|
||||
static int pitch = 1024 * 4;
|
||||
|
||||
GSVector4i r(0, 0, w, h);
|
||||
|
@ -113,6 +114,7 @@ GSTexture* GSRendererSW::GetOutput(int i)
|
|||
|
||||
s_n++;
|
||||
}
|
||||
|
||||
ReleaseTextureBufferLock();
|
||||
}
|
||||
|
||||
|
@ -427,24 +429,22 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
|
|||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::DoVertexKick()
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererSW::VertexKick(bool skip)
|
||||
{
|
||||
const bool tme = PRIM->TME;
|
||||
const bool fst = PRIM->FST;
|
||||
|
||||
const GSDrawingContext& context = *m_context;
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
||||
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]);
|
||||
|
||||
xy = xy.insert16<3>(m_v.FOG.F);
|
||||
xy = xy.upl16();
|
||||
xy -= context.XYOFFSET;
|
||||
xy -= context->XYOFFSET;
|
||||
|
||||
GSVertexSW& dst = m_vl.AddTail();
|
||||
GSVertexSW v;
|
||||
|
||||
dst.p = GSVector4(xy) * g_pos_scale;
|
||||
v.p = GSVector4(xy) * g_pos_scale;
|
||||
|
||||
dst.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.u32[0]).u8to32() << 7);
|
||||
v.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.u32[0]).u8to32() << 7);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
|
@ -452,37 +452,31 @@ void GSRendererSW::DoVertexKick()
|
|||
|
||||
if(fst)
|
||||
{
|
||||
dst.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
|
||||
v.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
|
||||
q = 1.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.t = GSVector4(m_v.ST.S, m_v.ST.T);
|
||||
dst.t *= GSVector4(0x10000 << context.TEX0.TW, 0x10000 << context.TEX0.TH);
|
||||
v.t = GSVector4(m_v.ST.S, m_v.ST.T);
|
||||
v.t *= GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
|
||||
q = m_v.RGBAQ.Q;
|
||||
}
|
||||
|
||||
dst.t = dst.t.xyxy(GSVector4::load(q));
|
||||
v.t = v.t.xyxy(GSVector4::load(q));
|
||||
}
|
||||
|
||||
GSVertexSW& dst = m_vl.AddTail();
|
||||
|
||||
dst = v;
|
||||
|
||||
dst.p.z = (float)min(m_v.XYZ.Z, 0xffffff00); // max value which can survive the uint32 => float => uint32 conversion
|
||||
}
|
||||
|
||||
int count = 0;
|
||||
|
||||
template< uint32 prim >
|
||||
void GSRendererSW::DrawingKick( bool skip )
|
||||
{
|
||||
int count;
|
||||
|
||||
// BaseDrawingKick can never return NULL here because the DrawingKick function
|
||||
// tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only
|
||||
// condition where this function would return NULL).
|
||||
|
||||
GSVertexSW* v = BaseDrawingKick<prim>(count);
|
||||
if (skip || !v) return;
|
||||
|
||||
if(!m_dump)
|
||||
if(GSVertexSW* v = DrawingKick<prim>(skip, count))
|
||||
{
|
||||
if(!m_dump)
|
||||
{
|
||||
GSVector4 pmin, pmax;
|
||||
|
||||
switch(prim)
|
||||
|
@ -505,7 +499,7 @@ void GSRendererSW::DrawingKick( bool skip )
|
|||
break;
|
||||
}
|
||||
|
||||
GSVector4 scissor = m_context->scissor.ex;
|
||||
GSVector4 scissor = context->scissor.ex;
|
||||
|
||||
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
|
||||
|
||||
|
@ -534,8 +528,7 @@ void GSRendererSW::DrawingKick( bool skip )
|
|||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
|
@ -602,4 +595,5 @@ void GSRendererSW::DrawingKick( bool skip )
|
|||
}
|
||||
|
||||
m_count += count;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,13 +47,6 @@ public:
|
|||
GSRendererSW();
|
||||
virtual ~GSRendererSW();
|
||||
|
||||
template<uint32 prim>
|
||||
void DrawingKick( bool skip );
|
||||
|
||||
void DoVertexKick();
|
||||
|
||||
void InvalidateTextureCache()
|
||||
{
|
||||
m_tc->RemoveAll();
|
||||
}
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void VertexKick(bool skip);
|
||||
};
|
||||
|
|
|
@ -99,7 +99,7 @@ union GSScanlineSelector
|
|||
}
|
||||
};
|
||||
|
||||
__aligned16 struct GSScanlineParam
|
||||
__aligned32 struct GSScanlineParam
|
||||
{
|
||||
GSScanlineSelector sel;
|
||||
|
||||
|
@ -115,7 +115,7 @@ __aligned16 struct GSScanlineParam
|
|||
uint32 fm, zm;
|
||||
};
|
||||
|
||||
__aligned16 struct GSScanlineEnvironment
|
||||
__aligned32 struct GSScanlineEnvironment
|
||||
{
|
||||
void* vm;
|
||||
const void* tex;
|
||||
|
|
|
@ -88,7 +88,9 @@ void GSSettingsDlg::OnInit()
|
|||
ComboBoxAppend(IDC_RESOLUTION, "Please select...", (LPARAM)&m_modes.back(), true);
|
||||
|
||||
CComPtr<IDirect3D9> d3d;
|
||||
|
||||
d3d.Attach(Direct3DCreate9(D3D_SDK_VERSION));
|
||||
|
||||
if(d3d)
|
||||
{
|
||||
uint32 w = theApp.GetConfig("ModeWidth", 0);
|
||||
|
@ -151,10 +153,13 @@ void GSSettingsDlg::OnInit()
|
|||
SendMessage(GetDlgItem(m_hWnd, IDC_RESY), UDM_SETRANGE, 0, MAKELPARAM(8192, 256));
|
||||
SendMessage(GetDlgItem(m_hWnd, IDC_RESY), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("resy", 1024), 0));
|
||||
|
||||
int r=theApp.GetConfig("Renderer", 0);
|
||||
if (r>=0 && r<=2){//DX9
|
||||
int r = theApp.GetConfig("Renderer", 0);
|
||||
|
||||
if(r >= 0 && r <= 2) // DX9
|
||||
{
|
||||
GSDevice9::ForceValidMsaaConfig();
|
||||
m_lastValidMsaa=theApp.GetConfig("msaa", 0);
|
||||
|
||||
m_lastValidMsaa = theApp.GetConfig("msaa", 0);
|
||||
}
|
||||
|
||||
SendMessage(GetDlgItem(m_hWnd, IDC_MSAA), UDM_SETRANGE, 0, MAKELPARAM(16, 0));
|
||||
|
|
|
@ -48,7 +48,14 @@ void GSSetupPrimCodeGenerator::Generate()
|
|||
{
|
||||
for(int i = 0; i < 5; i++)
|
||||
{
|
||||
movaps(Xmm(3 + i), xmmword[&m_shift[i]]);
|
||||
if(m_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
vmovaps(Xmm(3 + i), ptr[&m_shift[i]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
movaps(Xmm(3 + i), ptr[&m_shift[i]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -68,11 +75,119 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
return;
|
||||
}
|
||||
|
||||
if(m_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
// GSVector4 t = dscan.p;
|
||||
|
||||
movaps(xmm0, xmmword[edx + 16]);
|
||||
vmovaps(xmm0, ptr[edx + 16]);
|
||||
|
||||
if(m_en.f)
|
||||
{
|
||||
// GSVector4 df = p.wwww();
|
||||
|
||||
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
// m_env.d4.f = GSVector4i(df * 4.0f).xxzzlh();
|
||||
|
||||
vmulps(xmm2, xmm1, xmm3);
|
||||
vcvttps2dq(xmm2, xmm2);
|
||||
vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vmovdqa(ptr[&m_env.d4.f], xmm2);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
// m_env.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
|
||||
|
||||
vmulps(xmm2, xmm1, Xmm(4 + i));
|
||||
vcvttps2dq(xmm2, xmm2);
|
||||
vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vmovdqa(ptr[&m_env.d[i].f], xmm2);
|
||||
}
|
||||
}
|
||||
|
||||
if(m_en.z)
|
||||
{
|
||||
// GSVector4 dz = p.zzzz();
|
||||
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
// m_env.d4.z = dz * 4.0f;
|
||||
|
||||
vmulps(xmm1, xmm0, xmm3);
|
||||
vmovdqa(ptr[&m_env.d4.z], xmm1);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
// m_env.d[i].z = dz * m_shift[i];
|
||||
|
||||
vmulps(xmm1, xmm0, Xmm(4 + i));
|
||||
vmovdqa(ptr[&m_env.d[i].z], xmm1);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// GSVector4 p = vertices[0].p;
|
||||
|
||||
vmovaps(xmm0, ptr[ecx + 16]);
|
||||
|
||||
if(m_en.f)
|
||||
{
|
||||
// m_env.p.f = GSVector4i(p).zzzzh().zzzz();
|
||||
|
||||
vcvttps2dq(xmm1, xmm0);
|
||||
vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vmovdqa(ptr[&m_env.p.f], xmm1);
|
||||
}
|
||||
|
||||
if(m_en.z)
|
||||
{
|
||||
// GSVector4 z = p.zzzz();
|
||||
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
if(m_sel.zoverflow)
|
||||
{
|
||||
// m_env.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||
|
||||
static const float half = 0.5f;
|
||||
|
||||
vmovss(xmm1, dword[&half]);
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vmulps(xmm1, xmm0);
|
||||
vcvttps2dq(xmm1, xmm1);
|
||||
vpslld(xmm1, 1);
|
||||
|
||||
vcvttps2dq(xmm0, xmm0);
|
||||
vpcmpeqd(xmm2, xmm2);
|
||||
vpsrld(xmm2, 31);
|
||||
vpand(xmm0, xmm2);
|
||||
|
||||
vpor(xmm0, xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_env.p.z = GSVector4i(z);
|
||||
|
||||
vcvttps2dq(xmm0, xmm0);
|
||||
}
|
||||
|
||||
vmovdqa(ptr[&m_env.p.z], xmm0);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
// GSVector4 t = dscan.p;
|
||||
|
||||
movaps(xmm0, ptr[edx + 16]);
|
||||
|
||||
if(m_en.f)
|
||||
{
|
||||
|
@ -88,7 +203,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
cvttps2dq(xmm2, xmm2);
|
||||
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
movdqa(xmmword[&m_env.d4.f], xmm2);
|
||||
movdqa(ptr[&m_env.d4.f], xmm2);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
|
@ -99,7 +214,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
cvttps2dq(xmm2, xmm2);
|
||||
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
movdqa(xmmword[&m_env.d[i].f], xmm2);
|
||||
movdqa(ptr[&m_env.d[i].f], xmm2);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -113,7 +228,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
|
||||
movaps(xmm1, xmm0);
|
||||
mulps(xmm1, xmm3);
|
||||
movdqa(xmmword[&m_env.d4.z], xmm1);
|
||||
movdqa(ptr[&m_env.d4.z], xmm1);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
|
@ -121,7 +236,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
|
||||
movaps(xmm1, xmm0);
|
||||
mulps(xmm1, Xmm(4 + i));
|
||||
movdqa(xmmword[&m_env.d[i].z], xmm1);
|
||||
movdqa(ptr[&m_env.d[i].z], xmm1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -129,17 +244,16 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
{
|
||||
// GSVector4 p = vertices[0].p;
|
||||
|
||||
movaps(xmm0, xmmword[ecx + 16]);
|
||||
movaps(xmm0, ptr[ecx + 16]);
|
||||
|
||||
if(m_en.f)
|
||||
{
|
||||
// m_env.p.f = GSVector4i(p).zzzzh().zzzz();
|
||||
|
||||
movaps(xmm1, xmm0);
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
cvttps2dq(xmm1, xmm0);
|
||||
pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
movdqa(xmmword[&m_env.p.f], xmm1);
|
||||
movdqa(ptr[&m_env.p.f], xmm1);
|
||||
}
|
||||
|
||||
if(m_en.z)
|
||||
|
@ -174,7 +288,8 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
cvttps2dq(xmm0, xmm0);
|
||||
}
|
||||
|
||||
movdqa(xmmword[&m_env.p.z], xmm0);
|
||||
movdqa(ptr[&m_env.p.z], xmm0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -186,9 +301,73 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
return;
|
||||
}
|
||||
|
||||
if(m_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
// GSVector4 t = dscan.t;
|
||||
|
||||
movaps(xmm0, xmmword[edx + 32]);
|
||||
vmovaps(xmm0, ptr[edx + 32]);
|
||||
|
||||
vmulps(xmm1, xmm0, xmm3);
|
||||
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// m_env.d4.st = GSVector4i(t * 4.0f);
|
||||
|
||||
vcvttps2dq(xmm1, xmm1);
|
||||
vmovdqa(ptr[&m_env.d4.st], xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_env.d4.stq = t * 4.0f;
|
||||
|
||||
vmovaps(ptr[&m_env.d4.stq], xmm1);
|
||||
}
|
||||
|
||||
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||
{
|
||||
// GSVector4 ds = t.xxxx();
|
||||
// GSVector4 dt = t.yyyy();
|
||||
// GSVector4 dq = t.zzzz();
|
||||
|
||||
vshufps(xmm1, xmm0, xmm0, (uint8)_MM_SHUFFLE(j, j, j, j));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
// GSVector4 v = ds/dt * m_shift[i];
|
||||
|
||||
vmulps(xmm2, xmm1, Xmm(4 + i));
|
||||
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// m_env.d[i].si/ti = GSVector4i(v);
|
||||
|
||||
vcvttps2dq(xmm2, xmm2);
|
||||
|
||||
switch(j)
|
||||
{
|
||||
case 0: vmovdqa(ptr[&m_env.d[i].si], xmm2); break;
|
||||
case 1: vmovdqa(ptr[&m_env.d[i].ti], xmm2); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_env.d[i].s/t/q = v;
|
||||
|
||||
switch(j)
|
||||
{
|
||||
case 0: vmovaps(ptr[&m_env.d[i].s], xmm2); break;
|
||||
case 1: vmovaps(ptr[&m_env.d[i].t], xmm2); break;
|
||||
case 2: vmovaps(ptr[&m_env.d[i].q], xmm2); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// GSVector4 t = dscan.t;
|
||||
|
||||
movaps(xmm0, ptr[edx + 32]);
|
||||
|
||||
movaps(xmm1, xmm0);
|
||||
mulps(xmm1, xmm3);
|
||||
|
@ -198,13 +377,13 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
// m_env.d4.st = GSVector4i(t * 4.0f);
|
||||
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
movdqa(xmmword[&m_env.d4.st], xmm1);
|
||||
movdqa(ptr[&m_env.d4.st], xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_env.d4.stq = t * 4.0f;
|
||||
|
||||
movaps(xmmword[&m_env.d4.stq], xmm1);
|
||||
movaps(ptr[&m_env.d4.stq], xmm1);
|
||||
}
|
||||
|
||||
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||
|
@ -231,8 +410,8 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
|
||||
switch(j)
|
||||
{
|
||||
case 0: movdqa(xmmword[&m_env.d[i].si], xmm2); break;
|
||||
case 1: movdqa(xmmword[&m_env.d[i].ti], xmm2); break;
|
||||
case 0: movdqa(ptr[&m_env.d[i].si], xmm2); break;
|
||||
case 1: movdqa(ptr[&m_env.d[i].ti], xmm2); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -241,9 +420,10 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
|
||||
switch(j)
|
||||
{
|
||||
case 0: movaps(xmmword[&m_env.d[i].s], xmm2); break;
|
||||
case 1: movaps(xmmword[&m_env.d[i].t], xmm2); break;
|
||||
case 2: movaps(xmmword[&m_env.d[i].q], xmm2); break;
|
||||
case 0: movaps(ptr[&m_env.d[i].s], xmm2); break;
|
||||
case 1: movaps(ptr[&m_env.d[i].t], xmm2); break;
|
||||
case 2: movaps(ptr[&m_env.d[i].q], xmm2); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -257,11 +437,115 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
return;
|
||||
}
|
||||
|
||||
if(m_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
if(m_sel.iip)
|
||||
{
|
||||
// GSVector4 c = dscan.c;
|
||||
|
||||
movaps(xmm0, xmmword[edx]);
|
||||
vmovaps(xmm0, ptr[edx]);
|
||||
|
||||
// m_env.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
|
||||
|
||||
vmulps(xmm1, xmm0, xmm3);
|
||||
vcvttps2dq(xmm1, xmm1);
|
||||
vpshufd(xmm1, xmm1, _MM_SHUFFLE(3, 1, 2, 0));
|
||||
vpackssdw(xmm1, xmm1);
|
||||
vmovdqa(ptr[&m_env.d4.c], xmm1);
|
||||
|
||||
// xmm3 is not needed anymore
|
||||
|
||||
// GSVector4 dr = c.xxxx();
|
||||
// GSVector4 db = c.zzzz();
|
||||
|
||||
vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
// GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
|
||||
|
||||
vmulps(xmm0, xmm2, Xmm(4 + i));
|
||||
vcvttps2dq(xmm0, xmm0);
|
||||
vpackssdw(xmm0, xmm0);
|
||||
|
||||
// GSVector4i b = GSVector4i(db * m_shift[i]).ps32();
|
||||
|
||||
vmulps(xmm1, xmm3, Xmm(4 + i));
|
||||
vcvttps2dq(xmm1, xmm1);
|
||||
vpackssdw(xmm1, xmm1);
|
||||
|
||||
// m_env.d[i].rb = r.upl16(b);
|
||||
|
||||
vpunpcklwd(xmm0, xmm1);
|
||||
vmovdqa(ptr[&m_env.d[i].rb], xmm0);
|
||||
}
|
||||
|
||||
// GSVector4 c = dscan.c;
|
||||
|
||||
vmovaps(xmm0, ptr[edx]); // not enough regs, have to reload it
|
||||
|
||||
// GSVector4 dg = c.yyyy();
|
||||
// GSVector4 da = c.wwww();
|
||||
|
||||
vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
// GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
|
||||
|
||||
vmulps(xmm0, xmm2, Xmm(4 + i));
|
||||
vcvttps2dq(xmm0, xmm0);
|
||||
vpackssdw(xmm0, xmm0);
|
||||
|
||||
// GSVector4i a = GSVector4i(da * m_shift[i]).ps32();
|
||||
|
||||
vmulps(xmm1, xmm3, Xmm(4 + i));
|
||||
vcvttps2dq(xmm1, xmm1);
|
||||
vpackssdw(xmm1, xmm1);
|
||||
|
||||
// m_env.d[i].ga = g.upl16(a);
|
||||
|
||||
vpunpcklwd(xmm0, xmm1);
|
||||
vmovdqa(ptr[&m_env.d[i].ga], xmm0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// GSVector4i c = GSVector4i(vertices[0].c);
|
||||
|
||||
vcvttps2dq(xmm0, ptr[ecx]);
|
||||
|
||||
// c = c.upl16(c.zwxy());
|
||||
|
||||
vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
vpunpcklwd(xmm0, xmm1);
|
||||
|
||||
// if(!tme) c = c.srl16(7);
|
||||
|
||||
if(m_sel.tfx == TFX_NONE)
|
||||
{
|
||||
vpsrlw(xmm0, 7);
|
||||
}
|
||||
|
||||
// m_env.c.rb = c.xxxx();
|
||||
// m_env.c.ga = c.zzzz();
|
||||
|
||||
vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vpshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
vmovdqa(ptr[&m_env.c.rb], xmm1);
|
||||
vmovdqa(ptr[&m_env.c.ga], xmm2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_sel.iip)
|
||||
{
|
||||
// GSVector4 c = dscan.c;
|
||||
|
||||
movaps(xmm0, ptr[edx]);
|
||||
movaps(xmm1, xmm0);
|
||||
|
||||
// m_env.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
|
||||
|
@ -271,7 +555,7 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
cvttps2dq(xmm2, xmm2);
|
||||
pshufd(xmm2, xmm2, _MM_SHUFFLE(3, 1, 2, 0));
|
||||
packssdw(xmm2, xmm2);
|
||||
movdqa(xmmword[&m_env.d4.c], xmm2);
|
||||
movdqa(ptr[&m_env.d4.c], xmm2);
|
||||
|
||||
// xmm3 is not needed anymore
|
||||
|
||||
|
@ -300,12 +584,12 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
// m_env.d[i].rb = r.upl16(b);
|
||||
|
||||
punpcklwd(xmm2, xmm3);
|
||||
movdqa(xmmword[&m_env.d[i].rb], xmm2);
|
||||
movdqa(ptr[&m_env.d[i].rb], xmm2);
|
||||
}
|
||||
|
||||
// GSVector4 c = dscan.c;
|
||||
|
||||
movaps(xmm0, xmmword[edx]); // not enough regs, have to reload it
|
||||
movaps(xmm0, ptr[edx]); // not enough regs, have to reload it
|
||||
movaps(xmm1, xmm0);
|
||||
|
||||
// GSVector4 dg = c.yyyy();
|
||||
|
@ -333,20 +617,19 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
// m_env.d[i].ga = g.upl16(a);
|
||||
|
||||
punpcklwd(xmm2, xmm3);
|
||||
movdqa(xmmword[&m_env.d[i].ga], xmm2);
|
||||
movdqa(ptr[&m_env.d[i].ga], xmm2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// GSVector4i c = GSVector4i(vertices[0].c);
|
||||
|
||||
movaps(xmm0, xmmword[ecx]);
|
||||
movaps(xmm0, ptr[ecx]);
|
||||
cvttps2dq(xmm0, xmm0);
|
||||
|
||||
// c = c.upl16(c.zwxy());
|
||||
|
||||
movdqa(xmm1, xmm0);
|
||||
pshufd(xmm1, xmm1, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
pshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
punpcklwd(xmm0, xmm1);
|
||||
|
||||
// if(!tme) c = c.srl16(7);
|
||||
|
@ -359,11 +642,12 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
// m_env.c.rb = c.xxxx();
|
||||
// m_env.c.ga = c.zzzz();
|
||||
|
||||
movdqa(xmm1, xmm0);
|
||||
pshufd(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
movdqa(xmmword[&m_env.c.rb], xmm0);
|
||||
movdqa(xmmword[&m_env.c.ga], xmm1);
|
||||
pshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
movdqa(ptr[&m_env.c.rb], xmm1);
|
||||
movdqa(ptr[&m_env.c.ga], xmm2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ GSState::GSState()
|
|||
m_sssize += sizeof(m_tr.x);
|
||||
m_sssize += sizeof(m_tr.y);
|
||||
m_sssize += m_mem.m_vmsize;
|
||||
m_sssize += (sizeof(m_path[0].tag) + sizeof(m_path[0].reg)) * ArraySize(m_path);
|
||||
m_sssize += (sizeof(m_path[0].tag) + sizeof(m_path[0].reg)) * countof(m_path);
|
||||
m_sssize += sizeof(m_q);
|
||||
|
||||
PRIM = &m_env.PRIM;
|
||||
|
@ -103,6 +103,7 @@ GSState::~GSState()
|
|||
void GSState::SetRegsMem(uint8* basemem)
|
||||
{
|
||||
ASSERT(basemem);
|
||||
|
||||
m_regs = (GSPrivRegSet*)basemem;
|
||||
}
|
||||
|
||||
|
@ -111,15 +112,16 @@ void GSState::SetIrqCallback(void (*irq)())
|
|||
m_irq = irq;
|
||||
}
|
||||
|
||||
void GSState::SetMultithreaded( bool isMT )
|
||||
void GSState::SetMultithreaded(bool mt)
|
||||
{
|
||||
// Some older versions of PCSX2 didn't properly set the irq callback to NULL
|
||||
// in multithreaded mode (possibly because ZeroGS itself would assert in such
|
||||
// cases), and didn't bind them to a dummy callback either. PCSX2 handles all
|
||||
// IRQs internally when multithreaded anyway -- so let's ignore them here:
|
||||
|
||||
m_mt = isMT;
|
||||
if( isMT )
|
||||
m_mt = mt;
|
||||
|
||||
if(mt)
|
||||
{
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_SIGNAL] = &GSState::GIFRegHandlerNull;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_FINISH] = &GSState::GIFRegHandlerNull;
|
||||
|
@ -136,11 +138,11 @@ void GSState::SetMultithreaded( bool isMT )
|
|||
void GSState::SetFrameSkip(int skip)
|
||||
{
|
||||
if(m_frameskip == skip) return;
|
||||
|
||||
m_frameskip = skip;
|
||||
|
||||
if(skip)
|
||||
{
|
||||
#if !UsePackedRegSwitch
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = &GSState::GIFPackedRegHandlerNOP;
|
||||
|
@ -148,7 +150,6 @@ void GSState::SetFrameSkip(int skip)
|
|||
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerNOP;
|
||||
#endif
|
||||
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerNOP;
|
||||
|
@ -163,7 +164,6 @@ void GSState::SetFrameSkip(int skip)
|
|||
}
|
||||
else
|
||||
{
|
||||
#if !UsePackedRegSwitch
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)&GSState::GIFRegHandlerCLAMP<0>;
|
||||
|
@ -171,7 +171,6 @@ void GSState::SetFrameSkip(int skip)
|
|||
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZF3;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZ3;
|
||||
#endif
|
||||
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
|
||||
|
@ -188,7 +187,7 @@ void GSState::SetFrameSkip(int skip)
|
|||
|
||||
void GSState::Reset()
|
||||
{
|
||||
memset(&m_path[0], 0, sizeof(m_path[0]) * ArraySize(m_path));
|
||||
memset(&m_path[0], 0, sizeof(m_path[0]) * countof(m_path));
|
||||
memset(&m_v, 0, sizeof(m_v));
|
||||
|
||||
// PRIM = &m_env.PRIM;
|
||||
|
@ -203,7 +202,6 @@ void GSState::Reset()
|
|||
|
||||
void GSState::ResetHandlers()
|
||||
{
|
||||
#if !UsePackedRegSwitch
|
||||
for(int i = 0; i < countof(m_fpGIFPackedRegHandlers); i++)
|
||||
{
|
||||
m_fpGIFPackedRegHandlers[i] = &GSState::GIFPackedRegHandlerNull;
|
||||
|
@ -224,7 +222,6 @@ void GSState::ResetHandlers()
|
|||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZ3;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_A_D] = &GSState::GIFPackedRegHandlerA_D;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_NOP] = &GSState::GIFPackedRegHandlerNOP;
|
||||
#endif
|
||||
|
||||
for(int i = 0; i < countof(m_fpGIFRegHandlers); i++)
|
||||
{
|
||||
|
@ -284,7 +281,7 @@ void GSState::ResetHandlers()
|
|||
m_fpGIFRegHandlers[GIF_A_D_REG_TRXDIR] = &GSState::GIFRegHandlerTRXDIR;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_HWREG] = &GSState::GIFRegHandlerHWREG;
|
||||
|
||||
SetMultithreaded( m_mt );
|
||||
SetMultithreaded(m_mt);
|
||||
}
|
||||
|
||||
GSVector4i GSState::GetDisplayRect(int i)
|
||||
|
@ -375,22 +372,24 @@ int GSState::GetFPS()
|
|||
|
||||
// GIFPackedRegHandler*
|
||||
|
||||
void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* r)
|
||||
__forceinline void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* r)
|
||||
{
|
||||
// ASSERT(0);
|
||||
}
|
||||
|
||||
void __fi GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* r)
|
||||
__forceinline void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* r)
|
||||
{
|
||||
#if _M_SSE >= 0x301
|
||||
|
||||
GSVector4i mask = GSVector4i::load(0x0c080400);
|
||||
GSVector4i v = GSVector4i::load<false>(r).shuffle8(mask);
|
||||
|
||||
m_v.RGBAQ.u32[0] = (uint32)GSVector4i::store(v);
|
||||
|
||||
#elif _M_SSE >= 0x200
|
||||
|
||||
GSVector4i v = GSVector4i::load<false>(r) & GSVector4i::x000000ff();
|
||||
|
||||
m_v.RGBAQ.u32[0] = v.rgba32();
|
||||
|
||||
#else
|
||||
|
@ -405,7 +404,7 @@ void __fi GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* r)
|
|||
m_v.RGBAQ.Q = m_q;
|
||||
}
|
||||
|
||||
void __fi GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* r)
|
||||
__forceinline void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* r)
|
||||
{
|
||||
#if defined(_M_AMD64)
|
||||
|
||||
|
@ -426,7 +425,7 @@ void __fi GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* r)
|
|||
m_q = r->STQ.Q;
|
||||
}
|
||||
|
||||
void __fi GSState::GIFPackedRegHandlerUV(const GIFPackedReg* r)
|
||||
__forceinline void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* r)
|
||||
{
|
||||
#if _M_SSE >= 0x200
|
||||
|
||||
|
@ -441,7 +440,7 @@ void __fi GSState::GIFPackedRegHandlerUV(const GIFPackedReg* r)
|
|||
#endif
|
||||
}
|
||||
|
||||
void __fi GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* r)
|
||||
__forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* r)
|
||||
{
|
||||
m_v.XYZ.X = r->XYZF2.X;
|
||||
m_v.XYZ.Y = r->XYZF2.Y;
|
||||
|
@ -451,7 +450,7 @@ void __fi GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* r)
|
|||
VertexKick(r->XYZF2.ADC);
|
||||
}
|
||||
|
||||
void __fi GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* r)
|
||||
__forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* r)
|
||||
{
|
||||
m_v.XYZ.X = r->XYZ2.X;
|
||||
m_v.XYZ.Y = r->XYZ2.Y;
|
||||
|
@ -460,17 +459,17 @@ void __fi GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* r)
|
|||
VertexKick(r->XYZ2.ADC);
|
||||
}
|
||||
|
||||
void __fi GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* r)
|
||||
__forceinline void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* r)
|
||||
{
|
||||
m_v.FOG.F = r->FOG.F;
|
||||
}
|
||||
|
||||
void __fi GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* r)
|
||||
__forceinline void GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* r)
|
||||
{
|
||||
(this->*m_fpGIFRegHandlers[r->A_D.ADDR])(&r->r);
|
||||
}
|
||||
|
||||
void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* r)
|
||||
__forceinline void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* r)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -502,6 +501,8 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim)
|
|||
|
||||
m_context = &m_env.CTXT[PRIM->CTXT];
|
||||
|
||||
UpdateVertexKick();
|
||||
|
||||
ResetPrim();
|
||||
}
|
||||
|
||||
|
@ -510,22 +511,22 @@ void GSState::GIFRegHandlerPRIM(const GIFReg* r)
|
|||
ApplyPRIM(r->PRIM);
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerRGBAQ(const GIFReg* r)
|
||||
__forceinline void GSState::GIFRegHandlerRGBAQ(const GIFReg* r)
|
||||
{
|
||||
m_v.RGBAQ = (GSVector4i)r->RGBAQ;
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerST(const GIFReg* r)
|
||||
__forceinline void GSState::GIFRegHandlerST(const GIFReg* r)
|
||||
{
|
||||
m_v.ST = (GSVector4i)r->ST;
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerUV(const GIFReg* r)
|
||||
__forceinline void GSState::GIFRegHandlerUV(const GIFReg* r)
|
||||
{
|
||||
m_v.UV.u32[0] = r->UV.u32[0] & 0x3fff3fff;
|
||||
}
|
||||
|
||||
__fi void GSState::GIFRegHandlerXYZF2(const GIFReg* r)
|
||||
void GSState::GIFRegHandlerXYZF2(const GIFReg* r)
|
||||
{
|
||||
/*
|
||||
m_v.XYZ.X = r->XYZF.X;
|
||||
|
@ -540,14 +541,14 @@ __fi void GSState::GIFRegHandlerXYZF2(const GIFReg* r)
|
|||
VertexKick(false);
|
||||
}
|
||||
|
||||
__fi void GSState::GIFRegHandlerXYZ2(const GIFReg* r)
|
||||
void GSState::GIFRegHandlerXYZ2(const GIFReg* r)
|
||||
{
|
||||
m_v.XYZ = (GSVector4i)r->XYZ;
|
||||
|
||||
VertexKick(false);
|
||||
}
|
||||
|
||||
__fi void GSState::ApplyTEX0( uint i, GIFRegTEX0& TEX0 )
|
||||
void GSState::ApplyTEX0(uint i, GIFRegTEX0& TEX0)
|
||||
{
|
||||
// even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing
|
||||
|
||||
|
@ -578,7 +579,7 @@ __fi void GSState::ApplyTEX0( uint i, GIFRegTEX0& TEX0 )
|
|||
}
|
||||
}
|
||||
|
||||
template<int i> __fi void GSState::GIFRegHandlerTEX0(const GIFReg* r)
|
||||
template<int i> void GSState::GIFRegHandlerTEX0(const GIFReg* r)
|
||||
{
|
||||
GIFRegTEX0 TEX0 = r->TEX0;
|
||||
|
||||
|
@ -588,7 +589,7 @@ template<int i> __fi void GSState::GIFRegHandlerTEX0(const GIFReg* r)
|
|||
ApplyTEX0( i, TEX0 );
|
||||
}
|
||||
|
||||
template<int i> __fi void GSState::GIFRegHandlerCLAMP(const GIFReg* r)
|
||||
template<int i> void GSState::GIFRegHandlerCLAMP(const GIFReg* r)
|
||||
{
|
||||
if(PRIM->CTXT == i && r->CLAMP != m_env.CTXT[i].CLAMP)
|
||||
{
|
||||
|
@ -603,7 +604,7 @@ void GSState::GIFRegHandlerFOG(const GIFReg* r)
|
|||
m_v.FOG = (GSVector4i)r->FOG;
|
||||
}
|
||||
|
||||
__fi void GSState::GIFRegHandlerXYZF3(const GIFReg* r)
|
||||
void GSState::GIFRegHandlerXYZF3(const GIFReg* r)
|
||||
{
|
||||
/*
|
||||
m_v.XYZ.X = r->XYZF.X;
|
||||
|
@ -618,7 +619,7 @@ __fi void GSState::GIFRegHandlerXYZF3(const GIFReg* r)
|
|||
VertexKick(true);
|
||||
}
|
||||
|
||||
__fi void GSState::GIFRegHandlerXYZ3(const GIFReg* r)
|
||||
void GSState::GIFRegHandlerXYZ3(const GIFReg* r)
|
||||
{
|
||||
m_v.XYZ = (GSVector4i)r->XYZ;
|
||||
|
||||
|
@ -629,7 +630,7 @@ void GSState::GIFRegHandlerNOP(const GIFReg* r)
|
|||
{
|
||||
}
|
||||
|
||||
template<int i> __fi void GSState::GIFRegHandlerTEX1(const GIFReg* r)
|
||||
template<int i> void GSState::GIFRegHandlerTEX1(const GIFReg* r)
|
||||
{
|
||||
if(PRIM->CTXT == i && r->TEX1 != m_env.CTXT[i].TEX1)
|
||||
{
|
||||
|
@ -639,7 +640,7 @@ template<int i> __fi void GSState::GIFRegHandlerTEX1(const GIFReg* r)
|
|||
m_env.CTXT[i].TEX1 = (GSVector4i)r->TEX1;
|
||||
}
|
||||
|
||||
template<int i> __fi void GSState::GIFRegHandlerTEX2(const GIFReg* r)
|
||||
template<int i> void GSState::GIFRegHandlerTEX2(const GIFReg* r)
|
||||
{
|
||||
// m_env.CTXT[i].TEX2 = r->TEX2; // not used
|
||||
|
||||
|
@ -656,7 +657,7 @@ template<int i> __fi void GSState::GIFRegHandlerTEX2(const GIFReg* r)
|
|||
ApplyTEX0(i, TEX0);
|
||||
}
|
||||
|
||||
template<int i> __fi void GSState::GIFRegHandlerXYOFFSET(const GIFReg* r)
|
||||
template<int i> void GSState::GIFRegHandlerXYOFFSET(const GIFReg* r)
|
||||
{
|
||||
GSVector4i o = (GSVector4i)r->XYOFFSET & GSVector4i::x0000ffff();
|
||||
|
||||
|
@ -670,7 +671,7 @@ template<int i> __fi void GSState::GIFRegHandlerXYOFFSET(const GIFReg* r)
|
|||
m_env.CTXT[i].UpdateScissor();
|
||||
}
|
||||
|
||||
__fi void GSState::GIFRegHandlerPRMODECONT(const GIFReg* r)
|
||||
void GSState::GIFRegHandlerPRMODECONT(const GIFReg* r)
|
||||
{
|
||||
if(r->PRMODECONT != m_env.PRMODECONT)
|
||||
{
|
||||
|
@ -684,9 +685,11 @@ __fi void GSState::GIFRegHandlerPRMODECONT(const GIFReg* r)
|
|||
// if(PRIM->PRIM == 7) printf("Invalid PRMODECONT/PRIM\n");
|
||||
|
||||
m_context = &m_env.CTXT[PRIM->CTXT];
|
||||
|
||||
UpdateVertexKick();
|
||||
}
|
||||
|
||||
__fi void GSState::GIFRegHandlerPRMODE(const GIFReg* r)
|
||||
void GSState::GIFRegHandlerPRMODE(const GIFReg* r)
|
||||
{
|
||||
if(!m_env.PRMODECONT.AC)
|
||||
{
|
||||
|
@ -698,9 +701,11 @@ __fi void GSState::GIFRegHandlerPRMODE(const GIFReg* r)
|
|||
m_env.PRMODE._PRIM = _PRIM;
|
||||
|
||||
m_context = &m_env.CTXT[PRIM->CTXT];
|
||||
|
||||
UpdateVertexKick();
|
||||
}
|
||||
|
||||
__fi void GSState::GIFRegHandlerTEXCLUT(const GIFReg* r)
|
||||
void GSState::GIFRegHandlerTEXCLUT(const GIFReg* r)
|
||||
{
|
||||
if(r->TEXCLUT != m_env.TEXCLUT)
|
||||
{
|
||||
|
@ -730,7 +735,7 @@ template<int i> void GSState::GIFRegHandlerMIPTBP1(const GIFReg* r)
|
|||
m_env.CTXT[i].MIPTBP1 = (GSVector4i)r->MIPTBP1;
|
||||
}
|
||||
|
||||
template<int i> __fi void GSState::GIFRegHandlerMIPTBP2(const GIFReg* r)
|
||||
template<int i> void GSState::GIFRegHandlerMIPTBP2(const GIFReg* r)
|
||||
{
|
||||
if(PRIM->CTXT == i && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2)
|
||||
{
|
||||
|
@ -767,7 +772,7 @@ void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* r)
|
|||
// InvalidateTextureCache();
|
||||
}
|
||||
|
||||
template<int i> __fi void GSState::GIFRegHandlerSCISSOR(const GIFReg* r)
|
||||
template<int i> void GSState::GIFRegHandlerSCISSOR(const GIFReg* r)
|
||||
{
|
||||
if(PRIM->CTXT == i && r->SCISSOR != m_env.CTXT[i].SCISSOR)
|
||||
{
|
||||
|
@ -779,7 +784,7 @@ template<int i> __fi void GSState::GIFRegHandlerSCISSOR(const GIFReg* r)
|
|||
m_env.CTXT[i].UpdateScissor();
|
||||
}
|
||||
|
||||
template<int i> __fi void GSState::GIFRegHandlerALPHA(const GIFReg* r)
|
||||
template<int i> void GSState::GIFRegHandlerALPHA(const GIFReg* r)
|
||||
{
|
||||
ASSERT(r->ALPHA.A != 3);
|
||||
ASSERT(r->ALPHA.B != 3);
|
||||
|
@ -1142,66 +1147,6 @@ void GSState::Read(uint8* mem, int len)
|
|||
m_mem.ReadImageX(m_tr.x, m_tr.y, mem, len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG);
|
||||
}
|
||||
|
||||
// Use version 1 of the optimized local > local transfer, as per revision 887.
|
||||
// Later (more optimized?) versions cause a crash in Dark Cloud 2.
|
||||
#if 1
|
||||
void GSState::Move()
|
||||
{
|
||||
// ffxii uses this to move the top/bottom of the scrolling menus offscreen and then blends them back over the text to create a shading effect
|
||||
// guitar hero copies the far end of the board to do a similar blend too
|
||||
|
||||
int sx = m_env.TRXPOS.SSAX;
|
||||
int dx = m_env.TRXPOS.DSAX;
|
||||
int sy = m_env.TRXPOS.SSAY;
|
||||
int dy = m_env.TRXPOS.DSAY;
|
||||
int w = m_env.TRXREG.RRW;
|
||||
int h = m_env.TRXREG.RRH;
|
||||
int xinc = 1;
|
||||
int yinc = 1;
|
||||
|
||||
InvalidateLocalMem(m_env.BITBLTBUF, GSVector4i(sx, sy, sx + w, sy + h));
|
||||
InvalidateVideoMem(m_env.BITBLTBUF, GSVector4i(dx, dy, dx + w, dy + h));
|
||||
|
||||
if(sx < dx) sx += w-1, dx += w-1, xinc = -1;
|
||||
if(sy < dy) sy += h-1, dy += h-1, yinc = -1;
|
||||
|
||||
const GSLocalMemory::psm_t& spsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM];
|
||||
const GSLocalMemory::psm_t& dpsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM];
|
||||
|
||||
if(m_env.BITBLTBUF.SPSM == PSM_PSMCT32 && m_env.BITBLTBUF.DPSM == PSM_PSMCT32)
|
||||
{
|
||||
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc*w, dx -= xinc*w)
|
||||
{
|
||||
DWORD sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
|
||||
int* soffset = spsm.rowOffset[sy & 7];
|
||||
|
||||
DWORD dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
|
||||
int* doffset = dpsm.rowOffset[dy & 7];
|
||||
|
||||
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
|
||||
{
|
||||
m_mem.WritePixel32(dbase + doffset[dx], m_mem.ReadPixel32(sbase + soffset[sx]));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc*w, dx -= xinc*w)
|
||||
{
|
||||
DWORD sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
|
||||
int* soffset = spsm.rowOffset[sy & 7];
|
||||
|
||||
DWORD dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
|
||||
int* doffset = dpsm.rowOffset[dy & 7];
|
||||
|
||||
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
|
||||
{
|
||||
(m_mem.*dpsm.wpa)(dbase + doffset[dx], (m_mem.*spsm.rpa)(sbase + soffset[sx]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void GSState::Move()
|
||||
{
|
||||
// ffxii uses this to move the top/bottom of the scrolling menus offscreen and then blends them back over the text to create a shading effect
|
||||
|
@ -1346,10 +1291,7 @@ void GSState::Move()
|
|||
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
|
||||
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
|
||||
|
||||
for(int x = 0; x > -w; x--) {
|
||||
printf("%d",x); //Dark Cloud 2 crashes at x = -63
|
||||
d[dcol[x]] = s[scol[x]];
|
||||
}
|
||||
for(int x = 0; x > -w; x--) d[dcol[x]] = s[scol[x]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1412,7 +1354,7 @@ void GSState::Move()
|
|||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void GSState::SoftReset(uint32 mask)
|
||||
{
|
||||
if(mask & 1)
|
||||
|
@ -1508,91 +1450,7 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
|||
{
|
||||
do
|
||||
{
|
||||
uint32 reg = path.GetReg();
|
||||
|
||||
#if 0
|
||||
// I assume this was some sort of debugging code? Why intercept and perform
|
||||
// special handling for the first three entries in the table, and then do
|
||||
// a LUT for the rest? Either do a switch for the whole table (best idea)
|
||||
// or do a LUT for the whole table.
|
||||
switch(reg)
|
||||
{
|
||||
case GIF_REG_RGBA:
|
||||
GIFPackedRegHandlerRGBA((GIFPackedReg*)mem);
|
||||
break;
|
||||
case GIF_REG_STQ:
|
||||
GIFPackedRegHandlerSTQ((GIFPackedReg*)mem);
|
||||
break;
|
||||
case GIF_REG_UV:
|
||||
GIFPackedRegHandlerUV((GIFPackedReg*)mem);
|
||||
break;
|
||||
default:
|
||||
(this->*m_fpGIFPackedRegHandlers[reg])((GIFPackedReg*)mem);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if UsePackedRegSwitch
|
||||
// This is a switch statement version of the LUT above. Since there are only
|
||||
// 16 entries, this is almost certainly ideal, since the compiler can inline
|
||||
// all the handlers, and PGO will further optimize the switch dispatcher.
|
||||
|
||||
if (FrameSkipIt)
|
||||
{
|
||||
// When skipping frames it looks like we only need to bother with the A_D handler
|
||||
// and the TEX handlers. (and I'm thinking the TEX handlers might not be necessary
|
||||
// if the PCSX2 side of the frameskipper is smart enough anyway).
|
||||
switch(reg)
|
||||
{
|
||||
case GIF_REG_A_D: GIFPackedRegHandlerA_D ((GIFPackedReg*)mem); break;
|
||||
case GIF_REG_TEX0_1: GIFRegHandlerTEX0<0> ((GIFReg*)mem); break;
|
||||
case GIF_REG_TEX0_2: GIFRegHandlerTEX0<1> ((GIFReg*)mem); break;
|
||||
|
||||
// Should RGBA/STQ/UV be NOPs when skipping frames? I think so, but maybe the original
|
||||
// switch() (above) was some hack to enable them in frameskipping mode. --air
|
||||
|
||||
case GIF_REG_RGBA: //GIFPackedRegHandlerRGBA ((GIFPackedReg*)mem); break;
|
||||
case GIF_REG_STQ: //GIFPackedRegHandlerSTQ ((GIFPackedReg*)mem); break;
|
||||
case GIF_REG_UV: //GIFPackedRegHandlerUV ((GIFPackedReg*)mem); break;
|
||||
|
||||
case GIF_REG_XYZF2: //GIFPackedRegHandlerXYZF2((GIFPackedReg*)mem); break;
|
||||
case GIF_REG_XYZ2: //GIFPackedRegHandlerXYZ2 ((GIFPackedReg*)mem); break;
|
||||
case GIF_REG_CLAMP_1: //GIFRegHandlerCLAMP<0> ((GIFReg*)mem); break;
|
||||
case GIF_REG_CLAMP_2: //GIFRegHandlerCLAMP<1> ((GIFReg*)mem); break;
|
||||
case GIF_REG_FOG: //GIFPackedRegHandlerFOG ((GIFPackedReg*)mem); break;
|
||||
case GIF_REG_XYZF3: //GIFRegHandlerXYZF3 ((GIFReg*)mem); break;
|
||||
case GIF_REG_XYZ3: //GIFRegHandlerXYZ3 ((GIFReg*)mem); break;
|
||||
case GIF_REG_NOP: break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch(reg)
|
||||
{
|
||||
case GIF_REG_RGBA: GIFPackedRegHandlerRGBA ((GIFPackedReg*)mem); break;
|
||||
case GIF_REG_STQ: GIFPackedRegHandlerSTQ ((GIFPackedReg*)mem); break;
|
||||
case GIF_REG_UV: GIFPackedRegHandlerUV ((GIFPackedReg*)mem); break;
|
||||
case GIF_REG_XYZF2: GIFPackedRegHandlerXYZF2((GIFPackedReg*)mem); break;
|
||||
case GIF_REG_XYZ2: GIFPackedRegHandlerXYZ2 ((GIFPackedReg*)mem); break;
|
||||
case GIF_REG_TEX0_1: GIFRegHandlerTEX0<0> ((GIFReg*)mem); break;
|
||||
case GIF_REG_TEX0_2: GIFRegHandlerTEX0<1> ((GIFReg*)mem); break;
|
||||
case GIF_REG_CLAMP_1: GIFRegHandlerCLAMP<0> ((GIFReg*)mem); break;
|
||||
case GIF_REG_CLAMP_2: GIFRegHandlerCLAMP<1> ((GIFReg*)mem); break;
|
||||
case GIF_REG_FOG: GIFPackedRegHandlerFOG ((GIFPackedReg*)mem); break;
|
||||
case GIF_REG_XYZF3: GIFRegHandlerXYZF3 ((GIFReg*)mem); break;
|
||||
case GIF_REG_XYZ3: GIFRegHandlerXYZ3 ((GIFReg*)mem); break;
|
||||
case GIF_REG_A_D: GIFPackedRegHandlerA_D ((GIFPackedReg*)mem); break;
|
||||
case GIF_REG_NOP: break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
||||
// This is the original LUT implementation of the packed reg dispatcher.
|
||||
// Simple and clean, but the switch system below is probably more efficient.
|
||||
|
||||
(this->*m_fpGIFPackedRegHandlers[reg])((GIFPackedReg*)mem);
|
||||
|
||||
#endif
|
||||
(this->*m_fpGIFPackedRegHandlers[path.GetReg()])((GIFPackedReg*)mem);
|
||||
|
||||
mem += sizeof(GIFPackedReg);
|
||||
size--;
|
||||
|
@ -1779,7 +1637,7 @@ int GSState::Freeze(GSFreezeData* fd, bool sizeonly)
|
|||
WriteState(data, &m_tr.y);
|
||||
WriteState(data, m_mem.m_vm8, m_mem.m_vmsize);
|
||||
|
||||
for(int i = 0; i < ArraySize(m_path); i++)
|
||||
for(int i = 0; i < countof(m_path); i++)
|
||||
{
|
||||
m_path[i].tag.NREG = m_path[i].nreg;
|
||||
m_path[i].tag.NLOOP = m_path[i].nloop;
|
||||
|
@ -1874,7 +1732,7 @@ int GSState::Defrost(const GSFreezeData* fd)
|
|||
|
||||
m_tr.total = 0; // TODO: restore transfer state
|
||||
|
||||
for(int i = 0; i < ArraySize(m_path); i++)
|
||||
for(int i = 0; i < countof(m_path); i++)
|
||||
{
|
||||
ReadState(&m_path[i].tag, data);
|
||||
ReadState(&m_path[i].reg, data);
|
||||
|
@ -1888,6 +1746,8 @@ int GSState::Defrost(const GSFreezeData* fd)
|
|||
|
||||
m_context = &m_env.CTXT[PRIM->CTXT];
|
||||
|
||||
UpdateVertexKick();
|
||||
|
||||
m_env.UpdateDIMX();
|
||||
|
||||
for(int i = 0; i < 2; i++)
|
||||
|
@ -1918,7 +1778,7 @@ GSState::GSTransferBuffer::GSTransferBuffer()
|
|||
{
|
||||
x = y = 0;
|
||||
start = end = total = 0;
|
||||
buff = (uint8*)_aligned_malloc(1024 * 1024 * 4, 16);
|
||||
buff = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32);
|
||||
}
|
||||
|
||||
GSState::GSTransferBuffer::~GSTransferBuffer()
|
||||
|
|
|
@ -36,17 +36,11 @@
|
|||
#include "GSAlignedClass.h"
|
||||
#include "GSDump.h"
|
||||
|
||||
// Set this to 1 to enable a switch statement instead of a LUT for the packed register handler
|
||||
// in the GifTransfer code. Switch statement is probably faster, but it isn't fully implemented
|
||||
// yet (not properly supporting frameskipping).
|
||||
#define UsePackedRegSwitch 0
|
||||
|
||||
class GSState : public GSAlignedClass<16>
|
||||
class GSState : public GSAlignedClass<32>
|
||||
{
|
||||
#if !UsePackedRegSwitch
|
||||
typedef void (GSState::*GIFPackedRegHandler)(const GIFPackedReg* r);
|
||||
|
||||
GIFPackedRegHandler m_fpGIFPackedRegHandlers[16];
|
||||
#endif
|
||||
|
||||
void GIFPackedRegHandlerNull(const GIFPackedReg* r);
|
||||
void GIFPackedRegHandlerRGBA(const GIFPackedReg* r);
|
||||
|
@ -62,7 +56,7 @@ class GSState : public GSAlignedClass<16>
|
|||
|
||||
GIFRegHandler m_fpGIFRegHandlers[256];
|
||||
|
||||
void ApplyTEX0( uint i, GIFRegTEX0& TEX0 );
|
||||
void ApplyTEX0(uint i, GIFRegTEX0& TEX0);
|
||||
void ApplyPRIM(const GIFRegPRIM& PRIM);
|
||||
|
||||
void GIFRegHandlerNull(const GIFReg* r);
|
||||
|
@ -136,33 +130,67 @@ class GSState : public GSAlignedClass<16>
|
|||
protected:
|
||||
bool IsBadFrame(int& skip, int UserHacks_SkipDraw);
|
||||
|
||||
typedef void (GSState::*DrawingKickPtr)(bool skip);
|
||||
typedef void (GSState::*VertexKickPtr)(bool skip);
|
||||
|
||||
DrawingKickPtr m_dk[8];
|
||||
VertexKickPtr m_vk[8][2][2];
|
||||
VertexKickPtr m_vkf;
|
||||
|
||||
template<class T> void InitVertexKick()
|
||||
{
|
||||
m_dk[GS_POINTLIST] = (DrawingKickPtr)&T::DrawingKick<GS_POINTLIST>;
|
||||
m_dk[GS_LINELIST] = (DrawingKickPtr)&T::DrawingKick<GS_LINELIST>;
|
||||
m_dk[GS_LINESTRIP] = (DrawingKickPtr)&T::DrawingKick<GS_LINESTRIP>;
|
||||
m_dk[GS_TRIANGLELIST] = (DrawingKickPtr)&T::DrawingKick<GS_TRIANGLELIST>;
|
||||
m_dk[GS_TRIANGLESTRIP] = (DrawingKickPtr)&T::DrawingKick<GS_TRIANGLESTRIP>;
|
||||
m_dk[GS_TRIANGLEFAN] = (DrawingKickPtr)&T::DrawingKick<GS_TRIANGLEFAN>;
|
||||
m_dk[GS_SPRITE] = (DrawingKickPtr)&T::DrawingKick<GS_SPRITE>;
|
||||
m_dk[GS_INVALID] = &GSState::DrawingKickNull;
|
||||
m_vk[GS_POINTLIST][0][0] = (VertexKickPtr)&T::VertexKick<GS_POINTLIST, 0, 0>;
|
||||
m_vk[GS_POINTLIST][0][1] = (VertexKickPtr)&T::VertexKick<GS_POINTLIST, 0, 0>;
|
||||
m_vk[GS_POINTLIST][1][0] = (VertexKickPtr)&T::VertexKick<GS_POINTLIST, 1, 0>;
|
||||
m_vk[GS_POINTLIST][1][1] = (VertexKickPtr)&T::VertexKick<GS_POINTLIST, 1, 1>;
|
||||
|
||||
m_vk[GS_LINELIST][0][0] = (VertexKickPtr)&T::VertexKick<GS_LINELIST, 0, 0>;
|
||||
m_vk[GS_LINELIST][0][1] = (VertexKickPtr)&T::VertexKick<GS_LINELIST, 0, 0>;
|
||||
m_vk[GS_LINELIST][1][0] = (VertexKickPtr)&T::VertexKick<GS_LINELIST, 1, 0>;
|
||||
m_vk[GS_LINELIST][1][1] = (VertexKickPtr)&T::VertexKick<GS_LINELIST, 1, 1>;
|
||||
|
||||
m_vk[GS_LINESTRIP][0][0] = (VertexKickPtr)&T::VertexKick<GS_LINESTRIP, 0, 0>;
|
||||
m_vk[GS_LINESTRIP][0][1] = (VertexKickPtr)&T::VertexKick<GS_LINESTRIP, 0, 0>;
|
||||
m_vk[GS_LINESTRIP][1][0] = (VertexKickPtr)&T::VertexKick<GS_LINESTRIP, 1, 0>;
|
||||
m_vk[GS_LINESTRIP][1][1] = (VertexKickPtr)&T::VertexKick<GS_LINESTRIP, 1, 1>;
|
||||
|
||||
m_vk[GS_TRIANGLELIST][0][0] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLELIST, 0, 0>;
|
||||
m_vk[GS_TRIANGLELIST][0][1] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLELIST, 0, 0>;
|
||||
m_vk[GS_TRIANGLELIST][1][0] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLELIST, 1, 0>;
|
||||
m_vk[GS_TRIANGLELIST][1][1] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLELIST, 1, 1>;
|
||||
|
||||
m_vk[GS_TRIANGLESTRIP][0][0] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLESTRIP, 0, 0>;
|
||||
m_vk[GS_TRIANGLESTRIP][0][1] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLESTRIP, 0, 0>;
|
||||
m_vk[GS_TRIANGLESTRIP][1][0] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLESTRIP, 1, 0>;
|
||||
m_vk[GS_TRIANGLESTRIP][1][1] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLESTRIP, 1, 1>;
|
||||
|
||||
m_vk[GS_TRIANGLEFAN][0][0] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLEFAN, 0, 0>;
|
||||
m_vk[GS_TRIANGLEFAN][0][1] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLEFAN, 0, 0>;
|
||||
m_vk[GS_TRIANGLEFAN][1][0] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLEFAN, 1, 0>;
|
||||
m_vk[GS_TRIANGLEFAN][1][1] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLEFAN, 1, 1>;
|
||||
|
||||
m_vk[GS_SPRITE][0][0] = (VertexKickPtr)&T::VertexKick<GS_SPRITE, 0, 0>;
|
||||
m_vk[GS_SPRITE][0][1] = (VertexKickPtr)&T::VertexKick<GS_SPRITE, 0, 0>;
|
||||
m_vk[GS_SPRITE][1][0] = (VertexKickPtr)&T::VertexKick<GS_SPRITE, 1, 0>;
|
||||
m_vk[GS_SPRITE][1][1] = (VertexKickPtr)&T::VertexKick<GS_SPRITE, 1, 1>;
|
||||
|
||||
m_vk[GS_INVALID][0][0] = &GSState::VertexKickNull;
|
||||
m_vk[GS_INVALID][0][1] = &GSState::VertexKickNull;
|
||||
m_vk[GS_INVALID][1][0] = &GSState::VertexKickNull;
|
||||
m_vk[GS_INVALID][1][1] = &GSState::VertexKickNull;
|
||||
}
|
||||
|
||||
void DrawingKickNull(bool skip)
|
||||
void UpdateVertexKick()
|
||||
{
|
||||
m_vkf = m_vk[PRIM->PRIM][PRIM->TME][PRIM->FST];
|
||||
}
|
||||
|
||||
void VertexKickNull(bool skip)
|
||||
{
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
virtual void DoVertexKick()=0;
|
||||
|
||||
__fi void VertexKick(bool skip)
|
||||
void VertexKick(bool skip)
|
||||
{
|
||||
DoVertexKick();
|
||||
(this->*m_dk[PRIM->PRIM])(skip);
|
||||
(this->*m_vkf)(skip);
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -221,6 +249,6 @@ public:
|
|||
void SetFrameSkip(int skip);
|
||||
void SetRegsMem(uint8* basemem);
|
||||
void SetIrqCallback(void (*irq)());
|
||||
void SetMultithreaded(bool isMT=true);
|
||||
void SetMultithreaded(bool mt = true);
|
||||
};
|
||||
|
||||
|
|
|
@ -37,9 +37,12 @@ extern const uint8 clutTableT32I8[128];
|
|||
extern const uint8 clutTableT32I4[16];
|
||||
extern const uint8 clutTableT16I8[32];
|
||||
extern const uint8 clutTableT16I4[16];
|
||||
struct D3D9Blend {
|
||||
|
||||
struct D3D9Blend
|
||||
{
|
||||
int bogus;
|
||||
D3DBLENDOP op;
|
||||
D3DBLEND src, dst;
|
||||
};
|
||||
|
||||
extern const D3D9Blend blendMapD3D9[3*3*3*3];
|
||||
|
|
|
@ -27,6 +27,6 @@ GSTexture::GSTexture()
|
|||
, m_size(0, 0)
|
||||
, m_type(None)
|
||||
, m_msaa(false)
|
||||
, LikelyOffset (false)
|
||||
, LikelyOffset(false)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -836,11 +836,11 @@ GSTextureCache::Source::Source(GSRenderer* r)
|
|||
{
|
||||
memset(m_valid, 0, sizeof(m_valid));
|
||||
|
||||
m_clut = (uint32*)_aligned_malloc(256 * sizeof(uint32), 16);
|
||||
m_clut = (uint32*)_aligned_malloc(256 * sizeof(uint32), 32);
|
||||
|
||||
memset(m_clut, 0, sizeof(m_clut));
|
||||
|
||||
m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 16);
|
||||
m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 32);
|
||||
m_write.count = 0;
|
||||
}
|
||||
|
||||
|
@ -1082,7 +1082,7 @@ void GSTextureCache::Target::Update()
|
|||
}
|
||||
else
|
||||
{
|
||||
static uint8* buff = (uint8*)::_aligned_malloc(1024 * 1024 * 4, 16);
|
||||
static uint8* buff = (uint8*)::_aligned_malloc(1024 * 1024 * 4, 32);
|
||||
|
||||
int pitch = ((w + 3) & ~3) * 4;
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ public:
|
|||
FMT_8,
|
||||
};
|
||||
|
||||
class Surface : public GSAlignedClass<16>
|
||||
class Surface : public GSAlignedClass<32>
|
||||
{
|
||||
protected:
|
||||
GSRenderer* m_renderer;
|
||||
|
|
|
@ -253,7 +253,7 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
|
|||
|
||||
if(m_buff == NULL)
|
||||
{
|
||||
m_buff = _aligned_malloc(tw * th * sizeof(uint32), 16);
|
||||
m_buff = _aligned_malloc(tw * th * sizeof(uint32), 32);
|
||||
|
||||
if(m_buff == NULL)
|
||||
{
|
||||
|
|
|
@ -137,6 +137,7 @@ void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
|||
}
|
||||
|
||||
VSSetShader(i->second.vs, m_vs_cb);
|
||||
|
||||
IASetInputLayout(i->second.il);
|
||||
}
|
||||
|
||||
|
|
|
@ -69,7 +69,7 @@ void GSDevice9::SetupIA(const void* vertices, int count, int prim)
|
|||
|
||||
void GSDevice9::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
||||
{
|
||||
hash_map< uint32, GSVertexShader9 >::const_iterator i = m_vs.find(sel);
|
||||
hash_map<uint32, GSVertexShader9>::const_iterator i = m_vs.find(sel);
|
||||
|
||||
if(i == m_vs.end())
|
||||
{
|
||||
|
@ -110,6 +110,7 @@ void GSDevice9::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
|||
}
|
||||
|
||||
VSSetShader(i->second.vs, (const float*)cb, sizeof(*cb) / sizeof(GSVector4));
|
||||
|
||||
IASetInputLayout(i->second.il);
|
||||
}
|
||||
|
||||
|
|
|
@ -27,26 +27,6 @@ const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f);
|
|||
const GSVector4 GSVector4::m_x3f800000(_mm_castsi128_ps(_mm_set1_epi32(0x3f800000)));
|
||||
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
|
||||
|
||||
GSVector4i::GSVector4i(const GSVector4& v)
|
||||
{
|
||||
m = _mm_cvttps_epi32(v);
|
||||
}
|
||||
|
||||
GSVector4::GSVector4(const GSVector4i& v)
|
||||
{
|
||||
m = _mm_cvtepi32_ps(v);
|
||||
}
|
||||
|
||||
GSVector4i GSVector4i::cast(const GSVector4& v)
|
||||
{
|
||||
return GSVector4i(_mm_castps_si128(v.m));
|
||||
}
|
||||
|
||||
GSVector4 GSVector4::cast(const GSVector4i& v)
|
||||
{
|
||||
return GSVector4(_mm_castsi128_ps(v.m));
|
||||
}
|
||||
|
||||
GSVector4i GSVector4i::fit(int arx, int ary) const
|
||||
{
|
||||
GSVector4i r = *this;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -28,7 +28,7 @@
|
|||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
__aligned16 struct GSVertex
|
||||
__aligned32 struct GSVertex
|
||||
{
|
||||
union
|
||||
{
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
__aligned16 union GSVertexHW9
|
||||
__aligned32 union GSVertexHW9
|
||||
{
|
||||
struct
|
||||
{
|
||||
|
@ -56,7 +56,7 @@ __aligned16 union GSVertexHW9
|
|||
float GetQ() {return p.w;}
|
||||
};
|
||||
|
||||
__aligned16 union GSVertexHW11
|
||||
__aligned32 union GSVertexHW11
|
||||
{
|
||||
struct
|
||||
{
|
||||
|
|
|
@ -31,7 +31,7 @@ public:
|
|||
GSVertexList()
|
||||
: m_count(0)
|
||||
{
|
||||
m_base = _aligned_malloc(sizeof(Vertex) * countof(m_v), 16);
|
||||
m_base = _aligned_malloc(sizeof(Vertex) * countof(m_v), 32);
|
||||
|
||||
for(int i = 0; i < countof(m_v); i++)
|
||||
{
|
||||
|
|
|
@ -23,12 +23,16 @@
|
|||
|
||||
#include "GSVector.h"
|
||||
|
||||
__aligned16 union GSVertexSW
|
||||
__aligned32 union GSVertexSW
|
||||
{
|
||||
struct {GSVector4 c, p, t;};
|
||||
struct {GSVector4 v[3];};
|
||||
struct {float f[12];};
|
||||
|
||||
#if _M_SSE >= 0x500
|
||||
struct {GSVector8 cp, t_;};
|
||||
#endif
|
||||
|
||||
GSVertexSW() {}
|
||||
GSVertexSW(const GSVertexSW& v) {*this = v;}
|
||||
|
||||
|
@ -213,4 +217,3 @@ __forceinline GSVertexSW operator / (const GSVertexSW& v, float f)
|
|||
v0.t = v.t / vf;
|
||||
return v0;
|
||||
}
|
||||
|
||||
|
|
|
@ -120,8 +120,8 @@ void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primc
|
|||
|
||||
using namespace Xbyak;
|
||||
|
||||
GSVertexTrace::CGSW::CGSW(uint32 key, void* ptr, size_t maxsize)
|
||||
: CodeGenerator(maxsize, ptr)
|
||||
GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize)
|
||||
: CodeGenerator(maxsize, code)
|
||||
{
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
|
@ -161,10 +161,10 @@ GSVertexTrace::CGSW::CGSW(uint32 key, void* ptr, size_t maxsize)
|
|||
static const float fmin = -FLT_MAX;
|
||||
static const float fmax = FLT_MAX;
|
||||
|
||||
movss(xmm0, xmmword[&fmax]);
|
||||
movss(xmm0, ptr[&fmax]);
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
movss(xmm1, xmmword[&fmin]);
|
||||
movss(xmm1, ptr[&fmin]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
|
@ -202,7 +202,7 @@ L("loop");
|
|||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, xmmword[edx + 1 * sizeof(GSVertexSW) + 32]);
|
||||
movaps(xmm1, ptr[edx + 1 * sizeof(GSVertexSW) + 32]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
|
@ -213,7 +213,7 @@ L("loop");
|
|||
// min.c = min.c.minv(v[i + j].c);
|
||||
// max.c = max.c.maxv(v[i + j].c);
|
||||
|
||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW)]);
|
||||
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW)]);
|
||||
|
||||
minps(xmm2, xmm0);
|
||||
maxps(xmm3, xmm0);
|
||||
|
@ -222,7 +222,7 @@ L("loop");
|
|||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 16]);
|
||||
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + 16]);
|
||||
|
||||
minps(xmm4, xmm0);
|
||||
maxps(xmm5, xmm0);
|
||||
|
@ -232,7 +232,7 @@ L("loop");
|
|||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 32]);
|
||||
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + 32]);
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
|
@ -265,27 +265,27 @@ L("loop");
|
|||
{
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
psrld(xmm2, 7);
|
||||
movaps(xmmword[eax], xmm2);
|
||||
movaps(ptr[eax], xmm2);
|
||||
|
||||
cvttps2dq(xmm3, xmm3);
|
||||
psrld(xmm3, 7);
|
||||
movaps(xmmword[edx], xmm3);
|
||||
movaps(ptr[edx], xmm3);
|
||||
}
|
||||
|
||||
movaps(xmmword[eax + 16], xmm4);
|
||||
movaps(xmmword[edx + 16], xmm5);
|
||||
movaps(ptr[eax + 16], xmm4);
|
||||
movaps(ptr[edx + 16], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
movaps(xmmword[eax + 32], xmm6);
|
||||
movaps(xmmword[edx + 32], xmm7);
|
||||
movaps(ptr[eax + 32], xmm6);
|
||||
movaps(ptr[edx + 32], xmm7);
|
||||
}
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW9::CGHW9(uint32 key, void* ptr, size_t maxsize)
|
||||
: CodeGenerator(maxsize, ptr)
|
||||
GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize)
|
||||
: CodeGenerator(maxsize, code)
|
||||
{
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
|
@ -327,10 +327,10 @@ GSVertexTrace::CGHW9::CGHW9(uint32 key, void* ptr, size_t maxsize)
|
|||
static const float fmin = -FLT_MAX;
|
||||
static const float fmax = FLT_MAX;
|
||||
|
||||
movss(xmm0, xmmword[&fmax]);
|
||||
movss(xmm0, ptr[&fmax]);
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
movss(xmm1, xmmword[&fmin]);
|
||||
movss(xmm1, ptr[&fmin]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
|
@ -368,7 +368,7 @@ L("loop");
|
|||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, xmmword[edx + 5 * sizeof(GSVertexHW9) + 16]);
|
||||
movaps(xmm1, ptr[edx + 5 * sizeof(GSVertexHW9) + 16]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
|
@ -377,7 +377,7 @@ L("loop");
|
|||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexHW9) + 16]);
|
||||
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + 16]);
|
||||
|
||||
minps(xmm4, xmm0);
|
||||
maxps(xmm5, xmm0);
|
||||
|
@ -390,7 +390,7 @@ L("loop");
|
|||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexHW9)]);
|
||||
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
|
@ -455,15 +455,15 @@ L("loop");
|
|||
punpcklwd(xmm3, xmm0);
|
||||
}
|
||||
|
||||
movaps(xmmword[eax], xmm2);
|
||||
movaps(xmmword[edx], xmm3);
|
||||
movaps(ptr[eax], xmm2);
|
||||
movaps(ptr[edx], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin;
|
||||
// m_max.p = pmax;
|
||||
|
||||
movaps(xmmword[eax + 16], xmm4);
|
||||
movaps(xmmword[edx + 16], xmm5);
|
||||
movaps(ptr[eax + 16], xmm4);
|
||||
movaps(ptr[edx + 16], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
|
@ -473,15 +473,15 @@ L("loop");
|
|||
shufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
shufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
movaps(xmmword[eax + 32], xmm6);
|
||||
movaps(xmmword[edx + 32], xmm7);
|
||||
movaps(ptr[eax + 32], xmm6);
|
||||
movaps(ptr[edx + 32], xmm7);
|
||||
}
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW11::CGHW11(uint32 key, void* ptr, size_t maxsize)
|
||||
: CodeGenerator(maxsize, ptr)
|
||||
GSVertexTrace::CGHW11::CGHW11(uint32 key, void* code, size_t maxsize)
|
||||
: CodeGenerator(maxsize, code)
|
||||
{
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
|
@ -521,10 +521,10 @@ GSVertexTrace::CGHW11::CGHW11(uint32 key, void* ptr, size_t maxsize)
|
|||
static const float fmin = -FLT_MAX;
|
||||
static const float fmax = FLT_MAX;
|
||||
|
||||
movss(xmm0, xmmword[&fmax]);
|
||||
movss(xmm0, ptr[&fmax]);
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
movss(xmm1, xmmword[&fmin]);
|
||||
movss(xmm1, ptr[&fmin]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
|
@ -564,7 +564,7 @@ L("loop");
|
|||
{
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexHW11)]);
|
||||
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW11)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
|
@ -593,7 +593,7 @@ L("loop");
|
|||
maxps(xmm7, xmm0);
|
||||
}
|
||||
|
||||
movdqa(xmm0, xmmword[edx + j * sizeof(GSVertexHW11) + 16]);
|
||||
movdqa(xmm0, ptr[edx + j * sizeof(GSVertexHW11) + 16]);
|
||||
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
|
@ -648,8 +648,8 @@ L("loop");
|
|||
punpcklwd(xmm3, xmm0);
|
||||
}
|
||||
|
||||
movaps(xmmword[eax], xmm2);
|
||||
movaps(xmmword[edx], xmm3);
|
||||
movaps(ptr[eax], xmm2);
|
||||
movaps(ptr[edx], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin.xyww();
|
||||
|
@ -658,16 +658,16 @@ L("loop");
|
|||
shufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
movaps(xmmword[eax + 16], xmm4);
|
||||
movaps(xmmword[edx + 16], xmm5);
|
||||
movaps(ptr[eax + 16], xmm4);
|
||||
movaps(ptr[edx + 16], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin;
|
||||
// m_max.t = tmax;
|
||||
|
||||
movaps(xmmword[eax + 32], xmm6);
|
||||
movaps(xmmword[edx + 32], xmm7);
|
||||
movaps(ptr[eax + 32], xmm6);
|
||||
movaps(ptr[edx + 32], xmm7);
|
||||
}
|
||||
|
||||
ret();
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
|
||||
class GSState;
|
||||
|
||||
__aligned16 class GSVertexTrace
|
||||
__aligned32 class GSVertexTrace
|
||||
{
|
||||
struct Vertex {GSVector4i c; GSVector4 p, t;};
|
||||
struct VertexAlpha {int min, max; bool valid;};
|
||||
|
@ -41,14 +41,14 @@ __aligned16 class GSVertexTrace
|
|||
class CGSW : public Xbyak::CodeGenerator
|
||||
{
|
||||
public:
|
||||
CGSW(uint32 key, void* ptr, size_t maxsize);
|
||||
CGSW(uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
class GSVertexTraceMapSW : public GSCodeGeneratorFunctionMap<CGSW, uint32, VertexTracePtr>
|
||||
{
|
||||
public:
|
||||
GSVertexTraceMapSW() : GSCodeGeneratorFunctionMap("VertexTraceSW") {}
|
||||
CGSW* Create(uint32 key, void* ptr, size_t maxsize) {return new CGSW(key, ptr, maxsize);}
|
||||
CGSW* Create(uint32 key, void* code, size_t maxsize) {return new CGSW(key, code, maxsize);}
|
||||
};
|
||||
|
||||
class CGHW9 : public Xbyak::CodeGenerator
|
||||
|
@ -63,7 +63,7 @@ __aligned16 class GSVertexTrace
|
|||
{
|
||||
public:
|
||||
GSVertexTraceMapHW9() : GSCodeGeneratorFunctionMap("VertexTraceHW9") {}
|
||||
CGHW9* Create(uint32 key, void* ptr, size_t maxsize) {return new CGHW9(key, ptr, maxsize);}
|
||||
CGHW9* Create(uint32 key, void* code, size_t maxsize) {return new CGHW9(key, code, maxsize);}
|
||||
};
|
||||
|
||||
class CGHW11 : public Xbyak::CodeGenerator
|
||||
|
@ -78,7 +78,7 @@ __aligned16 class GSVertexTrace
|
|||
{
|
||||
public:
|
||||
GSVertexTraceMapHW11() : GSCodeGeneratorFunctionMap("VertexTraceHW11") {}
|
||||
CGHW11* Create(uint32 key, void* ptr, size_t maxsize) {return new CGHW11(key, ptr, maxsize);}
|
||||
CGHW11* Create(uint32 key, void* code, size_t maxsize) {return new CGHW11(key, code, maxsize);}
|
||||
};
|
||||
|
||||
GSVertexTraceMapSW m_map_sw;
|
||||
|
|
|
@ -174,6 +174,7 @@ GSVector4i GSWnd::GetClientRect()
|
|||
|
||||
// Returns FALSE if the window has no title, or if th window title is under the strict
|
||||
// management of the emulator.
|
||||
|
||||
bool GSWnd::SetWindowText(const char* title)
|
||||
{
|
||||
if( !m_IsManaged ) return false;
|
||||
|
|
|
@ -57,6 +57,7 @@
|
|||
#include <algorithm>
|
||||
|
||||
// Let's take advantage of the work that's already been done on making things cross-platform by bringing this in.
|
||||
|
||||
#include "Pcsx2Defs.h"
|
||||
|
||||
using namespace std;
|
||||
|
@ -126,7 +127,7 @@ typedef signed long long int64;
|
|||
|
||||
#define D3DCOLORWRITEENABLE_RGBA (D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE | D3DCOLORWRITEENABLE_ALPHA)
|
||||
|
||||
#define USE_UPSCALE_HACKS //Hacks intended to fix upscaling / rendering glitches in HW renderers
|
||||
#define USE_UPSCALE_HACKS // Hacks intended to fix upscaling / rendering glitches in HW renderers
|
||||
|
||||
// dxsdk beta missing these:
|
||||
#define D3D11_SHADER_MACRO D3D10_SHADER_MACRO
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
#ifndef XBYAK_H_
|
||||
#define XBYAK_H_
|
||||
#ifndef XBYAK_XBYAK_H_
|
||||
#define XBYAK_XBYAK_H_
|
||||
/*!
|
||||
@file xbyak.h
|
||||
@brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++
|
||||
@author herumi
|
||||
@version $Revision: 1.157 $
|
||||
@version $Revision: 1.238 $
|
||||
@url http://homepage1.nifty.com/herumi/soft/xbyak.html
|
||||
@date $Date: 2008/12/30 04:53:11 $
|
||||
@date $Date: 2011/02/04 03:46:09 $
|
||||
@note modified new BSD license
|
||||
http://www.opensource.org/licenses/bsd-license.php
|
||||
*/
|
||||
|
@ -15,9 +15,12 @@
|
|||
#include <assert.h>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#ifdef __GNUC__
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <algorithm>
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#elif defined(__GNUC__)
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
@ -45,13 +48,6 @@
|
|||
#pragma warning(disable : 4127) /* condition is constant(for "if" trick) */
|
||||
#endif
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#ifndef NUM_OF_ARRAY
|
||||
// template<class T, int N>
|
||||
// size_t num_of_array(const T (&)[N]) { return N; }
|
||||
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(*x))
|
||||
#endif
|
||||
|
||||
namespace Xbyak {
|
||||
|
@ -59,29 +55,35 @@ namespace Xbyak {
|
|||
#include "xbyak_bin2hex.h"
|
||||
|
||||
enum {
|
||||
DEFAULT_MAX_CODE_SIZE = 2048,
|
||||
VERSION = 0x2070, /* 0xABCD = A.BC(D) */
|
||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||
VERSION = 0x2990, /* 0xABCD = A.BC(D) */
|
||||
};
|
||||
/*
|
||||
#ifndef MIE_DEFINED_UINT32
|
||||
#define MIE_DEFINED_UINT32
|
||||
#ifdef _MSC_VER
|
||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||
#define MIE_INTEGER_TYPE_DEFINED
|
||||
#ifdef _MSC_VER
|
||||
typedef unsigned __int64 uint64;
|
||||
#else
|
||||
typedef __int64 sint64;
|
||||
#else
|
||||
typedef unsigned long long uint64;
|
||||
#endif
|
||||
typedef unsigned int uint32;
|
||||
typedef unsigned short uint16;
|
||||
typedef unsigned char uint8;
|
||||
#ifndef MIE_ALIGN
|
||||
typedef long long sint64;
|
||||
#endif
|
||||
typedef unsigned int uint32;
|
||||
typedef unsigned short uint16;
|
||||
typedef unsigned char uint8;
|
||||
#endif
|
||||
*/
|
||||
#ifndef MIE_ALIGN
|
||||
#ifdef _MSC_VER
|
||||
#define MIE_ALIGN(x) __declspec(align(x))
|
||||
#else
|
||||
#define MIE_ALIGN(x) __attribute__((aligned(x)))
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
#ifndef MIE_PACK // for shufps
|
||||
#define MIE_PACK(x, y, z, w) ((x) * 64 + (y) * 16 + (z) * 4 + (w))
|
||||
#endif
|
||||
|
||||
enum Error {
|
||||
ERR_NONE = 0,
|
||||
ERR_BAD_ADDRESSING,
|
||||
|
@ -101,6 +103,10 @@ enum Error {
|
|||
ERR_CANT_USE_64BIT_DISP,
|
||||
ERR_OFFSET_IS_TOO_BIG,
|
||||
ERR_MEM_SIZE_IS_NOT_SPECIFIED,
|
||||
ERR_BAD_MEM_SIZE,
|
||||
ERR_BAD_ST_COMBINATION,
|
||||
ERR_OVER_LOCAL_LABEL,
|
||||
ERR_UNDER_LOCAL_LABEL,
|
||||
ERR_INTERNAL
|
||||
};
|
||||
|
||||
|
@ -125,6 +131,10 @@ static inline const char *ConvertErrorToString(Error err)
|
|||
"can't use 64bit disp(use (void*))",
|
||||
"offset is too big",
|
||||
"MEM size is not specified",
|
||||
"bad mem size",
|
||||
"bad st combination",
|
||||
"over local label",
|
||||
"under local label",
|
||||
"internal error",
|
||||
};
|
||||
if (err < 0 || err > ERR_INTERNAL) return 0;
|
||||
|
@ -135,7 +145,7 @@ namespace inner {
|
|||
|
||||
enum { debug = 1 };
|
||||
|
||||
static inline uint32 GetPtrDist(const void *p1, const void *p2 = 0)
|
||||
static inline uint32 GetPtrDist(const void *p1, const void *p2)
|
||||
{
|
||||
uint64 diff = static_cast<const char *>(p1) - static_cast<const char *>(p2);
|
||||
#ifdef XBYAK64
|
||||
|
@ -145,6 +155,7 @@ static inline uint32 GetPtrDist(const void *p1, const void *p2 = 0)
|
|||
}
|
||||
|
||||
static inline bool IsInDisp8(uint32 x) { return 0xFFFFFF80 <= x || x <= 0x7F; }
|
||||
static inline bool IsInInt32(uint64 x) { return 0xFFFFFFFF80000000ULL <= x || x <= 0x7FFFFFFFU; }
|
||||
|
||||
}
|
||||
|
||||
|
@ -163,7 +174,8 @@ public:
|
|||
REG = 1 << 3,
|
||||
MMX = 1 << 4,
|
||||
XMM = 1 << 5,
|
||||
FPU = 1 << 6
|
||||
FPU = 1 << 6,
|
||||
YMM = 1 << 7
|
||||
};
|
||||
enum Code {
|
||||
#ifdef XBYAK64
|
||||
|
@ -191,10 +203,11 @@ public:
|
|||
bool isNone() const { return kind_ == 0; }
|
||||
bool isMMX() const { return is(MMX); }
|
||||
bool isXMM() const { return is(XMM); }
|
||||
bool isYMM() const { return is(YMM); }
|
||||
bool isREG(int bit = 0) const { return is(REG, bit); }
|
||||
bool isMEM(int bit = 0) const { return is(MEM, bit); }
|
||||
bool isFPU() const { return is(FPU); }
|
||||
bool isExt8bit() const { return ext8bit_ != 0; }
|
||||
Operand changeBit(int bit) const { return Operand(idx_, static_cast<Kind>(kind_), bit, ext8bit_); }
|
||||
// any bit is accetable if bit == 0
|
||||
bool is(int kind, uint32 bit = 0) const
|
||||
{
|
||||
|
@ -216,12 +229,18 @@ public:
|
|||
{ "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" },
|
||||
};
|
||||
return tbl[bit_ == 8 ? 0 : bit_ == 16 ? 1 : bit_ == 32 ? 2 : 3][idx_];
|
||||
} else if (isMMX()) {
|
||||
static const char tbl[8][4] = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" };
|
||||
} else if (isYMM()) {
|
||||
static const char tbl[16][5] = { "ym0", "ym1", "ym2", "ym3", "ym4", "ym5", "ym6", "ym7", "ym8", "ym9", "ym10", "ym11", "ym12", "ym13", "ym14", "ym15" };
|
||||
return tbl[idx_];
|
||||
} else if (isXMM()) {
|
||||
static const char tbl[16][5] = { "xm0", "xm1", "xm2", "xm3", "xm4", "xm5", "xm6", "xm7", "xm8", "xm9", "xm10", "xm11", "xm12", "xm13", "xm14", "xm15" };
|
||||
return tbl[idx_];
|
||||
} else if (isMMX()) {
|
||||
static const char tbl[8][4] = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" };
|
||||
return tbl[idx_];
|
||||
} else if (isFPU()) {
|
||||
static const char tbl[8][4] = { "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7" };
|
||||
return tbl[idx_];
|
||||
}
|
||||
throw ERR_INTERNAL;
|
||||
}
|
||||
|
@ -229,14 +248,15 @@ public:
|
|||
|
||||
class Reg : public Operand {
|
||||
void operator=(const Reg&);
|
||||
bool hasRex() const { return isExt8bit() | isREG(64) | isExtIdx(); }
|
||||
public:
|
||||
Reg() { }
|
||||
Reg(int idx, Kind kind, int bit = 0, int ext8bit = 0) : Operand(idx, kind, bit, ext8bit) { }
|
||||
// reg = this
|
||||
uint8 getRex(const Reg& index = Reg(), const Reg& base = Reg()) const
|
||||
Reg changeBit(int bit) const { return Reg(getIdx(), getKind(), bit, isExt8bit()); }
|
||||
bool isExtIdx() const { return getIdx() > 7; }
|
||||
uint8 getRex(const Reg& base = Reg()) const
|
||||
{
|
||||
if ((!isExt8bit() && !index.isExt8bit() && !base.isExt8bit()) && (getIdx() | index.getIdx() | base.getIdx()) < 8) return 0;
|
||||
return uint8(0x40 | ((getIdx() >> 3) << 2)| ((index.getIdx() >> 3) << 1) | (base.getIdx() >> 3));
|
||||
return (hasRex() || base.hasRex()) ? uint8(0x40 | ((isREG(64) | base.isREG(64)) ? 8 : 0) | (isExtIdx() ? 4 : 0)| (base.isExtIdx() ? 1 : 0)) : 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -261,7 +281,19 @@ public:
|
|||
class Xmm : public Mmx {
|
||||
void operator=(const Xmm&);
|
||||
public:
|
||||
explicit Xmm(int idx) : Mmx(idx, Operand::XMM, 128) { }
|
||||
explicit Xmm(int idx, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { }
|
||||
};
|
||||
|
||||
class Ymm : public Xmm {
|
||||
void operator=(const Ymm&);
|
||||
public:
|
||||
explicit Ymm(int idx) : Xmm(idx, Operand::YMM, 256) { }
|
||||
};
|
||||
|
||||
class Fpu : public Reg {
|
||||
void operator=(const Fpu&);
|
||||
public:
|
||||
explicit Fpu(int idx) : Reg(idx, Operand::FPU, 32) { }
|
||||
};
|
||||
|
||||
// register for addressing(32bit or 64bit)
|
||||
|
@ -307,7 +339,7 @@ private:
|
|||
{
|
||||
return operator+(r, -static_cast<int>(disp));
|
||||
}
|
||||
void operator=(const Reg32e&); // don't call
|
||||
void operator=(const Reg32e&);
|
||||
public:
|
||||
explicit Reg32e(int idx, int bit)
|
||||
: Reg(idx, REG, bit)
|
||||
|
@ -362,7 +394,7 @@ struct RegRip {
|
|||
|
||||
class CodeArray {
|
||||
enum {
|
||||
ALIGN_SIZE = 16,
|
||||
ALIGN_PAGE_SIZE = 4096,
|
||||
MAX_FIXED_BUF_SIZE = 8
|
||||
};
|
||||
enum Type {
|
||||
|
@ -381,13 +413,12 @@ protected:
|
|||
public:
|
||||
CodeArray(size_t maxSize = MAX_FIXED_BUF_SIZE, void *userPtr = 0)
|
||||
: type_(userPtr ? USER_BUF : maxSize <= MAX_FIXED_BUF_SIZE ? FIXED_BUF : ALLOC_BUF)
|
||||
, allocPtr_(type_ == ALLOC_BUF ? new uint8[maxSize + ALIGN_SIZE] : 0)
|
||||
, allocPtr_(type_ == ALLOC_BUF ? new uint8[maxSize + ALIGN_PAGE_SIZE] : 0)
|
||||
, maxSize_(maxSize)
|
||||
, top_(type_ == ALLOC_BUF ? getAlignedAddress(allocPtr_) : type_ == USER_BUF ? reinterpret_cast<uint8*>(userPtr) : buf_)
|
||||
, top_(type_ == ALLOC_BUF ? getAlignedAddress(allocPtr_, ALIGN_PAGE_SIZE) : type_ == USER_BUF ? reinterpret_cast<uint8*>(userPtr) : buf_)
|
||||
, size_(0)
|
||||
{
|
||||
if (type_ == ALLOC_BUF && !protect(top_, maxSize, true)) {
|
||||
// fprintf(stderr, "can't protect (addr=%p, size=%u, canExec=%d)\n", addr, size, canExec);
|
||||
throw ERR_CANT_PROTECT;
|
||||
}
|
||||
}
|
||||
|
@ -452,19 +483,19 @@ public:
|
|||
/*
|
||||
@param data [in] address of jmp data
|
||||
@param disp [in] offset from the next of jmp
|
||||
@param isShort [in] true if short jmp
|
||||
@param size [in] write size(1, 2, 4, 8)
|
||||
*/
|
||||
void rewrite(uint8 *data, uint32 disp, bool isShort)
|
||||
void rewrite(uint8 *data, uint64 disp, size_t size)
|
||||
{
|
||||
if (isShort) {
|
||||
data[0] = static_cast<uint8>(disp);
|
||||
} else {
|
||||
data[0] = static_cast<uint8>(disp);
|
||||
data[1] = static_cast<uint8>(disp >> 8);
|
||||
data[2] = static_cast<uint8>(disp >> 16);
|
||||
data[3] = static_cast<uint8>(disp >> 24);
|
||||
if (size != 1 && size != 2 && size != 4 && size != 8) throw ERR_BAD_PARAMETER;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
data[i] = static_cast<uint8>(disp >> (i * 8));
|
||||
}
|
||||
}
|
||||
void updateRegField(uint8 regIdx) const
|
||||
{
|
||||
*top_ = (*top_ & B11000111) | ((regIdx << 3) & B00111000);
|
||||
}
|
||||
/**
|
||||
change exec permission of memory
|
||||
@param addr [in] buffer address
|
||||
|
@ -474,15 +505,15 @@ public:
|
|||
*/
|
||||
static inline bool protect(const void *addr, size_t size, bool canExec)
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
#if defined(_WIN32)
|
||||
DWORD oldProtect;
|
||||
return VirtualProtect(const_cast<void*>(addr), size, canExec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldProtect) != 0;
|
||||
#elif defined(__GNUC__)
|
||||
size_t pageSize = sysconf(_SC_PAGESIZE);
|
||||
size_t iaddr = reinterpret_cast<size_t>(addr);
|
||||
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
||||
int mode = PROT_READ | PROT_WRITE | (canExec ? PROT_EXEC : 0);
|
||||
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
|
||||
#elif defined(_WIN32)
|
||||
DWORD oldProtect;
|
||||
return VirtualProtect(const_cast<void*>(addr), size, canExec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldProtect) != 0;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
|
@ -493,7 +524,7 @@ public:
|
|||
@param alingedSize [in] power of two
|
||||
@return aligned addr by alingedSize
|
||||
*/
|
||||
static inline uint8 *getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE)
|
||||
static inline uint8 *getAlignedAddress(uint8 *addr, size_t alignedSize = 16)
|
||||
{
|
||||
return reinterpret_cast<uint8*>((reinterpret_cast<size_t>(addr) + alignedSize - 1) & ~(alignedSize - static_cast<size_t>(1)));
|
||||
}
|
||||
|
@ -521,11 +552,7 @@ public:
|
|||
uint64 getDisp() const { return disp_; }
|
||||
uint8 getRex() const { return rex_; }
|
||||
bool is64bitDisp() const { return is64bitDisp_; } // for moffset
|
||||
#ifdef XBYAK64
|
||||
void setRex(uint8 rex) { rex_ = rex; }
|
||||
#else
|
||||
void setRex(uint8) { }
|
||||
#endif
|
||||
};
|
||||
|
||||
class AddressFrame {
|
||||
|
@ -536,7 +563,11 @@ public:
|
|||
explicit AddressFrame(uint32 bit) : bit_(bit) { }
|
||||
Address operator[](const void *disp) const
|
||||
{
|
||||
Reg32e r(Reg(), Reg(), 0, inner::GetPtrDist(disp));
|
||||
size_t adr = reinterpret_cast<size_t>(disp);
|
||||
#ifdef XBYAK64
|
||||
if (adr > 0xFFFFFFFFU) throw ERR_OFFSET_IS_TOO_BIG;
|
||||
#endif
|
||||
Reg32e r(Reg(), Reg(), 0, static_cast<uint32>(adr));
|
||||
return operator[](r);
|
||||
}
|
||||
#ifdef XBYAK64
|
||||
|
@ -587,7 +618,8 @@ public:
|
|||
} else if (mod == mod10 || (mod == mod00 && r.isNone())) {
|
||||
frame.dd(r.disp_);
|
||||
}
|
||||
frame.setRex(Reg().getRex(r.index_, r));
|
||||
uint8 rex = ((r.getIdx() | r.index_.getIdx()) < 8) ? 0 : uint8(0x40 | ((r.index_.getIdx() >> 3) << 1) | (r.getIdx() >> 3));
|
||||
frame.setRex(rex);
|
||||
return frame;
|
||||
}
|
||||
};
|
||||
|
@ -600,6 +632,12 @@ struct JmpLabel {
|
|||
class Label {
|
||||
CodeArray *base_;
|
||||
int anonymousCount_; // for @@, @f, @b
|
||||
enum {
|
||||
maxStack = 10
|
||||
};
|
||||
int stack_[maxStack];
|
||||
int stackPos_;
|
||||
int usedCount_;
|
||||
int localCount_; // for .***
|
||||
typedef std::map<const std::string, const uint8*> DefinedList;
|
||||
typedef std::multimap<const std::string, const JmpLabel> UndefinedList;
|
||||
|
@ -628,15 +666,22 @@ public:
|
|||
Label()
|
||||
: base_(0)
|
||||
, anonymousCount_(0)
|
||||
, stackPos_(1)
|
||||
, usedCount_(0)
|
||||
, localCount_(0)
|
||||
{
|
||||
}
|
||||
void incLocalCount() { localCount_++; }
|
||||
void decLocalCount() { localCount_--; }
|
||||
void set(CodeArray *base)
|
||||
void enterLocal()
|
||||
{
|
||||
base_ = base;
|
||||
if (stackPos_ == maxStack) throw ERR_OVER_LOCAL_LABEL;
|
||||
localCount_ = stack_[stackPos_++] = ++usedCount_;
|
||||
}
|
||||
void leaveLocal()
|
||||
{
|
||||
if (stackPos_ == 1) throw ERR_UNDER_LOCAL_LABEL;
|
||||
localCount_ = stack_[--stackPos_ - 1];
|
||||
}
|
||||
void set(CodeArray *base) { base_ = base; }
|
||||
void define(const char *label, const uint8 *address)
|
||||
{
|
||||
std::string newLabel(label);
|
||||
|
@ -657,8 +702,9 @@ public:
|
|||
const JmpLabel *jmp = &itr->second;
|
||||
uint32 disp = inner::GetPtrDist(address, jmp->endOfJmp);
|
||||
if (jmp->isShort && !inner::IsInDisp8(disp)) throw ERR_LABEL_IS_TOO_FAR;
|
||||
uint8 *data = jmp->endOfJmp - (jmp->isShort ? 1 : 4);
|
||||
base_->rewrite(data, disp, jmp->isShort);
|
||||
size_t jmpSize = jmp->isShort ? 1 : 4;
|
||||
uint8 *data = jmp->endOfJmp - jmpSize;
|
||||
base_->rewrite(data, disp, jmpSize);
|
||||
undefinedList_.erase(itr);
|
||||
}
|
||||
}
|
||||
|
@ -689,22 +735,22 @@ public:
|
|||
static inline std::string toStr(int num)
|
||||
{
|
||||
char buf[16];
|
||||
static const char fmt[] = ".%08x";
|
||||
#ifdef _WIN32
|
||||
#if _MSC_VER < 1400
|
||||
_snprintf(buf, sizeof(buf), fmt, num);
|
||||
_snprintf
|
||||
#else
|
||||
_snprintf_s(buf, sizeof(buf), fmt, num);
|
||||
_snprintf_s
|
||||
#endif
|
||||
#else
|
||||
snprintf(buf, sizeof(buf), fmt, num);
|
||||
snprintf
|
||||
#endif
|
||||
(buf, sizeof(buf), ".%08x", num);
|
||||
return buf;
|
||||
}
|
||||
};
|
||||
|
||||
class CodeGenerator : public CodeArray {
|
||||
protected:
|
||||
public:
|
||||
enum LabelType {
|
||||
T_SHORT,
|
||||
T_NEAR,
|
||||
|
@ -747,36 +793,44 @@ private:
|
|||
{
|
||||
return op1.isREG(i32e) && (op2.isXMM() || op2.isMEM());
|
||||
}
|
||||
void if16bit(const Operand& reg1, const Operand& reg2)
|
||||
{
|
||||
// except movsx(16bit, 32/64bit)
|
||||
if ((reg1.isBit(16) && !reg2.isBit(i32e)) || (reg2.isBit(16) && !reg1.isBit(i32e))) db(0x66);
|
||||
}
|
||||
void rexAddr(const Address& addr, const Reg& reg = Reg())
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
if (addr.is32bit_) db(0x67);
|
||||
#endif
|
||||
if16bit(reg, addr);
|
||||
uint32 rex = addr.getRex() | reg.getRex();
|
||||
if (reg.isREG(64)) rex |= 0x48;
|
||||
if (rex) db(rex);
|
||||
}
|
||||
void rex(const Operand& op1, const Operand& op2 = Operand())
|
||||
{
|
||||
if (op1.isMEM()) {
|
||||
rexAddr(static_cast<const Address&>(op1), static_cast<const Reg&>(op2));
|
||||
} else if (op2.isMEM()) {
|
||||
rexAddr(static_cast<const Address&>(op2), static_cast<const Reg&>(op1));
|
||||
uint8 rex = 0;
|
||||
const Operand *p1 = &op1, *p2 = &op2;
|
||||
if (p1->isMEM()) std::swap(p1, p2);
|
||||
if (p1->isMEM()) throw ERR_BAD_COMBINATION;
|
||||
if (p2->isMEM()) {
|
||||
const Address& addr = static_cast<const Address&>(*p2);
|
||||
if (BIT == 64 && addr.is32bit_) db(0x67);
|
||||
rex = addr.getRex() | static_cast<const Reg&>(*p1).getRex();
|
||||
} else {
|
||||
const Reg& reg1 = static_cast<const Reg&>(op1);
|
||||
const Reg& reg2 = static_cast<const Reg&>(op2);
|
||||
// ModRM(reg, base);
|
||||
if16bit(reg1, reg2);
|
||||
uint8 rex = reg2.getRex(Reg(), reg1);
|
||||
if (reg1.isREG(64) || reg2.isREG(64)) rex |= 0x48;
|
||||
rex = static_cast<const Reg&>(op2).getRex(static_cast<const Reg&>(op1));
|
||||
}
|
||||
// except movsx(16bit, 32/64bit)
|
||||
if ((op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e))) db(0x66);
|
||||
if (rex) db(rex);
|
||||
}
|
||||
enum AVXtype {
|
||||
PP_NONE = 1 << 0,
|
||||
PP_66 = 1 << 1,
|
||||
PP_F3 = 1 << 2,
|
||||
PP_F2 = 1 << 3,
|
||||
MM_RESERVED = 1 << 4,
|
||||
MM_0F = 1 << 5,
|
||||
MM_0F38 = 1 << 6,
|
||||
MM_0F3A = 1 << 7
|
||||
};
|
||||
void vex(bool r, int idx, bool is256, int type, bool x = false, bool b = false, int w = 1)
|
||||
{
|
||||
uint32 pp = (type & PP_66) ? 1 : (type & PP_F3) ? 2 : (type & PP_F2) ? 3 : 0;
|
||||
uint32 vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp;
|
||||
if (!b && !x && !w && (type & MM_0F)) {
|
||||
db(0xC5); db((r ? 0 : 0x80) | vvvv);
|
||||
} else {
|
||||
uint32 mmmm = (type & MM_0F) ? 1 : (type & MM_0F38) ? 2 : (type & MM_0F3A) ? 3 : 0;
|
||||
db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv);
|
||||
}
|
||||
}
|
||||
Label label_;
|
||||
bool isInDisp16(uint32 x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; }
|
||||
|
@ -792,10 +846,8 @@ private:
|
|||
if (addr.is64bitDisp()) throw ERR_CANT_USE_64BIT_DISP;
|
||||
rex(addr, reg);
|
||||
db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
|
||||
uint8 t = *addr.getCode();
|
||||
assert((t & ~0xC7) == 0); /* 0b11000111 */
|
||||
db(t | ((reg.getIdx() & 7) << 3)); // update reg field
|
||||
db(addr.getCode() + 1, static_cast<int>(addr.getSize()) - 1);
|
||||
addr.updateRegField(static_cast<uint8>(reg.getIdx()));
|
||||
db(addr.getCode(), static_cast<int>(addr.getSize()));
|
||||
}
|
||||
void opJmp(const char *label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref)
|
||||
{
|
||||
|
@ -835,13 +887,13 @@ private:
|
|||
if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) {
|
||||
db(shortCode);
|
||||
db(0);
|
||||
rewrite(top + shortHeaderSize, disp - shortJmpSize, true);
|
||||
rewrite(top + shortHeaderSize, disp - shortJmpSize, 1);
|
||||
} else {
|
||||
if (type == T_SHORT) throw ERR_LABEL_IS_TOO_FAR;
|
||||
if (longPref) db(longPref);
|
||||
db(longCode);
|
||||
dd(0);
|
||||
rewrite(top + longHeaderSize, disp - longJmpSize, false);
|
||||
rewrite(top + longHeaderSize, disp - longJmpSize, 4);
|
||||
}
|
||||
}
|
||||
/* preCode is for SSSE3/SSE4 */
|
||||
|
@ -864,8 +916,7 @@ private:
|
|||
}
|
||||
void opMMX(const Mmx& mmx, const Operand& op, int code, int pref = 0x66, int imm8 = NONE, int preCode = NONE)
|
||||
{
|
||||
pref = mmx.isXMM() ? pref : NONE;
|
||||
opGen(mmx, op, code, pref, isXMMorMMX_MEM, imm8, preCode);
|
||||
opGen(mmx, op, code, mmx.isXMM() ? pref : NONE, isXMMorMMX_MEM, imm8, preCode);
|
||||
}
|
||||
void opMovXMM(const Operand& op1, const Operand& op2, int code, int pref)
|
||||
{
|
||||
|
@ -887,14 +938,14 @@ private:
|
|||
opGen(mmx, op, code, 0x66, isXMM_REG32orMEM, imm, B00111010);
|
||||
}
|
||||
}
|
||||
void opR_ModM(const Operand& op, int bit, uint8 mod, int ext, int code0, int code1 = NONE, int code2 = NONE)
|
||||
void opR_ModM(const Operand& op, int bit, int ext, int code0, int code1 = NONE, int code2 = NONE, bool disableRex = false)
|
||||
{
|
||||
int opBit = op.getBit();
|
||||
if (disableRex && opBit == 64) opBit = 32;
|
||||
if (op.isREG(bit)) {
|
||||
rex(op);
|
||||
db(code0 | (op.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
|
||||
db(getModRM(mod, ext, op.getIdx()));
|
||||
opModR(Reg(ext, Operand::REG, opBit), static_cast<const Reg&>(op).changeBit(opBit), code0, code1, code2);
|
||||
} else if (op.isMEM()) {
|
||||
opModM(static_cast<const Address&>(op), Reg(ext, Operand::REG, op.getBit()), code0, code1, code2);
|
||||
opModM(static_cast<const Address&>(op), Reg(ext, Operand::REG, opBit), code0, code1, code2);
|
||||
} else {
|
||||
throw ERR_BAD_COMBINATION;
|
||||
}
|
||||
|
@ -902,13 +953,13 @@ private:
|
|||
void opShift(const Operand& op, int imm, int ext)
|
||||
{
|
||||
verifyMemHasSize(op);
|
||||
opR_ModM(op, 0, 3, ext, (B11000000 | ((imm == 1 ? 1 : 0) << 4)));
|
||||
opR_ModM(op, 0, ext, (B11000000 | ((imm == 1 ? 1 : 0) << 4)));
|
||||
if (imm != 1) db(imm);
|
||||
}
|
||||
void opShift(const Operand& op, const Reg8& cl, int ext)
|
||||
{
|
||||
if (cl.getIdx() != Operand::CL) throw ERR_BAD_COMBINATION;
|
||||
opR_ModM(op, 0, 3, ext, B11010010);
|
||||
opR_ModM(op, 0, ext, B11010010);
|
||||
}
|
||||
void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE)
|
||||
{
|
||||
|
@ -941,20 +992,19 @@ private:
|
|||
verifyMemHasSize(op);
|
||||
uint32 immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32;
|
||||
if (op.getBit() < immBit) throw ERR_IMM_IS_TOO_BIG;
|
||||
if (op.isREG()) {
|
||||
if (immBit == 16 && op.isBit(32)) immBit = 32; /* don't use MEM16 if 32bit mode */
|
||||
}
|
||||
if (op.isREG(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */
|
||||
if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al
|
||||
rex(op);
|
||||
db(code | 4 | (immBit == 8 ? 0 : 1));
|
||||
} else {
|
||||
int tmp = (op.getBit() > immBit && 32 > immBit) ? 2 : 0;
|
||||
opR_ModM(op, 0, 3, ext, B10000000 | tmp);
|
||||
int tmp = immBit < (std::min)(op.getBit(), 32U) ? 2 : 0;
|
||||
opR_ModM(op, 0, ext, B10000000 | tmp);
|
||||
}
|
||||
db(imm, immBit / 8);
|
||||
}
|
||||
void opIncDec(const Operand& op, int code, int ext)
|
||||
{
|
||||
verifyMemHasSize(op);
|
||||
#ifndef XBYAK64
|
||||
if (op.isREG() && !op.isBit(8)) {
|
||||
rex(op); db(code | op.getIdx());
|
||||
|
@ -964,21 +1014,15 @@ private:
|
|||
code = B11111110;
|
||||
if (op.isREG()) {
|
||||
opModR(Reg(ext, Operand::REG, op.getBit()), static_cast<const Reg&>(op), code);
|
||||
} else if (op.isMEM() && op.getBit() > 0) {
|
||||
opModM(static_cast<const Address&>(op), Reg(ext, Operand::REG, op.getBit()), code);
|
||||
} else {
|
||||
throw ERR_BAD_COMBINATION;
|
||||
opModM(static_cast<const Address&>(op), Reg(ext, Operand::REG, op.getBit()), code);
|
||||
}
|
||||
}
|
||||
void opPushPop(const Operand& op, int code, int ext, int alt)
|
||||
{
|
||||
if (op.isREG()) {
|
||||
#ifdef XBYAK64
|
||||
if (op.isBit(16)) db(0x66);
|
||||
if (static_cast<const Reg&>(op).getIdx() >= 8) db(0x41);
|
||||
#else
|
||||
rex(op);
|
||||
#endif
|
||||
db(alt | (op.getIdx() & 7));
|
||||
} else if (op.isMEM()) {
|
||||
opModM(static_cast<const Address&>(op), Reg(ext, Operand::REG, op.getBit()), code);
|
||||
|
@ -990,16 +1034,51 @@ private:
|
|||
{
|
||||
if (op.isMEM() && op.getBit() == 0) throw ERR_MEM_SIZE_IS_NOT_SPECIFIED;
|
||||
}
|
||||
protected:
|
||||
void opMovxx(const Reg& reg, const Operand& op, uint8 code)
|
||||
{
|
||||
int w = op.isBit(16);
|
||||
bool cond = reg.isREG() && (reg.getBit() > op.getBit());
|
||||
opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w);
|
||||
}
|
||||
void opFpuMem(const Address& addr, uint8 m16, uint8 m32, uint8 m64, uint8 ext, uint8 m64ext)
|
||||
{
|
||||
if (addr.is64bitDisp()) throw ERR_CANT_USE_64BIT_DISP;
|
||||
uint8 code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0;
|
||||
if (!code) throw ERR_BAD_MEM_SIZE;
|
||||
if (m64ext && addr.isBit(64)) ext = m64ext;
|
||||
|
||||
rex(addr, st0);
|
||||
db(code);
|
||||
addr.updateRegField(ext);
|
||||
db(addr.getCode(), static_cast<int>(addr.getSize()));
|
||||
}
|
||||
// like yasm not nasm
|
||||
// use code1 if reg1 == st0
|
||||
// use code2 if reg1 != st0 && reg2 == st0
|
||||
void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32 code1, uint32 code2)
|
||||
{
|
||||
uint32 code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0;
|
||||
if (!code) throw ERR_BAD_ST_COMBINATION;
|
||||
db(uint8(code >> 8));
|
||||
db(uint8(code | (reg1.getIdx() | reg2.getIdx())));
|
||||
}
|
||||
void opFpu(const Fpu& reg, uint8 code1, uint8 code2)
|
||||
{
|
||||
db(code1); db(code2 | reg.getIdx());
|
||||
}
|
||||
public:
|
||||
unsigned int getVersion() const { return VERSION; }
|
||||
using CodeArray::db;
|
||||
const Mmx mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
|
||||
const Xmm xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||
const Ymm ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7;
|
||||
const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7;
|
||||
const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7;
|
||||
const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi;
|
||||
const Reg16 ax, cx, dx, bx, sp, bp, si, di;
|
||||
const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
|
||||
const AddressFrame ptr, byte, word, dword, qword, xmmword;
|
||||
const AddressFrame ptr, byte, word, dword, qword;
|
||||
const Fpu st0, st1, st2, st3, st4, st5, st6, st7;
|
||||
#ifdef XBYAK64
|
||||
const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
|
||||
const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d;
|
||||
|
@ -1007,7 +1086,9 @@ protected:
|
|||
const Reg8 r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b;
|
||||
const Reg8 spl, bpl, sil, dil;
|
||||
const Xmm xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
||||
const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15;
|
||||
const Ymm ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15;
|
||||
const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience
|
||||
const Ymm &ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15;
|
||||
const RegRip rip;
|
||||
#endif
|
||||
|
||||
|
@ -1015,8 +1096,8 @@ protected:
|
|||
{
|
||||
label_.define(label, getCurr());
|
||||
}
|
||||
void inLocalLabel() { label_.incLocalCount(); }
|
||||
void outLocalLabel() { label_.decLocalCount(); }
|
||||
void inLocalLabel() { label_.enterLocal(); }
|
||||
void outLocalLabel() { label_.leaveLocal(); }
|
||||
void jmp(const char *label, LabelType type = T_AUTO)
|
||||
{
|
||||
opJmp(label, type, B11101011, B11101001, 0);
|
||||
|
@ -1027,7 +1108,11 @@ protected:
|
|||
}
|
||||
void jmp(const Operand& op)
|
||||
{
|
||||
opR_ModM(op, i32e, 3, 4, 0xFF);
|
||||
opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true);
|
||||
}
|
||||
void call(const Operand& op)
|
||||
{
|
||||
opR_ModM(op, 16 | i32e, 2, 0xFF, NONE, NONE, true);
|
||||
}
|
||||
// (REG|MEM, REG)
|
||||
void test(const Operand& op, const Reg& reg)
|
||||
|
@ -1042,10 +1127,9 @@ protected:
|
|||
rex(op);
|
||||
db(B10101000 | (op.isBit(8) ? 0 : 1));
|
||||
} else {
|
||||
opR_ModM(op, 0, 3, 0, B11110110);
|
||||
opR_ModM(op, 0, 0, B11110110);
|
||||
}
|
||||
int size = op.getBit() / 8; if (size > 4) size = 4;
|
||||
db(imm, size);
|
||||
db(imm, (std::min)(op.getBit() / 8, 4U));
|
||||
}
|
||||
void ret(int imm = 0)
|
||||
{
|
||||
|
@ -1134,24 +1218,39 @@ protected:
|
|||
opRM_RM(reg1, reg2, B10001000);
|
||||
}
|
||||
}
|
||||
void mov(const Operand& op, uint64 imm)
|
||||
void mov(const Operand& op,
|
||||
#ifdef XBYAK64
|
||||
uint64
|
||||
#else
|
||||
uint32
|
||||
#endif
|
||||
imm)
|
||||
{
|
||||
verifyMemHasSize(op);
|
||||
if (op.isREG()) {
|
||||
int w = op.isBit(8) ? 0 : 1;
|
||||
rex(op); db(B10110000 | (w << 3) | (op.getIdx() & 7));
|
||||
rex(op);
|
||||
int code, size;
|
||||
#ifdef XBYAK64
|
||||
if (op.isBit(64) && inner::IsInInt32(imm)) {
|
||||
db(B11000111);
|
||||
code = B11000000;
|
||||
size = 4;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
code = B10110000 | ((op.isBit(8) ? 0 : 1) << 3);
|
||||
size = op.getBit() / 8;
|
||||
}
|
||||
|
||||
db(code | (op.getIdx() & 7));
|
||||
db(imm, size);
|
||||
} else if (op.isMEM()) {
|
||||
opModM(static_cast<const Address&>(op), Reg(0, Operand::REG, op.getBit()), B11000110);
|
||||
int size = op.getBit() / 8; if (size > 4) size = 4;
|
||||
db(static_cast<uint32>(imm), size);
|
||||
} else {
|
||||
throw ERR_BAD_COMBINATION;
|
||||
}
|
||||
db(imm, op.getBit() / 8);
|
||||
}
|
||||
void opMovxx(const Reg& reg, const Operand& op, uint8 code)
|
||||
{
|
||||
int w = op.isBit(16);
|
||||
bool cond = reg.isREG() && (reg.getBit() > op.getBit());
|
||||
opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w);
|
||||
}
|
||||
void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, B11000111); }
|
||||
#ifdef XBYAK64
|
||||
|
@ -1180,20 +1279,17 @@ protected:
|
|||
}
|
||||
void call(const char *label)
|
||||
{
|
||||
opJmp(label, T_NEAR, 0, B10011010, 0);
|
||||
opJmp(label, T_NEAR, 0, B11101000, 0);
|
||||
}
|
||||
void call(const void *addr)
|
||||
{
|
||||
opJmp(addr, T_NEAR, 0, B11101000, 0);
|
||||
}
|
||||
void call(const Operand& op)
|
||||
{
|
||||
opR_ModM(op, 16 | i32e, 3, 2, B11111111);
|
||||
}
|
||||
// special case
|
||||
void movd(const Address& addr, const Mmx& mmx)
|
||||
{
|
||||
opModM(addr, Reg(mmx.getIdx(), Operand::REG, mmx.getBit() / 8), 0x0F, B01111110);
|
||||
if (mmx.isXMM()) db(0x66);
|
||||
opModM(addr, mmx, 0x0F, B01111110);
|
||||
}
|
||||
void movd(const Reg32& reg, const Mmx& mmx)
|
||||
{
|
||||
|
@ -1202,8 +1298,8 @@ protected:
|
|||
}
|
||||
void movd(const Mmx& mmx, const Address& addr)
|
||||
{
|
||||
ASSERT(!addr.isBit(32)); // don't use dword ptr, bogus, won't output 0x66 for xmm dest op
|
||||
opModM(addr, Reg(mmx.getIdx(), Operand::REG, mmx.getBit() / 8), 0x0F, B01101110);
|
||||
if (mmx.isXMM()) db(0x66);
|
||||
opModM(addr, mmx, 0x0F, B01101110);
|
||||
}
|
||||
void movd(const Mmx& mmx, const Reg32& reg)
|
||||
{
|
||||
|
@ -1225,8 +1321,31 @@ protected:
|
|||
}
|
||||
void movq(const Address& addr, const Mmx& mmx)
|
||||
{
|
||||
opModM(addr, Reg(mmx.getIdx(), Operand::REG, mmx.getBit() / 8), 0x0F, mmx.isXMM() ? B11010110 : B01111111);
|
||||
if (mmx.isXMM()) db(0x66);
|
||||
opModM(addr, mmx, 0x0F, mmx.isXMM() ? B11010110 : B01111111);
|
||||
}
|
||||
#ifdef XBYAK64
|
||||
void movq(const Reg64& reg, const Mmx& mmx)
|
||||
{
|
||||
if (mmx.isXMM()) db(0x66);
|
||||
opModR(mmx, reg, 0x0F, B01111110);
|
||||
}
|
||||
void movq(const Mmx& mmx, const Reg64& reg)
|
||||
{
|
||||
if (mmx.isXMM()) db(0x66);
|
||||
opModR(mmx, reg, 0x0F, B01101110);
|
||||
}
|
||||
void pextrq(const Operand& op, const Xmm& xmm, uint8 imm)
|
||||
{
|
||||
if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION;
|
||||
opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, B00111010); // force to 64bit
|
||||
}
|
||||
void pinsrq(const Xmm& xmm, const Operand& op, uint8 imm)
|
||||
{
|
||||
if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION;
|
||||
opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, B00111010); // force to 64bit
|
||||
}
|
||||
#endif
|
||||
// MMX2 : pextrw : reg, mmx/xmm, imm
|
||||
// SSE4 : pextrw, pextrb, pextrd, extractps : reg/mem, mmx/xmm, imm
|
||||
void pextrw(const Operand& op, const Mmx& xmm, uint8 imm) { opExt(op, xmm, 0x15, imm, true); }
|
||||
|
@ -1270,7 +1389,7 @@ protected:
|
|||
bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM());
|
||||
if (!is16bit && !(reg.isREG(i32e) && (op.isREG(i32e) || op.isMEM()))) throw ERR_BAD_COMBINATION;
|
||||
if (is16bit) db(0x66);
|
||||
db(0xF3); opModRM(Reg(reg.getIdx(), Operand::REG, i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, 0x0F, 0xB8);
|
||||
db(0xF3); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, 0x0F, 0xB8);
|
||||
}
|
||||
void crc32(const Reg32e& reg, const Operand& op)
|
||||
{
|
||||
|
@ -1278,17 +1397,86 @@ protected:
|
|||
db(0xF2);
|
||||
opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1));
|
||||
}
|
||||
public:
|
||||
void vextractps(const Operand& op, const Xmm& xmm, uint8 imm)
|
||||
{
|
||||
if (!(op.isREG(32) || op.isMEM()) || xmm.isYMM()) throw ERR_BAD_COMBINATION;
|
||||
opAVX_X_XM_IMM(xmm, cvtReg(op, op.isREG(), Operand::XMM), MM_0F3A | PP_66, 0x17, false, 0, imm);
|
||||
}
|
||||
// support (x, x, x/m), (y, y, y/m)
|
||||
void opAVX_X_X_XM(const Xmm& xm1, const Operand& op1, const Operand& op2, int type, int code0, bool supportYMM, int w = -1)
|
||||
{
|
||||
const Xmm *xm2;
|
||||
const Operand *op;
|
||||
if (op2.isNone()) {
|
||||
xm2 = &xm1;
|
||||
op = &op1;
|
||||
} else {
|
||||
if (!(op1.isXMM() || (supportYMM && op1.isYMM()))) throw ERR_BAD_COMBINATION;
|
||||
xm2 = static_cast<const Xmm*>(&op1);
|
||||
op = &op2;
|
||||
}
|
||||
// (xm1, xm2, op)
|
||||
if (!((xm1.isXMM() && xm2->isXMM()) || (supportYMM && xm1.isYMM() && xm2->isYMM()))) throw ERR_BAD_COMBINATION;
|
||||
bool x, b;
|
||||
if (op->isMEM()) {
|
||||
const Address& addr = *static_cast<const Address*>(op);
|
||||
uint8 rex = addr.getRex();
|
||||
x = (rex & 2) != 0;
|
||||
b = (rex & 1) != 0;
|
||||
if (BIT == 64 && addr.is32bit_) db(0x67);
|
||||
if (BIT == 64 && w == -1) w = (rex & 4) ? 1 : 0;
|
||||
} else {
|
||||
x = false;
|
||||
b = static_cast<const Reg*>(op)->isExtIdx();
|
||||
}
|
||||
if (w == -1) w = 0;
|
||||
vex(xm1.isExtIdx(), xm2->getIdx(), xm1.isYMM(), type, x, b, w);
|
||||
db(code0);
|
||||
if (op->isMEM()) {
|
||||
const Address& addr = *static_cast<const Address*>(op);
|
||||
addr.updateRegField(static_cast<uint8>(xm1.getIdx()));
|
||||
db(addr.getCode(), static_cast<int>(addr.getSize()));
|
||||
} else {
|
||||
db(getModRM(3, xm1.getIdx(), op->getIdx()));
|
||||
}
|
||||
}
|
||||
// if cvt then return pointer to Xmm(idx) (or Ymm(idx)), otherwise return op
|
||||
const Operand& cvtReg(const Operand& op, bool cvt, Operand::Kind kind) const
|
||||
{
|
||||
if (!cvt) return op;
|
||||
static const Xmm* xmTbl[] = {
|
||||
&xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7,
|
||||
#ifdef XBYAK64
|
||||
&xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15
|
||||
#endif
|
||||
};
|
||||
static const Ymm* ymTbl[] = {
|
||||
&ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7,
|
||||
#ifdef XBYAK64
|
||||
&ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15
|
||||
#endif
|
||||
};
|
||||
return (kind == Operand::XMM) ? *xmTbl[op.getIdx()] : *ymTbl[op.getIdx()];
|
||||
}
|
||||
// support (x, x/m, imm), (y, y/m, imm)
|
||||
void opAVX_X_XM_IMM(const Xmm& xmm, const Operand& op, int type, int code, bool supportYMM, int w = -1, int imm = NONE)
|
||||
{
|
||||
opAVX_X_X_XM(xmm, xmm.isXMM() ? xm0 : ym0, op, type, code, supportYMM, w); if (imm != NONE) db((uint8)imm);
|
||||
}
|
||||
enum { NONE = 256 };
|
||||
public:
|
||||
CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0)
|
||||
: CodeArray(maxSize, userPtr)
|
||||
, mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7)
|
||||
, xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7)
|
||||
, ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7)
|
||||
, xm0(xmm0), xm1(xmm1), xm2(xmm2), xm3(xmm3), xm4(xmm4), xm5(xmm5), xm6(xmm6), xm7(xmm7) // for my convenience
|
||||
, ym0(ymm0), ym1(ymm1), ym2(ymm2), ym3(ymm3), ym4(ymm4), ym5(ymm5), ym6(ymm6), ym7(ymm7) // for my convenience
|
||||
, eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI)
|
||||
, ax(Operand::EAX), cx(Operand::ECX), dx(Operand::EDX), bx(Operand::EBX), sp(Operand::ESP), bp(Operand::EBP), si(Operand::ESI), di(Operand::EDI)
|
||||
, al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH)
|
||||
, ptr(0), byte(8), word(16), dword(32), qword(64), xmmword(128)
|
||||
, ptr(0), byte(8), word(16), dword(32), qword(64)
|
||||
, st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7)
|
||||
#ifdef XBYAK64
|
||||
, rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15)
|
||||
, r8d(Operand::R8D), r9d(Operand::R9D), r10d(Operand::R10D), r11d(Operand::R11D), r12d(Operand::R12D), r13d(Operand::R13D), r14d(Operand::R14D), r15d(Operand::R15D)
|
||||
|
@ -1296,7 +1484,9 @@ public:
|
|||
, r8b(Operand::R8B), r9b(Operand::R9B), r10b(Operand::R10B), r11b(Operand::R11B), r12b(Operand::R12B), r13b(Operand::R13B), r14b(Operand::R14B), r15b(Operand::R15B)
|
||||
, spl(Operand::SPL, 1), bpl(Operand::BPL, 1), sil(Operand::SIL, 1), dil(Operand::DIL, 1)
|
||||
, xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15)
|
||||
, ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15)
|
||||
, xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15) // for my convenience
|
||||
, ym8(ymm8), ym9(ymm9), ym10(ymm10), ym11(ymm11), ym12(ymm12), ym13(ymm13), ym14(ymm14), ym15(ymm15) // for my convenience
|
||||
, rip()
|
||||
#endif
|
||||
{
|
||||
|
@ -1309,7 +1499,7 @@ public:
|
|||
// if (hasUndefinedLabel()) throw ERR_LABEL_IS_NOT_FOUND;
|
||||
return top_;
|
||||
}
|
||||
#ifdef TEST_NM
|
||||
#ifdef XBYAK_TEST
|
||||
void dump(bool doClear = true)
|
||||
{
|
||||
CodeArray::dump();
|
||||
|
@ -1322,7 +1512,7 @@ public:
|
|||
void align(int x = 16)
|
||||
{
|
||||
if (x != 4 && x != 8 && x != 16 && x != 32) throw ERR_BAD_ALIGN;
|
||||
while (inner::GetPtrDist(getCurr()) % x) {
|
||||
while (size_t(getCurr()) % x) {
|
||||
nop();
|
||||
}
|
||||
}
|
||||
|
@ -1335,4 +1525,4 @@ public:
|
|||
|
||||
} // end of namespace
|
||||
|
||||
#endif // XBYAK_H_
|
||||
#endif // XBYAK_XBYAK_H_
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
const char *getVersionString() const { return "2.07"; }
|
||||
const char *getVersionString() const { return "2.99"; }
|
||||
void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); }
|
||||
void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); }
|
||||
void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); }
|
||||
|
@ -184,88 +184,94 @@ void movhpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0
|
|||
void movlpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x66); }
|
||||
void cmovo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 0); }
|
||||
void jo(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x70, 0x80, 0x0F); }
|
||||
void seto(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 0); }
|
||||
void seto(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 0); }
|
||||
void cmovno(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 1); }
|
||||
void jno(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x71, 0x81, 0x0F); }
|
||||
void setno(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 1); }
|
||||
void setno(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 1); }
|
||||
void cmovb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); }
|
||||
void jb(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
|
||||
void setb(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 2); }
|
||||
void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 2); }
|
||||
void cmovc(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); }
|
||||
void jc(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
|
||||
void setc(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 2); }
|
||||
void cmovnae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); }
|
||||
void jnae(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
|
||||
void setnae(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 2); }
|
||||
void setnae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 2); }
|
||||
void cmovnb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); }
|
||||
void jnb(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
|
||||
void setnb(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 3); }
|
||||
void setnb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 3); }
|
||||
void cmovae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); }
|
||||
void jae(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
|
||||
void setae(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 3); }
|
||||
void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 3); }
|
||||
void cmovnc(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); }
|
||||
void jnc(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
|
||||
void setnc(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 3); }
|
||||
void cmove(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); }
|
||||
void je(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }
|
||||
void sete(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 4); }
|
||||
void sete(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 4); }
|
||||
void cmovz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); }
|
||||
void jz(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }
|
||||
void setz(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 4); }
|
||||
void setz(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 4); }
|
||||
void cmovne(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); }
|
||||
void jne(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }
|
||||
void setne(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 5); }
|
||||
void setne(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 5); }
|
||||
void cmovnz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); }
|
||||
void jnz(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }
|
||||
void setnz(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 5); }
|
||||
void setnz(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 5); }
|
||||
void cmovbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); }
|
||||
void jbe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }
|
||||
void setbe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 6); }
|
||||
void setbe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 6); }
|
||||
void cmovna(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); }
|
||||
void jna(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }
|
||||
void setna(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 6); }
|
||||
void setna(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 6); }
|
||||
void cmovnbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); }
|
||||
void jnbe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }
|
||||
void setnbe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 7); }
|
||||
void setnbe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 7); }
|
||||
void cmova(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); }
|
||||
void ja(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }
|
||||
void seta(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 7); }
|
||||
void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 7); }
|
||||
void cmovs(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 8); }
|
||||
void js(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x78, 0x88, 0x0F); }
|
||||
void sets(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 8); }
|
||||
void sets(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 8); }
|
||||
void cmovns(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 9); }
|
||||
void jns(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x79, 0x89, 0x0F); }
|
||||
void setns(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 9); }
|
||||
void setns(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 9); }
|
||||
void cmovp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); }
|
||||
void jp(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }
|
||||
void setp(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 10); }
|
||||
void setp(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 10); }
|
||||
void cmovpe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); }
|
||||
void jpe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }
|
||||
void setpe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 10); }
|
||||
void setpe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 10); }
|
||||
void cmovnp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); }
|
||||
void jnp(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }
|
||||
void setnp(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 11); }
|
||||
void setnp(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 11); }
|
||||
void cmovpo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); }
|
||||
void jpo(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }
|
||||
void setpo(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 11); }
|
||||
void setpo(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 11); }
|
||||
void cmovl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); }
|
||||
void jl(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }
|
||||
void setl(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 12); }
|
||||
void setl(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 12); }
|
||||
void cmovnge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); }
|
||||
void jnge(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }
|
||||
void setnge(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 12); }
|
||||
void setnge(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 12); }
|
||||
void cmovnl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); }
|
||||
void jnl(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }
|
||||
void setnl(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 13); }
|
||||
void setnl(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 13); }
|
||||
void cmovge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); }
|
||||
void jge(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }
|
||||
void setge(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 13); }
|
||||
void setge(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 13); }
|
||||
void cmovle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); }
|
||||
void jle(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }
|
||||
void setle(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 14); }
|
||||
void setle(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 14); }
|
||||
void cmovng(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); }
|
||||
void jng(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }
|
||||
void setng(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 14); }
|
||||
void setng(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 14); }
|
||||
void cmovnle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); }
|
||||
void jnle(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
|
||||
void setnle(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 15); }
|
||||
void setnle(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 15); }
|
||||
void cmovg(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); }
|
||||
void jg(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
|
||||
void setg(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 15); }
|
||||
void setg(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 15); }
|
||||
#ifdef XBYAK64
|
||||
void cdqe() { db(0x48); db(0x98); }
|
||||
#else
|
||||
|
@ -308,12 +314,57 @@ void mwait() { db(0x0F); db(0x01); db(0xC9); }
|
|||
void rdmsr() { db(0x0F); db(0x32); }
|
||||
void rdpmc() { db(0x0F); db(0x33); }
|
||||
void rdtsc() { db(0x0F); db(0x31); }
|
||||
void rdtscp() { db(0x0F); db(0x01); db(0xF9); }
|
||||
void wait() { db(0x9B); }
|
||||
void wbinvd() { db(0x0F); db(0x09); }
|
||||
void wrmsr() { db(0x0F); db(0x30); }
|
||||
void xlatb() { db(0xD7); }
|
||||
void popf() { db(0x9D); }
|
||||
void pushf() { db(0x9C); }
|
||||
void vzeroall() { db(0xC5); db(0xFC); db(0x77); }
|
||||
void vzeroupper() { db(0xC5); db(0xF8); db(0x77); }
|
||||
void xgetbv() { db(0x0F); db(0x01); db(0xD0); }
|
||||
void f2xm1() { db(0xD9); db(0xF0); }
|
||||
void fabs() { db(0xD9); db(0xE1); }
|
||||
void faddp() { db(0xDE); db(0xC1); }
|
||||
void fchs() { db(0xD9); db(0xE0); }
|
||||
void fcom() { db(0xD8); db(0xD1); }
|
||||
void fcomp() { db(0xD8); db(0xD9); }
|
||||
void fcompp() { db(0xDE); db(0xD9); }
|
||||
void fcos() { db(0xD9); db(0xFF); }
|
||||
void fdecstp() { db(0xD9); db(0xF6); }
|
||||
void fdivp() { db(0xDE); db(0xF9); }
|
||||
void fdivrp() { db(0xDE); db(0xF1); }
|
||||
void fincstp() { db(0xD9); db(0xF7); }
|
||||
void fld1() { db(0xD9); db(0xE8); }
|
||||
void fldl2t() { db(0xD9); db(0xE9); }
|
||||
void fldl2e() { db(0xD9); db(0xEA); }
|
||||
void fldpi() { db(0xD9); db(0xEB); }
|
||||
void fldlg2() { db(0xD9); db(0xEC); }
|
||||
void fldln2() { db(0xD9); db(0xED); }
|
||||
void fldz() { db(0xD9); db(0xEE); }
|
||||
void fmulp() { db(0xDE); db(0xC9); }
|
||||
void fnop() { db(0xD9); db(0xD0); }
|
||||
void fpatan() { db(0xD9); db(0xF3); }
|
||||
void fprem() { db(0xD9); db(0xF8); }
|
||||
void fprem1() { db(0xD9); db(0xF5); }
|
||||
void fptan() { db(0xD9); db(0xF2); }
|
||||
void frndint() { db(0xD9); db(0xFC); }
|
||||
void fscale() { db(0xD9); db(0xFD); }
|
||||
void fsin() { db(0xD9); db(0xFE); }
|
||||
void fsincos() { db(0xD9); db(0xFB); }
|
||||
void fsqrt() { db(0xD9); db(0xFA); }
|
||||
void fsubp() { db(0xDE); db(0xE9); }
|
||||
void fsubrp() { db(0xDE); db(0xE1); }
|
||||
void ftst() { db(0xD9); db(0xE4); }
|
||||
void fucom() { db(0xDD); db(0xE1); }
|
||||
void fucomp() { db(0xDD); db(0xE9); }
|
||||
void fucompp() { db(0xDA); db(0xE9); }
|
||||
void fxam() { db(0xD9); db(0xE5); }
|
||||
void fxch() { db(0xD9); db(0xC9); }
|
||||
void fxtract() { db(0xD9); db(0xF4); }
|
||||
void fyl2x() { db(0xD9); db(0xF1); }
|
||||
void fyl2xp1() { db(0xD9); db(0xF9); }
|
||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
||||
void add(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x00); }
|
||||
|
@ -332,12 +383,12 @@ void xor(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); }
|
|||
void xor(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); }
|
||||
void dec(const Operand& op) { opIncDec(op, 0x48, 1); }
|
||||
void inc(const Operand& op) { opIncDec(op, 0x40, 0); }
|
||||
void div(const Operand& op) { opR_ModM(op, 0, 3, 6, 0xF6); }
|
||||
void idiv(const Operand& op) { opR_ModM(op, 0, 3, 7, 0xF6); }
|
||||
void imul(const Operand& op) { opR_ModM(op, 0, 3, 5, 0xF6); }
|
||||
void mul(const Operand& op) { opR_ModM(op, 0, 3, 4, 0xF6); }
|
||||
void neg(const Operand& op) { opR_ModM(op, 0, 3, 3, 0xF6); }
|
||||
void not(const Operand& op) { opR_ModM(op, 0, 3, 2, 0xF6); }
|
||||
void div(const Operand& op) { opR_ModM(op, 0, 6, 0xF6); }
|
||||
void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); }
|
||||
void imul(const Operand& op) { opR_ModM(op, 0, 5, 0xF6); }
|
||||
void mul(const Operand& op) { opR_ModM(op, 0, 4, 0xF6); }
|
||||
void neg(const Operand& op) { opR_ModM(op, 0, 3, 0xF6); }
|
||||
void not(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); }
|
||||
void rcl(const Operand& op, int imm) { opShift(op, imm, 2); }
|
||||
void rcl(const Operand& op, const Reg8& cl) { opShift(op, cl, 2); }
|
||||
void rcr(const Operand& op, int imm) { opShift(op, imm, 3); }
|
||||
|
@ -360,52 +411,57 @@ void shrd(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0
|
|||
void shrd(const Operand& op, const Reg& reg, const Reg8& cl) { opShxd(op, reg, 0, 0xAC, &cl); }
|
||||
void bsf(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBC); }
|
||||
void bsr(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); }
|
||||
void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, 256, 0x38); }
|
||||
void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, 256, 0x38); }
|
||||
void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, 256, 0x38); }
|
||||
void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, 0x66, 256, 0x38); }
|
||||
void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, 0x66, 256, 0x38); }
|
||||
void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, 0x66, 256, 0x38); }
|
||||
void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, 0x66, 256, 0x38); }
|
||||
void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, 0x66, 256, 0x38); }
|
||||
void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, 0x66, 256, 0x38); }
|
||||
void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, 0x66, 256, 0x38); }
|
||||
void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, 0x66, 256, 0x38); }
|
||||
void pmulhrsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0B, 0x66, 256, 0x38); }
|
||||
void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, 256, 0x38); }
|
||||
void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, 256, 0x38); }
|
||||
void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, 256, 0x38); }
|
||||
void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, NONE, 0x38); }
|
||||
void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, NONE, 0x38); }
|
||||
void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, NONE, 0x38); }
|
||||
void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, 0x66, NONE, 0x38); }
|
||||
void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, 0x66, NONE, 0x38); }
|
||||
void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, 0x66, NONE, 0x38); }
|
||||
void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, 0x66, NONE, 0x38); }
|
||||
void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, 0x66, NONE, 0x38); }
|
||||
void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, 0x66, NONE, 0x38); }
|
||||
void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, 0x66, NONE, 0x38); }
|
||||
void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, 0x66, NONE, 0x38); }
|
||||
void pmulhrsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0B, 0x66, NONE, 0x38); }
|
||||
void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, NONE, 0x38); }
|
||||
void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, NONE, 0x38); }
|
||||
void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, NONE, 0x38); }
|
||||
void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast<uint8>(imm), 0x3a); }
|
||||
void blendvpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void blendvps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void packusdw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2B, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pblendvb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x10, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pcmpeqq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x29, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void ptest(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x17, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovsxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x20, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovsxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovsxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x22, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovsxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x23, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovsxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x24, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovsxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x25, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovzxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x30, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovzxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x31, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovzxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x32, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovzxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x33, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovzxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x34, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovzxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x35, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pminsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x38, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pminsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x39, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pminuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3A, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pminud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3B, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmaxsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3C, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmaxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3D, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmaxuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3E, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmaxud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3F, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmuldq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x28, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmulld(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void phminposuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pcmpgtq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x37, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void blendvpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void blendvps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void packusdw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2B, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pblendvb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x10, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pcmpeqq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x29, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void ptest(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x17, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmovsxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x20, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmovsxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmovsxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x22, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmovsxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x23, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmovsxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x24, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmovsxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x25, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmovzxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x30, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmovzxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x31, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmovzxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x32, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmovzxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x33, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmovzxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x34, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmovzxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x35, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pminsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x38, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pminsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x39, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pminuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3A, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pminud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3B, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmaxsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3C, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmaxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3D, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmaxuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3E, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmaxud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3F, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmuldq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x28, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pmulld(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void phminposuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void pcmpgtq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x37, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void aesdec(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDE, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void aesdeclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void aesenc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDC, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void aesenclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDD, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void aesimc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDB, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
|
@ -420,6 +476,8 @@ void pcmpestrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x60
|
|||
void pcmpestri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void pcmpistrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void pcmpistri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x44, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void aeskeygenassist(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); }
|
||||
void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
|
||||
void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
|
||||
|
@ -427,3 +485,540 @@ void movntpd(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getId
|
|||
void movntdq(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0xE7); }
|
||||
void movsx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xBE); }
|
||||
void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); }
|
||||
void fadd(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 0, 0); }
|
||||
void fiadd(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 0, 0); }
|
||||
void fcom(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 2, 0); }
|
||||
void fcomp(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 3, 0); }
|
||||
void fdiv(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 6, 0); }
|
||||
void fidiv(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 6, 0); }
|
||||
void fdivr(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 7, 0); }
|
||||
void fidivr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 7, 0); }
|
||||
void ficom(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 2, 0); }
|
||||
void ficomp(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 3, 0); }
|
||||
void fild(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDF, 0, 5); }
|
||||
void fist(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0x00, 2, 0); }
|
||||
void fistp(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDF, 3, 7); }
|
||||
void fisttp(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDD, 1, 0); }
|
||||
void fld(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 0, 0); }
|
||||
void fmul(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 1, 0); }
|
||||
void fimul(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 1, 0); }
|
||||
void fst(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); }
|
||||
void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); }
|
||||
void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); }
|
||||
void fisub(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 4, 0); }
|
||||
void fsubr(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 5, 0); }
|
||||
void fisubr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 5, 0); }
|
||||
void fadd(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC0); }
|
||||
void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); }
|
||||
void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); }
|
||||
void fcmove(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC8, 0x00C8); }
|
||||
void fcmovbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD0, 0x00D0); }
|
||||
void fcmovu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD8, 0x00D8); }
|
||||
void fcmovnb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC0, 0x00C0); }
|
||||
void fcmovne(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC8, 0x00C8); }
|
||||
void fcmovnbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD0, 0x00D0); }
|
||||
void fcmovnu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD8, 0x00D8); }
|
||||
void fcomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBF0, 0x00F0); }
|
||||
void fcomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFF0, 0x00F0); }
|
||||
void fucomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBE8, 0x00E8); }
|
||||
void fucomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFE8, 0x00E8); }
|
||||
void fdiv(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F0, 0xDCF8); }
|
||||
void fdivp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF8); }
|
||||
void fdivr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F8, 0xDCF0); }
|
||||
void fdivrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF0); }
|
||||
void fmul(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C8, 0xDCC8); }
|
||||
void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); }
|
||||
void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); }
|
||||
void fsubp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE8); }
|
||||
void fsubr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E8, 0xDCE0); }
|
||||
void fsubrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE0); }
|
||||
void fcom(const Fpu& reg) { opFpu(reg, 0xD8, 0xD0); }
|
||||
void fcomp(const Fpu& reg) { opFpu(reg, 0xD8, 0xD8); }
|
||||
void ffree(const Fpu& reg) { opFpu(reg, 0xDD, 0xC0); }
|
||||
void fld(const Fpu& reg) { opFpu(reg, 0xD9, 0xC0); }
|
||||
void fst(const Fpu& reg) { opFpu(reg, 0xDD, 0xD0); }
|
||||
void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); }
|
||||
void fucom(const Fpu& reg) { opFpu(reg, 0xDD, 0xE0); }
|
||||
void fucomp(const Fpu& reg) { opFpu(reg, 0xDD, 0xE8); }
|
||||
void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); }
|
||||
void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x58, true); }
|
||||
void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x58, true); }
|
||||
void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x58, false); }
|
||||
void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x58, false); }
|
||||
void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5C, true); }
|
||||
void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5C, true); }
|
||||
void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5C, false); }
|
||||
void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5C, false); }
|
||||
void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x59, true); }
|
||||
void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x59, true); }
|
||||
void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x59, false); }
|
||||
void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x59, false); }
|
||||
void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5E, true); }
|
||||
void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5E, true); }
|
||||
void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5E, false); }
|
||||
void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5E, false); }
|
||||
void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5F, true); }
|
||||
void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5F, true); }
|
||||
void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5F, false); }
|
||||
void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5F, false); }
|
||||
void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5D, true); }
|
||||
void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5D, true); }
|
||||
void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5D, false); }
|
||||
void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5D, false); }
|
||||
void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x54, true); }
|
||||
void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x54, true); }
|
||||
void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x55, true); }
|
||||
void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x55, true); }
|
||||
void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x56, true); }
|
||||
void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x56, true); }
|
||||
void vxorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x57, true); }
|
||||
void vxorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x57, true); }
|
||||
void vblendpd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0D, true, 0); db(imm); }
|
||||
void vblendpd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0D, true, 0); db(imm); }
|
||||
void vblendps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0C, true, 0); db(imm); }
|
||||
void vblendps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0C, true, 0); db(imm); }
|
||||
void vdppd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x41, false, 0); db(imm); }
|
||||
void vdppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x41, false, 0); db(imm); }
|
||||
void vdpps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); }
|
||||
void vdpps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); }
|
||||
void vmpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x42, false, 0); db(imm); }
|
||||
void vmpsadbw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x42, false, 0); db(imm); }
|
||||
void vpblendw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0E, false, 0); db(imm); }
|
||||
void vpblendw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0E, false, 0); db(imm); }
|
||||
void vroundsd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); }
|
||||
void vroundsd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); }
|
||||
void vroundss(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0A, false, 0); db(imm); }
|
||||
void vroundss(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0A, false, 0); db(imm); }
|
||||
void vpclmulqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x44, false, 0); db(imm); }
|
||||
void vpclmulqdq(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x44, false, 0); db(imm); }
|
||||
void vpermilps(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0C, true, 0); }
|
||||
void vpermilpd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0D, true, 0); }
|
||||
void vcmppd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xC2, true, -1); db(imm); }
|
||||
void vcmppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xC2, true, -1); db(imm); }
|
||||
void vcmpps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F, 0xC2, true, -1); db(imm); }
|
||||
void vcmpps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0xC2, true, -1); db(imm); }
|
||||
void vcmpsd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F2, 0xC2, false, -1); db(imm); }
|
||||
void vcmpsd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F2, 0xC2, false, -1); db(imm); }
|
||||
void vcmpss(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F3, 0xC2, false, -1); db(imm); }
|
||||
void vcmpss(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0xC2, false, -1); db(imm); }
|
||||
void vcvtsd2ss(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F2, 0x5A, false, -1); }
|
||||
void vcvtsd2ss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F2, 0x5A, false, -1); }
|
||||
void vcvtss2sd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F3, 0x5A, false, -1); }
|
||||
void vcvtss2sd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x5A, false, -1); }
|
||||
void vinsertps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x21, false, 0); db(imm); }
|
||||
void vinsertps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x21, false, 0); db(imm); }
|
||||
void vpacksswb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x63, false, -1); }
|
||||
void vpacksswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x63, false, -1); }
|
||||
void vpackssdw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6B, false, -1); }
|
||||
void vpackssdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6B, false, -1); }
|
||||
void vpackuswb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x67, false, -1); }
|
||||
void vpackuswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x67, false, -1); }
|
||||
void vpackusdw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x2B, false, -1); }
|
||||
void vpackusdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x2B, false, -1); }
|
||||
void vpaddb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFC, false, -1); }
|
||||
void vpaddb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFC, false, -1); }
|
||||
void vpaddw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFD, false, -1); }
|
||||
void vpaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFD, false, -1); }
|
||||
void vpaddd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFE, false, -1); }
|
||||
void vpaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFE, false, -1); }
|
||||
void vpaddq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD4, false, -1); }
|
||||
void vpaddq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD4, false, -1); }
|
||||
void vpaddsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEC, false, -1); }
|
||||
void vpaddsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEC, false, -1); }
|
||||
void vpaddsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xED, false, -1); }
|
||||
void vpaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xED, false, -1); }
|
||||
void vpaddusb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDC, false, -1); }
|
||||
void vpaddusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDC, false, -1); }
|
||||
void vpaddusw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDD, false, -1); }
|
||||
void vpaddusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDD, false, -1); }
|
||||
void vpalignr(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0F, false, -1); db(imm); }
|
||||
void vpalignr(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0F, false, -1); db(imm); }
|
||||
void vpand(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDB, false, -1); }
|
||||
void vpand(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDB, false, -1); }
|
||||
void vpandn(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDF, false, -1); }
|
||||
void vpandn(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDF, false, -1); }
|
||||
void vpavgb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE0, false, -1); }
|
||||
void vpavgb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE0, false, -1); }
|
||||
void vpavgw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE3, false, -1); }
|
||||
void vpavgw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE3, false, -1); }
|
||||
void vpcmpeqb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x74, false, -1); }
|
||||
void vpcmpeqb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x74, false, -1); }
|
||||
void vpcmpeqw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x75, false, -1); }
|
||||
void vpcmpeqw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x75, false, -1); }
|
||||
void vpcmpeqd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x76, false, -1); }
|
||||
void vpcmpeqd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x76, false, -1); }
|
||||
void vpcmpeqq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x29, false, -1); }
|
||||
void vpcmpeqq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x29, false, -1); }
|
||||
void vpcmpgtb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x64, false, -1); }
|
||||
void vpcmpgtb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x64, false, -1); }
|
||||
void vpcmpgtw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x65, false, -1); }
|
||||
void vpcmpgtw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x65, false, -1); }
|
||||
void vpcmpgtd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x66, false, -1); }
|
||||
void vpcmpgtd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x66, false, -1); }
|
||||
void vpcmpgtq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x37, false, -1); }
|
||||
void vpcmpgtq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x37, false, -1); }
|
||||
void vphaddw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x01, false, -1); }
|
||||
void vphaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x01, false, -1); }
|
||||
void vphaddd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x02, false, -1); }
|
||||
void vphaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x02, false, -1); }
|
||||
void vphaddsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x03, false, -1); }
|
||||
void vphaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x03, false, -1); }
|
||||
void vphsubw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x05, false, -1); }
|
||||
void vphsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x05, false, -1); }
|
||||
void vphsubd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x06, false, -1); }
|
||||
void vphsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x06, false, -1); }
|
||||
void vphsubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x07, false, -1); }
|
||||
void vphsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x07, false, -1); }
|
||||
void vpmaddwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF5, false, -1); }
|
||||
void vpmaddwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF5, false, -1); }
|
||||
void vpmaddubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x04, false, -1); }
|
||||
void vpmaddubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x04, false, -1); }
|
||||
void vpmaxsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3C, false, -1); }
|
||||
void vpmaxsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3C, false, -1); }
|
||||
void vpmaxsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEE, false, -1); }
|
||||
void vpmaxsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEE, false, -1); }
|
||||
void vpmaxsd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3D, false, -1); }
|
||||
void vpmaxsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3D, false, -1); }
|
||||
void vpmaxub(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDE, false, -1); }
|
||||
void vpmaxub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDE, false, -1); }
|
||||
void vpmaxuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3E, false, -1); }
|
||||
void vpmaxuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3E, false, -1); }
|
||||
void vpmaxud(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3F, false, -1); }
|
||||
void vpmaxud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3F, false, -1); }
|
||||
void vpminsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x38, false, -1); }
|
||||
void vpminsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x38, false, -1); }
|
||||
void vpminsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEA, false, -1); }
|
||||
void vpminsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEA, false, -1); }
|
||||
void vpminsd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x39, false, -1); }
|
||||
void vpminsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x39, false, -1); }
|
||||
void vpminub(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDA, false, -1); }
|
||||
void vpminub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDA, false, -1); }
|
||||
void vpminuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3A, false, -1); }
|
||||
void vpminuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3A, false, -1); }
|
||||
void vpminud(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3B, false, -1); }
|
||||
void vpminud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3B, false, -1); }
|
||||
void vpmulhuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE4, false, -1); }
|
||||
void vpmulhuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE4, false, -1); }
|
||||
void vpmulhrsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0B, false, -1); }
|
||||
void vpmulhrsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0B, false, -1); }
|
||||
void vpmulhw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE5, false, -1); }
|
||||
void vpmulhw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE5, false, -1); }
|
||||
void vpmullw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD5, false, -1); }
|
||||
void vpmullw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD5, false, -1); }
|
||||
void vpmulld(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x40, false, -1); }
|
||||
void vpmulld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x40, false, -1); }
|
||||
void vpmuludq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF4, false, -1); }
|
||||
void vpmuludq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF4, false, -1); }
|
||||
void vpmuldq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x28, false, -1); }
|
||||
void vpmuldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x28, false, -1); }
|
||||
void vpor(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEB, false, -1); }
|
||||
void vpor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEB, false, -1); }
|
||||
void vpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF6, false, -1); }
|
||||
void vpsadbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF6, false, -1); }
|
||||
void vpshufb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x00, false, -1); }
|
||||
void vpsignb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x08, false, -1); }
|
||||
void vpsignb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x08, false, -1); }
|
||||
void vpsignw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x09, false, -1); }
|
||||
void vpsignw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x09, false, -1); }
|
||||
void vpsignd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0A, false, -1); }
|
||||
void vpsignd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0A, false, -1); }
|
||||
void vpsllw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF1, false, -1); }
|
||||
void vpsllw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF1, false, -1); }
|
||||
void vpslld(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF2, false, -1); }
|
||||
void vpslld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF2, false, -1); }
|
||||
void vpsllq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF3, false, -1); }
|
||||
void vpsllq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF3, false, -1); }
|
||||
void vpsraw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE1, false, -1); }
|
||||
void vpsraw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE1, false, -1); }
|
||||
void vpsrad(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE2, false, -1); }
|
||||
void vpsrad(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE2, false, -1); }
|
||||
void vpsrlw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD1, false, -1); }
|
||||
void vpsrlw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD1, false, -1); }
|
||||
void vpsrld(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD2, false, -1); }
|
||||
void vpsrld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD2, false, -1); }
|
||||
void vpsrlq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD3, false, -1); }
|
||||
void vpsrlq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD3, false, -1); }
|
||||
void vpsubb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF8, false, -1); }
|
||||
void vpsubb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF8, false, -1); }
|
||||
void vpsubw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF9, false, -1); }
|
||||
void vpsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF9, false, -1); }
|
||||
void vpsubd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFA, false, -1); }
|
||||
void vpsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFA, false, -1); }
|
||||
void vpsubq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFB, false, -1); }
|
||||
void vpsubq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFB, false, -1); }
|
||||
void vpsubsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE8, false, -1); }
|
||||
void vpsubsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE8, false, -1); }
|
||||
void vpsubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE9, false, -1); }
|
||||
void vpsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE9, false, -1); }
|
||||
void vpsubusb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD8, false, -1); }
|
||||
void vpsubusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD8, false, -1); }
|
||||
void vpsubusw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD9, false, -1); }
|
||||
void vpsubusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD9, false, -1); }
|
||||
void vpunpckhbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x68, false, -1); }
|
||||
void vpunpckhbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x68, false, -1); }
|
||||
void vpunpckhwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x69, false, -1); }
|
||||
void vpunpckhwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x69, false, -1); }
|
||||
void vpunpckhdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6A, false, -1); }
|
||||
void vpunpckhdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6A, false, -1); }
|
||||
void vpunpckhqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6D, false, -1); }
|
||||
void vpunpckhqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6D, false, -1); }
|
||||
void vpunpcklbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x60, false, -1); }
|
||||
void vpunpcklbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x60, false, -1); }
|
||||
void vpunpcklwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x61, false, -1); }
|
||||
void vpunpcklwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x61, false, -1); }
|
||||
void vpunpckldq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x62, false, -1); }
|
||||
void vpunpckldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x62, false, -1); }
|
||||
void vpunpcklqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6C, false, -1); }
|
||||
void vpunpcklqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6C, false, -1); }
|
||||
void vpxor(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEF, false, -1); }
|
||||
void vpxor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEF, false, -1); }
|
||||
void vrcpss(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F3, 0x53, false, -1); }
|
||||
void vrcpss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x53, false, -1); }
|
||||
void vrsqrtss(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F3, 0x52, false, -1); }
|
||||
void vrsqrtss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x52, false, -1); }
|
||||
void vshufpd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xC6, true, -1); db(imm); }
|
||||
void vshufpd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xC6, true, -1); db(imm); }
|
||||
void vshufps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F, 0xC6, true, -1); db(imm); }
|
||||
void vshufps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0xC6, true, -1); db(imm); }
|
||||
void vsqrtsd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F2, 0x51, false, -1); }
|
||||
void vsqrtsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F2, 0x51, false, -1); }
|
||||
void vsqrtss(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F3, 0x51, false, -1); }
|
||||
void vsqrtss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x51, false, -1); }
|
||||
void vunpckhpd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x15, true, -1); }
|
||||
void vunpckhpd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x15, true, -1); }
|
||||
void vunpckhps(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F, 0x15, true, -1); }
|
||||
void vunpckhps(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0x15, true, -1); }
|
||||
void vunpcklpd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x14, true, -1); }
|
||||
void vunpcklpd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x14, true, -1); }
|
||||
void vunpcklps(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F, 0x14, true, -1); }
|
||||
void vunpcklps(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0x14, true, -1); }
|
||||
void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0xDF, false, 0, imm); }
|
||||
void vroundpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x09, true, 0, imm); }
|
||||
void vroundps(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x08, true, 0, imm); }
|
||||
void vpermilpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x05, true, 0, imm); }
|
||||
void vpermilps(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x04, true, 0, imm); }
|
||||
void vpcmpestri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x61, false, 0, imm); }
|
||||
void vpcmpestrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x60, false, 0, imm); }
|
||||
void vpcmpistri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x63, false, 0, imm); }
|
||||
void vpcmpistrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x62, false, 0, imm); }
|
||||
void vtestps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x0E, true, 0); }
|
||||
void vtestpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x0F, true, 0); }
|
||||
void vcomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x2F, false, -1); }
|
||||
void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x2F, false, -1); }
|
||||
void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x5B, true, -1); }
|
||||
void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x5B, true, -1); }
|
||||
void vcvttps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x5B, true, -1); }
|
||||
void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x28, true, -1); }
|
||||
void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x28, true, -1); }
|
||||
void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x12, true, -1); }
|
||||
void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x6F, true, -1); }
|
||||
void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x6F, true, -1); }
|
||||
void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x16, true, -1); }
|
||||
void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x12, true, -1); }
|
||||
void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x10, true, -1); }
|
||||
void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x10, true, -1); }
|
||||
void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1C, false, -1); }
|
||||
void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1D, false, -1); }
|
||||
void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1E, false, -1); }
|
||||
void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x41, false, -1); }
|
||||
void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x20, false, -1); }
|
||||
void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x21, false, -1); }
|
||||
void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x22, false, -1); }
|
||||
void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x23, false, -1); }
|
||||
void vpmovsxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x24, false, -1); }
|
||||
void vpmovsxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x25, false, -1); }
|
||||
void vpmovzxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x30, false, -1); }
|
||||
void vpmovzxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x31, false, -1); }
|
||||
void vpmovzxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x32, false, -1); }
|
||||
void vpmovzxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x33, false, -1); }
|
||||
void vpmovzxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x34, false, -1); }
|
||||
void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x35, false, -1); }
|
||||
void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x70, false, -1, imm); }
|
||||
void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x70, false, -1, imm); }
|
||||
void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x70, false, -1, imm); }
|
||||
void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x17, false, -1); }
|
||||
void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x53, true, -1); }
|
||||
void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x52, true, -1); }
|
||||
void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x51, true, -1); }
|
||||
void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x51, true, -1); }
|
||||
void vucomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x2E, false, -1); }
|
||||
void vucomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x2E, false, -1); }
|
||||
void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_66, 0x29, true, -1); }
|
||||
void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F, 0x29, true, -1); }
|
||||
void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_66, 0x7F, true, -1); }
|
||||
void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_F3, 0x7F, true, -1); }
|
||||
void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_66, 0x11, true, -1); }
|
||||
void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F, 0x11, true, -1); }
|
||||
void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0xD0, true, -1); }
|
||||
void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0xD0, true, -1); }
|
||||
void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x7C, true, -1); }
|
||||
void vhaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x7C, true, -1); }
|
||||
void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x7D, true, -1); }
|
||||
void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x7D, true, -1); }
|
||||
void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDC, false, 0); }
|
||||
void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDD, false, 0); }
|
||||
void vaesdec(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDE, false, 0); }
|
||||
void vaesdeclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDF, false, 0); }
|
||||
void vmaskmovps(const Xmm& xm1, const Xmm& xm2, const Address& addr) { opAVX_X_X_XM(xm1, xm2, addr, MM_0F38 | PP_66, 0x2C, true, 0); }
|
||||
void vmaskmovps(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x2E, true, 0); }
|
||||
void vmaskmovpd(const Xmm& xm1, const Xmm& xm2, const Address& addr) { opAVX_X_X_XM(xm1, xm2, addr, MM_0F38 | PP_66, 0x2D, true, 0); }
|
||||
void vmaskmovpd(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x2F, true, 0); }
|
||||
void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, op2, MM_0F | PP_66, 0x16, false); }
|
||||
void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x17, false); }
|
||||
void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, op2, MM_0F, 0x16, false); }
|
||||
void vmovhps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F, 0x17, false); }
|
||||
void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, op2, MM_0F | PP_66, 0x12, false); }
|
||||
void vmovlpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x13, false); }
|
||||
void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, op2, MM_0F, 0x12, false); }
|
||||
void vmovlps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F, 0x13, false); }
|
||||
void vfmadd132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x98, true, 1); }
|
||||
void vfmadd213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA8, true, 1); }
|
||||
void vfmadd231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB8, true, 1); }
|
||||
void vfmadd132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x98, true, 0); }
|
||||
void vfmadd213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA8, true, 0); }
|
||||
void vfmadd231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB8, true, 0); }
|
||||
void vfmadd132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x99, false, 1); }
|
||||
void vfmadd213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA9, false, 1); }
|
||||
void vfmadd231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB9, false, 1); }
|
||||
void vfmadd132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x99, false, 0); }
|
||||
void vfmadd213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA9, false, 0); }
|
||||
void vfmadd231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB9, false, 0); }
|
||||
void vfmaddsub132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x96, true, 1); }
|
||||
void vfmaddsub213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA6, true, 1); }
|
||||
void vfmaddsub231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB6, true, 1); }
|
||||
void vfmaddsub132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x96, true, 0); }
|
||||
void vfmaddsub213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA6, true, 0); }
|
||||
void vfmaddsub231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB6, true, 0); }
|
||||
void vfmsubadd132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x97, true, 1); }
|
||||
void vfmsubadd213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA7, true, 1); }
|
||||
void vfmsubadd231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB7, true, 1); }
|
||||
void vfmsubadd132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x97, true, 0); }
|
||||
void vfmsubadd213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA7, true, 0); }
|
||||
void vfmsubadd231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB7, true, 0); }
|
||||
void vfmsub132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9A, true, 1); }
|
||||
void vfmsub213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAA, true, 1); }
|
||||
void vfmsub231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBA, true, 1); }
|
||||
void vfmsub132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9A, true, 0); }
|
||||
void vfmsub213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAA, true, 0); }
|
||||
void vfmsub231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBA, true, 0); }
|
||||
void vfmsub132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9B, false, 1); }
|
||||
void vfmsub213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAB, false, 1); }
|
||||
void vfmsub231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBB, false, 1); }
|
||||
void vfmsub132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9B, false, 0); }
|
||||
void vfmsub213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAB, false, 0); }
|
||||
void vfmsub231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBB, false, 0); }
|
||||
void vfnmadd132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9C, true, 1); }
|
||||
void vfnmadd213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAC, true, 1); }
|
||||
void vfnmadd231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBC, true, 1); }
|
||||
void vfnmadd132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9C, true, 0); }
|
||||
void vfnmadd213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAC, true, 0); }
|
||||
void vfnmadd231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBC, true, 0); }
|
||||
void vfnmadd132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9D, false, 1); }
|
||||
void vfnmadd213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAD, false, 1); }
|
||||
void vfnmadd231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBD, false, 1); }
|
||||
void vfnmadd132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9D, false, 0); }
|
||||
void vfnmadd213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAD, false, 0); }
|
||||
void vfnmadd231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBD, false, 0); }
|
||||
void vfnmsub132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9E, true, 1); }
|
||||
void vfnmsub213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAE, true, 1); }
|
||||
void vfnmsub231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBE, true, 1); }
|
||||
void vfnmsub132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9E, true, 0); }
|
||||
void vfnmsub213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAE, true, 0); }
|
||||
void vfnmsub231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBE, true, 0); }
|
||||
void vfnmsub132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9F, false, 1); }
|
||||
void vfnmsub213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAF, false, 1); }
|
||||
void vfnmsub231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBF, false, 1); }
|
||||
void vfnmsub132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9F, false, 0); }
|
||||
void vfnmsub213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAF, false, 0); }
|
||||
void vfnmsub231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBF, false, 0); }
|
||||
void vaesimc(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0xDB, false, 0); }
|
||||
void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, MM_0F38 | PP_66, 0x1A, true, 0); }
|
||||
void vbroadcastsd(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, MM_0F38 | PP_66, 0x19, true, 0); }
|
||||
void vbroadcastss(const Xmm& x, const Address& addr) { opAVX_X_XM_IMM(x, addr, MM_0F38 | PP_66, 0x18, true, 0); }
|
||||
void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_XM_IMM(y, cvtReg(op, op.isXMM(), Operand::YMM), MM_0F3A | PP_66, 0x19, true, 0, imm); }
|
||||
void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, cvtReg(op, op.isXMM(), Operand::YMM), MM_0F3A | PP_66, 0x18, true, 0); db(imm); }
|
||||
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, MM_0F3A | PP_66, 0x06, true, 0); db(imm); }
|
||||
void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_F2, 0xF0, true, 0); }
|
||||
void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, MM_0F, 0xAE, false, -1); }
|
||||
void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, MM_0F, 0xAE, false, -1); }
|
||||
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }
|
||||
void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }
|
||||
void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), MM_0F | PP_66, 0xC5, false); db(imm); }
|
||||
void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }
|
||||
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }
|
||||
void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); }
|
||||
void vpinsrb(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); }
|
||||
void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F | PP_66, 0xC4, false); db(imm); }
|
||||
void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F | PP_66, 0xC4, false); db(imm); }
|
||||
void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 0); db(imm); }
|
||||
void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 0); db(imm); }
|
||||
void vpmovmskb(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xD7, false); }
|
||||
void vpslldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm7, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||
void vpslldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm7, x, x, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||
void vpsrldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm3, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||
void vpsrldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm3, x, x, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||
void vpsllw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm6, x1, x2, MM_0F | PP_66, 0x71, false); db(imm); }
|
||||
void vpsllw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm6, x, x, MM_0F | PP_66, 0x71, false); db(imm); }
|
||||
void vpslld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm6, x1, x2, MM_0F | PP_66, 0x72, false); db(imm); }
|
||||
void vpslld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm6, x, x, MM_0F | PP_66, 0x72, false); db(imm); }
|
||||
void vpsllq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm6, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||
void vpsllq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm6, x, x, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||
void vpsraw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm4, x1, x2, MM_0F | PP_66, 0x71, false); db(imm); }
|
||||
void vpsraw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm4, x, x, MM_0F | PP_66, 0x71, false); db(imm); }
|
||||
void vpsrad(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm4, x1, x2, MM_0F | PP_66, 0x72, false); db(imm); }
|
||||
void vpsrad(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm4, x, x, MM_0F | PP_66, 0x72, false); db(imm); }
|
||||
void vpsrlw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm2, x1, x2, MM_0F | PP_66, 0x71, false); db(imm); }
|
||||
void vpsrlw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm2, x, x, MM_0F | PP_66, 0x71, false); db(imm); }
|
||||
void vpsrld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm2, x1, x2, MM_0F | PP_66, 0x72, false); db(imm); }
|
||||
void vpsrld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm2, x, x, MM_0F | PP_66, 0x72, false); db(imm); }
|
||||
void vpsrlq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm2, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||
void vpsrlq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm2, x, x, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||
void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4B, true); db(x4.getIdx() << 4); }
|
||||
void vblendvpd(const Xmm& x1, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x1, op, MM_0F3A | PP_66, 0x4B, true); db(x4.getIdx() << 4); }
|
||||
void vblendvps(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4A, true); db(x4.getIdx() << 4); }
|
||||
void vblendvps(const Xmm& x1, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x1, op, MM_0F3A | PP_66, 0x4A, true); db(x4.getIdx() << 4); }
|
||||
void vpblendvb(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4C, false); db(x4.getIdx() << 4); }
|
||||
void vpblendvb(const Xmm& x1, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x1, op, MM_0F3A | PP_66, 0x4C, false); db(x4.getIdx() << 4); }
|
||||
void vmovd(const Xmm& x, const Reg32& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x6E, false, 0); }
|
||||
void vmovd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x6E, false, 0); }
|
||||
void vmovd(const Reg32& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 0); }
|
||||
void vmovd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x7E, false, 0); }
|
||||
void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F, 0x12, false); }
|
||||
void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F, 0x16, false); }
|
||||
void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), x.isXMM() ? xm0 : ym0, x, MM_0F | PP_66, 0x50, true, 0); }
|
||||
void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), x.isXMM() ? xm0 : ym0, x, MM_0F, 0x50, true, 0); }
|
||||
void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0xE7, true); }
|
||||
void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0x2B, true); }
|
||||
void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F, 0x2B, true); }
|
||||
void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F38 | PP_66, 0x2A, false); }
|
||||
void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0x10, false); }
|
||||
void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x10, false); }
|
||||
void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x11, false); }
|
||||
void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F3, 0x10, false); }
|
||||
void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x10, false); }
|
||||
void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x11, false); }
|
||||
void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2D, false, 0); }
|
||||
void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2C, false, 0); }
|
||||
void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2D, false, 0); }
|
||||
void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2C, false, 0); }
|
||||
void vcvtsi2ss(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, cvtReg(op2, op2.isREG(), Operand::XMM), MM_0F | PP_F3, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); }
|
||||
void vcvtsi2sd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, cvtReg(op2, op2.isREG(), Operand::XMM), MM_0F | PP_F2, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); }
|
||||
void vcvtps2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, cvtReg(op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM), MM_0F, 0x5A, true); }
|
||||
void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, cvtReg(op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM), MM_0F | PP_F3, 0xE6, true); }
|
||||
void vcvtpd2ps(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0x5A, true); }
|
||||
void vcvtpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_F2, 0xE6, true); }
|
||||
void vcvttpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0xE6, true); }
|
||||
#ifdef XBYAK64
|
||||
void vmovq(const Xmm& x, const Reg64& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x6E, false, 1); }
|
||||
void vmovq(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x7E, false, -1); }
|
||||
void vmovq(const Reg64& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 1); }
|
||||
void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0xD6, false, -1); }
|
||||
void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_F3, 0x7E, false, -1); }
|
||||
void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 1); db(imm); }
|
||||
void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 1); db(imm); }
|
||||
void vpinsrq(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 1); db(imm); }
|
||||
void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2D, false, 1); }
|
||||
void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2C, false, 1); }
|
||||
void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2D, false, 1); }
|
||||
void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2C, false, 1); }
|
||||
#endif
|
||||
|
|
|
@ -2,9 +2,10 @@
|
|||
#define XBYAK_XBYAK_UTIL_H_
|
||||
|
||||
/**
|
||||
utility class for Xbyak
|
||||
@note this header is under construction
|
||||
utility class and functions for Xbyak
|
||||
@note this header is UNDER CONSTRUCTION!
|
||||
*/
|
||||
#include "xbyak/xbyak.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#if (_MSC_VER < 1400) && defined(XBYAK32)
|
||||
|
@ -29,10 +30,17 @@
|
|||
#include <intrin.h> // for __cpuid
|
||||
#endif
|
||||
#else
|
||||
#if __GNUC_PREREQ(4, 3)
|
||||
#ifndef __GNUC_PREREQ
|
||||
#define __GNUC_PREREQ(major, minor) (((major) << 16) + (minor))
|
||||
#endif
|
||||
#if __GNUC_PREREQ(4, 3) && !defined(__APPLE__)
|
||||
#include <cpuid.h>
|
||||
#else
|
||||
#define __cpuid(eaxIn, a, b, c, d) __asm__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
|
||||
#if defined(__APPLE__) && defined(XBYAK32) // avoid err : can't find a register in class `BREG' while reloading `asm'
|
||||
#define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
|
||||
#else
|
||||
#define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -43,6 +51,10 @@ namespace Xbyak { namespace util {
|
|||
*/
|
||||
class Cpu {
|
||||
unsigned int type_;
|
||||
unsigned int get32bitAsBE(const char *x) const
|
||||
{
|
||||
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
|
||||
}
|
||||
public:
|
||||
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
|
||||
{
|
||||
|
@ -64,11 +76,17 @@ public:
|
|||
tSSE41 = 1 << 7,
|
||||
tSSE42 = 1 << 8,
|
||||
tPOPCNT = 1 << 9,
|
||||
tAESNI = 1 << 10,
|
||||
tSSE5 = 1 << 11,
|
||||
tOSXSACE = 1 << 12,
|
||||
tPCLMULQDQ = 1 << 13,
|
||||
tAVX = 1 << 14,
|
||||
tFMA = 1 << 15,
|
||||
|
||||
t3DN = 1 << 16,
|
||||
tE3DN = 1 << 17,
|
||||
tSSE4a = 1 << 18,
|
||||
tSSE5 = 1 << 11,
|
||||
tRDTSCP = 1 << 19,
|
||||
|
||||
tINTEL = 1 << 24,
|
||||
tAMD = 1 << 25
|
||||
|
@ -80,28 +98,39 @@ public:
|
|||
getCpuid(0, data);
|
||||
static const char intel[] = "ntel";
|
||||
static const char amd[] = "cAMD";
|
||||
if (data[2] == *reinterpret_cast<const unsigned int*>(amd)) {
|
||||
if (data[2] == get32bitAsBE(amd)) {
|
||||
type_ |= tAMD;
|
||||
getCpuid(0x80000001, data);
|
||||
if (data[3] & (1 << 31)) type_ |= t3DN;
|
||||
if (data[3] & (1 << 15)) type_ |= tCMOV;
|
||||
if (data[3] & (1 << 30)) type_ |= tE3DN;
|
||||
if (data[3] & (1 << 22)) type_ |= tMMX2;
|
||||
if (data[3] & (1U << 31)) type_ |= t3DN;
|
||||
if (data[3] & (1U << 15)) type_ |= tCMOV;
|
||||
if (data[3] & (1U << 30)) type_ |= tE3DN;
|
||||
if (data[3] & (1U << 22)) type_ |= tMMX2;
|
||||
if (data[3] & (1U << 27)) type_ |= tRDTSCP;
|
||||
}
|
||||
if (data[2] == *reinterpret_cast<const unsigned int*>(intel)) {
|
||||
if (data[2] == get32bitAsBE(intel)) {
|
||||
type_ |= tINTEL;
|
||||
getCpuid(0x80000001, data);
|
||||
if (data[3] & (1U << 27)) type_ |= tRDTSCP;
|
||||
}
|
||||
getCpuid(1, data);
|
||||
if (data[2] & (1 << 0)) type_ |= tSSE3;
|
||||
if (data[2] & (1 << 9)) type_ |= tSSSE3;
|
||||
if (data[2] & (1 << 19)) type_ |= tSSE41;
|
||||
if (data[2] & (1 << 20)) type_ |= tSSE42;
|
||||
if (data[2] & (1 << 23)) type_ |= tPOPCNT;
|
||||
|
||||
if (data[3] & (1 << 15)) type_ |= tCMOV;
|
||||
if (data[3] & (1 << 23)) type_ |= tMMX;
|
||||
if (data[3] & (1 << 25)) type_ |= tMMX2 | tSSE;
|
||||
if (data[3] & (1 << 26)) type_ |= tSSE2;
|
||||
if (data[2] & (1U << 0)) type_ |= tSSE3;
|
||||
if (data[2] & (1U << 9)) type_ |= tSSSE3;
|
||||
if (data[2] & (1U << 19)) type_ |= tSSE41;
|
||||
if (data[2] & (1U << 20)) type_ |= tSSE42;
|
||||
if (data[2] & (1U << 23)) type_ |= tPOPCNT;
|
||||
if (data[2] & (1U << 25)) type_ |= tAESNI;
|
||||
if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ;
|
||||
if (data[2] & (1U << 27)) type_ |= tOSXSACE;
|
||||
#if _M_SSE >= 0x500
|
||||
// QQQ
|
||||
// should check XFEATURE_ENABLED_MASK[2:1] = '11b' by xgetvb
|
||||
if (data[2] & (1U << 28)) type_ |= tAVX;
|
||||
if (data[2] & (1U << 12)) type_ |= tFMA;
|
||||
#endif
|
||||
if (data[3] & (1U << 15)) type_ |= tCMOV;
|
||||
if (data[3] & (1U << 23)) type_ |= tMMX;
|
||||
if (data[3] & (1U << 25)) type_ |= tMMX2 | tSSE;
|
||||
if (data[3] & (1U << 26)) type_ |= tSSE2;
|
||||
}
|
||||
bool has(Type type) const
|
||||
{
|
||||
|
@ -109,6 +138,40 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class Clock {
|
||||
public:
|
||||
static inline uint64 getRdtsc()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#else
|
||||
unsigned int eax, edx;
|
||||
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||
return ((uint64)edx << 32) | eax;
|
||||
#endif
|
||||
}
|
||||
Clock()
|
||||
: clock_(0)
|
||||
, count_(0)
|
||||
{
|
||||
}
|
||||
void begin()
|
||||
{
|
||||
clock_ -= getRdtsc();
|
||||
}
|
||||
void end()
|
||||
{
|
||||
clock_ += getRdtsc();
|
||||
count_++;
|
||||
}
|
||||
int getCount() const { return count_; }
|
||||
uint64 getClock() const { return clock_; }
|
||||
void clear() { count_ = 0; clock_ = 0; }
|
||||
private:
|
||||
uint64 clock_;
|
||||
int count_;
|
||||
};
|
||||
|
||||
#ifdef XBYAK32
|
||||
|
||||
namespace local {
|
||||
|
@ -133,53 +196,47 @@ XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ebp)
|
|||
#undef XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG
|
||||
} // end of local
|
||||
|
||||
template<class Gen>
|
||||
struct EnableSetEip : public Gen {
|
||||
EnableSetEip(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0)
|
||||
: Gen(maxSize, userPtr)
|
||||
{
|
||||
}
|
||||
/**
|
||||
get pid to out register
|
||||
@note out = eax or ecx or edx
|
||||
*/
|
||||
void setEipTo(const Xbyak::Reg32& out)
|
||||
{
|
||||
/**
|
||||
get eip to out register
|
||||
@note out is not esp
|
||||
*/
|
||||
template<class T>
|
||||
void setEipTo(T *self, const Xbyak::Reg32& out)
|
||||
{
|
||||
#if 0
|
||||
Gen::call(Gen::getCurr() + 5);
|
||||
Gen::pop(out);
|
||||
self->call("@f");
|
||||
self->L("@@");
|
||||
self->pop(out);
|
||||
#else
|
||||
int idx = out.getIdx();
|
||||
switch (idx) {
|
||||
case Xbyak::Operand::EAX:
|
||||
Gen::call((void*)local::set_eip_to_eax);
|
||||
self->call((void*)local::set_eip_to_eax);
|
||||
break;
|
||||
case Xbyak::Operand::ECX:
|
||||
Gen::call((void*)local::set_eip_to_ecx);
|
||||
self->call((void*)local::set_eip_to_ecx);
|
||||
break;
|
||||
case Xbyak::Operand::EDX:
|
||||
Gen::call((void*)local::set_eip_to_edx);
|
||||
self->call((void*)local::set_eip_to_edx);
|
||||
break;
|
||||
case Xbyak::Operand::EBX:
|
||||
Gen::call((void*)local::set_eip_to_ebx);
|
||||
self->call((void*)local::set_eip_to_ebx);
|
||||
break;
|
||||
case Xbyak::Operand::ESI:
|
||||
Gen::call((void*)local::set_eip_to_esi);
|
||||
self->call((void*)local::set_eip_to_esi);
|
||||
break;
|
||||
case Xbyak::Operand::EDI:
|
||||
Gen::call((void*)local::set_eip_to_edi);
|
||||
self->call((void*)local::set_eip_to_edi);
|
||||
break;
|
||||
case Xbyak::Operand::EBP:
|
||||
Gen::call((void*)local::set_eip_to_ebp);
|
||||
self->call((void*)local::set_eip_to_ebp);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
} } // end of util
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in New Issue