mirror of https://github.com/PCSX2/pcsx2.git
Mostly code cleanups, XBYAK 2.99, VEX conversion for the sw renderer (3-5% faster), GSState::Move fix for dark cloud 2 invention crash.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4287 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
e2d36a53a4
commit
ca7abd983a
|
@ -193,6 +193,7 @@ static const int __pagesize = PCSX2_PAGESIZE;
|
||||||
|
|
||||||
# define __aligned(alig) __declspec(align(alig))
|
# define __aligned(alig) __declspec(align(alig))
|
||||||
# define __aligned16 __declspec(align(16))
|
# define __aligned16 __declspec(align(16))
|
||||||
|
# define __aligned32 __declspec(align(32))
|
||||||
# define __pagealigned __declspec(align(PCSX2_PAGESIZE))
|
# define __pagealigned __declspec(align(PCSX2_PAGESIZE))
|
||||||
|
|
||||||
// Deprecated; use __align instead.
|
// Deprecated; use __align instead.
|
||||||
|
|
|
@ -153,7 +153,7 @@ static INT32 _GSopen(void* dsp, char* title, int renderer)
|
||||||
{
|
{
|
||||||
GSDevice* dev = NULL;
|
GSDevice* dev = NULL;
|
||||||
|
|
||||||
if( renderer == -1 )
|
if(renderer == -1)
|
||||||
{
|
{
|
||||||
renderer = theApp.GetConfig("renderer", 0);
|
renderer = theApp.GetConfig("renderer", 0);
|
||||||
}
|
}
|
||||||
|
@ -167,6 +167,7 @@ static INT32 _GSopen(void* dsp, char* title, int renderer)
|
||||||
// GSopen call then they'll get corrupted graphics, but that's not my problem.
|
// GSopen call then they'll get corrupted graphics, but that's not my problem.
|
||||||
|
|
||||||
delete s_gs;
|
delete s_gs;
|
||||||
|
|
||||||
s_gs = NULL;
|
s_gs = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -178,20 +179,25 @@ static INT32 _GSopen(void* dsp, char* title, int renderer)
|
||||||
case 12: case 13: new GSDeviceNull(); break;
|
case 12: case 13: new GSDeviceNull(); break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !dev ) return -1;
|
if(!dev) return -1;
|
||||||
|
|
||||||
if( !s_gs )
|
if(!s_gs)
|
||||||
{
|
{
|
||||||
switch(renderer)
|
switch(renderer)
|
||||||
{
|
{
|
||||||
default:
|
default:
|
||||||
case 0: s_gs = new GSRendererDX9(); break;
|
case 0:
|
||||||
case 3: s_gs = new GSRendererDX11(); break;
|
s_gs = new GSRendererDX9();
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
s_gs = new GSRendererDX11();
|
||||||
|
break;
|
||||||
case 2: case 5: case 8: case 11: case 13:
|
case 2: case 5: case 8: case 11: case 13:
|
||||||
s_gs = new GSRendererNull(); break;
|
s_gs = new GSRendererNull();
|
||||||
|
break;
|
||||||
case 1: case 4: case 7: case 10: case 12:
|
case 1: case 4: case 7: case 10: case 12:
|
||||||
s_gs = new GSRendererSW(); break;
|
s_gs = new GSRendererSW();
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
s_renderer = renderer;
|
s_renderer = renderer;
|
||||||
|
@ -519,72 +525,6 @@ EXPORT_C GSsetFrameLimit(int limit)
|
||||||
|
|
||||||
#ifdef _WINDOWS
|
#ifdef _WINDOWS
|
||||||
|
|
||||||
// Returns false if the window's been closed or an invalid packet was encountered.
|
|
||||||
static __forceinline bool LoopDatPacket_Thingamajig(HWND hWnd, uint8 (®s)[0x2000], vector<uint8>& buff, FILE* fp, long start)
|
|
||||||
{
|
|
||||||
switch(fgetc(fp))
|
|
||||||
{
|
|
||||||
case EOF:
|
|
||||||
fseek(fp, start, 0);
|
|
||||||
return !!IsWindowVisible(hWnd);
|
|
||||||
|
|
||||||
case 0:
|
|
||||||
{
|
|
||||||
uint32 index = fgetc(fp);
|
|
||||||
uint32 size;
|
|
||||||
|
|
||||||
fread(&size, 4, 1, fp);
|
|
||||||
|
|
||||||
switch(index)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
{
|
|
||||||
if(buff.size() < 0x4000) buff.resize(0x4000);
|
|
||||||
uint32 addr = 0x4000 - size;
|
|
||||||
fread(&buff[0] + addr, size, 1, fp);
|
|
||||||
GSgifTransfer1(&buff[0], addr);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 1:
|
|
||||||
if(buff.size() < size) buff.resize(size);
|
|
||||||
fread(&buff[0], size, 1, fp);
|
|
||||||
GSgifTransfer2(&buff[0], size / 16);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 2:
|
|
||||||
if(buff.size() < size) buff.resize(size);
|
|
||||||
fread(&buff[0], size, 1, fp);
|
|
||||||
GSgifTransfer3(&buff[0], size / 16);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 1:
|
|
||||||
GSvsync(fgetc(fp));
|
|
||||||
return !!IsWindowVisible(hWnd);
|
|
||||||
|
|
||||||
case 2:
|
|
||||||
{
|
|
||||||
uint32 size;
|
|
||||||
fread(&size, 4, 1, fp);
|
|
||||||
if(buff.size() < size) buff.resize(size);
|
|
||||||
GSreadFIFO2(&buff[0], size / 16);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 3:
|
|
||||||
fread(regs, 0x2000, 1, fp);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// lpszCmdLine:
|
// lpszCmdLine:
|
||||||
// First parameter is the renderer.
|
// First parameter is the renderer.
|
||||||
// Second parameter is the gs file to load and run.
|
// Second parameter is the gs file to load and run.
|
||||||
|
@ -634,7 +574,73 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
|
||||||
|
|
||||||
GSvsync(1);
|
GSvsync(1);
|
||||||
|
|
||||||
while( LoopDatPacket_Thingamajig(hWnd, regs, buff, fp, start) ) ;
|
bool exit = false;
|
||||||
|
|
||||||
|
while(!exit)
|
||||||
|
{
|
||||||
|
uint32 index;
|
||||||
|
uint32 size;
|
||||||
|
uint32 addr;
|
||||||
|
|
||||||
|
int pos;
|
||||||
|
|
||||||
|
switch(fgetc(fp))
|
||||||
|
{
|
||||||
|
case EOF:
|
||||||
|
fseek(fp, start, 0);
|
||||||
|
exit = !IsWindowVisible(hWnd);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 0:
|
||||||
|
index = fgetc(fp);
|
||||||
|
fread(&size, 4, 1, fp);
|
||||||
|
|
||||||
|
switch(index)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
if(buff.size() < 0x4000) buff.resize(0x4000);
|
||||||
|
addr = 0x4000 - size;
|
||||||
|
fread(buff.data() + addr, size, 1, fp);
|
||||||
|
GSgifTransfer1(buff.data(), addr);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
if(buff.size() < size) buff.resize(size);
|
||||||
|
fread(buff.data(), size, 1, fp);
|
||||||
|
GSgifTransfer2(buff.data(), size / 16);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
if(buff.size() < size) buff.resize(size);
|
||||||
|
fread(buff.data(), size, 1, fp);
|
||||||
|
GSgifTransfer3(buff.data(), size / 16);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
if(buff.size() < size) buff.resize(size);
|
||||||
|
fread(buff.data(), size, 1, fp);
|
||||||
|
GSgifTransfer(buff.data(), size / 16);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
GSvsync(fgetc(fp));
|
||||||
|
exit = !IsWindowVisible(hWnd);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
fread(&size, 4, 1, fp);
|
||||||
|
if(buff.size() < size) buff.resize(size);
|
||||||
|
GSreadFIFO2(&buff[0], size / 16);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
fread(regs, 0x2000, 1, fp);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
GSclose();
|
GSclose();
|
||||||
GSshutdown();
|
GSshutdown();
|
||||||
|
@ -672,7 +678,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
|
||||||
{PSM_PSMZ16S, "16ZS"},
|
{PSM_PSMZ16S, "16ZS"},
|
||||||
};
|
};
|
||||||
|
|
||||||
uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 16);
|
uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32);
|
||||||
|
|
||||||
for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i;
|
for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i;
|
||||||
|
|
||||||
|
@ -809,7 +815,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
|
||||||
{
|
{
|
||||||
GSLocalMemory mem;
|
GSLocalMemory mem;
|
||||||
|
|
||||||
uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 16);
|
uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32);
|
||||||
|
|
||||||
for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i;
|
for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i;
|
||||||
|
|
||||||
|
|
|
@ -77,6 +77,7 @@ enum GIF_REG
|
||||||
GIF_REG_CLAMP_1 = 0x08,
|
GIF_REG_CLAMP_1 = 0x08,
|
||||||
GIF_REG_CLAMP_2 = 0x09,
|
GIF_REG_CLAMP_2 = 0x09,
|
||||||
GIF_REG_FOG = 0x0a,
|
GIF_REG_FOG = 0x0a,
|
||||||
|
GIF_REG_INVALID = 0x0b,
|
||||||
GIF_REG_XYZF3 = 0x0c,
|
GIF_REG_XYZF3 = 0x0c,
|
||||||
GIF_REG_XYZ3 = 0x0d,
|
GIF_REG_XYZ3 = 0x0d,
|
||||||
GIF_REG_A_D = 0x0e,
|
GIF_REG_A_D = 0x0e,
|
||||||
|
@ -1077,7 +1078,7 @@ REG128_SET(GIFPackedReg)
|
||||||
GIFPackedNOP NOP;
|
GIFPackedNOP NOP;
|
||||||
REG_SET_END
|
REG_SET_END
|
||||||
|
|
||||||
__aligned16 struct GIFPath
|
__aligned32 struct GIFPath
|
||||||
{
|
{
|
||||||
GIFTag tag;
|
GIFTag tag;
|
||||||
uint32 reg;
|
uint32 reg;
|
||||||
|
@ -1107,8 +1108,11 @@ __aligned16 struct GIFPath
|
||||||
if((++reg & 0xf) == nreg)
|
if((++reg & 0xf) == nreg)
|
||||||
{
|
{
|
||||||
reg = 0;
|
reg = 0;
|
||||||
|
|
||||||
if(--nloop == 0)
|
if(--nloop == 0)
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -1201,7 +1201,7 @@ public:
|
||||||
|
|
||||||
#else
|
#else
|
||||||
/*
|
/*
|
||||||
__aligned16 uint32 block[8 * 8];
|
__aligned32 uint32 block[8 * 8];
|
||||||
|
|
||||||
UnpackBlock4HL(src, srcpitch, block);
|
UnpackBlock4HL(src, srcpitch, block);
|
||||||
|
|
||||||
|
@ -1316,7 +1316,7 @@ public:
|
||||||
|
|
||||||
#else
|
#else
|
||||||
/*
|
/*
|
||||||
__aligned16 uint32 block[8 * 8];
|
__aligned32 uint32 block[8 * 8];
|
||||||
|
|
||||||
UnpackBlock4HH(src, srcpitch, block);
|
UnpackBlock4HH(src, srcpitch, block);
|
||||||
|
|
||||||
|
@ -1467,7 +1467,7 @@ public:
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
__aligned16 uint8 block[16 * 16];
|
__aligned32 uint8 block[16 * 16];
|
||||||
|
|
||||||
ReadBlock8<true>(src, (uint8*)block, sizeof(block) / 16);
|
ReadBlock8<true>(src, (uint8*)block, sizeof(block) / 16);
|
||||||
|
|
||||||
|
@ -1542,7 +1542,7 @@ public:
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
__aligned16 uint8 block[(32 / 2) * 16];
|
__aligned32 uint8 block[(32 / 2) * 16];
|
||||||
|
|
||||||
ReadBlock4<true>(src, (uint8*)block, sizeof(block) / 16);
|
ReadBlock4<true>(src, (uint8*)block, sizeof(block) / 16);
|
||||||
|
|
||||||
|
@ -1583,7 +1583,7 @@ public:
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
__aligned16 uint32 block[8 * 8];
|
__aligned32 uint32 block[8 * 8];
|
||||||
|
|
||||||
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
|
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||||
|
|
||||||
|
@ -1624,7 +1624,7 @@ public:
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
__aligned16 uint32 block[8 * 8];
|
__aligned32 uint32 block[8 * 8];
|
||||||
|
|
||||||
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
|
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||||
|
|
||||||
|
@ -1665,7 +1665,7 @@ public:
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
__aligned16 uint32 block[8 * 8];
|
__aligned32 uint32 block[8 * 8];
|
||||||
|
|
||||||
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
|
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||||
|
|
||||||
|
|
|
@ -68,7 +68,8 @@ void GSCaptureDlg::OnInit()
|
||||||
|
|
||||||
ComboBoxAppend(IDC_CODECS, "Uncompressed", 0, true);
|
ComboBoxAppend(IDC_CODECS, "Uncompressed", 0, true);
|
||||||
|
|
||||||
CoInitialize(0);
|
CoInitialize(0); // this is obviously wrong here, each thread should call this on start, and where is CoUninitalize?
|
||||||
|
|
||||||
BeginEnumSysDev(CLSID_VideoCompressorCategory, moniker)
|
BeginEnumSysDev(CLSID_VideoCompressorCategory, moniker)
|
||||||
{
|
{
|
||||||
Codec c;
|
Codec c;
|
||||||
|
@ -195,6 +196,7 @@ bool GSCaptureDlg::OnCommand(HWND hWnd, UINT id, UINT code)
|
||||||
if (ris != 2)
|
if (ris != 2)
|
||||||
{
|
{
|
||||||
wstring s = wstring(c.DisplayName.m_str);
|
wstring s = wstring(c.DisplayName.m_str);
|
||||||
|
|
||||||
theApp.SetConfig("CaptureVideoCodecDisplayName", string(s.begin(), s.end()).c_str());
|
theApp.SetConfig("CaptureVideoCodecDisplayName", string(s.begin(), s.end()).c_str());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
@ -126,7 +126,7 @@ void GSClut::Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
|
|
||||||
void GSClut::WriteCLUT32_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
void GSClut::WriteCLUT32_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
ASSERT(TEX0.CSA == 0);
|
ASSERT(TEX0.CSA == 0);
|
||||||
|
|
||||||
|
@ -135,7 +135,7 @@ void GSClut::WriteCLUT32_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TE
|
||||||
|
|
||||||
void GSClut::WriteCLUT32_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
void GSClut::WriteCLUT32_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
ASSERT(TEX0.CSA < 16);
|
ASSERT(TEX0.CSA < 16);
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
|
|
||||||
class GSLocalMemory;
|
class GSLocalMemory;
|
||||||
|
|
||||||
__aligned16 class GSClut : public GSAlignedClass<16>
|
__aligned32 class GSClut : public GSAlignedClass<32>
|
||||||
{
|
{
|
||||||
GSLocalMemory* m_mem;
|
GSLocalMemory* m_mem;
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ __aligned16 class GSClut : public GSAlignedClass<16>
|
||||||
uint32* m_buff32;
|
uint32* m_buff32;
|
||||||
uint64* m_buff64;
|
uint64* m_buff64;
|
||||||
|
|
||||||
__aligned16 struct WriteState
|
__aligned32 struct WriteState
|
||||||
{
|
{
|
||||||
GIFRegTEX0 TEX0;
|
GIFRegTEX0 TEX0;
|
||||||
GIFRegTEXCLUT TEXCLUT;
|
GIFRegTEXCLUT TEXCLUT;
|
||||||
|
@ -45,7 +45,7 @@ __aligned16 class GSClut : public GSAlignedClass<16>
|
||||||
bool IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
|
bool IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
|
||||||
} m_write;
|
} m_write;
|
||||||
|
|
||||||
__aligned16 struct ReadState
|
__aligned32 struct ReadState
|
||||||
{
|
{
|
||||||
GIFRegTEX0 TEX0;
|
GIFRegTEX0 TEX0;
|
||||||
GIFRegTEXA TEXA;
|
GIFRegTEXA TEXA;
|
||||||
|
|
|
@ -145,8 +145,11 @@ void GSDevice::Recycle(GSTexture* t)
|
||||||
if(t)
|
if(t)
|
||||||
{
|
{
|
||||||
t->last_frame_used = m_frame;
|
t->last_frame_used = m_frame;
|
||||||
|
|
||||||
m_pool.push_front(t);
|
m_pool.push_front(t);
|
||||||
|
|
||||||
//printf("%d\n",m_pool.size());
|
//printf("%d\n",m_pool.size());
|
||||||
|
|
||||||
while(m_pool.size() > 300)
|
while(m_pool.size() > 300)
|
||||||
{
|
{
|
||||||
delete m_pool.back();
|
delete m_pool.back();
|
||||||
|
@ -159,9 +162,11 @@ void GSDevice::Recycle(GSTexture* t)
|
||||||
void GSDevice::AgePool()
|
void GSDevice::AgePool()
|
||||||
{
|
{
|
||||||
m_frame++;
|
m_frame++;
|
||||||
while (m_pool.size() > 20 && m_frame - m_pool.back()->last_frame_used > 10)
|
|
||||||
|
while(m_pool.size() > 20 && m_frame - m_pool.back()->last_frame_used > 10)
|
||||||
{
|
{
|
||||||
delete m_pool.back();
|
delete m_pool.back();
|
||||||
|
|
||||||
m_pool.pop_back();
|
m_pool.pop_back();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,7 +46,7 @@ struct InterlaceConstantBuffer
|
||||||
|
|
||||||
#pragma pack(pop)
|
#pragma pack(pop)
|
||||||
|
|
||||||
class GSDevice : public GSAlignedClass<16>
|
class GSDevice : public GSAlignedClass<32>
|
||||||
{
|
{
|
||||||
list<GSTexture*> m_pool;
|
list<GSTexture*> m_pool;
|
||||||
|
|
||||||
|
@ -66,7 +66,7 @@ protected:
|
||||||
struct {size_t stride, start, count, limit;} m_vertices;
|
struct {size_t stride, start, count, limit;} m_vertices;
|
||||||
uint32 m_msaa;
|
uint32 m_msaa;
|
||||||
DXGI_SAMPLE_DESC m_msaa_desc;
|
DXGI_SAMPLE_DESC m_msaa_desc;
|
||||||
unsigned m_frame; // for ageing the pool
|
unsigned int m_frame; // for ageing the pool
|
||||||
|
|
||||||
virtual GSTexture* Create(int type, int w, int h, bool msaa, int format) = 0;
|
virtual GSTexture* Create(int type, int w, int h, bool msaa, int format) = 0;
|
||||||
|
|
||||||
|
|
|
@ -229,8 +229,10 @@ bool GSDevice11::Create(GSWnd* wnd)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_msaa_desc.Count == 1)
|
if(m_msaa_desc.Count == 1)
|
||||||
|
{
|
||||||
m_msaa = 0;
|
m_msaa = 0;
|
||||||
|
}
|
||||||
|
|
||||||
// convert
|
// convert
|
||||||
|
|
||||||
|
@ -378,7 +380,7 @@ bool GSDevice11::Create(GSWnd* wnd)
|
||||||
|
|
||||||
if(m_wnd->IsManaged())
|
if(m_wnd->IsManaged())
|
||||||
{
|
{
|
||||||
SetExclusive( !theApp.GetConfig("windowed", 1) );
|
SetExclusive(!theApp.GetConfig("windowed", 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -392,11 +394,14 @@ bool GSDevice11::Reset(int w, int h)
|
||||||
if(m_swapchain)
|
if(m_swapchain)
|
||||||
{
|
{
|
||||||
DXGI_SWAP_CHAIN_DESC scd;
|
DXGI_SWAP_CHAIN_DESC scd;
|
||||||
|
|
||||||
memset(&scd, 0, sizeof(scd));
|
memset(&scd, 0, sizeof(scd));
|
||||||
|
|
||||||
m_swapchain->GetDesc(&scd);
|
m_swapchain->GetDesc(&scd);
|
||||||
m_swapchain->ResizeBuffers(scd.BufferCount, w, h, scd.BufferDesc.Format, 0);
|
m_swapchain->ResizeBuffers(scd.BufferCount, w, h, scd.BufferDesc.Format, 0);
|
||||||
|
|
||||||
CComPtr<ID3D11Texture2D> backbuffer;
|
CComPtr<ID3D11Texture2D> backbuffer;
|
||||||
|
|
||||||
if(FAILED(m_swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&backbuffer)))
|
if(FAILED(m_swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&backbuffer)))
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
|
@ -422,9 +427,12 @@ void GSDevice11::SetExclusive(bool isExcl)
|
||||||
m_swapchain->ResizeTarget(&desc);
|
m_swapchain->ResizeTarget(&desc);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
HRESULT hr = m_swapchain->SetFullscreenState( isExcl, NULL );
|
HRESULT hr = m_swapchain->SetFullscreenState(isExcl, NULL);
|
||||||
|
|
||||||
if(hr == DXGI_ERROR_NOT_CURRENTLY_AVAILABLE)
|
if(hr == DXGI_ERROR_NOT_CURRENTLY_AVAILABLE)
|
||||||
|
{
|
||||||
fprintf(stderr, "(GSdx10) SetExclusive(%s) failed; request unavailable.", isExcl ? "true" : "false");
|
fprintf(stderr, "(GSdx10) SetExclusive(%s) failed; request unavailable.", isExcl ? "true" : "false");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDevice11::Flip()
|
void GSDevice11::Flip()
|
||||||
|
@ -885,10 +893,13 @@ void GSDevice11::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1)
|
||||||
void GSDevice11::PSSetShaderResource(int i, GSTexture* sr)
|
void GSDevice11::PSSetShaderResource(int i, GSTexture* sr)
|
||||||
{
|
{
|
||||||
ID3D11ShaderResourceView* srv = NULL;
|
ID3D11ShaderResourceView* srv = NULL;
|
||||||
if (sr) srv = *(GSTexture11*)sr;
|
|
||||||
|
|
||||||
if (m_state.ps_srv[i] != srv) {
|
if(sr) srv = *(GSTexture11*)sr;
|
||||||
|
|
||||||
|
if(m_state.ps_srv[i] != srv)
|
||||||
|
{
|
||||||
m_state.ps_srv[i] = srv;
|
m_state.ps_srv[i] = srv;
|
||||||
|
|
||||||
m_srv_changed = true;
|
m_srv_changed = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -914,13 +925,17 @@ void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
|
||||||
m_ctx->PSSetShader(ps, NULL, 0);
|
m_ctx->PSSetShader(ps, NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_srv_changed) {
|
if (m_srv_changed)
|
||||||
|
{
|
||||||
m_ctx->PSSetShaderResources(0, 3, m_state.ps_srv);
|
m_ctx->PSSetShaderResources(0, 3, m_state.ps_srv);
|
||||||
|
|
||||||
m_srv_changed = false;
|
m_srv_changed = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_ss_changed) {
|
if(m_ss_changed)
|
||||||
|
{
|
||||||
m_ctx->PSSetSamplers(0, 3, m_state.ps_ss);
|
m_ctx->PSSetSamplers(0, 3, m_state.ps_ss);
|
||||||
|
|
||||||
m_ss_changed = false;
|
m_ss_changed = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -982,8 +997,8 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector
|
||||||
|
|
||||||
vp.TopLeftX = 0;
|
vp.TopLeftX = 0;
|
||||||
vp.TopLeftY = 0;
|
vp.TopLeftY = 0;
|
||||||
vp.Width = (FLOAT)rt->GetWidth();
|
vp.Width = (float)rt->GetWidth();
|
||||||
vp.Height = (FLOAT)rt->GetHeight();
|
vp.Height = (float)rt->GetHeight();
|
||||||
vp.MinDepth = 0.0f;
|
vp.MinDepth = 0.0f;
|
||||||
vp.MaxDepth = 1.0f;
|
vp.MaxDepth = 1.0f;
|
||||||
|
|
||||||
|
|
|
@ -31,7 +31,6 @@ GSDevice9::GSDevice9()
|
||||||
|
|
||||||
memset(&m_pp, 0, sizeof(m_pp));
|
memset(&m_pp, 0, sizeof(m_pp));
|
||||||
memset(&m_d3dcaps, 0, sizeof(m_d3dcaps));
|
memset(&m_d3dcaps, 0, sizeof(m_d3dcaps));
|
||||||
|
|
||||||
memset(&m_state, 0, sizeof(m_state));
|
memset(&m_state, 0, sizeof(m_state));
|
||||||
|
|
||||||
m_state.bf = 0xffffffff;
|
m_state.bf = 0xffffffff;
|
||||||
|
@ -39,81 +38,109 @@ GSDevice9::GSDevice9()
|
||||||
|
|
||||||
GSDevice9::~GSDevice9()
|
GSDevice9::~GSDevice9()
|
||||||
{
|
{
|
||||||
for_each(m_mskfix.begin(), m_mskfix.end(), delete_second());
|
|
||||||
|
|
||||||
for_each(m_om_bs.begin(), m_om_bs.end(), delete_second());
|
for_each(m_om_bs.begin(), m_om_bs.end(), delete_second());
|
||||||
for_each(m_om_dss.begin(), m_om_dss.end(), delete_second());
|
for_each(m_om_dss.begin(), m_om_dss.end(), delete_second());
|
||||||
for_each(m_ps_ss.begin(), m_ps_ss.end(), delete_second());
|
for_each(m_ps_ss.begin(), m_ps_ss.end(), delete_second());
|
||||||
|
for_each(m_mskfix.begin(), m_mskfix.end(), delete_second());
|
||||||
|
|
||||||
if(m_state.vs_cb) _aligned_free(m_state.vs_cb);
|
if(m_state.vs_cb) _aligned_free(m_state.vs_cb);
|
||||||
if(m_state.ps_cb) _aligned_free(m_state.ps_cb);
|
if(m_state.ps_cb) _aligned_free(m_state.ps_cb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if supported and null != msaa_desc, msaa_desc will contain requested Count and Quality
|
||||||
|
|
||||||
|
static bool IsMsaaSupported(IDirect3D9* d3d, D3DFORMAT depth_format, uint msaaCount, DXGI_SAMPLE_DESC* msaa_desc = NULL)
|
||||||
|
{
|
||||||
|
if(msaaCount > 16) return false;
|
||||||
|
|
||||||
//if supported and null!=msaa_desc, msaa_desc will contain requested Count and Quality
|
|
||||||
static bool IsMsaaSupported(CComPtr<IDirect3D9>& d3d, D3DFORMAT depth_format, uint msaaCount, OUT DXGI_SAMPLE_DESC* msaa_desc=NULL){
|
|
||||||
D3DCAPS9 d3dcaps;
|
D3DCAPS9 d3dcaps;
|
||||||
|
|
||||||
if (msaaCount>16) return false;
|
|
||||||
|
|
||||||
memset(&d3dcaps, 0, sizeof(d3dcaps));
|
memset(&d3dcaps, 0, sizeof(d3dcaps));
|
||||||
|
|
||||||
d3d->GetDeviceCaps(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &d3dcaps);
|
d3d->GetDeviceCaps(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &d3dcaps);
|
||||||
|
|
||||||
DWORD quality[2] = {0, 0};
|
DWORD quality[2] = {0, 0};
|
||||||
|
|
||||||
if(SUCCEEDED(d3d->CheckDeviceMultiSampleType(d3dcaps.AdapterOrdinal, d3dcaps.DeviceType, D3DFMT_A8R8G8B8, TRUE, (D3DMULTISAMPLE_TYPE)msaaCount, &quality[0])) && quality[0] >0
|
if(SUCCEEDED(d3d->CheckDeviceMultiSampleType(d3dcaps.AdapterOrdinal, d3dcaps.DeviceType, D3DFMT_A8R8G8B8, TRUE, (D3DMULTISAMPLE_TYPE)msaaCount, &quality[0])) && quality[0] > 0
|
||||||
&& SUCCEEDED(d3d->CheckDeviceMultiSampleType(d3dcaps.AdapterOrdinal, d3dcaps.DeviceType, depth_format, TRUE, (D3DMULTISAMPLE_TYPE)msaaCount, &quality[1])) && quality[1] >0
|
&& SUCCEEDED(d3d->CheckDeviceMultiSampleType(d3dcaps.AdapterOrdinal, d3dcaps.DeviceType, depth_format, TRUE, (D3DMULTISAMPLE_TYPE)msaaCount, &quality[1])) && quality[1] > 0)
|
||||||
){
|
{
|
||||||
if (msaa_desc){
|
if(msaa_desc)
|
||||||
msaa_desc->Count = msaaCount;
|
{
|
||||||
msaa_desc->Quality = std::min<DWORD>(quality[0] - 1, quality[1] - 1);
|
msaa_desc->Count = msaaCount;
|
||||||
|
msaa_desc->Quality = std::min<DWORD>(quality[0] - 1, quality[1] - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool TestDepthFormat(CComPtr<IDirect3D9> &d3d, D3DFORMAT format)
|
static bool TestDepthFormat(IDirect3D9* d3d, D3DFORMAT format)
|
||||||
{
|
{
|
||||||
if (FAILED(d3d->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, format)))
|
if(FAILED(d3d->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, format)))
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
if (FAILED(d3d->CheckDepthStencilMatch(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, D3DFMT_X8R8G8B8, D3DFMT_X8R8G8B8, format)))
|
}
|
||||||
|
|
||||||
|
if(FAILED(d3d->CheckDepthStencilMatch(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, D3DFMT_X8R8G8B8, D3DFMT_X8R8G8B8, format)))
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static D3DFORMAT BestD3dFormat(IDirect3D9* d3d, int msaaCount = 0, DXGI_SAMPLE_DESC* msaa_desc = NULL)
|
||||||
|
{
|
||||||
|
// In descending order of preference
|
||||||
|
|
||||||
//In descending order of preference
|
static D3DFORMAT fmts[] =
|
||||||
static D3DFORMAT s_DX9formatsToSearch[]={D3DFMT_D32, D3DFMT_D32F_LOCKABLE, D3DFMT_D24S8};
|
{
|
||||||
|
D3DFMT_D32,
|
||||||
|
D3DFMT_D32F_LOCKABLE,
|
||||||
|
D3DFMT_D24S8
|
||||||
|
};
|
||||||
|
|
||||||
static D3DFORMAT BestD3dFormat(CComPtr<IDirect3D9>& d3d, int msaaCount=0, OUT DXGI_SAMPLE_DESC* msaa_desc=NULL){
|
if(1 == msaaCount) msaaCount = 0;
|
||||||
if(!d3d) return D3DFMT_UNKNOWN;
|
|
||||||
if (1==msaaCount) msaaCount=0;
|
|
||||||
|
|
||||||
for (int i=0; i<sizeof(s_DX9formatsToSearch); i++)
|
for(int i = 0; i < sizeof(fmts); i++)
|
||||||
if (TestDepthFormat(d3d, s_DX9formatsToSearch[i]) && (!msaaCount || IsMsaaSupported(d3d, s_DX9formatsToSearch[i], msaaCount, msaa_desc)))
|
{
|
||||||
return s_DX9formatsToSearch[i];
|
if(TestDepthFormat(d3d, fmts[i]) && (!msaaCount || IsMsaaSupported(d3d, fmts[i], msaaCount, msaa_desc)))
|
||||||
|
{
|
||||||
|
return fmts[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return D3DFMT_UNKNOWN;
|
return D3DFMT_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
//return: 32, 24, or 0 if not supported. if 1==msaa, considered as msaa=0
|
// return: 32, 24, or 0 if not supported. if 1==msaa, considered as msaa=0
|
||||||
uint GSDevice9::GetMaxDepth(uint msaa=0){
|
|
||||||
|
uint GSDevice9::GetMaxDepth(uint msaa = 0)
|
||||||
|
{
|
||||||
CComPtr<IDirect3D9> d3d;
|
CComPtr<IDirect3D9> d3d;
|
||||||
|
|
||||||
d3d.Attach(Direct3DCreate9(D3D_SDK_VERSION));
|
d3d.Attach(Direct3DCreate9(D3D_SDK_VERSION));
|
||||||
|
|
||||||
D3DFORMAT f=BestD3dFormat(d3d, msaa);
|
switch(BestD3dFormat(d3d, msaa))
|
||||||
switch (f){
|
{
|
||||||
case D3DFMT_D32: case D3DFMT_D32F_LOCKABLE: return 32;
|
case D3DFMT_D32:
|
||||||
case D3DFMT_D24S8: return 24;
|
case D3DFMT_D32F_LOCKABLE:
|
||||||
|
return 32;
|
||||||
|
case D3DFMT_D24S8:
|
||||||
|
return 24;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDevice9::ForceValidMsaaConfig(){
|
void GSDevice9::ForceValidMsaaConfig()
|
||||||
if (0==GetMaxDepth(theApp.GetConfig("msaa", 0)))
|
{
|
||||||
theApp.SetConfig("msaa", 0);//replace invalid msaa value in ini file with 0.
|
if(0 == GetMaxDepth(theApp.GetConfig("msaa", 0)))
|
||||||
|
{
|
||||||
|
theApp.SetConfig("msaa", 0); // replace invalid msaa value in ini file with 0.
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
bool GSDevice9::Create(GSWnd* wnd)
|
bool GSDevice9::Create(GSWnd* wnd)
|
||||||
|
@ -128,17 +155,26 @@ bool GSDevice9::Create(GSWnd* wnd)
|
||||||
m_d3d.Attach(Direct3DCreate9(D3D_SDK_VERSION));
|
m_d3d.Attach(Direct3DCreate9(D3D_SDK_VERSION));
|
||||||
|
|
||||||
if(!m_d3d) return false;
|
if(!m_d3d) return false;
|
||||||
|
|
||||||
ForceValidMsaaConfig();
|
ForceValidMsaaConfig();
|
||||||
//Get best format/depth for msaa. Assumption is that if the resulting depth is 24 instead of possible 32,
|
|
||||||
// the user was already warned when she selected it. (Lower res z buffer without warning is unacceptable).
|
// Get best format/depth for msaa. Assumption is that if the resulting depth is 24 instead of possible 32,
|
||||||
m_depth_format=BestD3dFormat(m_d3d, m_msaa, &m_msaa_desc);
|
// the user was already warned when she selected it. (Lower res z buffer without warning is unacceptable).
|
||||||
if (D3DFMT_UNKNOWN == m_depth_format){
|
|
||||||
//can't find a format with requested msaa, try without.
|
m_depth_format = BestD3dFormat(m_d3d, m_msaa, &m_msaa_desc);
|
||||||
m_depth_format = BestD3dFormat(m_d3d, 0);
|
|
||||||
if (D3DFMT_UNKNOWN == m_depth_format)
|
if(D3DFMT_UNKNOWN == m_depth_format)
|
||||||
return false;
|
{
|
||||||
|
// can't find a format with requested msaa, try without.
|
||||||
|
|
||||||
m_msaa=0;
|
m_depth_format = BestD3dFormat(m_d3d, 0);
|
||||||
|
|
||||||
|
if(D3DFMT_UNKNOWN == m_depth_format)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_msaa = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(&m_d3dcaps, 0, sizeof(m_d3dcaps));
|
memset(&m_d3dcaps, 0, sizeof(m_d3dcaps));
|
||||||
|
@ -180,7 +216,6 @@ bool GSDevice9::Create(GSWnd* wnd)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if(!Reset(1, 1))
|
if(!Reset(1, 1))
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
|
@ -274,7 +309,8 @@ bool GSDevice9::Create(GSWnd* wnd)
|
||||||
|
|
||||||
void GSDevice9::SetVsync(bool enable)
|
void GSDevice9::SetVsync(bool enable)
|
||||||
{
|
{
|
||||||
if( m_vsync == enable ) return;
|
if(m_vsync == enable) return;
|
||||||
|
|
||||||
__super::SetVsync(enable);
|
__super::SetVsync(enable);
|
||||||
|
|
||||||
// Clever trick: Delete the backbuffer, so that the next Present will fail and
|
// Clever trick: Delete the backbuffer, so that the next Present will fail and
|
||||||
|
@ -282,6 +318,7 @@ void GSDevice9::SetVsync(bool enable)
|
||||||
// vsync settings. :)
|
// vsync settings. :)
|
||||||
|
|
||||||
delete m_backbuffer;
|
delete m_backbuffer;
|
||||||
|
|
||||||
m_backbuffer = NULL;
|
m_backbuffer = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -293,6 +330,7 @@ bool GSDevice9::Reset(int w, int h)
|
||||||
HRESULT hr;
|
HRESULT hr;
|
||||||
|
|
||||||
int mode = (!m_wnd->IsManaged() || theApp.GetConfig("windowed", 1)) ? Windowed : Fullscreen;
|
int mode = (!m_wnd->IsManaged() || theApp.GetConfig("windowed", 1)) ? Windowed : Fullscreen;
|
||||||
|
|
||||||
if(mode == DontCare)
|
if(mode == DontCare)
|
||||||
{
|
{
|
||||||
mode = m_pp.Windowed ? Windowed : Fullscreen;
|
mode = m_pp.Windowed ? Windowed : Fullscreen;
|
||||||
|
@ -707,11 +745,11 @@ void GSDevice9::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, c
|
||||||
|
|
||||||
IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices));
|
IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices));
|
||||||
IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP);
|
IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP);
|
||||||
|
IASetInputLayout(m_convert.il);
|
||||||
|
|
||||||
// vs
|
// vs
|
||||||
|
|
||||||
VSSetShader(m_convert.vs, NULL, 0);
|
VSSetShader(m_convert.vs, NULL, 0);
|
||||||
IASetInputLayout(m_convert.il);
|
|
||||||
|
|
||||||
// ps
|
// ps
|
||||||
|
|
||||||
|
@ -904,7 +942,7 @@ void GSDevice9::VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int
|
||||||
{
|
{
|
||||||
if(m_state.vs_cb) _aligned_free(m_state.vs_cb);
|
if(m_state.vs_cb) _aligned_free(m_state.vs_cb);
|
||||||
|
|
||||||
m_state.vs_cb = (float*)_aligned_malloc(size, 16);
|
m_state.vs_cb = (float*)_aligned_malloc(size, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_state.vs_cb_len = vs_cb_len;
|
m_state.vs_cb_len = vs_cb_len;
|
||||||
|
@ -926,10 +964,13 @@ void GSDevice9::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1)
|
||||||
void GSDevice9::PSSetShaderResource(int i, GSTexture* sr)
|
void GSDevice9::PSSetShaderResource(int i, GSTexture* sr)
|
||||||
{
|
{
|
||||||
IDirect3DTexture9* srv = NULL;
|
IDirect3DTexture9* srv = NULL;
|
||||||
if (sr) srv = *(GSTexture9*)sr;
|
|
||||||
|
|
||||||
if (m_state.ps_srvs[i] != srv) {
|
if(sr) srv = *(GSTexture9*)sr;
|
||||||
|
|
||||||
|
if(m_state.ps_srvs[i] != srv)
|
||||||
|
{
|
||||||
m_state.ps_srvs[i] = srv;
|
m_state.ps_srvs[i] = srv;
|
||||||
|
|
||||||
m_dev->SetTexture(i, srv);
|
m_dev->SetTexture(i, srv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -953,7 +994,7 @@ void GSDevice9::PSSetShader(IDirect3DPixelShader9* ps, const float* ps_cb, int p
|
||||||
{
|
{
|
||||||
if(m_state.ps_cb) _aligned_free(m_state.ps_cb);
|
if(m_state.ps_cb) _aligned_free(m_state.ps_cb);
|
||||||
|
|
||||||
m_state.ps_cb = (float*)_aligned_malloc(size, 16);
|
m_state.ps_cb = (float*)_aligned_malloc(size, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_state.ps_cb_len = ps_cb_len;
|
m_state.ps_cb_len = ps_cb_len;
|
||||||
|
|
|
@ -30,7 +30,7 @@ class GSDeviceDX : public GSDevice
|
||||||
public:
|
public:
|
||||||
#pragma pack(push, 1)
|
#pragma pack(push, 1)
|
||||||
|
|
||||||
__aligned16 struct VSConstantBuffer
|
__aligned32 struct VSConstantBuffer
|
||||||
{
|
{
|
||||||
GSVector4 VertexScale;
|
GSVector4 VertexScale;
|
||||||
GSVector4 VertexOffset;
|
GSVector4 VertexOffset;
|
||||||
|
@ -86,7 +86,7 @@ public:
|
||||||
VSSelector() : key(0) {}
|
VSSelector() : key(0) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
__aligned16 struct PSConstantBuffer
|
__aligned32 struct PSConstantBuffer
|
||||||
{
|
{
|
||||||
GSVector4 FogColor_AREF;
|
GSVector4 FogColor_AREF;
|
||||||
GSVector4 HalfTexel;
|
GSVector4 HalfTexel;
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -67,10 +67,10 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
|
||||||
void mix16(const Xmm& a, const Xmm& b, const Xmm& temp);
|
void mix16(const Xmm& a, const Xmm& b, const Xmm& temp);
|
||||||
void clamp16(const Xmm& a, const Xmm& temp);
|
void clamp16(const Xmm& a, const Xmm& temp);
|
||||||
void alltrue();
|
void alltrue();
|
||||||
void blend8(const Xmm& a, const Xmm& b);
|
|
||||||
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
|
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
|
||||||
void blend8r(const Xmm& b, const Xmm& a);
|
|
||||||
void blendr(const Xmm& b, const Xmm& a, const Xmm& mask);
|
void blendr(const Xmm& b, const Xmm& a, const Xmm& mask);
|
||||||
|
void blend8(const Xmm& a, const Xmm& b);
|
||||||
|
void blend8r(const Xmm& b, const Xmm& a);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize);
|
GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize);
|
||||||
|
|
|
@ -26,7 +26,7 @@
|
||||||
|
|
||||||
#pragma pack(push, 1)
|
#pragma pack(push, 1)
|
||||||
|
|
||||||
__aligned16 class GSDrawingContext
|
__aligned32 class GSDrawingContext
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
GIFRegXYOFFSET XYOFFSET;
|
GIFRegXYOFFSET XYOFFSET;
|
||||||
|
@ -43,7 +43,7 @@ public:
|
||||||
GIFRegFRAME FRAME;
|
GIFRegFRAME FRAME;
|
||||||
GIFRegZBUF ZBUF;
|
GIFRegZBUF ZBUF;
|
||||||
|
|
||||||
__aligned16 struct
|
__aligned32 struct
|
||||||
{
|
{
|
||||||
GSVector4i dx10;
|
GSVector4i dx10;
|
||||||
GSVector4 dx9;
|
GSVector4 dx9;
|
||||||
|
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
#pragma pack(push, 1)
|
#pragma pack(push, 1)
|
||||||
|
|
||||||
__aligned16 class GSDrawingEnvironment
|
__aligned32 class GSDrawingEnvironment
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
GIFRegPRIM PRIM;
|
GIFRegPRIM PRIM;
|
||||||
|
|
|
@ -56,14 +56,14 @@ uint32 GSLocalMemory::pageOffset16SZ[32][64][64];
|
||||||
uint32 GSLocalMemory::pageOffset8[32][64][128];
|
uint32 GSLocalMemory::pageOffset8[32][64][128];
|
||||||
uint32 GSLocalMemory::pageOffset4[32][128][128];
|
uint32 GSLocalMemory::pageOffset4[32][128][128];
|
||||||
|
|
||||||
int GSLocalMemory::rowOffset32[2048];
|
int GSLocalMemory::rowOffset32[4096];
|
||||||
int GSLocalMemory::rowOffset32Z[2048];
|
int GSLocalMemory::rowOffset32Z[4096];
|
||||||
int GSLocalMemory::rowOffset16[2048];
|
int GSLocalMemory::rowOffset16[4096];
|
||||||
int GSLocalMemory::rowOffset16S[2048];
|
int GSLocalMemory::rowOffset16S[4096];
|
||||||
int GSLocalMemory::rowOffset16Z[2048];
|
int GSLocalMemory::rowOffset16Z[4096];
|
||||||
int GSLocalMemory::rowOffset16SZ[2048];
|
int GSLocalMemory::rowOffset16SZ[4096];
|
||||||
int GSLocalMemory::rowOffset8[2][2048];
|
int GSLocalMemory::rowOffset8[2][4096];
|
||||||
int GSLocalMemory::rowOffset4[2][2048];
|
int GSLocalMemory::rowOffset4[2][4096];
|
||||||
|
|
||||||
short GSLocalMemory::blockOffset32[256];
|
short GSLocalMemory::blockOffset32[256];
|
||||||
short GSLocalMemory::blockOffset32Z[256];
|
short GSLocalMemory::blockOffset32Z[256];
|
||||||
|
@ -116,44 +116,44 @@ GSLocalMemory::GSLocalMemory()
|
||||||
|
|
||||||
for(int x = 0; x < countof(rowOffset32); x++)
|
for(int x = 0; x < countof(rowOffset32); x++)
|
||||||
{
|
{
|
||||||
rowOffset32[x] = (int)PixelAddress32(x, 0, 0, 32) - (int)PixelAddress32(0, 0, 0, 32);
|
rowOffset32[x] = (int)PixelAddress32(x & 0x7ff, 0, 0, 32) - (int)PixelAddress32(0, 0, 0, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int x = 0; x < countof(rowOffset32Z); x++)
|
for(int x = 0; x < countof(rowOffset32Z); x++)
|
||||||
{
|
{
|
||||||
rowOffset32Z[x] = (int)PixelAddress32Z(x, 0, 0, 32) - (int)PixelAddress32Z(0, 0, 0, 32);
|
rowOffset32Z[x] = (int)PixelAddress32Z(x & 0x7ff, 0, 0, 32) - (int)PixelAddress32Z(0, 0, 0, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int x = 0; x < countof(rowOffset16); x++)
|
for(int x = 0; x < countof(rowOffset16); x++)
|
||||||
{
|
{
|
||||||
rowOffset16[x] = (int)PixelAddress16(x, 0, 0, 32) - (int)PixelAddress16(0, 0, 0, 32);
|
rowOffset16[x] = (int)PixelAddress16(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16(0, 0, 0, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int x = 0; x < countof(rowOffset16S); x++)
|
for(int x = 0; x < countof(rowOffset16S); x++)
|
||||||
{
|
{
|
||||||
rowOffset16S[x] = (int)PixelAddress16S(x, 0, 0, 32) - (int)PixelAddress16S(0, 0, 0, 32);
|
rowOffset16S[x] = (int)PixelAddress16S(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16S(0, 0, 0, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int x = 0; x < countof(rowOffset16Z); x++)
|
for(int x = 0; x < countof(rowOffset16Z); x++)
|
||||||
{
|
{
|
||||||
rowOffset16Z[x] = (int)PixelAddress16Z(x, 0, 0, 32) - (int)PixelAddress16Z(0, 0, 0, 32);
|
rowOffset16Z[x] = (int)PixelAddress16Z(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16Z(0, 0, 0, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int x = 0; x < countof(rowOffset16SZ); x++)
|
for(int x = 0; x < countof(rowOffset16SZ); x++)
|
||||||
{
|
{
|
||||||
rowOffset16SZ[x] = (int)PixelAddress16SZ(x, 0, 0, 32) - (int)PixelAddress16SZ(0, 0, 0, 32);
|
rowOffset16SZ[x] = (int)PixelAddress16SZ(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16SZ(0, 0, 0, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int x = 0; x < countof(rowOffset8[0]); x++)
|
for(int x = 0; x < countof(rowOffset8[0]); x++)
|
||||||
{
|
{
|
||||||
rowOffset8[0][x] = (int)PixelAddress8(x, 0, 0, 32) - (int)PixelAddress8(0, 0, 0, 32);
|
rowOffset8[0][x] = (int)PixelAddress8(x & 0x7ff, 0, 0, 32) - (int)PixelAddress8(0, 0, 0, 32);
|
||||||
rowOffset8[1][x] = (int)PixelAddress8(x, 2, 0, 32) - (int)PixelAddress8(0, 2, 0, 32);
|
rowOffset8[1][x] = (int)PixelAddress8(x & 0x7ff, 2, 0, 32) - (int)PixelAddress8(0, 2, 0, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int x = 0; x < countof(rowOffset4[0]); x++)
|
for(int x = 0; x < countof(rowOffset4[0]); x++)
|
||||||
{
|
{
|
||||||
rowOffset4[0][x] = (int)PixelAddress4(x, 0, 0, 32) - (int)PixelAddress4(0, 0, 0, 32);
|
rowOffset4[0][x] = (int)PixelAddress4(x & 0x7ff, 0, 0, 32) - (int)PixelAddress4(0, 0, 0, 32);
|
||||||
rowOffset4[1][x] = (int)PixelAddress4(x, 2, 0, 32) - (int)PixelAddress4(0, 2, 0, 32);
|
rowOffset4[1][x] = (int)PixelAddress4(x & 0x7ff, 2, 0, 32) - (int)PixelAddress4(0, 2, 0, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int x = 0; x < countof(blockOffset32); x++)
|
for(int x = 0; x < countof(blockOffset32); x++)
|
||||||
|
@ -459,7 +459,7 @@ GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
|
||||||
return i->second;
|
return i->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSOffset* o = (GSOffset*)_aligned_malloc(sizeof(GSOffset), 16);
|
GSOffset* o = (GSOffset*)_aligned_malloc(sizeof(GSOffset), 32);
|
||||||
|
|
||||||
o->hash = hash;
|
o->hash = hash;
|
||||||
|
|
||||||
|
@ -474,9 +474,9 @@ GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
|
||||||
|
|
||||||
pixelAddress pa = m_psm[psm].pa;
|
pixelAddress pa = m_psm[psm].pa;
|
||||||
|
|
||||||
for(int i = 0; i < 2048; i++)
|
for(int i = 0; i < 4096; i++)
|
||||||
{
|
{
|
||||||
o->pixel.row[i] = (int)pa(0, i, bp, bw);
|
o->pixel.row[i] = (int)pa(0, i & 0x7ff, bp, bw);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int i = 0; i < 8; i++)
|
for(int i = 0; i < 8; i++)
|
||||||
|
@ -513,7 +513,7 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G
|
||||||
return i->second;
|
return i->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSPixelOffset4* o = (GSPixelOffset4*)_aligned_malloc(sizeof(GSPixelOffset4), 16);
|
GSPixelOffset4* o = (GSPixelOffset4*)_aligned_malloc(sizeof(GSPixelOffset4), 32);
|
||||||
|
|
||||||
o->hash = hash;
|
o->hash = hash;
|
||||||
|
|
||||||
|
@ -628,7 +628,7 @@ void GSLocalMemory::WriteImageLeftRight(int l, int r, int y, int h, const uint8*
|
||||||
template<int psm, int bsx, int bsy, int trbpp>
|
template<int psm, int bsx, int bsy, int trbpp>
|
||||||
void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF)
|
void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF)
|
||||||
{
|
{
|
||||||
__aligned16 uint8 buff[64]; // merge buffer for one column
|
__aligned32 uint8 buff[64]; // merge buffer for one column
|
||||||
|
|
||||||
uint32 bp = BITBLTBUF.DBP;
|
uint32 bp = BITBLTBUF.DBP;
|
||||||
uint32 bw = BITBLTBUF.DBW;
|
uint32 bw = BITBLTBUF.DBW;
|
||||||
|
@ -1438,7 +1438,7 @@ void GSLocalMemory::ReadTexture24(const GSOffset* RESTRICT o, const GSVector4i&
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture16(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture16(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
__aligned16 uint16 block[16 * 8];
|
__aligned32 uint16 block[16 * 8];
|
||||||
|
|
||||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||||
{
|
{
|
||||||
|
@ -1451,7 +1451,7 @@ void GSLocalMemory::ReadTexture16(const GSOffset* RESTRICT o, const GSVector4i&
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture16S(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture16S(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
__aligned16 uint16 block[16 * 8];
|
__aligned32 uint16 block[16 * 8];
|
||||||
|
|
||||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||||
{
|
{
|
||||||
|
@ -1548,7 +1548,7 @@ void GSLocalMemory::ReadTexture24Z(const GSOffset* RESTRICT o, const GSVector4i&
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture16Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture16Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
__aligned16 uint16 block[16 * 8];
|
__aligned32 uint16 block[16 * 8];
|
||||||
|
|
||||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||||
{
|
{
|
||||||
|
@ -1561,7 +1561,7 @@ void GSLocalMemory::ReadTexture16Z(const GSOffset* RESTRICT o, const GSVector4i&
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture16SZ(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture16SZ(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
__aligned16 uint16 block[16 * 8];
|
__aligned32 uint16 block[16 * 8];
|
||||||
|
|
||||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||||
{
|
{
|
||||||
|
@ -1576,14 +1576,14 @@ void GSLocalMemory::ReadTexture16SZ(const GSOffset* RESTRICT o, const GSVector4i
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
ReadBlock32<true>(BlockPtr(bp), dst, dstpitch);
|
ReadBlock32<true>(BlockPtr(bp), dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
if(TEXA.AEM)
|
if(TEXA.AEM)
|
||||||
{
|
{
|
||||||
|
@ -1597,7 +1597,7 @@ void GSLocalMemory::ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, cons
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock16(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock16(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
__aligned16 uint16 block[16 * 8];
|
__aligned32 uint16 block[16 * 8];
|
||||||
|
|
||||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||||
|
|
||||||
|
@ -1606,7 +1606,7 @@ void GSLocalMemory::ReadTextureBlock16(uint32 bp, uint8* dst, int dstpitch, cons
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock16S(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock16S(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
__aligned16 uint16 block[16 * 8];
|
__aligned32 uint16 block[16 * 8];
|
||||||
|
|
||||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||||
|
|
||||||
|
@ -1615,49 +1615,49 @@ void GSLocalMemory::ReadTextureBlock16S(uint32 bp, uint8* dst, int dstpitch, con
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock8(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock8(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
ReadAndExpandBlock8_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
ReadAndExpandBlock8_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock4(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock4(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
ReadAndExpandBlock4_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
ReadAndExpandBlock4_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock8H(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock8H(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
ReadAndExpandBlock8H_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
ReadAndExpandBlock8H_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock4HL(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock4HL(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
ReadAndExpandBlock4HL_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
ReadAndExpandBlock4HL_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock4HH(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock4HH(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
ReadAndExpandBlock4HH_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
ReadAndExpandBlock4HH_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock32Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock32Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
ReadBlock32<true>(BlockPtr(bp), dst, dstpitch);
|
ReadBlock32<true>(BlockPtr(bp), dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock24Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock24Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
if(TEXA.AEM)
|
if(TEXA.AEM)
|
||||||
{
|
{
|
||||||
|
@ -1671,7 +1671,7 @@ void GSLocalMemory::ReadTextureBlock24Z(uint32 bp, uint8* dst, int dstpitch, con
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock16Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock16Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
__aligned16 uint16 block[16 * 8];
|
__aligned32 uint16 block[16 * 8];
|
||||||
|
|
||||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||||
|
|
||||||
|
@ -1680,7 +1680,7 @@ void GSLocalMemory::ReadTextureBlock16Z(uint32 bp, uint8* dst, int dstpitch, con
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock16SZ(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock16SZ(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
__aligned16 uint16 block[16 * 8];
|
__aligned32 uint16 block[16 * 8];
|
||||||
|
|
||||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||||
|
|
||||||
|
@ -1823,28 +1823,28 @@ void GSLocalMemory::ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, cons
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
ReadBlock4P(BlockPtr(bp), dst, dstpitch);
|
ReadBlock4P(BlockPtr(bp), dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock8HP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock8HP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
ReadBlock8HP(BlockPtr(bp), dst, dstpitch);
|
ReadBlock8HP(BlockPtr(bp), dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock4HLP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock4HLP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
ReadBlock4HLP(BlockPtr(bp), dst, dstpitch);
|
ReadBlock4HLP(BlockPtr(bp), dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock4HHP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock4HHP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(16);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
ReadBlock4HHP(BlockPtr(bp), dst, dstpitch);
|
ReadBlock4HHP(BlockPtr(bp), dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
@ -1855,7 +1855,7 @@ HRESULT GSLocalMemory::SaveBMP(const string& fn, uint32 bp, uint32 bw, uint32 ps
|
||||||
{
|
{
|
||||||
int pitch = w * 4;
|
int pitch = w * 4;
|
||||||
int size = pitch * h;
|
int size = pitch * h;
|
||||||
void* bits = ::_aligned_malloc(size, 16);
|
void* bits = _aligned_malloc(size, 32);
|
||||||
|
|
||||||
GIFRegTEX0 TEX0;
|
GIFRegTEX0 TEX0;
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ struct GSOffset
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
int row[2048]; // yn (n = 0 1 2 ...)
|
int row[4096]; // yn (n = 0 1 2 ...) NOTE: this wraps around above 2048, only transfers should address the upper half (dark cloud 2 inventing)
|
||||||
int* col[8]; // rowOffset*
|
int* col[8]; // rowOffset*
|
||||||
} pixel;
|
} pixel;
|
||||||
|
|
||||||
|
@ -116,14 +116,14 @@ protected:
|
||||||
static uint32 pageOffset8[32][64][128];
|
static uint32 pageOffset8[32][64][128];
|
||||||
static uint32 pageOffset4[32][128][128];
|
static uint32 pageOffset4[32][128][128];
|
||||||
|
|
||||||
static int rowOffset32[2048];
|
static int rowOffset32[4096];
|
||||||
static int rowOffset32Z[2048];
|
static int rowOffset32Z[4096];
|
||||||
static int rowOffset16[2048];
|
static int rowOffset16[4096];
|
||||||
static int rowOffset16S[2048];
|
static int rowOffset16S[4096];
|
||||||
static int rowOffset16Z[2048];
|
static int rowOffset16Z[4096];
|
||||||
static int rowOffset16SZ[2048];
|
static int rowOffset16SZ[4096];
|
||||||
static int rowOffset8[2][2048];
|
static int rowOffset8[2][4096];
|
||||||
static int rowOffset4[2][2048];
|
static int rowOffset4[2][4096];
|
||||||
|
|
||||||
static short blockOffset32[256];
|
static short blockOffset32[256];
|
||||||
static short blockOffset32Z[256];
|
static short blockOffset32Z[256];
|
||||||
|
|
|
@ -29,18 +29,20 @@
|
||||||
// Using a spinning finish on the main (MTGS) thread is apparently a big win still, over trying
|
// Using a spinning finish on the main (MTGS) thread is apparently a big win still, over trying
|
||||||
// to wait out all the pending m_finished semaphores. It leaves one spinwait in the rasterizer,
|
// to wait out all the pending m_finished semaphores. It leaves one spinwait in the rasterizer,
|
||||||
// but that's still worlds better than 2-6 spinning threads like before.
|
// but that's still worlds better than 2-6 spinning threads like before.
|
||||||
#define UseSpinningFinish 1
|
|
||||||
|
#define UseSpinningFinish
|
||||||
|
|
||||||
// Set this to 1 to remove a lot of non-const div/modulus ops from the rasterization process.
|
// Set this to 1 to remove a lot of non-const div/modulus ops from the rasterization process.
|
||||||
// Might likely be a measurable speedup but limits threading to 1, 2, 4, and 8 threads.
|
// Might likely be a measurable speedup but limits threading to 1, 2, 4, and 8 threads.
|
||||||
// note by rama: Speedup is around 5% on average.
|
// note by rama: Speedup is around 5% on average.
|
||||||
#define UseConstThreadCount 0
|
|
||||||
|
|
||||||
#if UseConstThreadCount
|
// #define UseConstThreadCount
|
||||||
|
|
||||||
|
#ifdef UseConstThreadCount
|
||||||
// ThreadsConst - const number of threads. User-configured threads (in GSdx panel) must match
|
// ThreadsConst - const number of threads. User-configured threads (in GSdx panel) must match
|
||||||
// this value if UseConstThreadCount is enabled. [yeah, it's hacky for now]
|
// this value if UseConstThreadCount is enabled. [yeah, it's hacky for now]
|
||||||
static const int ThreadsConst = 2;
|
static const int ThreadsConst = 2;
|
||||||
static const int ThreadMaskConst = ThreadsConst-1;
|
static const int ThreadMaskConst = ThreadsConst - 1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads)
|
GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads)
|
||||||
|
@ -57,11 +59,15 @@ GSRasterizer::~GSRasterizer()
|
||||||
|
|
||||||
__forceinline bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
|
__forceinline bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
|
||||||
{
|
{
|
||||||
#if UseConstThreadCount
|
#ifdef UseConstThreadCount
|
||||||
return (ThreadMaskConst==0) || ((scanline & ThreadMaskConst) == m_id);
|
|
||||||
#else
|
return ThreadMaskConst == 0 || (scanline & ThreadMaskConst) == m_id;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
return (scanline % m_threads) == m_id;
|
return (scanline % m_threads) == m_id;
|
||||||
#endif
|
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizer::Draw(const GSRasterizerData* data)
|
void GSRasterizer::Draw(const GSRasterizerData* data)
|
||||||
|
@ -871,7 +877,7 @@ void GSRasterizerMT::ThreadProc()
|
||||||
{
|
{
|
||||||
// _mm_setcsr(MXCSR);
|
// _mm_setcsr(MXCSR);
|
||||||
|
|
||||||
while( true )
|
while(true)
|
||||||
{
|
{
|
||||||
sem_wait(&m_semaphore);
|
sem_wait(&m_semaphore);
|
||||||
|
|
||||||
|
@ -879,10 +885,15 @@ void GSRasterizerMT::ThreadProc()
|
||||||
|
|
||||||
__super::Draw(m_data);
|
__super::Draw(m_data);
|
||||||
|
|
||||||
if( UseSpinningFinish )
|
#ifdef UseSpinningFinish
|
||||||
_interlockedbittestandreset( &m_sync, m_id );
|
|
||||||
else
|
_interlockedbittestandreset(&m_sync, m_id);
|
||||||
sem_post(&m_finished);
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
sem_post(&m_finished);
|
||||||
|
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
sem_post(&m_stopped);
|
sem_post(&m_stopped);
|
||||||
|
@ -917,33 +928,36 @@ void GSRasterizerList::Draw(const GSRasterizerData* data)
|
||||||
|
|
||||||
m_sync = m_syncstart;
|
m_sync = m_syncstart;
|
||||||
|
|
||||||
for(unsigned i=1; i<size(); ++i)
|
for(size_t i = 1; i < size(); i++)
|
||||||
{
|
{
|
||||||
(*this)[i]->Draw(data);
|
(*this)[i]->Draw(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
(*this)[0]->Draw(data);
|
(*this)[0]->Draw(data);
|
||||||
|
|
||||||
if( UseSpinningFinish )
|
#ifdef UseSpinningFinish
|
||||||
|
|
||||||
|
while(m_sync) _mm_pause();
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
for(size_t i = 1; i < size(); i++)
|
||||||
{
|
{
|
||||||
while(m_sync) _mm_pause();
|
sem_wait(&m_finished);
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for(unsigned i=1; i<size(); ++i )
|
|
||||||
sem_wait(&m_finished);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
m_stats.ticks = __rdtsc() - start;
|
m_stats.ticks = __rdtsc() - start;
|
||||||
|
|
||||||
for(unsigned i=0; i<size(); ++i)
|
for(size_t i = 0; i < size(); i++)
|
||||||
{
|
{
|
||||||
GSRasterizerStats s;
|
GSRasterizerStats s;
|
||||||
|
|
||||||
(*this)[i]->GetStats(s);
|
(*this)[i]->GetStats(s);
|
||||||
|
|
||||||
m_stats.pixels += s.pixels;
|
m_stats.pixels += s.pixels;
|
||||||
m_stats.prims = max(m_stats.prims, s.prims);
|
m_stats.prims = std::max<int>(m_stats.prims, s.prims);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,7 @@
|
||||||
#include "pthread.h"
|
#include "pthread.h"
|
||||||
#include "semaphore.h"
|
#include "semaphore.h"
|
||||||
|
|
||||||
__aligned16 class GSRasterizerData
|
__aligned32 class GSRasterizerData
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
GSVector4i scissor;
|
GSVector4i scissor;
|
||||||
|
@ -50,7 +50,7 @@ public:
|
||||||
virtual void PrintStats() = 0;
|
virtual void PrintStats() = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
class IDrawScanline : public GSAlignedClass<16>
|
class IDrawScanline : public GSAlignedClass<32>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
typedef void (__fastcall *DrawScanlineStaticPtr)(int right, int left, int top, const GSVertexSW& v);
|
typedef void (__fastcall *DrawScanlineStaticPtr)(int right, int left, int top, const GSVertexSW& v);
|
||||||
|
@ -153,9 +153,11 @@ public:
|
||||||
push_back(new GSRasterizer(new DS(parent, 0), 0, threads));
|
push_back(new GSRasterizer(new DS(parent, 0), 0, threads));
|
||||||
|
|
||||||
m_syncstart = 0;
|
m_syncstart = 0;
|
||||||
|
|
||||||
for(int i = 1; i < threads; i++)
|
for(int i = 1; i < threads; i++)
|
||||||
{
|
{
|
||||||
push_back(new GSRasterizerMT(new DS(parent, i), i, threads, m_finished, m_sync));
|
push_back(new GSRasterizerMT(new DS(parent, i), i, threads, m_finished, m_sync));
|
||||||
|
|
||||||
_interlockedbittestandset(&m_syncstart, i);
|
_interlockedbittestandset(&m_syncstart, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
|
|
||||||
GSRenderer::GSRenderer()
|
GSRenderer::GSRenderer()
|
||||||
: GSState()
|
: GSState()
|
||||||
, m_tex_buff( (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 16) )
|
, m_tex_buff((uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32))
|
||||||
, m_vt(this)
|
, m_vt(this)
|
||||||
, m_dev(NULL)
|
, m_dev(NULL)
|
||||||
, m_shader(0)
|
, m_shader(0)
|
||||||
|
@ -61,9 +61,10 @@ GSRenderer::~GSRenderer()
|
||||||
m_dev->Reset(1, 1, GSDevice::Windowed);
|
m_dev->Reset(1, 1, GSDevice::Windowed);
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
_aligned_free( m_tex_buff );
|
_aligned_free(m_tex_buff);
|
||||||
|
|
||||||
delete m_dev;
|
delete m_dev;
|
||||||
|
|
||||||
DeleteCriticalSection(&m_pGSsetTitle_Crit);
|
DeleteCriticalSection(&m_pGSsetTitle_Crit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -220,13 +221,6 @@ bool GSRenderer::Merge(int field)
|
||||||
r.bottom = r.top + y;
|
r.bottom = r.top + y;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Breaks the blur filter, and actually makes games blurry again.
|
|
||||||
// This might have to do with earlier changes to device size detection.
|
|
||||||
/*if(blurdetected && i == 1)
|
|
||||||
{
|
|
||||||
r += GSVector4i(0, 1).xyxy();
|
|
||||||
}*/
|
|
||||||
|
|
||||||
GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy();
|
GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy();
|
||||||
|
|
||||||
src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy();
|
src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy();
|
||||||
|
@ -380,8 +374,8 @@ void GSRenderer::VSync(int field)
|
||||||
|
|
||||||
EnterCriticalSection(&m_pGSsetTitle_Crit);
|
EnterCriticalSection(&m_pGSsetTitle_Crit);
|
||||||
|
|
||||||
strncpy(m_GStitleInfoBuffer, s.c_str(), ArraySize(m_GStitleInfoBuffer)-1);
|
strncpy(m_GStitleInfoBuffer, s.c_str(), countof(m_GStitleInfoBuffer) - 1);
|
||||||
m_GStitleInfoBuffer[sizeof(m_GStitleInfoBuffer)-1] = 0;// make sure null terminated even if text overflows
|
m_GStitleInfoBuffer[sizeof(m_GStitleInfoBuffer) - 1] = 0;// make sure null terminated even if text overflows
|
||||||
|
|
||||||
LeaveCriticalSection(&m_pGSsetTitle_Crit);
|
LeaveCriticalSection(&m_pGSsetTitle_Crit);
|
||||||
}
|
}
|
||||||
|
|
|
@ -158,12 +158,13 @@ protected:
|
||||||
void GrowVertexBuffer()
|
void GrowVertexBuffer()
|
||||||
{
|
{
|
||||||
m_maxcount = max(10000, m_maxcount * 3/2);
|
m_maxcount = max(10000, m_maxcount * 3/2);
|
||||||
m_vertices = (Vertex*)_aligned_realloc(m_vertices, sizeof(Vertex) * m_maxcount, 16);
|
m_vertices = (Vertex*)_aligned_realloc(m_vertices, sizeof(Vertex) * m_maxcount, 32);
|
||||||
m_maxcount -= 100;
|
m_maxcount -= 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns a pointer to the drawing vertex. Can return NULL!
|
// Returns a pointer to the drawing vertex. Can return NULL!
|
||||||
template<uint32 prim> __fi Vertex* BaseDrawingKick(int& count)
|
|
||||||
|
template<uint32 prim> __forceinline Vertex* DrawingKick(bool skip, int& count)
|
||||||
{
|
{
|
||||||
switch(prim)
|
switch(prim)
|
||||||
{
|
{
|
||||||
|
@ -237,7 +238,7 @@ protected:
|
||||||
__assume(0);
|
__assume(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return v;
|
return !skip ? v : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void Draw() = 0;
|
virtual void Draw() = 0;
|
||||||
|
|
|
@ -249,7 +249,9 @@ public:
|
||||||
ps_sel.clr1 = om_bsel.IsCLR1();
|
ps_sel.clr1 = om_bsel.IsCLR1();
|
||||||
ps_sel.fba = context->FBA.FBA;
|
ps_sel.fba = context->FBA.FBA;
|
||||||
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
|
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
|
||||||
|
|
||||||
if (UserHacks_AlphaHack) ps_sel.aout = 1;
|
if (UserHacks_AlphaHack) ps_sel.aout = 1;
|
||||||
|
|
||||||
if(PRIM->FGE)
|
if(PRIM->FGE)
|
||||||
{
|
{
|
||||||
ps_sel.fog = 1;
|
ps_sel.fog = 1;
|
||||||
|
|
|
@ -38,20 +38,20 @@ bool GSRendererDX11::CreateDevice(GSDevice* dev)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRendererDX11::DoVertexKick()
|
template<uint32 prim, uint32 tme, uint32 fst>
|
||||||
|
void GSRendererDX11::VertexKick(bool skip)
|
||||||
{
|
{
|
||||||
const bool tme = PRIM->TME;
|
|
||||||
const bool fst = PRIM->FST;
|
|
||||||
|
|
||||||
GSVertexHW11& dst = m_vl.AddTail();
|
GSVertexHW11& dst = m_vl.AddTail();
|
||||||
|
|
||||||
dst.vi[0] = m_v.vi[0];
|
dst.vi[0] = m_v.vi[0];
|
||||||
dst.vi[1] = m_v.vi[1];
|
dst.vi[1] = m_v.vi[1];
|
||||||
|
|
||||||
#ifdef USE_UPSCALE_HACKS
|
#ifdef USE_UPSCALE_HACKS
|
||||||
|
|
||||||
if(tme && fst)
|
if(tme && fst)
|
||||||
{
|
{
|
||||||
//GSVector4::storel(&dst.ST, m_v.GetUV());
|
//GSVector4::storel(&dst.ST, m_v.GetUV());
|
||||||
|
|
||||||
int Udiff = 0;
|
int Udiff = 0;
|
||||||
int Vdiff = 0;
|
int Vdiff = 0;
|
||||||
int Uadjust = 0;
|
int Uadjust = 0;
|
||||||
|
@ -95,6 +95,7 @@ void GSRendererDX11::DoVertexKick()
|
||||||
else if (Vdiff <= 1) { Vadjust = 1; }
|
else if (Vdiff <= 1) { Vadjust = 1; }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dst.ST.S = (float)m_v.UV.U - Uadjust;
|
dst.ST.S = (float)m_v.UV.U - Uadjust;
|
||||||
dst.ST.T = (float)m_v.UV.V - Vadjust;
|
dst.ST.T = (float)m_v.UV.V - Vadjust;
|
||||||
}
|
}
|
||||||
|
@ -104,104 +105,103 @@ void GSRendererDX11::DoVertexKick()
|
||||||
//dst.XYZ.X += 5;
|
//dst.XYZ.X += 5;
|
||||||
//dst.XYZ.Y += 5;
|
//dst.XYZ.Y += 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
if(tme && fst)
|
if(tme && fst)
|
||||||
{
|
{
|
||||||
GSVector4::storel(&dst.ST, m_v.GetUV());
|
GSVector4::storel(&dst.ST, m_v.GetUV());
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
|
|
||||||
template< uint32 prim >
|
int count = 0;
|
||||||
void GSRendererDX11::DrawingKick( bool skip )
|
|
||||||
{
|
if(GSVertexHW11* v = DrawingKick<prim>(skip, count))
|
||||||
int count;
|
|
||||||
|
|
||||||
GSVertexHW11* v = BaseDrawingKick<prim>(count);
|
|
||||||
if (skip || !v) return;
|
|
||||||
|
|
||||||
GSVector4i scissor = m_context->scissor.dx10;
|
|
||||||
|
|
||||||
GSVector4i pmin, pmax;
|
|
||||||
|
|
||||||
#if _M_SSE >= 0x401
|
|
||||||
|
|
||||||
GSVector4i v0, v1, v2;
|
|
||||||
|
|
||||||
switch(prim)
|
|
||||||
{
|
{
|
||||||
case GS_POINTLIST:
|
GSVector4i scissor = m_context->scissor.dx10;
|
||||||
v0 = GSVector4i::load((int)v[0].p.xy).upl16();
|
|
||||||
pmin = v0;
|
GSVector4i pmin, pmax;
|
||||||
pmax = v0;
|
|
||||||
break;
|
#if _M_SSE >= 0x401
|
||||||
case GS_LINELIST:
|
|
||||||
case GS_LINESTRIP:
|
GSVector4i v0, v1, v2;
|
||||||
case GS_SPRITE:
|
|
||||||
v0 = GSVector4i::load((int)v[0].p.xy);
|
switch(prim)
|
||||||
v1 = GSVector4i::load((int)v[1].p.xy);
|
{
|
||||||
pmin = v0.min_u16(v1).upl16();
|
case GS_POINTLIST:
|
||||||
pmax = v0.max_u16(v1).upl16();
|
v0 = GSVector4i::load((int)v[0].p.xy).upl16();
|
||||||
break;
|
pmin = v0;
|
||||||
case GS_TRIANGLELIST:
|
pmax = v0;
|
||||||
case GS_TRIANGLESTRIP:
|
break;
|
||||||
case GS_TRIANGLEFAN:
|
case GS_LINELIST:
|
||||||
v0 = GSVector4i::load((int)v[0].p.xy);
|
case GS_LINESTRIP:
|
||||||
v1 = GSVector4i::load((int)v[1].p.xy);
|
case GS_SPRITE:
|
||||||
v2 = GSVector4i::load((int)v[2].p.xy);
|
v0 = GSVector4i::load((int)v[0].p.xy);
|
||||||
pmin = v0.min_u16(v1).min_u16(v2).upl16();
|
v1 = GSVector4i::load((int)v[1].p.xy);
|
||||||
pmax = v0.max_u16(v1).max_u16(v2).upl16();
|
pmin = v0.min_u16(v1).upl16();
|
||||||
break;
|
pmax = v0.max_u16(v1).upl16();
|
||||||
|
break;
|
||||||
|
case GS_TRIANGLELIST:
|
||||||
|
case GS_TRIANGLESTRIP:
|
||||||
|
case GS_TRIANGLEFAN:
|
||||||
|
v0 = GSVector4i::load((int)v[0].p.xy);
|
||||||
|
v1 = GSVector4i::load((int)v[1].p.xy);
|
||||||
|
v2 = GSVector4i::load((int)v[2].p.xy);
|
||||||
|
pmin = v0.min_u16(v1).min_u16(v2).upl16();
|
||||||
|
pmax = v0.max_u16(v1).max_u16(v2).upl16();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
switch(prim)
|
||||||
|
{
|
||||||
|
case GS_POINTLIST:
|
||||||
|
pmin.x = v[0].p.x;
|
||||||
|
pmin.y = v[0].p.y;
|
||||||
|
pmax.x = v[0].p.x;
|
||||||
|
pmax.y = v[0].p.y;
|
||||||
|
break;
|
||||||
|
case GS_LINELIST:
|
||||||
|
case GS_LINESTRIP:
|
||||||
|
case GS_SPRITE:
|
||||||
|
pmin.x = std::min<uint16>(v[0].p.x, v[1].p.x);
|
||||||
|
pmin.y = std::min<uint16>(v[0].p.y, v[1].p.y);
|
||||||
|
pmax.x = std::max<uint16>(v[0].p.x, v[1].p.x);
|
||||||
|
pmax.y = std::max<uint16>(v[0].p.y, v[1].p.y);
|
||||||
|
break;
|
||||||
|
case GS_TRIANGLELIST:
|
||||||
|
case GS_TRIANGLESTRIP:
|
||||||
|
case GS_TRIANGLEFAN:
|
||||||
|
pmin.x = std::min<uint16>(std::min<uint16>(v[0].p.x, v[1].p.x), v[2].p.x);
|
||||||
|
pmin.y = std::min<uint16>(std::min<uint16>(v[0].p.y, v[1].p.y), v[2].p.y);
|
||||||
|
pmax.x = std::max<uint16>(std::max<uint16>(v[0].p.x, v[1].p.x), v[2].p.x);
|
||||||
|
pmax.y = std::max<uint16>(std::max<uint16>(v[0].p.y, v[1].p.y), v[2].p.y);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
GSVector4i test = (pmax < scissor) | (pmin > scissor.zwxy());
|
||||||
|
|
||||||
|
switch(prim)
|
||||||
|
{
|
||||||
|
case GS_TRIANGLELIST:
|
||||||
|
case GS_TRIANGLESTRIP:
|
||||||
|
case GS_TRIANGLEFAN:
|
||||||
|
case GS_SPRITE:
|
||||||
|
test |= pmin == pmax;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(test.mask() & 0xff)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_count += count;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
switch(prim)
|
|
||||||
{
|
|
||||||
case GS_POINTLIST:
|
|
||||||
pmin.x = v[0].p.x;
|
|
||||||
pmin.y = v[0].p.y;
|
|
||||||
pmax.x = v[0].p.x;
|
|
||||||
pmax.y = v[0].p.y;
|
|
||||||
break;
|
|
||||||
case GS_LINELIST:
|
|
||||||
case GS_LINESTRIP:
|
|
||||||
case GS_SPRITE:
|
|
||||||
pmin.x = std::min<uint16>(v[0].p.x, v[1].p.x);
|
|
||||||
pmin.y = std::min<uint16>(v[0].p.y, v[1].p.y);
|
|
||||||
pmax.x = std::max<uint16>(v[0].p.x, v[1].p.x);
|
|
||||||
pmax.y = std::max<uint16>(v[0].p.y, v[1].p.y);
|
|
||||||
break;
|
|
||||||
case GS_TRIANGLELIST:
|
|
||||||
case GS_TRIANGLESTRIP:
|
|
||||||
case GS_TRIANGLEFAN:
|
|
||||||
pmin.x = std::min<uint16>(std::min<uint16>(v[0].p.x, v[1].p.x), v[2].p.x);
|
|
||||||
pmin.y = std::min<uint16>(std::min<uint16>(v[0].p.y, v[1].p.y), v[2].p.y);
|
|
||||||
pmax.x = std::max<uint16>(std::max<uint16>(v[0].p.x, v[1].p.x), v[2].p.x);
|
|
||||||
pmax.y = std::max<uint16>(std::max<uint16>(v[0].p.y, v[1].p.y), v[2].p.y);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
GSVector4i test = (pmax < scissor) | (pmin > scissor.zwxy());
|
|
||||||
|
|
||||||
switch(prim)
|
|
||||||
{
|
|
||||||
case GS_TRIANGLELIST:
|
|
||||||
case GS_TRIANGLESTRIP:
|
|
||||||
case GS_TRIANGLEFAN:
|
|
||||||
case GS_SPRITE:
|
|
||||||
test |= pmin == pmax;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(test.mask() & 0xff)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_count += count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRendererDX11::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
void GSRendererDX11::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||||
|
|
|
@ -36,8 +36,5 @@ public:
|
||||||
|
|
||||||
bool CreateDevice(GSDevice* dev);
|
bool CreateDevice(GSDevice* dev);
|
||||||
|
|
||||||
template<uint32 prim>
|
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip);
|
||||||
void DrawingKick( bool skip );
|
|
||||||
|
|
||||||
void DoVertexKick();
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -57,11 +57,9 @@ bool GSRendererDX9::CreateDevice(GSDevice* dev)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRendererDX9::DoVertexKick()
|
template<uint32 prim, uint32 tme, uint32 fst>
|
||||||
|
void GSRendererDX9::VertexKick(bool skip)
|
||||||
{
|
{
|
||||||
const bool tme = PRIM->TME;
|
|
||||||
const bool fst = PRIM->FST;
|
|
||||||
|
|
||||||
GSVertexHW9& dst = m_vl.AddTail();
|
GSVertexHW9& dst = m_vl.AddTail();
|
||||||
|
|
||||||
dst.p = GSVector4(((GSVector4i)m_v.XYZ).upl16());
|
dst.p = GSVector4(((GSVector4i)m_v.XYZ).upl16());
|
||||||
|
@ -142,92 +140,90 @@ void GSRendererDX9::DoVertexKick()
|
||||||
|
|
||||||
dst.c0 = m_v.RGBAQ.u32[0];
|
dst.c0 = m_v.RGBAQ.u32[0];
|
||||||
dst.c1 = m_v.FOG.u32[1];
|
dst.c1 = m_v.FOG.u32[1];
|
||||||
}
|
|
||||||
|
|
||||||
template< uint32 prim >
|
//
|
||||||
void GSRendererDX9::DrawingKick( bool skip )
|
|
||||||
{
|
|
||||||
int count;
|
|
||||||
|
|
||||||
// BaseDrawingKick can never return NULL here because the DrawingKick function
|
// BaseDrawingKick can never return NULL here because the DrawingKick function
|
||||||
// tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only
|
// tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only
|
||||||
// condition where this function would return NULL).
|
// condition where this function would return NULL).
|
||||||
|
|
||||||
GSVertexHW9* v = BaseDrawingKick<prim>(count);
|
int count = 0;
|
||||||
if (skip || !v) return;
|
|
||||||
|
if(GSVertexHW9* v = DrawingKick<prim>(skip, count))
|
||||||
GSVector4 scissor = m_context->scissor.dx9;
|
|
||||||
|
|
||||||
GSVector4 pmin, pmax;
|
|
||||||
|
|
||||||
switch(prim)
|
|
||||||
{
|
{
|
||||||
case GS_POINTLIST:
|
GSVector4 scissor = m_context->scissor.dx9;
|
||||||
pmin = v[0].p;
|
|
||||||
pmax = v[0].p;
|
GSVector4 pmin, pmax;
|
||||||
break;
|
|
||||||
case GS_LINELIST:
|
switch(prim)
|
||||||
case GS_LINESTRIP:
|
{
|
||||||
case GS_SPRITE:
|
case GS_POINTLIST:
|
||||||
pmin = v[0].p.min(v[1].p);
|
pmin = v[0].p;
|
||||||
pmax = v[0].p.max(v[1].p);
|
pmax = v[0].p;
|
||||||
break;
|
break;
|
||||||
case GS_TRIANGLELIST:
|
case GS_LINELIST:
|
||||||
case GS_TRIANGLESTRIP:
|
case GS_LINESTRIP:
|
||||||
case GS_TRIANGLEFAN:
|
case GS_SPRITE:
|
||||||
pmin = v[0].p.min(v[1].p).min(v[2].p);
|
pmin = v[0].p.min(v[1].p);
|
||||||
pmax = v[0].p.max(v[1].p).max(v[2].p);
|
pmax = v[0].p.max(v[1].p);
|
||||||
break;
|
break;
|
||||||
|
case GS_TRIANGLELIST:
|
||||||
|
case GS_TRIANGLESTRIP:
|
||||||
|
case GS_TRIANGLEFAN:
|
||||||
|
pmin = v[0].p.min(v[1].p).min(v[2].p);
|
||||||
|
pmax = v[0].p.max(v[1].p).max(v[2].p);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
|
||||||
|
|
||||||
|
switch(prim)
|
||||||
|
{
|
||||||
|
case GS_TRIANGLELIST:
|
||||||
|
case GS_TRIANGLESTRIP:
|
||||||
|
case GS_TRIANGLEFAN:
|
||||||
|
case GS_SPRITE:
|
||||||
|
test |= pmin == pmax;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(test.mask() & 3)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch(prim)
|
||||||
|
{
|
||||||
|
case GS_POINTLIST:
|
||||||
|
break;
|
||||||
|
case GS_LINELIST:
|
||||||
|
case GS_LINESTRIP:
|
||||||
|
if(PRIM->IIP == 0) {v[0].c0 = v[1].c0;}
|
||||||
|
break;
|
||||||
|
case GS_TRIANGLELIST:
|
||||||
|
case GS_TRIANGLESTRIP:
|
||||||
|
case GS_TRIANGLEFAN:
|
||||||
|
if(PRIM->IIP == 0) {v[0].c0 = v[1].c0 = v[2].c0;}
|
||||||
|
break;
|
||||||
|
case GS_SPRITE:
|
||||||
|
if(PRIM->IIP == 0) {v[0].c0 = v[1].c0;}
|
||||||
|
v[0].p.z = v[1].p.z;
|
||||||
|
v[0].p.w = v[1].p.w;
|
||||||
|
v[0].c1 = v[1].c1;
|
||||||
|
v[2] = v[1];
|
||||||
|
v[3] = v[1];
|
||||||
|
v[1].p.y = v[0].p.y;
|
||||||
|
v[1].t.y = v[0].t.y;
|
||||||
|
v[2].p.x = v[0].p.x;
|
||||||
|
v[2].t.x = v[0].t.x;
|
||||||
|
v[4] = v[1];
|
||||||
|
v[5] = v[2];
|
||||||
|
count += 4;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_count += count;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
|
|
||||||
|
|
||||||
switch(prim)
|
|
||||||
{
|
|
||||||
case GS_TRIANGLELIST:
|
|
||||||
case GS_TRIANGLESTRIP:
|
|
||||||
case GS_TRIANGLEFAN:
|
|
||||||
case GS_SPRITE:
|
|
||||||
test |= pmin == pmax;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(test.mask() & 3)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch(prim)
|
|
||||||
{
|
|
||||||
case GS_POINTLIST:
|
|
||||||
break;
|
|
||||||
case GS_LINELIST:
|
|
||||||
case GS_LINESTRIP:
|
|
||||||
if(PRIM->IIP == 0) {v[0].c0 = v[1].c0;}
|
|
||||||
break;
|
|
||||||
case GS_TRIANGLELIST:
|
|
||||||
case GS_TRIANGLESTRIP:
|
|
||||||
case GS_TRIANGLEFAN:
|
|
||||||
if(PRIM->IIP == 0) {v[0].c0 = v[1].c0 = v[2].c0;}
|
|
||||||
break;
|
|
||||||
case GS_SPRITE:
|
|
||||||
if(PRIM->IIP == 0) {v[0].c0 = v[1].c0;}
|
|
||||||
v[0].p.z = v[1].p.z;
|
|
||||||
v[0].p.w = v[1].p.w;
|
|
||||||
v[0].c1 = v[1].c1;
|
|
||||||
v[2] = v[1];
|
|
||||||
v[3] = v[1];
|
|
||||||
v[1].p.y = v[0].p.y;
|
|
||||||
v[1].t.y = v[0].t.y;
|
|
||||||
v[2].p.x = v[0].p.x;
|
|
||||||
v[2].t.x = v[0].t.x;
|
|
||||||
v[4] = v[1];
|
|
||||||
v[5] = v[2];
|
|
||||||
count += 4;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_count += count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRendererDX9::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
void GSRendererDX9::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||||
|
|
|
@ -43,8 +43,5 @@ public:
|
||||||
|
|
||||||
bool CreateDevice(GSDevice* dev);
|
bool CreateDevice(GSDevice* dev);
|
||||||
|
|
||||||
template<uint32 prim>
|
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip);
|
||||||
void DrawingKick( bool skip );
|
|
||||||
|
|
||||||
void DoVertexKick();
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -43,10 +43,7 @@ public:
|
||||||
InitVertexKick<GSRendererNull>();
|
InitVertexKick<GSRendererNull>();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~GSRendererNull() {}
|
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip)
|
||||||
|
{
|
||||||
template<uint32 prim>
|
}
|
||||||
void DrawingKick( bool skip ) {}
|
|
||||||
|
|
||||||
void DoVertexKick() {}
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -94,6 +94,7 @@ GSTexture* GSRendererSW::GetOutput(int i)
|
||||||
if(m_dev->ResizeTexture(&m_texture[i], w, h))
|
if(m_dev->ResizeTexture(&m_texture[i], w, h))
|
||||||
{
|
{
|
||||||
uint8* buff = GetTextureBufferLock();
|
uint8* buff = GetTextureBufferLock();
|
||||||
|
|
||||||
static int pitch = 1024 * 4;
|
static int pitch = 1024 * 4;
|
||||||
|
|
||||||
GSVector4i r(0, 0, w, h);
|
GSVector4i r(0, 0, w, h);
|
||||||
|
@ -113,6 +114,7 @@ GSTexture* GSRendererSW::GetOutput(int i)
|
||||||
|
|
||||||
s_n++;
|
s_n++;
|
||||||
}
|
}
|
||||||
|
|
||||||
ReleaseTextureBufferLock();
|
ReleaseTextureBufferLock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -427,24 +429,22 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRendererSW::DoVertexKick()
|
template<uint32 prim, uint32 tme, uint32 fst>
|
||||||
|
void GSRendererSW::VertexKick(bool skip)
|
||||||
{
|
{
|
||||||
const bool tme = PRIM->TME;
|
const GSDrawingContext* context = m_context;
|
||||||
const bool fst = PRIM->FST;
|
|
||||||
|
|
||||||
const GSDrawingContext& context = *m_context;
|
|
||||||
|
|
||||||
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]);
|
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]);
|
||||||
|
|
||||||
xy = xy.insert16<3>(m_v.FOG.F);
|
xy = xy.insert16<3>(m_v.FOG.F);
|
||||||
xy = xy.upl16();
|
xy = xy.upl16();
|
||||||
xy -= context.XYOFFSET;
|
xy -= context->XYOFFSET;
|
||||||
|
|
||||||
GSVertexSW& dst = m_vl.AddTail();
|
GSVertexSW v;
|
||||||
|
|
||||||
dst.p = GSVector4(xy) * g_pos_scale;
|
v.p = GSVector4(xy) * g_pos_scale;
|
||||||
|
|
||||||
dst.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.u32[0]).u8to32() << 7);
|
v.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.u32[0]).u8to32() << 7);
|
||||||
|
|
||||||
if(tme)
|
if(tme)
|
||||||
{
|
{
|
||||||
|
@ -452,37 +452,31 @@ void GSRendererSW::DoVertexKick()
|
||||||
|
|
||||||
if(fst)
|
if(fst)
|
||||||
{
|
{
|
||||||
dst.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
|
v.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
|
||||||
q = 1.0f;
|
q = 1.0f;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
dst.t = GSVector4(m_v.ST.S, m_v.ST.T);
|
v.t = GSVector4(m_v.ST.S, m_v.ST.T);
|
||||||
dst.t *= GSVector4(0x10000 << context.TEX0.TW, 0x10000 << context.TEX0.TH);
|
v.t *= GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
|
||||||
q = m_v.RGBAQ.Q;
|
q = m_v.RGBAQ.Q;
|
||||||
}
|
}
|
||||||
|
|
||||||
dst.t = dst.t.xyxy(GSVector4::load(q));
|
v.t = v.t.xyxy(GSVector4::load(q));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GSVertexSW& dst = m_vl.AddTail();
|
||||||
|
|
||||||
|
dst = v;
|
||||||
|
|
||||||
dst.p.z = (float)min(m_v.XYZ.Z, 0xffffff00); // max value which can survive the uint32 => float => uint32 conversion
|
dst.p.z = (float)min(m_v.XYZ.Z, 0xffffff00); // max value which can survive the uint32 => float => uint32 conversion
|
||||||
}
|
|
||||||
|
|
||||||
|
int count = 0;
|
||||||
template< uint32 prim >
|
|
||||||
void GSRendererSW::DrawingKick( bool skip )
|
if(GSVertexSW* v = DrawingKick<prim>(skip, count))
|
||||||
{
|
|
||||||
int count;
|
|
||||||
|
|
||||||
// BaseDrawingKick can never return NULL here because the DrawingKick function
|
|
||||||
// tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only
|
|
||||||
// condition where this function would return NULL).
|
|
||||||
|
|
||||||
GSVertexSW* v = BaseDrawingKick<prim>(count);
|
|
||||||
if (skip || !v) return;
|
|
||||||
|
|
||||||
if(!m_dump)
|
|
||||||
{
|
{
|
||||||
|
if(!m_dump)
|
||||||
|
{
|
||||||
GSVector4 pmin, pmax;
|
GSVector4 pmin, pmax;
|
||||||
|
|
||||||
switch(prim)
|
switch(prim)
|
||||||
|
@ -505,7 +499,7 @@ void GSRendererSW::DrawingKick( bool skip )
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSVector4 scissor = m_context->scissor.ex;
|
GSVector4 scissor = context->scissor.ex;
|
||||||
|
|
||||||
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
|
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
|
||||||
|
|
||||||
|
@ -529,77 +523,77 @@ void GSRendererSW::DrawingKick( bool skip )
|
||||||
test |= tmp == tmp.yxwz();
|
test |= tmp == tmp.yxwz();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(test.mask() & 3)
|
if(test.mask() & 3)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch(prim)
|
|
||||||
{
|
|
||||||
case GS_POINTLIST:
|
|
||||||
break;
|
|
||||||
case GS_LINELIST:
|
|
||||||
case GS_LINESTRIP:
|
|
||||||
if(PRIM->IIP == 0) {v[0].c = v[1].c;}
|
|
||||||
break;
|
|
||||||
case GS_TRIANGLELIST:
|
|
||||||
case GS_TRIANGLESTRIP:
|
|
||||||
case GS_TRIANGLEFAN:
|
|
||||||
if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;}
|
|
||||||
break;
|
|
||||||
case GS_SPRITE:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(m_count < 30 && m_count >= 3)
|
|
||||||
{
|
|
||||||
GSVertexSW* v = &m_vertices[m_count - 3];
|
|
||||||
|
|
||||||
int tl = 0;
|
|
||||||
int br = 0;
|
|
||||||
|
|
||||||
bool isquad = false;
|
|
||||||
|
|
||||||
switch(prim)
|
switch(prim)
|
||||||
{
|
{
|
||||||
|
case GS_POINTLIST:
|
||||||
|
break;
|
||||||
|
case GS_LINELIST:
|
||||||
|
case GS_LINESTRIP:
|
||||||
|
if(PRIM->IIP == 0) {v[0].c = v[1].c;}
|
||||||
|
break;
|
||||||
|
case GS_TRIANGLELIST:
|
||||||
case GS_TRIANGLESTRIP:
|
case GS_TRIANGLESTRIP:
|
||||||
case GS_TRIANGLEFAN:
|
case GS_TRIANGLEFAN:
|
||||||
case GS_TRIANGLELIST:
|
if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;}
|
||||||
isquad = GSVertexSW::IsQuad(v, tl, br);
|
break;
|
||||||
|
case GS_SPRITE:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(isquad)
|
if(m_count < 30 && m_count >= 3)
|
||||||
{
|
{
|
||||||
m_count -= 3;
|
GSVertexSW* v = &m_vertices[m_count - 3];
|
||||||
|
|
||||||
if(m_count > 0)
|
int tl = 0;
|
||||||
|
int br = 0;
|
||||||
|
|
||||||
|
bool isquad = false;
|
||||||
|
|
||||||
|
switch(prim)
|
||||||
{
|
{
|
||||||
tl += m_count;
|
case GS_TRIANGLESTRIP:
|
||||||
br += m_count;
|
case GS_TRIANGLEFAN:
|
||||||
|
case GS_TRIANGLELIST:
|
||||||
Flush();
|
isquad = GSVertexSW::IsQuad(v, tl, br);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(tl != 0) m_vertices[0] = m_vertices[tl];
|
if(isquad)
|
||||||
if(br != 1) m_vertices[1] = m_vertices[br];
|
{
|
||||||
|
m_count -= 3;
|
||||||
|
|
||||||
m_count = 2;
|
if(m_count > 0)
|
||||||
|
{
|
||||||
|
tl += m_count;
|
||||||
|
br += m_count;
|
||||||
|
|
||||||
uint32 tmp = PRIM->PRIM;
|
Flush();
|
||||||
PRIM->PRIM = GS_SPRITE;
|
}
|
||||||
|
|
||||||
Flush();
|
if(tl != 0) m_vertices[0] = m_vertices[tl];
|
||||||
|
if(br != 1) m_vertices[1] = m_vertices[br];
|
||||||
|
|
||||||
PRIM->PRIM = tmp;
|
m_count = 2;
|
||||||
|
|
||||||
m_perfmon.Put(GSPerfMon::Quad, 1);
|
uint32 tmp = PRIM->PRIM;
|
||||||
|
PRIM->PRIM = GS_SPRITE;
|
||||||
|
|
||||||
return;
|
Flush();
|
||||||
|
|
||||||
|
PRIM->PRIM = tmp;
|
||||||
|
|
||||||
|
m_perfmon.Put(GSPerfMon::Quad, 1);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
m_count += count;
|
m_count += count;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,13 +47,6 @@ public:
|
||||||
GSRendererSW();
|
GSRendererSW();
|
||||||
virtual ~GSRendererSW();
|
virtual ~GSRendererSW();
|
||||||
|
|
||||||
template<uint32 prim>
|
template<uint32 prim, uint32 tme, uint32 fst>
|
||||||
void DrawingKick( bool skip );
|
void VertexKick(bool skip);
|
||||||
|
|
||||||
void DoVertexKick();
|
|
||||||
|
|
||||||
void InvalidateTextureCache()
|
|
||||||
{
|
|
||||||
m_tc->RemoveAll();
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -99,7 +99,7 @@ union GSScanlineSelector
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
__aligned16 struct GSScanlineParam
|
__aligned32 struct GSScanlineParam
|
||||||
{
|
{
|
||||||
GSScanlineSelector sel;
|
GSScanlineSelector sel;
|
||||||
|
|
||||||
|
@ -115,7 +115,7 @@ __aligned16 struct GSScanlineParam
|
||||||
uint32 fm, zm;
|
uint32 fm, zm;
|
||||||
};
|
};
|
||||||
|
|
||||||
__aligned16 struct GSScanlineEnvironment
|
__aligned32 struct GSScanlineEnvironment
|
||||||
{
|
{
|
||||||
void* vm;
|
void* vm;
|
||||||
const void* tex;
|
const void* tex;
|
||||||
|
|
|
@ -88,7 +88,9 @@ void GSSettingsDlg::OnInit()
|
||||||
ComboBoxAppend(IDC_RESOLUTION, "Please select...", (LPARAM)&m_modes.back(), true);
|
ComboBoxAppend(IDC_RESOLUTION, "Please select...", (LPARAM)&m_modes.back(), true);
|
||||||
|
|
||||||
CComPtr<IDirect3D9> d3d;
|
CComPtr<IDirect3D9> d3d;
|
||||||
|
|
||||||
d3d.Attach(Direct3DCreate9(D3D_SDK_VERSION));
|
d3d.Attach(Direct3DCreate9(D3D_SDK_VERSION));
|
||||||
|
|
||||||
if(d3d)
|
if(d3d)
|
||||||
{
|
{
|
||||||
uint32 w = theApp.GetConfig("ModeWidth", 0);
|
uint32 w = theApp.GetConfig("ModeWidth", 0);
|
||||||
|
@ -151,10 +153,13 @@ void GSSettingsDlg::OnInit()
|
||||||
SendMessage(GetDlgItem(m_hWnd, IDC_RESY), UDM_SETRANGE, 0, MAKELPARAM(8192, 256));
|
SendMessage(GetDlgItem(m_hWnd, IDC_RESY), UDM_SETRANGE, 0, MAKELPARAM(8192, 256));
|
||||||
SendMessage(GetDlgItem(m_hWnd, IDC_RESY), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("resy", 1024), 0));
|
SendMessage(GetDlgItem(m_hWnd, IDC_RESY), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("resy", 1024), 0));
|
||||||
|
|
||||||
int r=theApp.GetConfig("Renderer", 0);
|
int r = theApp.GetConfig("Renderer", 0);
|
||||||
if (r>=0 && r<=2){//DX9
|
|
||||||
|
if(r >= 0 && r <= 2) // DX9
|
||||||
|
{
|
||||||
GSDevice9::ForceValidMsaaConfig();
|
GSDevice9::ForceValidMsaaConfig();
|
||||||
m_lastValidMsaa=theApp.GetConfig("msaa", 0);
|
|
||||||
|
m_lastValidMsaa = theApp.GetConfig("msaa", 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
SendMessage(GetDlgItem(m_hWnd, IDC_MSAA), UDM_SETRANGE, 0, MAKELPARAM(16, 0));
|
SendMessage(GetDlgItem(m_hWnd, IDC_MSAA), UDM_SETRANGE, 0, MAKELPARAM(16, 0));
|
||||||
|
|
|
@ -48,7 +48,14 @@ void GSSetupPrimCodeGenerator::Generate()
|
||||||
{
|
{
|
||||||
for(int i = 0; i < 5; i++)
|
for(int i = 0; i < 5; i++)
|
||||||
{
|
{
|
||||||
movaps(Xmm(3 + i), xmmword[&m_shift[i]]);
|
if(m_cpu.has(util::Cpu::tAVX))
|
||||||
|
{
|
||||||
|
vmovaps(Xmm(3 + i), ptr[&m_shift[i]]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
movaps(Xmm(3 + i), ptr[&m_shift[i]]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -68,113 +75,221 @@ void GSSetupPrimCodeGenerator::Depth()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!m_sel.sprite)
|
if(m_cpu.has(util::Cpu::tAVX))
|
||||||
{
|
{
|
||||||
// GSVector4 t = dscan.p;
|
if(!m_sel.sprite)
|
||||||
|
|
||||||
movaps(xmm0, xmmword[edx + 16]);
|
|
||||||
|
|
||||||
if(m_en.f)
|
|
||||||
{
|
{
|
||||||
// GSVector4 df = p.wwww();
|
// GSVector4 t = dscan.p;
|
||||||
|
|
||||||
movaps(xmm1, xmm0);
|
vmovaps(xmm0, ptr[edx + 16]);
|
||||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
|
||||||
|
|
||||||
// m_env.d4.f = GSVector4i(df * 4.0f).xxzzlh();
|
if(m_en.f)
|
||||||
|
|
||||||
movaps(xmm2, xmm1);
|
|
||||||
mulps(xmm2, xmm3);
|
|
||||||
cvttps2dq(xmm2, xmm2);
|
|
||||||
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
|
||||||
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
|
||||||
movdqa(xmmword[&m_env.d4.f], xmm2);
|
|
||||||
|
|
||||||
for(int i = 0; i < 4; i++)
|
|
||||||
{
|
{
|
||||||
// m_env.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
|
// GSVector4 df = p.wwww();
|
||||||
|
|
||||||
movaps(xmm2, xmm1);
|
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||||
mulps(xmm2, Xmm(4 + i));
|
|
||||||
cvttps2dq(xmm2, xmm2);
|
// m_env.d4.f = GSVector4i(df * 4.0f).xxzzlh();
|
||||||
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
|
||||||
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
vmulps(xmm2, xmm1, xmm3);
|
||||||
movdqa(xmmword[&m_env.d[i].f], xmm2);
|
vcvttps2dq(xmm2, xmm2);
|
||||||
|
vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||||
|
vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||||
|
vmovdqa(ptr[&m_env.d4.f], xmm2);
|
||||||
|
|
||||||
|
for(int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
// m_env.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
|
||||||
|
|
||||||
|
vmulps(xmm2, xmm1, Xmm(4 + i));
|
||||||
|
vcvttps2dq(xmm2, xmm2);
|
||||||
|
vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||||
|
vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||||
|
vmovdqa(ptr[&m_env.d[i].f], xmm2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(m_en.z)
|
||||||
|
{
|
||||||
|
// GSVector4 dz = p.zzzz();
|
||||||
|
|
||||||
|
vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
|
||||||
|
// m_env.d4.z = dz * 4.0f;
|
||||||
|
|
||||||
|
vmulps(xmm1, xmm0, xmm3);
|
||||||
|
vmovdqa(ptr[&m_env.d4.z], xmm1);
|
||||||
|
|
||||||
|
for(int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
// m_env.d[i].z = dz * m_shift[i];
|
||||||
|
|
||||||
|
vmulps(xmm1, xmm0, Xmm(4 + i));
|
||||||
|
vmovdqa(ptr[&m_env.d[i].z], xmm1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
if(m_en.z)
|
|
||||||
{
|
{
|
||||||
// GSVector4 dz = p.zzzz();
|
// GSVector4 p = vertices[0].p;
|
||||||
|
|
||||||
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
vmovaps(xmm0, ptr[ecx + 16]);
|
||||||
|
|
||||||
// m_env.d4.z = dz * 4.0f;
|
if(m_en.f)
|
||||||
|
|
||||||
movaps(xmm1, xmm0);
|
|
||||||
mulps(xmm1, xmm3);
|
|
||||||
movdqa(xmmword[&m_env.d4.z], xmm1);
|
|
||||||
|
|
||||||
for(int i = 0; i < 4; i++)
|
|
||||||
{
|
{
|
||||||
// m_env.d[i].z = dz * m_shift[i];
|
// m_env.p.f = GSVector4i(p).zzzzh().zzzz();
|
||||||
|
|
||||||
movaps(xmm1, xmm0);
|
vcvttps2dq(xmm1, xmm0);
|
||||||
mulps(xmm1, Xmm(4 + i));
|
vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
movdqa(xmmword[&m_env.d[i].z], xmm1);
|
vpshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
vmovdqa(ptr[&m_env.p.f], xmm1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(m_en.z)
|
||||||
|
{
|
||||||
|
// GSVector4 z = p.zzzz();
|
||||||
|
|
||||||
|
vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
|
||||||
|
if(m_sel.zoverflow)
|
||||||
|
{
|
||||||
|
// m_env.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||||
|
|
||||||
|
static const float half = 0.5f;
|
||||||
|
|
||||||
|
vmovss(xmm1, dword[&half]);
|
||||||
|
vshufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
vmulps(xmm1, xmm0);
|
||||||
|
vcvttps2dq(xmm1, xmm1);
|
||||||
|
vpslld(xmm1, 1);
|
||||||
|
|
||||||
|
vcvttps2dq(xmm0, xmm0);
|
||||||
|
vpcmpeqd(xmm2, xmm2);
|
||||||
|
vpsrld(xmm2, 31);
|
||||||
|
vpand(xmm0, xmm2);
|
||||||
|
|
||||||
|
vpor(xmm0, xmm1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// m_env.p.z = GSVector4i(z);
|
||||||
|
|
||||||
|
vcvttps2dq(xmm0, xmm0);
|
||||||
|
}
|
||||||
|
|
||||||
|
vmovdqa(ptr[&m_env.p.z], xmm0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// GSVector4 p = vertices[0].p;
|
if(!m_sel.sprite)
|
||||||
|
|
||||||
movaps(xmm0, xmmword[ecx + 16]);
|
|
||||||
|
|
||||||
if(m_en.f)
|
|
||||||
{
|
{
|
||||||
// m_env.p.f = GSVector4i(p).zzzzh().zzzz();
|
// GSVector4 t = dscan.p;
|
||||||
|
|
||||||
movaps(xmm1, xmm0);
|
movaps(xmm0, ptr[edx + 16]);
|
||||||
cvttps2dq(xmm1, xmm1);
|
|
||||||
pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
if(m_en.f)
|
||||||
pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
{
|
||||||
movdqa(xmmword[&m_env.p.f], xmm1);
|
// GSVector4 df = p.wwww();
|
||||||
|
|
||||||
|
movaps(xmm1, xmm0);
|
||||||
|
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||||
|
|
||||||
|
// m_env.d4.f = GSVector4i(df * 4.0f).xxzzlh();
|
||||||
|
|
||||||
|
movaps(xmm2, xmm1);
|
||||||
|
mulps(xmm2, xmm3);
|
||||||
|
cvttps2dq(xmm2, xmm2);
|
||||||
|
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||||
|
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||||
|
movdqa(ptr[&m_env.d4.f], xmm2);
|
||||||
|
|
||||||
|
for(int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
// m_env.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
|
||||||
|
|
||||||
|
movaps(xmm2, xmm1);
|
||||||
|
mulps(xmm2, Xmm(4 + i));
|
||||||
|
cvttps2dq(xmm2, xmm2);
|
||||||
|
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||||
|
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||||
|
movdqa(ptr[&m_env.d[i].f], xmm2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(m_en.z)
|
||||||
|
{
|
||||||
|
// GSVector4 dz = p.zzzz();
|
||||||
|
|
||||||
|
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
|
||||||
|
// m_env.d4.z = dz * 4.0f;
|
||||||
|
|
||||||
|
movaps(xmm1, xmm0);
|
||||||
|
mulps(xmm1, xmm3);
|
||||||
|
movdqa(ptr[&m_env.d4.z], xmm1);
|
||||||
|
|
||||||
|
for(int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
// m_env.d[i].z = dz * m_shift[i];
|
||||||
|
|
||||||
|
movaps(xmm1, xmm0);
|
||||||
|
mulps(xmm1, Xmm(4 + i));
|
||||||
|
movdqa(ptr[&m_env.d[i].z], xmm1);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
if(m_en.z)
|
|
||||||
{
|
{
|
||||||
// GSVector4 z = p.zzzz();
|
// GSVector4 p = vertices[0].p;
|
||||||
|
|
||||||
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
movaps(xmm0, ptr[ecx + 16]);
|
||||||
|
|
||||||
if(m_sel.zoverflow)
|
if(m_en.f)
|
||||||
{
|
{
|
||||||
// m_env.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
// m_env.p.f = GSVector4i(p).zzzzh().zzzz();
|
||||||
|
|
||||||
static const float half = 0.5f;
|
cvttps2dq(xmm1, xmm0);
|
||||||
|
pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
movss(xmm1, dword[&half]);
|
pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
movdqa(ptr[&m_env.p.f], xmm1);
|
||||||
mulps(xmm1, xmm0);
|
|
||||||
cvttps2dq(xmm1, xmm1);
|
|
||||||
pslld(xmm1, 1);
|
|
||||||
|
|
||||||
cvttps2dq(xmm0, xmm0);
|
|
||||||
pcmpeqd(xmm2, xmm2);
|
|
||||||
psrld(xmm2, 31);
|
|
||||||
pand(xmm0, xmm2);
|
|
||||||
|
|
||||||
por(xmm0, xmm1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// m_env.p.z = GSVector4i(z);
|
|
||||||
|
|
||||||
cvttps2dq(xmm0, xmm0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
movdqa(xmmword[&m_env.p.z], xmm0);
|
if(m_en.z)
|
||||||
|
{
|
||||||
|
// GSVector4 z = p.zzzz();
|
||||||
|
|
||||||
|
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
|
||||||
|
if(m_sel.zoverflow)
|
||||||
|
{
|
||||||
|
// m_env.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||||
|
|
||||||
|
static const float half = 0.5f;
|
||||||
|
|
||||||
|
movss(xmm1, dword[&half]);
|
||||||
|
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
mulps(xmm1, xmm0);
|
||||||
|
cvttps2dq(xmm1, xmm1);
|
||||||
|
pslld(xmm1, 1);
|
||||||
|
|
||||||
|
cvttps2dq(xmm0, xmm0);
|
||||||
|
pcmpeqd(xmm2, xmm2);
|
||||||
|
psrld(xmm2, 31);
|
||||||
|
pand(xmm0, xmm2);
|
||||||
|
|
||||||
|
por(xmm0, xmm1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// m_env.p.z = GSVector4i(z);
|
||||||
|
|
||||||
|
cvttps2dq(xmm0, xmm0);
|
||||||
|
}
|
||||||
|
|
||||||
|
movdqa(ptr[&m_env.p.z], xmm0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -186,64 +301,129 @@ void GSSetupPrimCodeGenerator::Texture()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// GSVector4 t = dscan.t;
|
if(m_cpu.has(util::Cpu::tAVX))
|
||||||
|
|
||||||
movaps(xmm0, xmmword[edx + 32]);
|
|
||||||
|
|
||||||
movaps(xmm1, xmm0);
|
|
||||||
mulps(xmm1, xmm3);
|
|
||||||
|
|
||||||
if(m_sel.fst)
|
|
||||||
{
|
{
|
||||||
// m_env.d4.st = GSVector4i(t * 4.0f);
|
// GSVector4 t = dscan.t;
|
||||||
|
|
||||||
cvttps2dq(xmm1, xmm1);
|
vmovaps(xmm0, ptr[edx + 32]);
|
||||||
movdqa(xmmword[&m_env.d4.st], xmm1);
|
|
||||||
|
vmulps(xmm1, xmm0, xmm3);
|
||||||
|
|
||||||
|
if(m_sel.fst)
|
||||||
|
{
|
||||||
|
// m_env.d4.st = GSVector4i(t * 4.0f);
|
||||||
|
|
||||||
|
vcvttps2dq(xmm1, xmm1);
|
||||||
|
vmovdqa(ptr[&m_env.d4.st], xmm1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// m_env.d4.stq = t * 4.0f;
|
||||||
|
|
||||||
|
vmovaps(ptr[&m_env.d4.stq], xmm1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||||
|
{
|
||||||
|
// GSVector4 ds = t.xxxx();
|
||||||
|
// GSVector4 dt = t.yyyy();
|
||||||
|
// GSVector4 dq = t.zzzz();
|
||||||
|
|
||||||
|
vshufps(xmm1, xmm0, xmm0, (uint8)_MM_SHUFFLE(j, j, j, j));
|
||||||
|
|
||||||
|
for(int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
// GSVector4 v = ds/dt * m_shift[i];
|
||||||
|
|
||||||
|
vmulps(xmm2, xmm1, Xmm(4 + i));
|
||||||
|
|
||||||
|
if(m_sel.fst)
|
||||||
|
{
|
||||||
|
// m_env.d[i].si/ti = GSVector4i(v);
|
||||||
|
|
||||||
|
vcvttps2dq(xmm2, xmm2);
|
||||||
|
|
||||||
|
switch(j)
|
||||||
|
{
|
||||||
|
case 0: vmovdqa(ptr[&m_env.d[i].si], xmm2); break;
|
||||||
|
case 1: vmovdqa(ptr[&m_env.d[i].ti], xmm2); break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// m_env.d[i].s/t/q = v;
|
||||||
|
|
||||||
|
switch(j)
|
||||||
|
{
|
||||||
|
case 0: vmovaps(ptr[&m_env.d[i].s], xmm2); break;
|
||||||
|
case 1: vmovaps(ptr[&m_env.d[i].t], xmm2); break;
|
||||||
|
case 2: vmovaps(ptr[&m_env.d[i].q], xmm2); break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// m_env.d4.stq = t * 4.0f;
|
// GSVector4 t = dscan.t;
|
||||||
|
|
||||||
movaps(xmmword[&m_env.d4.stq], xmm1);
|
movaps(xmm0, ptr[edx + 32]);
|
||||||
}
|
|
||||||
|
|
||||||
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
|
||||||
{
|
|
||||||
// GSVector4 ds = t.xxxx();
|
|
||||||
// GSVector4 dt = t.yyyy();
|
|
||||||
// GSVector4 dq = t.zzzz();
|
|
||||||
|
|
||||||
movaps(xmm1, xmm0);
|
movaps(xmm1, xmm0);
|
||||||
shufps(xmm1, xmm1, (uint8)_MM_SHUFFLE(j, j, j, j));
|
mulps(xmm1, xmm3);
|
||||||
|
|
||||||
for(int i = 0; i < 4; i++)
|
if(m_sel.fst)
|
||||||
{
|
{
|
||||||
// GSVector4 v = ds/dt * m_shift[i];
|
// m_env.d4.st = GSVector4i(t * 4.0f);
|
||||||
|
|
||||||
movaps(xmm2, xmm1);
|
cvttps2dq(xmm1, xmm1);
|
||||||
mulps(xmm2, Xmm(4 + i));
|
movdqa(ptr[&m_env.d4.st], xmm1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// m_env.d4.stq = t * 4.0f;
|
||||||
|
|
||||||
if(m_sel.fst)
|
movaps(ptr[&m_env.d4.stq], xmm1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||||
|
{
|
||||||
|
// GSVector4 ds = t.xxxx();
|
||||||
|
// GSVector4 dt = t.yyyy();
|
||||||
|
// GSVector4 dq = t.zzzz();
|
||||||
|
|
||||||
|
movaps(xmm1, xmm0);
|
||||||
|
shufps(xmm1, xmm1, (uint8)_MM_SHUFFLE(j, j, j, j));
|
||||||
|
|
||||||
|
for(int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
// m_env.d[i].si/ti = GSVector4i(v);
|
// GSVector4 v = ds/dt * m_shift[i];
|
||||||
|
|
||||||
cvttps2dq(xmm2, xmm2);
|
movaps(xmm2, xmm1);
|
||||||
|
mulps(xmm2, Xmm(4 + i));
|
||||||
|
|
||||||
switch(j)
|
if(m_sel.fst)
|
||||||
{
|
{
|
||||||
case 0: movdqa(xmmword[&m_env.d[i].si], xmm2); break;
|
// m_env.d[i].si/ti = GSVector4i(v);
|
||||||
case 1: movdqa(xmmword[&m_env.d[i].ti], xmm2); break;
|
|
||||||
|
cvttps2dq(xmm2, xmm2);
|
||||||
|
|
||||||
|
switch(j)
|
||||||
|
{
|
||||||
|
case 0: movdqa(ptr[&m_env.d[i].si], xmm2); break;
|
||||||
|
case 1: movdqa(ptr[&m_env.d[i].ti], xmm2); break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
else
|
||||||
else
|
|
||||||
{
|
|
||||||
// m_env.d[i].s/t/q = v;
|
|
||||||
|
|
||||||
switch(j)
|
|
||||||
{
|
{
|
||||||
case 0: movaps(xmmword[&m_env.d[i].s], xmm2); break;
|
// m_env.d[i].s/t/q = v;
|
||||||
case 1: movaps(xmmword[&m_env.d[i].t], xmm2); break;
|
|
||||||
case 2: movaps(xmmword[&m_env.d[i].q], xmm2); break;
|
switch(j)
|
||||||
|
{
|
||||||
|
case 0: movaps(ptr[&m_env.d[i].s], xmm2); break;
|
||||||
|
case 1: movaps(ptr[&m_env.d[i].t], xmm2); break;
|
||||||
|
case 2: movaps(ptr[&m_env.d[i].q], xmm2); break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -257,113 +437,217 @@ void GSSetupPrimCodeGenerator::Color()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(m_sel.iip)
|
if(m_cpu.has(util::Cpu::tAVX))
|
||||||
{
|
{
|
||||||
// GSVector4 c = dscan.c;
|
if(m_sel.iip)
|
||||||
|
|
||||||
movaps(xmm0, xmmword[edx]);
|
|
||||||
movaps(xmm1, xmm0);
|
|
||||||
|
|
||||||
// m_env.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
|
|
||||||
|
|
||||||
movaps(xmm2, xmm0);
|
|
||||||
mulps(xmm2, xmm3);
|
|
||||||
cvttps2dq(xmm2, xmm2);
|
|
||||||
pshufd(xmm2, xmm2, _MM_SHUFFLE(3, 1, 2, 0));
|
|
||||||
packssdw(xmm2, xmm2);
|
|
||||||
movdqa(xmmword[&m_env.d4.c], xmm2);
|
|
||||||
|
|
||||||
// xmm3 is not needed anymore
|
|
||||||
|
|
||||||
// GSVector4 dr = c.xxxx();
|
|
||||||
// GSVector4 db = c.zzzz();
|
|
||||||
|
|
||||||
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
|
||||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
|
||||||
|
|
||||||
for(int i = 0; i < 4; i++)
|
|
||||||
{
|
{
|
||||||
// GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
|
// GSVector4 c = dscan.c;
|
||||||
|
|
||||||
movaps(xmm2, xmm0);
|
vmovaps(xmm0, ptr[edx]);
|
||||||
mulps(xmm2, Xmm(4 + i));
|
|
||||||
cvttps2dq(xmm2, xmm2);
|
|
||||||
packssdw(xmm2, xmm2);
|
|
||||||
|
|
||||||
// GSVector4i b = GSVector4i(db * m_shift[i]).ps32();
|
// m_env.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
|
||||||
|
|
||||||
movaps(xmm3, xmm1);
|
vmulps(xmm1, xmm0, xmm3);
|
||||||
mulps(xmm3, Xmm(4 + i));
|
vcvttps2dq(xmm1, xmm1);
|
||||||
cvttps2dq(xmm3, xmm3);
|
vpshufd(xmm1, xmm1, _MM_SHUFFLE(3, 1, 2, 0));
|
||||||
packssdw(xmm3, xmm3);
|
vpackssdw(xmm1, xmm1);
|
||||||
|
vmovdqa(ptr[&m_env.d4.c], xmm1);
|
||||||
|
|
||||||
// m_env.d[i].rb = r.upl16(b);
|
// xmm3 is not needed anymore
|
||||||
|
|
||||||
punpcklwd(xmm2, xmm3);
|
// GSVector4 dr = c.xxxx();
|
||||||
movdqa(xmmword[&m_env.d[i].rb], xmm2);
|
// GSVector4 db = c.zzzz();
|
||||||
|
|
||||||
|
vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
|
||||||
|
for(int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
// GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
|
||||||
|
|
||||||
|
vmulps(xmm0, xmm2, Xmm(4 + i));
|
||||||
|
vcvttps2dq(xmm0, xmm0);
|
||||||
|
vpackssdw(xmm0, xmm0);
|
||||||
|
|
||||||
|
// GSVector4i b = GSVector4i(db * m_shift[i]).ps32();
|
||||||
|
|
||||||
|
vmulps(xmm1, xmm3, Xmm(4 + i));
|
||||||
|
vcvttps2dq(xmm1, xmm1);
|
||||||
|
vpackssdw(xmm1, xmm1);
|
||||||
|
|
||||||
|
// m_env.d[i].rb = r.upl16(b);
|
||||||
|
|
||||||
|
vpunpcklwd(xmm0, xmm1);
|
||||||
|
vmovdqa(ptr[&m_env.d[i].rb], xmm0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// GSVector4 c = dscan.c;
|
||||||
|
|
||||||
|
vmovaps(xmm0, ptr[edx]); // not enough regs, have to reload it
|
||||||
|
|
||||||
|
// GSVector4 dg = c.yyyy();
|
||||||
|
// GSVector4 da = c.wwww();
|
||||||
|
|
||||||
|
vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||||
|
vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||||
|
|
||||||
|
for(int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
// GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
|
||||||
|
|
||||||
|
vmulps(xmm0, xmm2, Xmm(4 + i));
|
||||||
|
vcvttps2dq(xmm0, xmm0);
|
||||||
|
vpackssdw(xmm0, xmm0);
|
||||||
|
|
||||||
|
// GSVector4i a = GSVector4i(da * m_shift[i]).ps32();
|
||||||
|
|
||||||
|
vmulps(xmm1, xmm3, Xmm(4 + i));
|
||||||
|
vcvttps2dq(xmm1, xmm1);
|
||||||
|
vpackssdw(xmm1, xmm1);
|
||||||
|
|
||||||
|
// m_env.d[i].ga = g.upl16(a);
|
||||||
|
|
||||||
|
vpunpcklwd(xmm0, xmm1);
|
||||||
|
vmovdqa(ptr[&m_env.d[i].ga], xmm0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
// GSVector4 c = dscan.c;
|
|
||||||
|
|
||||||
movaps(xmm0, xmmword[edx]); // not enough regs, have to reload it
|
|
||||||
movaps(xmm1, xmm0);
|
|
||||||
|
|
||||||
// GSVector4 dg = c.yyyy();
|
|
||||||
// GSVector4 da = c.wwww();
|
|
||||||
|
|
||||||
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
|
||||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
|
||||||
|
|
||||||
for(int i = 0; i < 4; i++)
|
|
||||||
{
|
{
|
||||||
// GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
|
// GSVector4i c = GSVector4i(vertices[0].c);
|
||||||
|
|
||||||
movaps(xmm2, xmm0);
|
vcvttps2dq(xmm0, ptr[ecx]);
|
||||||
mulps(xmm2, Xmm(4 + i));
|
|
||||||
cvttps2dq(xmm2, xmm2);
|
|
||||||
packssdw(xmm2, xmm2);
|
|
||||||
|
|
||||||
// GSVector4i a = GSVector4i(da * m_shift[i]).ps32();
|
// c = c.upl16(c.zwxy());
|
||||||
|
|
||||||
movaps(xmm3, xmm1);
|
vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2));
|
||||||
mulps(xmm3, Xmm(4 + i));
|
vpunpcklwd(xmm0, xmm1);
|
||||||
cvttps2dq(xmm3, xmm3);
|
|
||||||
packssdw(xmm3, xmm3);
|
|
||||||
|
|
||||||
// m_env.d[i].ga = g.upl16(a);
|
// if(!tme) c = c.srl16(7);
|
||||||
|
|
||||||
punpcklwd(xmm2, xmm3);
|
if(m_sel.tfx == TFX_NONE)
|
||||||
movdqa(xmmword[&m_env.d[i].ga], xmm2);
|
{
|
||||||
|
vpsrlw(xmm0, 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
// m_env.c.rb = c.xxxx();
|
||||||
|
// m_env.c.ga = c.zzzz();
|
||||||
|
|
||||||
|
vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
vpshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
|
||||||
|
vmovdqa(ptr[&m_env.c.rb], xmm1);
|
||||||
|
vmovdqa(ptr[&m_env.c.ga], xmm2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// GSVector4i c = GSVector4i(vertices[0].c);
|
if(m_sel.iip)
|
||||||
|
|
||||||
movaps(xmm0, xmmword[ecx]);
|
|
||||||
cvttps2dq(xmm0, xmm0);
|
|
||||||
|
|
||||||
// c = c.upl16(c.zwxy());
|
|
||||||
|
|
||||||
movdqa(xmm1, xmm0);
|
|
||||||
pshufd(xmm1, xmm1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
||||||
punpcklwd(xmm0, xmm1);
|
|
||||||
|
|
||||||
// if(!tme) c = c.srl16(7);
|
|
||||||
|
|
||||||
if(m_sel.tfx == TFX_NONE)
|
|
||||||
{
|
{
|
||||||
psrlw(xmm0, 7);
|
// GSVector4 c = dscan.c;
|
||||||
|
|
||||||
|
movaps(xmm0, ptr[edx]);
|
||||||
|
movaps(xmm1, xmm0);
|
||||||
|
|
||||||
|
// m_env.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
|
||||||
|
|
||||||
|
movaps(xmm2, xmm0);
|
||||||
|
mulps(xmm2, xmm3);
|
||||||
|
cvttps2dq(xmm2, xmm2);
|
||||||
|
pshufd(xmm2, xmm2, _MM_SHUFFLE(3, 1, 2, 0));
|
||||||
|
packssdw(xmm2, xmm2);
|
||||||
|
movdqa(ptr[&m_env.d4.c], xmm2);
|
||||||
|
|
||||||
|
// xmm3 is not needed anymore
|
||||||
|
|
||||||
|
// GSVector4 dr = c.xxxx();
|
||||||
|
// GSVector4 db = c.zzzz();
|
||||||
|
|
||||||
|
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
|
||||||
|
for(int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
// GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
|
||||||
|
|
||||||
|
movaps(xmm2, xmm0);
|
||||||
|
mulps(xmm2, Xmm(4 + i));
|
||||||
|
cvttps2dq(xmm2, xmm2);
|
||||||
|
packssdw(xmm2, xmm2);
|
||||||
|
|
||||||
|
// GSVector4i b = GSVector4i(db * m_shift[i]).ps32();
|
||||||
|
|
||||||
|
movaps(xmm3, xmm1);
|
||||||
|
mulps(xmm3, Xmm(4 + i));
|
||||||
|
cvttps2dq(xmm3, xmm3);
|
||||||
|
packssdw(xmm3, xmm3);
|
||||||
|
|
||||||
|
// m_env.d[i].rb = r.upl16(b);
|
||||||
|
|
||||||
|
punpcklwd(xmm2, xmm3);
|
||||||
|
movdqa(ptr[&m_env.d[i].rb], xmm2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// GSVector4 c = dscan.c;
|
||||||
|
|
||||||
|
movaps(xmm0, ptr[edx]); // not enough regs, have to reload it
|
||||||
|
movaps(xmm1, xmm0);
|
||||||
|
|
||||||
|
// GSVector4 dg = c.yyyy();
|
||||||
|
// GSVector4 da = c.wwww();
|
||||||
|
|
||||||
|
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||||
|
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||||
|
|
||||||
|
for(int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
// GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
|
||||||
|
|
||||||
|
movaps(xmm2, xmm0);
|
||||||
|
mulps(xmm2, Xmm(4 + i));
|
||||||
|
cvttps2dq(xmm2, xmm2);
|
||||||
|
packssdw(xmm2, xmm2);
|
||||||
|
|
||||||
|
// GSVector4i a = GSVector4i(da * m_shift[i]).ps32();
|
||||||
|
|
||||||
|
movaps(xmm3, xmm1);
|
||||||
|
mulps(xmm3, Xmm(4 + i));
|
||||||
|
cvttps2dq(xmm3, xmm3);
|
||||||
|
packssdw(xmm3, xmm3);
|
||||||
|
|
||||||
|
// m_env.d[i].ga = g.upl16(a);
|
||||||
|
|
||||||
|
punpcklwd(xmm2, xmm3);
|
||||||
|
movdqa(ptr[&m_env.d[i].ga], xmm2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// GSVector4i c = GSVector4i(vertices[0].c);
|
||||||
|
|
||||||
// m_env.c.rb = c.xxxx();
|
movaps(xmm0, ptr[ecx]);
|
||||||
// m_env.c.ga = c.zzzz();
|
cvttps2dq(xmm0, xmm0);
|
||||||
|
|
||||||
movdqa(xmm1, xmm0);
|
// c = c.upl16(c.zwxy());
|
||||||
pshufd(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
|
||||||
pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
pshufd(xmm1, xmm0, _MM_SHUFFLE(1, 0, 3, 2));
|
||||||
movdqa(xmmword[&m_env.c.rb], xmm0);
|
punpcklwd(xmm0, xmm1);
|
||||||
movdqa(xmmword[&m_env.c.ga], xmm1);
|
|
||||||
|
// if(!tme) c = c.srl16(7);
|
||||||
|
|
||||||
|
if(m_sel.tfx == TFX_NONE)
|
||||||
|
{
|
||||||
|
psrlw(xmm0, 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
// m_env.c.rb = c.xxxx();
|
||||||
|
// m_env.c.ga = c.zzzz();
|
||||||
|
|
||||||
|
pshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
pshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
|
||||||
|
movdqa(ptr[&m_env.c.rb], xmm1);
|
||||||
|
movdqa(ptr[&m_env.c.ga], xmm2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -84,7 +84,7 @@ GSState::GSState()
|
||||||
m_sssize += sizeof(m_tr.x);
|
m_sssize += sizeof(m_tr.x);
|
||||||
m_sssize += sizeof(m_tr.y);
|
m_sssize += sizeof(m_tr.y);
|
||||||
m_sssize += m_mem.m_vmsize;
|
m_sssize += m_mem.m_vmsize;
|
||||||
m_sssize += (sizeof(m_path[0].tag) + sizeof(m_path[0].reg)) * ArraySize(m_path);
|
m_sssize += (sizeof(m_path[0].tag) + sizeof(m_path[0].reg)) * countof(m_path);
|
||||||
m_sssize += sizeof(m_q);
|
m_sssize += sizeof(m_q);
|
||||||
|
|
||||||
PRIM = &m_env.PRIM;
|
PRIM = &m_env.PRIM;
|
||||||
|
@ -103,6 +103,7 @@ GSState::~GSState()
|
||||||
void GSState::SetRegsMem(uint8* basemem)
|
void GSState::SetRegsMem(uint8* basemem)
|
||||||
{
|
{
|
||||||
ASSERT(basemem);
|
ASSERT(basemem);
|
||||||
|
|
||||||
m_regs = (GSPrivRegSet*)basemem;
|
m_regs = (GSPrivRegSet*)basemem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,84 +112,82 @@ void GSState::SetIrqCallback(void (*irq)())
|
||||||
m_irq = irq;
|
m_irq = irq;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::SetMultithreaded( bool isMT )
|
void GSState::SetMultithreaded(bool mt)
|
||||||
{
|
{
|
||||||
// Some older versions of PCSX2 didn't properly set the irq callback to NULL
|
// Some older versions of PCSX2 didn't properly set the irq callback to NULL
|
||||||
// in multithreaded mode (possibly because ZeroGS itself would assert in such
|
// in multithreaded mode (possibly because ZeroGS itself would assert in such
|
||||||
// cases), and didn't bind them to a dummy callback either. PCSX2 handles all
|
// cases), and didn't bind them to a dummy callback either. PCSX2 handles all
|
||||||
// IRQs internally when multithreaded anyway -- so let's ignore them here:
|
// IRQs internally when multithreaded anyway -- so let's ignore them here:
|
||||||
|
|
||||||
m_mt = isMT;
|
m_mt = mt;
|
||||||
if( isMT )
|
|
||||||
|
if(mt)
|
||||||
{
|
{
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_SIGNAL] = &GSState::GIFRegHandlerNull;
|
m_fpGIFRegHandlers[GIF_A_D_REG_SIGNAL] = &GSState::GIFRegHandlerNull;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_FINISH] = &GSState::GIFRegHandlerNull;
|
m_fpGIFRegHandlers[GIF_A_D_REG_FINISH] = &GSState::GIFRegHandlerNull;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_LABEL] = &GSState::GIFRegHandlerNull;
|
m_fpGIFRegHandlers[GIF_A_D_REG_LABEL] = &GSState::GIFRegHandlerNull;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_SIGNAL] = &GSState::GIFRegHandlerSIGNAL;
|
m_fpGIFRegHandlers[GIF_A_D_REG_SIGNAL] = &GSState::GIFRegHandlerSIGNAL;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_FINISH] = &GSState::GIFRegHandlerFINISH;
|
m_fpGIFRegHandlers[GIF_A_D_REG_FINISH] = &GSState::GIFRegHandlerFINISH;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_LABEL] = &GSState::GIFRegHandlerLABEL;
|
m_fpGIFRegHandlers[GIF_A_D_REG_LABEL] = &GSState::GIFRegHandlerLABEL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::SetFrameSkip(int skip)
|
void GSState::SetFrameSkip(int skip)
|
||||||
{
|
{
|
||||||
if(m_frameskip == skip) return;
|
if(m_frameskip == skip) return;
|
||||||
|
|
||||||
m_frameskip = skip;
|
m_frameskip = skip;
|
||||||
|
|
||||||
if(skip)
|
if(skip)
|
||||||
{
|
{
|
||||||
#if !UsePackedRegSwitch
|
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerNOP;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerNOP;
|
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerNOP;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerNOP;
|
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = &GSState::GIFPackedRegHandlerNOP;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = &GSState::GIFPackedRegHandlerNOP;
|
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = &GSState::GIFPackedRegHandlerNOP;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = &GSState::GIFPackedRegHandlerNOP;
|
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerNOP;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerNOP;
|
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerNOP;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerNOP;
|
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerNOP;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerNOP;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerNOP;
|
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerNOP;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerNOP;
|
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerNOP;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerNOP;
|
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerNOP;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerNOP;
|
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerNOP;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerNOP;
|
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerNOP;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerNOP;
|
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerNOP;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerNOP;
|
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerNOP;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerNOP;
|
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerNOP;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerNOP;
|
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerNOP;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerNOP;
|
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerNOP;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
#if !UsePackedRegSwitch
|
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2;
|
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2;
|
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)&GSState::GIFRegHandlerCLAMP<0>;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)&GSState::GIFRegHandlerCLAMP<0>;
|
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)&GSState::GIFRegHandlerCLAMP<1>;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)&GSState::GIFRegHandlerCLAMP<1>;
|
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG;
|
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZF3;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZF3;
|
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZ3;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZ3;
|
|
||||||
#endif
|
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM;
|
||||||
|
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM;
|
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
|
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST;
|
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV;
|
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2;
|
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF3;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2;
|
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ3;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF3;
|
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ3;
|
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT;
|
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::Reset()
|
void GSState::Reset()
|
||||||
{
|
{
|
||||||
memset(&m_path[0], 0, sizeof(m_path[0]) * ArraySize(m_path));
|
memset(&m_path[0], 0, sizeof(m_path[0]) * countof(m_path));
|
||||||
memset(&m_v, 0, sizeof(m_v));
|
memset(&m_v, 0, sizeof(m_v));
|
||||||
|
|
||||||
// PRIM = &m_env.PRIM;
|
// PRIM = &m_env.PRIM;
|
||||||
|
@ -203,88 +202,86 @@ void GSState::Reset()
|
||||||
|
|
||||||
void GSState::ResetHandlers()
|
void GSState::ResetHandlers()
|
||||||
{
|
{
|
||||||
#if !UsePackedRegSwitch
|
|
||||||
for(int i = 0; i < countof(m_fpGIFPackedRegHandlers); i++)
|
for(int i = 0; i < countof(m_fpGIFPackedRegHandlers); i++)
|
||||||
{
|
{
|
||||||
m_fpGIFPackedRegHandlers[i] = &GSState::GIFPackedRegHandlerNull;
|
m_fpGIFPackedRegHandlers[i] = &GSState::GIFPackedRegHandlerNull;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_PRIM] = (GIFPackedRegHandler)&GSState::GIFRegHandlerPRIM;
|
m_fpGIFPackedRegHandlers[GIF_REG_PRIM] = (GIFPackedRegHandler)&GSState::GIFRegHandlerPRIM;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_RGBA] = &GSState::GIFPackedRegHandlerRGBA;
|
m_fpGIFPackedRegHandlers[GIF_REG_RGBA] = &GSState::GIFPackedRegHandlerRGBA;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_STQ] = &GSState::GIFPackedRegHandlerSTQ;
|
m_fpGIFPackedRegHandlers[GIF_REG_STQ] = &GSState::GIFPackedRegHandlerSTQ;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_UV] = &GSState::GIFPackedRegHandlerUV;
|
m_fpGIFPackedRegHandlers[GIF_REG_UV] = &GSState::GIFPackedRegHandlerUV;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2;
|
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2;
|
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_TEX0_1] = (GIFPackedRegHandler)&GSState::GIFRegHandlerTEX0<0>;
|
m_fpGIFPackedRegHandlers[GIF_REG_TEX0_1] = (GIFPackedRegHandler)&GSState::GIFRegHandlerTEX0<0>;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_TEX0_2] = (GIFPackedRegHandler)&GSState::GIFRegHandlerTEX0<1>;
|
m_fpGIFPackedRegHandlers[GIF_REG_TEX0_2] = (GIFPackedRegHandler)&GSState::GIFRegHandlerTEX0<1>;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)&GSState::GIFRegHandlerCLAMP<0>;
|
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)&GSState::GIFRegHandlerCLAMP<0>;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)&GSState::GIFRegHandlerCLAMP<1>;
|
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)&GSState::GIFRegHandlerCLAMP<1>;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG;
|
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZF3;
|
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZF3;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZ3;
|
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZ3;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_A_D] = &GSState::GIFPackedRegHandlerA_D;
|
m_fpGIFPackedRegHandlers[GIF_REG_A_D] = &GSState::GIFPackedRegHandlerA_D;
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_NOP] = &GSState::GIFPackedRegHandlerNOP;
|
m_fpGIFPackedRegHandlers[GIF_REG_NOP] = &GSState::GIFPackedRegHandlerNOP;
|
||||||
#endif
|
|
||||||
|
|
||||||
for(int i = 0; i < countof(m_fpGIFRegHandlers); i++)
|
for(int i = 0; i < countof(m_fpGIFRegHandlers); i++)
|
||||||
{
|
{
|
||||||
m_fpGIFRegHandlers[i] = &GSState::GIFRegHandlerNull;
|
m_fpGIFRegHandlers[i] = &GSState::GIFRegHandlerNull;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM;
|
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
|
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST;
|
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV;
|
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2;
|
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2;
|
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_1] = &GSState::GIFRegHandlerTEX0<0>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_1] = &GSState::GIFRegHandlerTEX0<0>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_2] = &GSState::GIFRegHandlerTEX0<1>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_2] = &GSState::GIFRegHandlerTEX0<1>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_1] = &GSState::GIFRegHandlerCLAMP<0>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_1] = &GSState::GIFRegHandlerCLAMP<0>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_2] = &GSState::GIFRegHandlerCLAMP<1>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_2] = &GSState::GIFRegHandlerCLAMP<1>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_FOG] = &GSState::GIFRegHandlerFOG;
|
m_fpGIFRegHandlers[GIF_A_D_REG_FOG] = &GSState::GIFRegHandlerFOG;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF3;
|
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF3;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ3;
|
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ3;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_NOP] = &GSState::GIFRegHandlerNOP;
|
m_fpGIFRegHandlers[GIF_A_D_REG_NOP] = &GSState::GIFRegHandlerNOP;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_1] = &GSState::GIFRegHandlerTEX1<0>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_1] = &GSState::GIFRegHandlerTEX1<0>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_2] = &GSState::GIFRegHandlerTEX1<1>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_2] = &GSState::GIFRegHandlerTEX1<1>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_1] = &GSState::GIFRegHandlerTEX2<0>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_1] = &GSState::GIFRegHandlerTEX2<0>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_2] = &GSState::GIFRegHandlerTEX2<1>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_2] = &GSState::GIFRegHandlerTEX2<1>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_1] = &GSState::GIFRegHandlerXYOFFSET<0>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_1] = &GSState::GIFRegHandlerXYOFFSET<0>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_2] = &GSState::GIFRegHandlerXYOFFSET<1>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_2] = &GSState::GIFRegHandlerXYOFFSET<1>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT;
|
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE;
|
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEXCLUT] = &GSState::GIFRegHandlerTEXCLUT;
|
m_fpGIFRegHandlers[GIF_A_D_REG_TEXCLUT] = &GSState::GIFRegHandlerTEXCLUT;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_SCANMSK] = &GSState::GIFRegHandlerSCANMSK;
|
m_fpGIFRegHandlers[GIF_A_D_REG_SCANMSK] = &GSState::GIFRegHandlerSCANMSK;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_1] = &GSState::GIFRegHandlerMIPTBP1<0>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_1] = &GSState::GIFRegHandlerMIPTBP1<0>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_2] = &GSState::GIFRegHandlerMIPTBP1<1>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_2] = &GSState::GIFRegHandlerMIPTBP1<1>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_1] = &GSState::GIFRegHandlerMIPTBP2<0>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_1] = &GSState::GIFRegHandlerMIPTBP2<0>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_2] = &GSState::GIFRegHandlerMIPTBP2<1>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_2] = &GSState::GIFRegHandlerMIPTBP2<1>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEXA] = &GSState::GIFRegHandlerTEXA;
|
m_fpGIFRegHandlers[GIF_A_D_REG_TEXA] = &GSState::GIFRegHandlerTEXA;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_FOGCOL] = &GSState::GIFRegHandlerFOGCOL;
|
m_fpGIFRegHandlers[GIF_A_D_REG_FOGCOL] = &GSState::GIFRegHandlerFOGCOL;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEXFLUSH] = &GSState::GIFRegHandlerTEXFLUSH;
|
m_fpGIFRegHandlers[GIF_A_D_REG_TEXFLUSH] = &GSState::GIFRegHandlerTEXFLUSH;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_1] = &GSState::GIFRegHandlerSCISSOR<0>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_1] = &GSState::GIFRegHandlerSCISSOR<0>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_2] = &GSState::GIFRegHandlerSCISSOR<1>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_2] = &GSState::GIFRegHandlerSCISSOR<1>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_1] = &GSState::GIFRegHandlerALPHA<0>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_1] = &GSState::GIFRegHandlerALPHA<0>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_2] = &GSState::GIFRegHandlerALPHA<1>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_2] = &GSState::GIFRegHandlerALPHA<1>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_DIMX] = &GSState::GIFRegHandlerDIMX;
|
m_fpGIFRegHandlers[GIF_A_D_REG_DIMX] = &GSState::GIFRegHandlerDIMX;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_DTHE] = &GSState::GIFRegHandlerDTHE;
|
m_fpGIFRegHandlers[GIF_A_D_REG_DTHE] = &GSState::GIFRegHandlerDTHE;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_COLCLAMP] = &GSState::GIFRegHandlerCOLCLAMP;
|
m_fpGIFRegHandlers[GIF_A_D_REG_COLCLAMP] = &GSState::GIFRegHandlerCOLCLAMP;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEST_1] = &GSState::GIFRegHandlerTEST<0>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_TEST_1] = &GSState::GIFRegHandlerTEST<0>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEST_2] = &GSState::GIFRegHandlerTEST<1>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_TEST_2] = &GSState::GIFRegHandlerTEST<1>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_PABE] = &GSState::GIFRegHandlerPABE;
|
m_fpGIFRegHandlers[GIF_A_D_REG_PABE] = &GSState::GIFRegHandlerPABE;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_FBA_1] = &GSState::GIFRegHandlerFBA<0>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_FBA_1] = &GSState::GIFRegHandlerFBA<0>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_FBA_2] = &GSState::GIFRegHandlerFBA<1>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_FBA_2] = &GSState::GIFRegHandlerFBA<1>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_1] = &GSState::GIFRegHandlerFRAME<0>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_1] = &GSState::GIFRegHandlerFRAME<0>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_2] = &GSState::GIFRegHandlerFRAME<1>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_2] = &GSState::GIFRegHandlerFRAME<1>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_1] = &GSState::GIFRegHandlerZBUF<0>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_1] = &GSState::GIFRegHandlerZBUF<0>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_2] = &GSState::GIFRegHandlerZBUF<1>;
|
m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_2] = &GSState::GIFRegHandlerZBUF<1>;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_BITBLTBUF] = &GSState::GIFRegHandlerBITBLTBUF;
|
m_fpGIFRegHandlers[GIF_A_D_REG_BITBLTBUF] = &GSState::GIFRegHandlerBITBLTBUF;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_TRXPOS] = &GSState::GIFRegHandlerTRXPOS;
|
m_fpGIFRegHandlers[GIF_A_D_REG_TRXPOS] = &GSState::GIFRegHandlerTRXPOS;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_TRXREG] = &GSState::GIFRegHandlerTRXREG;
|
m_fpGIFRegHandlers[GIF_A_D_REG_TRXREG] = &GSState::GIFRegHandlerTRXREG;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_TRXDIR] = &GSState::GIFRegHandlerTRXDIR;
|
m_fpGIFRegHandlers[GIF_A_D_REG_TRXDIR] = &GSState::GIFRegHandlerTRXDIR;
|
||||||
m_fpGIFRegHandlers[GIF_A_D_REG_HWREG] = &GSState::GIFRegHandlerHWREG;
|
m_fpGIFRegHandlers[GIF_A_D_REG_HWREG] = &GSState::GIFRegHandlerHWREG;
|
||||||
|
|
||||||
SetMultithreaded( m_mt );
|
SetMultithreaded(m_mt);
|
||||||
}
|
}
|
||||||
|
|
||||||
GSVector4i GSState::GetDisplayRect(int i)
|
GSVector4i GSState::GetDisplayRect(int i)
|
||||||
|
@ -375,22 +372,24 @@ int GSState::GetFPS()
|
||||||
|
|
||||||
// GIFPackedRegHandler*
|
// GIFPackedRegHandler*
|
||||||
|
|
||||||
void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* r)
|
__forceinline void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* r)
|
||||||
{
|
{
|
||||||
// ASSERT(0);
|
// ASSERT(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void __fi GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* r)
|
__forceinline void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* r)
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x301
|
#if _M_SSE >= 0x301
|
||||||
|
|
||||||
GSVector4i mask = GSVector4i::load(0x0c080400);
|
GSVector4i mask = GSVector4i::load(0x0c080400);
|
||||||
GSVector4i v = GSVector4i::load<false>(r).shuffle8(mask);
|
GSVector4i v = GSVector4i::load<false>(r).shuffle8(mask);
|
||||||
|
|
||||||
m_v.RGBAQ.u32[0] = (uint32)GSVector4i::store(v);
|
m_v.RGBAQ.u32[0] = (uint32)GSVector4i::store(v);
|
||||||
|
|
||||||
#elif _M_SSE >= 0x200
|
#elif _M_SSE >= 0x200
|
||||||
|
|
||||||
GSVector4i v = GSVector4i::load<false>(r) & GSVector4i::x000000ff();
|
GSVector4i v = GSVector4i::load<false>(r) & GSVector4i::x000000ff();
|
||||||
|
|
||||||
m_v.RGBAQ.u32[0] = v.rgba32();
|
m_v.RGBAQ.u32[0] = v.rgba32();
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
@ -405,7 +404,7 @@ void __fi GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* r)
|
||||||
m_v.RGBAQ.Q = m_q;
|
m_v.RGBAQ.Q = m_q;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __fi GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* r)
|
__forceinline void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* r)
|
||||||
{
|
{
|
||||||
#if defined(_M_AMD64)
|
#if defined(_M_AMD64)
|
||||||
|
|
||||||
|
@ -426,7 +425,7 @@ void __fi GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* r)
|
||||||
m_q = r->STQ.Q;
|
m_q = r->STQ.Q;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __fi GSState::GIFPackedRegHandlerUV(const GIFPackedReg* r)
|
__forceinline void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* r)
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x200
|
#if _M_SSE >= 0x200
|
||||||
|
|
||||||
|
@ -441,7 +440,7 @@ void __fi GSState::GIFPackedRegHandlerUV(const GIFPackedReg* r)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void __fi GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* r)
|
__forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* r)
|
||||||
{
|
{
|
||||||
m_v.XYZ.X = r->XYZF2.X;
|
m_v.XYZ.X = r->XYZF2.X;
|
||||||
m_v.XYZ.Y = r->XYZF2.Y;
|
m_v.XYZ.Y = r->XYZF2.Y;
|
||||||
|
@ -451,7 +450,7 @@ void __fi GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* r)
|
||||||
VertexKick(r->XYZF2.ADC);
|
VertexKick(r->XYZF2.ADC);
|
||||||
}
|
}
|
||||||
|
|
||||||
void __fi GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* r)
|
__forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* r)
|
||||||
{
|
{
|
||||||
m_v.XYZ.X = r->XYZ2.X;
|
m_v.XYZ.X = r->XYZ2.X;
|
||||||
m_v.XYZ.Y = r->XYZ2.Y;
|
m_v.XYZ.Y = r->XYZ2.Y;
|
||||||
|
@ -460,17 +459,17 @@ void __fi GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* r)
|
||||||
VertexKick(r->XYZ2.ADC);
|
VertexKick(r->XYZ2.ADC);
|
||||||
}
|
}
|
||||||
|
|
||||||
void __fi GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* r)
|
__forceinline void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* r)
|
||||||
{
|
{
|
||||||
m_v.FOG.F = r->FOG.F;
|
m_v.FOG.F = r->FOG.F;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __fi GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* r)
|
__forceinline void GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* r)
|
||||||
{
|
{
|
||||||
(this->*m_fpGIFRegHandlers[r->A_D.ADDR])(&r->r);
|
(this->*m_fpGIFRegHandlers[r->A_D.ADDR])(&r->r);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* r)
|
__forceinline void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* r)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -502,6 +501,8 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim)
|
||||||
|
|
||||||
m_context = &m_env.CTXT[PRIM->CTXT];
|
m_context = &m_env.CTXT[PRIM->CTXT];
|
||||||
|
|
||||||
|
UpdateVertexKick();
|
||||||
|
|
||||||
ResetPrim();
|
ResetPrim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -510,22 +511,22 @@ void GSState::GIFRegHandlerPRIM(const GIFReg* r)
|
||||||
ApplyPRIM(r->PRIM);
|
ApplyPRIM(r->PRIM);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::GIFRegHandlerRGBAQ(const GIFReg* r)
|
__forceinline void GSState::GIFRegHandlerRGBAQ(const GIFReg* r)
|
||||||
{
|
{
|
||||||
m_v.RGBAQ = (GSVector4i)r->RGBAQ;
|
m_v.RGBAQ = (GSVector4i)r->RGBAQ;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::GIFRegHandlerST(const GIFReg* r)
|
__forceinline void GSState::GIFRegHandlerST(const GIFReg* r)
|
||||||
{
|
{
|
||||||
m_v.ST = (GSVector4i)r->ST;
|
m_v.ST = (GSVector4i)r->ST;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::GIFRegHandlerUV(const GIFReg* r)
|
__forceinline void GSState::GIFRegHandlerUV(const GIFReg* r)
|
||||||
{
|
{
|
||||||
m_v.UV.u32[0] = r->UV.u32[0] & 0x3fff3fff;
|
m_v.UV.u32[0] = r->UV.u32[0] & 0x3fff3fff;
|
||||||
}
|
}
|
||||||
|
|
||||||
__fi void GSState::GIFRegHandlerXYZF2(const GIFReg* r)
|
void GSState::GIFRegHandlerXYZF2(const GIFReg* r)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
m_v.XYZ.X = r->XYZF.X;
|
m_v.XYZ.X = r->XYZF.X;
|
||||||
|
@ -540,14 +541,14 @@ __fi void GSState::GIFRegHandlerXYZF2(const GIFReg* r)
|
||||||
VertexKick(false);
|
VertexKick(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
__fi void GSState::GIFRegHandlerXYZ2(const GIFReg* r)
|
void GSState::GIFRegHandlerXYZ2(const GIFReg* r)
|
||||||
{
|
{
|
||||||
m_v.XYZ = (GSVector4i)r->XYZ;
|
m_v.XYZ = (GSVector4i)r->XYZ;
|
||||||
|
|
||||||
VertexKick(false);
|
VertexKick(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
__fi void GSState::ApplyTEX0( uint i, GIFRegTEX0& TEX0 )
|
void GSState::ApplyTEX0(uint i, GIFRegTEX0& TEX0)
|
||||||
{
|
{
|
||||||
// even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing
|
// even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing
|
||||||
|
|
||||||
|
@ -578,7 +579,7 @@ __fi void GSState::ApplyTEX0( uint i, GIFRegTEX0& TEX0 )
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int i> __fi void GSState::GIFRegHandlerTEX0(const GIFReg* r)
|
template<int i> void GSState::GIFRegHandlerTEX0(const GIFReg* r)
|
||||||
{
|
{
|
||||||
GIFRegTEX0 TEX0 = r->TEX0;
|
GIFRegTEX0 TEX0 = r->TEX0;
|
||||||
|
|
||||||
|
@ -588,7 +589,7 @@ template<int i> __fi void GSState::GIFRegHandlerTEX0(const GIFReg* r)
|
||||||
ApplyTEX0( i, TEX0 );
|
ApplyTEX0( i, TEX0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int i> __fi void GSState::GIFRegHandlerCLAMP(const GIFReg* r)
|
template<int i> void GSState::GIFRegHandlerCLAMP(const GIFReg* r)
|
||||||
{
|
{
|
||||||
if(PRIM->CTXT == i && r->CLAMP != m_env.CTXT[i].CLAMP)
|
if(PRIM->CTXT == i && r->CLAMP != m_env.CTXT[i].CLAMP)
|
||||||
{
|
{
|
||||||
|
@ -603,7 +604,7 @@ void GSState::GIFRegHandlerFOG(const GIFReg* r)
|
||||||
m_v.FOG = (GSVector4i)r->FOG;
|
m_v.FOG = (GSVector4i)r->FOG;
|
||||||
}
|
}
|
||||||
|
|
||||||
__fi void GSState::GIFRegHandlerXYZF3(const GIFReg* r)
|
void GSState::GIFRegHandlerXYZF3(const GIFReg* r)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
m_v.XYZ.X = r->XYZF.X;
|
m_v.XYZ.X = r->XYZF.X;
|
||||||
|
@ -618,7 +619,7 @@ __fi void GSState::GIFRegHandlerXYZF3(const GIFReg* r)
|
||||||
VertexKick(true);
|
VertexKick(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
__fi void GSState::GIFRegHandlerXYZ3(const GIFReg* r)
|
void GSState::GIFRegHandlerXYZ3(const GIFReg* r)
|
||||||
{
|
{
|
||||||
m_v.XYZ = (GSVector4i)r->XYZ;
|
m_v.XYZ = (GSVector4i)r->XYZ;
|
||||||
|
|
||||||
|
@ -629,7 +630,7 @@ void GSState::GIFRegHandlerNOP(const GIFReg* r)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int i> __fi void GSState::GIFRegHandlerTEX1(const GIFReg* r)
|
template<int i> void GSState::GIFRegHandlerTEX1(const GIFReg* r)
|
||||||
{
|
{
|
||||||
if(PRIM->CTXT == i && r->TEX1 != m_env.CTXT[i].TEX1)
|
if(PRIM->CTXT == i && r->TEX1 != m_env.CTXT[i].TEX1)
|
||||||
{
|
{
|
||||||
|
@ -639,7 +640,7 @@ template<int i> __fi void GSState::GIFRegHandlerTEX1(const GIFReg* r)
|
||||||
m_env.CTXT[i].TEX1 = (GSVector4i)r->TEX1;
|
m_env.CTXT[i].TEX1 = (GSVector4i)r->TEX1;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int i> __fi void GSState::GIFRegHandlerTEX2(const GIFReg* r)
|
template<int i> void GSState::GIFRegHandlerTEX2(const GIFReg* r)
|
||||||
{
|
{
|
||||||
// m_env.CTXT[i].TEX2 = r->TEX2; // not used
|
// m_env.CTXT[i].TEX2 = r->TEX2; // not used
|
||||||
|
|
||||||
|
@ -656,7 +657,7 @@ template<int i> __fi void GSState::GIFRegHandlerTEX2(const GIFReg* r)
|
||||||
ApplyTEX0(i, TEX0);
|
ApplyTEX0(i, TEX0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int i> __fi void GSState::GIFRegHandlerXYOFFSET(const GIFReg* r)
|
template<int i> void GSState::GIFRegHandlerXYOFFSET(const GIFReg* r)
|
||||||
{
|
{
|
||||||
GSVector4i o = (GSVector4i)r->XYOFFSET & GSVector4i::x0000ffff();
|
GSVector4i o = (GSVector4i)r->XYOFFSET & GSVector4i::x0000ffff();
|
||||||
|
|
||||||
|
@ -670,7 +671,7 @@ template<int i> __fi void GSState::GIFRegHandlerXYOFFSET(const GIFReg* r)
|
||||||
m_env.CTXT[i].UpdateScissor();
|
m_env.CTXT[i].UpdateScissor();
|
||||||
}
|
}
|
||||||
|
|
||||||
__fi void GSState::GIFRegHandlerPRMODECONT(const GIFReg* r)
|
void GSState::GIFRegHandlerPRMODECONT(const GIFReg* r)
|
||||||
{
|
{
|
||||||
if(r->PRMODECONT != m_env.PRMODECONT)
|
if(r->PRMODECONT != m_env.PRMODECONT)
|
||||||
{
|
{
|
||||||
|
@ -684,9 +685,11 @@ __fi void GSState::GIFRegHandlerPRMODECONT(const GIFReg* r)
|
||||||
// if(PRIM->PRIM == 7) printf("Invalid PRMODECONT/PRIM\n");
|
// if(PRIM->PRIM == 7) printf("Invalid PRMODECONT/PRIM\n");
|
||||||
|
|
||||||
m_context = &m_env.CTXT[PRIM->CTXT];
|
m_context = &m_env.CTXT[PRIM->CTXT];
|
||||||
|
|
||||||
|
UpdateVertexKick();
|
||||||
}
|
}
|
||||||
|
|
||||||
__fi void GSState::GIFRegHandlerPRMODE(const GIFReg* r)
|
void GSState::GIFRegHandlerPRMODE(const GIFReg* r)
|
||||||
{
|
{
|
||||||
if(!m_env.PRMODECONT.AC)
|
if(!m_env.PRMODECONT.AC)
|
||||||
{
|
{
|
||||||
|
@ -698,9 +701,11 @@ __fi void GSState::GIFRegHandlerPRMODE(const GIFReg* r)
|
||||||
m_env.PRMODE._PRIM = _PRIM;
|
m_env.PRMODE._PRIM = _PRIM;
|
||||||
|
|
||||||
m_context = &m_env.CTXT[PRIM->CTXT];
|
m_context = &m_env.CTXT[PRIM->CTXT];
|
||||||
|
|
||||||
|
UpdateVertexKick();
|
||||||
}
|
}
|
||||||
|
|
||||||
__fi void GSState::GIFRegHandlerTEXCLUT(const GIFReg* r)
|
void GSState::GIFRegHandlerTEXCLUT(const GIFReg* r)
|
||||||
{
|
{
|
||||||
if(r->TEXCLUT != m_env.TEXCLUT)
|
if(r->TEXCLUT != m_env.TEXCLUT)
|
||||||
{
|
{
|
||||||
|
@ -730,7 +735,7 @@ template<int i> void GSState::GIFRegHandlerMIPTBP1(const GIFReg* r)
|
||||||
m_env.CTXT[i].MIPTBP1 = (GSVector4i)r->MIPTBP1;
|
m_env.CTXT[i].MIPTBP1 = (GSVector4i)r->MIPTBP1;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int i> __fi void GSState::GIFRegHandlerMIPTBP2(const GIFReg* r)
|
template<int i> void GSState::GIFRegHandlerMIPTBP2(const GIFReg* r)
|
||||||
{
|
{
|
||||||
if(PRIM->CTXT == i && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2)
|
if(PRIM->CTXT == i && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2)
|
||||||
{
|
{
|
||||||
|
@ -767,7 +772,7 @@ void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* r)
|
||||||
// InvalidateTextureCache();
|
// InvalidateTextureCache();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int i> __fi void GSState::GIFRegHandlerSCISSOR(const GIFReg* r)
|
template<int i> void GSState::GIFRegHandlerSCISSOR(const GIFReg* r)
|
||||||
{
|
{
|
||||||
if(PRIM->CTXT == i && r->SCISSOR != m_env.CTXT[i].SCISSOR)
|
if(PRIM->CTXT == i && r->SCISSOR != m_env.CTXT[i].SCISSOR)
|
||||||
{
|
{
|
||||||
|
@ -779,7 +784,7 @@ template<int i> __fi void GSState::GIFRegHandlerSCISSOR(const GIFReg* r)
|
||||||
m_env.CTXT[i].UpdateScissor();
|
m_env.CTXT[i].UpdateScissor();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int i> __fi void GSState::GIFRegHandlerALPHA(const GIFReg* r)
|
template<int i> void GSState::GIFRegHandlerALPHA(const GIFReg* r)
|
||||||
{
|
{
|
||||||
ASSERT(r->ALPHA.A != 3);
|
ASSERT(r->ALPHA.A != 3);
|
||||||
ASSERT(r->ALPHA.B != 3);
|
ASSERT(r->ALPHA.B != 3);
|
||||||
|
@ -1142,66 +1147,6 @@ void GSState::Read(uint8* mem, int len)
|
||||||
m_mem.ReadImageX(m_tr.x, m_tr.y, mem, len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG);
|
m_mem.ReadImageX(m_tr.x, m_tr.y, mem, len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use version 1 of the optimized local > local transfer, as per revision 887.
|
|
||||||
// Later (more optimized?) versions cause a crash in Dark Cloud 2.
|
|
||||||
#if 1
|
|
||||||
void GSState::Move()
|
|
||||||
{
|
|
||||||
// ffxii uses this to move the top/bottom of the scrolling menus offscreen and then blends them back over the text to create a shading effect
|
|
||||||
// guitar hero copies the far end of the board to do a similar blend too
|
|
||||||
|
|
||||||
int sx = m_env.TRXPOS.SSAX;
|
|
||||||
int dx = m_env.TRXPOS.DSAX;
|
|
||||||
int sy = m_env.TRXPOS.SSAY;
|
|
||||||
int dy = m_env.TRXPOS.DSAY;
|
|
||||||
int w = m_env.TRXREG.RRW;
|
|
||||||
int h = m_env.TRXREG.RRH;
|
|
||||||
int xinc = 1;
|
|
||||||
int yinc = 1;
|
|
||||||
|
|
||||||
InvalidateLocalMem(m_env.BITBLTBUF, GSVector4i(sx, sy, sx + w, sy + h));
|
|
||||||
InvalidateVideoMem(m_env.BITBLTBUF, GSVector4i(dx, dy, dx + w, dy + h));
|
|
||||||
|
|
||||||
if(sx < dx) sx += w-1, dx += w-1, xinc = -1;
|
|
||||||
if(sy < dy) sy += h-1, dy += h-1, yinc = -1;
|
|
||||||
|
|
||||||
const GSLocalMemory::psm_t& spsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM];
|
|
||||||
const GSLocalMemory::psm_t& dpsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM];
|
|
||||||
|
|
||||||
if(m_env.BITBLTBUF.SPSM == PSM_PSMCT32 && m_env.BITBLTBUF.DPSM == PSM_PSMCT32)
|
|
||||||
{
|
|
||||||
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc*w, dx -= xinc*w)
|
|
||||||
{
|
|
||||||
DWORD sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
|
|
||||||
int* soffset = spsm.rowOffset[sy & 7];
|
|
||||||
|
|
||||||
DWORD dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
|
|
||||||
int* doffset = dpsm.rowOffset[dy & 7];
|
|
||||||
|
|
||||||
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
|
|
||||||
{
|
|
||||||
m_mem.WritePixel32(dbase + doffset[dx], m_mem.ReadPixel32(sbase + soffset[sx]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc*w, dx -= xinc*w)
|
|
||||||
{
|
|
||||||
DWORD sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
|
|
||||||
int* soffset = spsm.rowOffset[sy & 7];
|
|
||||||
|
|
||||||
DWORD dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
|
|
||||||
int* doffset = dpsm.rowOffset[dy & 7];
|
|
||||||
|
|
||||||
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
|
|
||||||
{
|
|
||||||
(m_mem.*dpsm.wpa)(dbase + doffset[dx], (m_mem.*spsm.rpa)(sbase + soffset[sx]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
void GSState::Move()
|
void GSState::Move()
|
||||||
{
|
{
|
||||||
// ffxii uses this to move the top/bottom of the scrolling menus offscreen and then blends them back over the text to create a shading effect
|
// ffxii uses this to move the top/bottom of the scrolling menus offscreen and then blends them back over the text to create a shading effect
|
||||||
|
@ -1346,10 +1291,7 @@ void GSState::Move()
|
||||||
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
|
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
|
||||||
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
|
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
|
||||||
|
|
||||||
for(int x = 0; x > -w; x--) {
|
for(int x = 0; x > -w; x--) d[dcol[x]] = s[scol[x]];
|
||||||
printf("%d",x); //Dark Cloud 2 crashes at x = -63
|
|
||||||
d[dcol[x]] = s[scol[x]];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1412,7 +1354,7 @@ void GSState::Move()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
void GSState::SoftReset(uint32 mask)
|
void GSState::SoftReset(uint32 mask)
|
||||||
{
|
{
|
||||||
if(mask & 1)
|
if(mask & 1)
|
||||||
|
@ -1508,91 +1450,7 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
||||||
{
|
{
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
uint32 reg = path.GetReg();
|
(this->*m_fpGIFPackedRegHandlers[path.GetReg()])((GIFPackedReg*)mem);
|
||||||
|
|
||||||
#if 0
|
|
||||||
// I assume this was some sort of debugging code? Why intercept and perform
|
|
||||||
// special handling for the first three entries in the table, and then do
|
|
||||||
// a LUT for the rest? Either do a switch for the whole table (best idea)
|
|
||||||
// or do a LUT for the whole table.
|
|
||||||
switch(reg)
|
|
||||||
{
|
|
||||||
case GIF_REG_RGBA:
|
|
||||||
GIFPackedRegHandlerRGBA((GIFPackedReg*)mem);
|
|
||||||
break;
|
|
||||||
case GIF_REG_STQ:
|
|
||||||
GIFPackedRegHandlerSTQ((GIFPackedReg*)mem);
|
|
||||||
break;
|
|
||||||
case GIF_REG_UV:
|
|
||||||
GIFPackedRegHandlerUV((GIFPackedReg*)mem);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
(this->*m_fpGIFPackedRegHandlers[reg])((GIFPackedReg*)mem);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if UsePackedRegSwitch
|
|
||||||
// This is a switch statement version of the LUT above. Since there are only
|
|
||||||
// 16 entries, this is almost certainly ideal, since the compiler can inline
|
|
||||||
// all the handlers, and PGO will further optimize the switch dispatcher.
|
|
||||||
|
|
||||||
if (FrameSkipIt)
|
|
||||||
{
|
|
||||||
// When skipping frames it looks like we only need to bother with the A_D handler
|
|
||||||
// and the TEX handlers. (and I'm thinking the TEX handlers might not be necessary
|
|
||||||
// if the PCSX2 side of the frameskipper is smart enough anyway).
|
|
||||||
switch(reg)
|
|
||||||
{
|
|
||||||
case GIF_REG_A_D: GIFPackedRegHandlerA_D ((GIFPackedReg*)mem); break;
|
|
||||||
case GIF_REG_TEX0_1: GIFRegHandlerTEX0<0> ((GIFReg*)mem); break;
|
|
||||||
case GIF_REG_TEX0_2: GIFRegHandlerTEX0<1> ((GIFReg*)mem); break;
|
|
||||||
|
|
||||||
// Should RGBA/STQ/UV be NOPs when skipping frames? I think so, but maybe the original
|
|
||||||
// switch() (above) was some hack to enable them in frameskipping mode. --air
|
|
||||||
|
|
||||||
case GIF_REG_RGBA: //GIFPackedRegHandlerRGBA ((GIFPackedReg*)mem); break;
|
|
||||||
case GIF_REG_STQ: //GIFPackedRegHandlerSTQ ((GIFPackedReg*)mem); break;
|
|
||||||
case GIF_REG_UV: //GIFPackedRegHandlerUV ((GIFPackedReg*)mem); break;
|
|
||||||
|
|
||||||
case GIF_REG_XYZF2: //GIFPackedRegHandlerXYZF2((GIFPackedReg*)mem); break;
|
|
||||||
case GIF_REG_XYZ2: //GIFPackedRegHandlerXYZ2 ((GIFPackedReg*)mem); break;
|
|
||||||
case GIF_REG_CLAMP_1: //GIFRegHandlerCLAMP<0> ((GIFReg*)mem); break;
|
|
||||||
case GIF_REG_CLAMP_2: //GIFRegHandlerCLAMP<1> ((GIFReg*)mem); break;
|
|
||||||
case GIF_REG_FOG: //GIFPackedRegHandlerFOG ((GIFPackedReg*)mem); break;
|
|
||||||
case GIF_REG_XYZF3: //GIFRegHandlerXYZF3 ((GIFReg*)mem); break;
|
|
||||||
case GIF_REG_XYZ3: //GIFRegHandlerXYZ3 ((GIFReg*)mem); break;
|
|
||||||
case GIF_REG_NOP: break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
switch(reg)
|
|
||||||
{
|
|
||||||
case GIF_REG_RGBA: GIFPackedRegHandlerRGBA ((GIFPackedReg*)mem); break;
|
|
||||||
case GIF_REG_STQ: GIFPackedRegHandlerSTQ ((GIFPackedReg*)mem); break;
|
|
||||||
case GIF_REG_UV: GIFPackedRegHandlerUV ((GIFPackedReg*)mem); break;
|
|
||||||
case GIF_REG_XYZF2: GIFPackedRegHandlerXYZF2((GIFPackedReg*)mem); break;
|
|
||||||
case GIF_REG_XYZ2: GIFPackedRegHandlerXYZ2 ((GIFPackedReg*)mem); break;
|
|
||||||
case GIF_REG_TEX0_1: GIFRegHandlerTEX0<0> ((GIFReg*)mem); break;
|
|
||||||
case GIF_REG_TEX0_2: GIFRegHandlerTEX0<1> ((GIFReg*)mem); break;
|
|
||||||
case GIF_REG_CLAMP_1: GIFRegHandlerCLAMP<0> ((GIFReg*)mem); break;
|
|
||||||
case GIF_REG_CLAMP_2: GIFRegHandlerCLAMP<1> ((GIFReg*)mem); break;
|
|
||||||
case GIF_REG_FOG: GIFPackedRegHandlerFOG ((GIFPackedReg*)mem); break;
|
|
||||||
case GIF_REG_XYZF3: GIFRegHandlerXYZF3 ((GIFReg*)mem); break;
|
|
||||||
case GIF_REG_XYZ3: GIFRegHandlerXYZ3 ((GIFReg*)mem); break;
|
|
||||||
case GIF_REG_A_D: GIFPackedRegHandlerA_D ((GIFPackedReg*)mem); break;
|
|
||||||
case GIF_REG_NOP: break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
|
|
||||||
// This is the original LUT implementation of the packed reg dispatcher.
|
|
||||||
// Simple and clean, but the switch system below is probably more efficient.
|
|
||||||
|
|
||||||
(this->*m_fpGIFPackedRegHandlers[reg])((GIFPackedReg*)mem);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
mem += sizeof(GIFPackedReg);
|
mem += sizeof(GIFPackedReg);
|
||||||
size--;
|
size--;
|
||||||
|
@ -1779,7 +1637,7 @@ int GSState::Freeze(GSFreezeData* fd, bool sizeonly)
|
||||||
WriteState(data, &m_tr.y);
|
WriteState(data, &m_tr.y);
|
||||||
WriteState(data, m_mem.m_vm8, m_mem.m_vmsize);
|
WriteState(data, m_mem.m_vm8, m_mem.m_vmsize);
|
||||||
|
|
||||||
for(int i = 0; i < ArraySize(m_path); i++)
|
for(int i = 0; i < countof(m_path); i++)
|
||||||
{
|
{
|
||||||
m_path[i].tag.NREG = m_path[i].nreg;
|
m_path[i].tag.NREG = m_path[i].nreg;
|
||||||
m_path[i].tag.NLOOP = m_path[i].nloop;
|
m_path[i].tag.NLOOP = m_path[i].nloop;
|
||||||
|
@ -1874,7 +1732,7 @@ int GSState::Defrost(const GSFreezeData* fd)
|
||||||
|
|
||||||
m_tr.total = 0; // TODO: restore transfer state
|
m_tr.total = 0; // TODO: restore transfer state
|
||||||
|
|
||||||
for(int i = 0; i < ArraySize(m_path); i++)
|
for(int i = 0; i < countof(m_path); i++)
|
||||||
{
|
{
|
||||||
ReadState(&m_path[i].tag, data);
|
ReadState(&m_path[i].tag, data);
|
||||||
ReadState(&m_path[i].reg, data);
|
ReadState(&m_path[i].reg, data);
|
||||||
|
@ -1888,6 +1746,8 @@ int GSState::Defrost(const GSFreezeData* fd)
|
||||||
|
|
||||||
m_context = &m_env.CTXT[PRIM->CTXT];
|
m_context = &m_env.CTXT[PRIM->CTXT];
|
||||||
|
|
||||||
|
UpdateVertexKick();
|
||||||
|
|
||||||
m_env.UpdateDIMX();
|
m_env.UpdateDIMX();
|
||||||
|
|
||||||
for(int i = 0; i < 2; i++)
|
for(int i = 0; i < 2; i++)
|
||||||
|
@ -1918,7 +1778,7 @@ GSState::GSTransferBuffer::GSTransferBuffer()
|
||||||
{
|
{
|
||||||
x = y = 0;
|
x = y = 0;
|
||||||
start = end = total = 0;
|
start = end = total = 0;
|
||||||
buff = (uint8*)_aligned_malloc(1024 * 1024 * 4, 16);
|
buff = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
GSState::GSTransferBuffer::~GSTransferBuffer()
|
GSState::GSTransferBuffer::~GSTransferBuffer()
|
||||||
|
|
|
@ -36,17 +36,11 @@
|
||||||
#include "GSAlignedClass.h"
|
#include "GSAlignedClass.h"
|
||||||
#include "GSDump.h"
|
#include "GSDump.h"
|
||||||
|
|
||||||
// Set this to 1 to enable a switch statement instead of a LUT for the packed register handler
|
class GSState : public GSAlignedClass<32>
|
||||||
// in the GifTransfer code. Switch statement is probably faster, but it isn't fully implemented
|
|
||||||
// yet (not properly supporting frameskipping).
|
|
||||||
#define UsePackedRegSwitch 0
|
|
||||||
|
|
||||||
class GSState : public GSAlignedClass<16>
|
|
||||||
{
|
{
|
||||||
#if !UsePackedRegSwitch
|
|
||||||
typedef void (GSState::*GIFPackedRegHandler)(const GIFPackedReg* r);
|
typedef void (GSState::*GIFPackedRegHandler)(const GIFPackedReg* r);
|
||||||
|
|
||||||
GIFPackedRegHandler m_fpGIFPackedRegHandlers[16];
|
GIFPackedRegHandler m_fpGIFPackedRegHandlers[16];
|
||||||
#endif
|
|
||||||
|
|
||||||
void GIFPackedRegHandlerNull(const GIFPackedReg* r);
|
void GIFPackedRegHandlerNull(const GIFPackedReg* r);
|
||||||
void GIFPackedRegHandlerRGBA(const GIFPackedReg* r);
|
void GIFPackedRegHandlerRGBA(const GIFPackedReg* r);
|
||||||
|
@ -62,7 +56,7 @@ class GSState : public GSAlignedClass<16>
|
||||||
|
|
||||||
GIFRegHandler m_fpGIFRegHandlers[256];
|
GIFRegHandler m_fpGIFRegHandlers[256];
|
||||||
|
|
||||||
void ApplyTEX0( uint i, GIFRegTEX0& TEX0 );
|
void ApplyTEX0(uint i, GIFRegTEX0& TEX0);
|
||||||
void ApplyPRIM(const GIFRegPRIM& PRIM);
|
void ApplyPRIM(const GIFRegPRIM& PRIM);
|
||||||
|
|
||||||
void GIFRegHandlerNull(const GIFReg* r);
|
void GIFRegHandlerNull(const GIFReg* r);
|
||||||
|
@ -136,33 +130,67 @@ class GSState : public GSAlignedClass<16>
|
||||||
protected:
|
protected:
|
||||||
bool IsBadFrame(int& skip, int UserHacks_SkipDraw);
|
bool IsBadFrame(int& skip, int UserHacks_SkipDraw);
|
||||||
|
|
||||||
typedef void (GSState::*DrawingKickPtr)(bool skip);
|
typedef void (GSState::*VertexKickPtr)(bool skip);
|
||||||
|
|
||||||
DrawingKickPtr m_dk[8];
|
VertexKickPtr m_vk[8][2][2];
|
||||||
|
VertexKickPtr m_vkf;
|
||||||
|
|
||||||
template<class T> void InitVertexKick()
|
template<class T> void InitVertexKick()
|
||||||
{
|
{
|
||||||
m_dk[GS_POINTLIST] = (DrawingKickPtr)&T::DrawingKick<GS_POINTLIST>;
|
m_vk[GS_POINTLIST][0][0] = (VertexKickPtr)&T::VertexKick<GS_POINTLIST, 0, 0>;
|
||||||
m_dk[GS_LINELIST] = (DrawingKickPtr)&T::DrawingKick<GS_LINELIST>;
|
m_vk[GS_POINTLIST][0][1] = (VertexKickPtr)&T::VertexKick<GS_POINTLIST, 0, 0>;
|
||||||
m_dk[GS_LINESTRIP] = (DrawingKickPtr)&T::DrawingKick<GS_LINESTRIP>;
|
m_vk[GS_POINTLIST][1][0] = (VertexKickPtr)&T::VertexKick<GS_POINTLIST, 1, 0>;
|
||||||
m_dk[GS_TRIANGLELIST] = (DrawingKickPtr)&T::DrawingKick<GS_TRIANGLELIST>;
|
m_vk[GS_POINTLIST][1][1] = (VertexKickPtr)&T::VertexKick<GS_POINTLIST, 1, 1>;
|
||||||
m_dk[GS_TRIANGLESTRIP] = (DrawingKickPtr)&T::DrawingKick<GS_TRIANGLESTRIP>;
|
|
||||||
m_dk[GS_TRIANGLEFAN] = (DrawingKickPtr)&T::DrawingKick<GS_TRIANGLEFAN>;
|
m_vk[GS_LINELIST][0][0] = (VertexKickPtr)&T::VertexKick<GS_LINELIST, 0, 0>;
|
||||||
m_dk[GS_SPRITE] = (DrawingKickPtr)&T::DrawingKick<GS_SPRITE>;
|
m_vk[GS_LINELIST][0][1] = (VertexKickPtr)&T::VertexKick<GS_LINELIST, 0, 0>;
|
||||||
m_dk[GS_INVALID] = &GSState::DrawingKickNull;
|
m_vk[GS_LINELIST][1][0] = (VertexKickPtr)&T::VertexKick<GS_LINELIST, 1, 0>;
|
||||||
|
m_vk[GS_LINELIST][1][1] = (VertexKickPtr)&T::VertexKick<GS_LINELIST, 1, 1>;
|
||||||
|
|
||||||
|
m_vk[GS_LINESTRIP][0][0] = (VertexKickPtr)&T::VertexKick<GS_LINESTRIP, 0, 0>;
|
||||||
|
m_vk[GS_LINESTRIP][0][1] = (VertexKickPtr)&T::VertexKick<GS_LINESTRIP, 0, 0>;
|
||||||
|
m_vk[GS_LINESTRIP][1][0] = (VertexKickPtr)&T::VertexKick<GS_LINESTRIP, 1, 0>;
|
||||||
|
m_vk[GS_LINESTRIP][1][1] = (VertexKickPtr)&T::VertexKick<GS_LINESTRIP, 1, 1>;
|
||||||
|
|
||||||
|
m_vk[GS_TRIANGLELIST][0][0] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLELIST, 0, 0>;
|
||||||
|
m_vk[GS_TRIANGLELIST][0][1] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLELIST, 0, 0>;
|
||||||
|
m_vk[GS_TRIANGLELIST][1][0] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLELIST, 1, 0>;
|
||||||
|
m_vk[GS_TRIANGLELIST][1][1] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLELIST, 1, 1>;
|
||||||
|
|
||||||
|
m_vk[GS_TRIANGLESTRIP][0][0] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLESTRIP, 0, 0>;
|
||||||
|
m_vk[GS_TRIANGLESTRIP][0][1] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLESTRIP, 0, 0>;
|
||||||
|
m_vk[GS_TRIANGLESTRIP][1][0] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLESTRIP, 1, 0>;
|
||||||
|
m_vk[GS_TRIANGLESTRIP][1][1] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLESTRIP, 1, 1>;
|
||||||
|
|
||||||
|
m_vk[GS_TRIANGLEFAN][0][0] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLEFAN, 0, 0>;
|
||||||
|
m_vk[GS_TRIANGLEFAN][0][1] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLEFAN, 0, 0>;
|
||||||
|
m_vk[GS_TRIANGLEFAN][1][0] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLEFAN, 1, 0>;
|
||||||
|
m_vk[GS_TRIANGLEFAN][1][1] = (VertexKickPtr)&T::VertexKick<GS_TRIANGLEFAN, 1, 1>;
|
||||||
|
|
||||||
|
m_vk[GS_SPRITE][0][0] = (VertexKickPtr)&T::VertexKick<GS_SPRITE, 0, 0>;
|
||||||
|
m_vk[GS_SPRITE][0][1] = (VertexKickPtr)&T::VertexKick<GS_SPRITE, 0, 0>;
|
||||||
|
m_vk[GS_SPRITE][1][0] = (VertexKickPtr)&T::VertexKick<GS_SPRITE, 1, 0>;
|
||||||
|
m_vk[GS_SPRITE][1][1] = (VertexKickPtr)&T::VertexKick<GS_SPRITE, 1, 1>;
|
||||||
|
|
||||||
|
m_vk[GS_INVALID][0][0] = &GSState::VertexKickNull;
|
||||||
|
m_vk[GS_INVALID][0][1] = &GSState::VertexKickNull;
|
||||||
|
m_vk[GS_INVALID][1][0] = &GSState::VertexKickNull;
|
||||||
|
m_vk[GS_INVALID][1][1] = &GSState::VertexKickNull;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DrawingKickNull(bool skip)
|
void UpdateVertexKick()
|
||||||
|
{
|
||||||
|
m_vkf = m_vk[PRIM->PRIM][PRIM->TME][PRIM->FST];
|
||||||
|
}
|
||||||
|
|
||||||
|
void VertexKickNull(bool skip)
|
||||||
{
|
{
|
||||||
ASSERT(0);
|
ASSERT(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void DoVertexKick()=0;
|
void VertexKick(bool skip)
|
||||||
|
|
||||||
__fi void VertexKick(bool skip)
|
|
||||||
{
|
{
|
||||||
DoVertexKick();
|
(this->*m_vkf)(skip);
|
||||||
(this->*m_dk[PRIM->PRIM])(skip);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -221,6 +249,6 @@ public:
|
||||||
void SetFrameSkip(int skip);
|
void SetFrameSkip(int skip);
|
||||||
void SetRegsMem(uint8* basemem);
|
void SetRegsMem(uint8* basemem);
|
||||||
void SetIrqCallback(void (*irq)());
|
void SetIrqCallback(void (*irq)());
|
||||||
void SetMultithreaded(bool isMT=true);
|
void SetMultithreaded(bool mt = true);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -37,9 +37,12 @@ extern const uint8 clutTableT32I8[128];
|
||||||
extern const uint8 clutTableT32I4[16];
|
extern const uint8 clutTableT32I4[16];
|
||||||
extern const uint8 clutTableT16I8[32];
|
extern const uint8 clutTableT16I8[32];
|
||||||
extern const uint8 clutTableT16I4[16];
|
extern const uint8 clutTableT16I4[16];
|
||||||
struct D3D9Blend {
|
|
||||||
|
struct D3D9Blend
|
||||||
|
{
|
||||||
int bogus;
|
int bogus;
|
||||||
D3DBLENDOP op;
|
D3DBLENDOP op;
|
||||||
D3DBLEND src, dst;
|
D3DBLEND src, dst;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern const D3D9Blend blendMapD3D9[3*3*3*3];
|
extern const D3D9Blend blendMapD3D9[3*3*3*3];
|
||||||
|
|
|
@ -27,6 +27,6 @@ GSTexture::GSTexture()
|
||||||
, m_size(0, 0)
|
, m_size(0, 0)
|
||||||
, m_type(None)
|
, m_type(None)
|
||||||
, m_msaa(false)
|
, m_msaa(false)
|
||||||
, LikelyOffset (false)
|
, LikelyOffset(false)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
|
@ -836,11 +836,11 @@ GSTextureCache::Source::Source(GSRenderer* r)
|
||||||
{
|
{
|
||||||
memset(m_valid, 0, sizeof(m_valid));
|
memset(m_valid, 0, sizeof(m_valid));
|
||||||
|
|
||||||
m_clut = (uint32*)_aligned_malloc(256 * sizeof(uint32), 16);
|
m_clut = (uint32*)_aligned_malloc(256 * sizeof(uint32), 32);
|
||||||
|
|
||||||
memset(m_clut, 0, sizeof(m_clut));
|
memset(m_clut, 0, sizeof(m_clut));
|
||||||
|
|
||||||
m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 16);
|
m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 32);
|
||||||
m_write.count = 0;
|
m_write.count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1082,7 +1082,7 @@ void GSTextureCache::Target::Update()
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
static uint8* buff = (uint8*)::_aligned_malloc(1024 * 1024 * 4, 16);
|
static uint8* buff = (uint8*)::_aligned_malloc(1024 * 1024 * 4, 32);
|
||||||
|
|
||||||
int pitch = ((w + 3) & ~3) * 4;
|
int pitch = ((w + 3) & ~3) * 4;
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ public:
|
||||||
FMT_8,
|
FMT_8,
|
||||||
};
|
};
|
||||||
|
|
||||||
class Surface : public GSAlignedClass<16>
|
class Surface : public GSAlignedClass<32>
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
GSRenderer* m_renderer;
|
GSRenderer* m_renderer;
|
||||||
|
|
|
@ -253,7 +253,7 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
|
||||||
|
|
||||||
if(m_buff == NULL)
|
if(m_buff == NULL)
|
||||||
{
|
{
|
||||||
m_buff = _aligned_malloc(tw * th * sizeof(uint32), 16);
|
m_buff = _aligned_malloc(tw * th * sizeof(uint32), 32);
|
||||||
|
|
||||||
if(m_buff == NULL)
|
if(m_buff == NULL)
|
||||||
{
|
{
|
||||||
|
|
|
@ -137,6 +137,7 @@ void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
||||||
}
|
}
|
||||||
|
|
||||||
VSSetShader(i->second.vs, m_vs_cb);
|
VSSetShader(i->second.vs, m_vs_cb);
|
||||||
|
|
||||||
IASetInputLayout(i->second.il);
|
IASetInputLayout(i->second.il);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -69,7 +69,7 @@ void GSDevice9::SetupIA(const void* vertices, int count, int prim)
|
||||||
|
|
||||||
void GSDevice9::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
void GSDevice9::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
||||||
{
|
{
|
||||||
hash_map< uint32, GSVertexShader9 >::const_iterator i = m_vs.find(sel);
|
hash_map<uint32, GSVertexShader9>::const_iterator i = m_vs.find(sel);
|
||||||
|
|
||||||
if(i == m_vs.end())
|
if(i == m_vs.end())
|
||||||
{
|
{
|
||||||
|
@ -110,6 +110,7 @@ void GSDevice9::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
||||||
}
|
}
|
||||||
|
|
||||||
VSSetShader(i->second.vs, (const float*)cb, sizeof(*cb) / sizeof(GSVector4));
|
VSSetShader(i->second.vs, (const float*)cb, sizeof(*cb) / sizeof(GSVector4));
|
||||||
|
|
||||||
IASetInputLayout(i->second.il);
|
IASetInputLayout(i->second.il);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,26 +27,6 @@ const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f);
|
||||||
const GSVector4 GSVector4::m_x3f800000(_mm_castsi128_ps(_mm_set1_epi32(0x3f800000)));
|
const GSVector4 GSVector4::m_x3f800000(_mm_castsi128_ps(_mm_set1_epi32(0x3f800000)));
|
||||||
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
|
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
|
||||||
|
|
||||||
GSVector4i::GSVector4i(const GSVector4& v)
|
|
||||||
{
|
|
||||||
m = _mm_cvttps_epi32(v);
|
|
||||||
}
|
|
||||||
|
|
||||||
GSVector4::GSVector4(const GSVector4i& v)
|
|
||||||
{
|
|
||||||
m = _mm_cvtepi32_ps(v);
|
|
||||||
}
|
|
||||||
|
|
||||||
GSVector4i GSVector4i::cast(const GSVector4& v)
|
|
||||||
{
|
|
||||||
return GSVector4i(_mm_castps_si128(v.m));
|
|
||||||
}
|
|
||||||
|
|
||||||
GSVector4 GSVector4::cast(const GSVector4i& v)
|
|
||||||
{
|
|
||||||
return GSVector4(_mm_castsi128_ps(v.m));
|
|
||||||
}
|
|
||||||
|
|
||||||
GSVector4i GSVector4i::fit(int arx, int ary) const
|
GSVector4i GSVector4i::fit(int arx, int ary) const
|
||||||
{
|
{
|
||||||
GSVector4i r = *this;
|
GSVector4i r = *this;
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -28,7 +28,7 @@
|
||||||
|
|
||||||
#pragma pack(push, 1)
|
#pragma pack(push, 1)
|
||||||
|
|
||||||
__aligned16 struct GSVertex
|
__aligned32 struct GSVertex
|
||||||
{
|
{
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
|
|
|
@ -26,7 +26,7 @@
|
||||||
|
|
||||||
#pragma pack(push, 1)
|
#pragma pack(push, 1)
|
||||||
|
|
||||||
__aligned16 union GSVertexHW9
|
__aligned32 union GSVertexHW9
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
|
@ -56,7 +56,7 @@ __aligned16 union GSVertexHW9
|
||||||
float GetQ() {return p.w;}
|
float GetQ() {return p.w;}
|
||||||
};
|
};
|
||||||
|
|
||||||
__aligned16 union GSVertexHW11
|
__aligned32 union GSVertexHW11
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
|
|
|
@ -31,7 +31,7 @@ public:
|
||||||
GSVertexList()
|
GSVertexList()
|
||||||
: m_count(0)
|
: m_count(0)
|
||||||
{
|
{
|
||||||
m_base = _aligned_malloc(sizeof(Vertex) * countof(m_v), 16);
|
m_base = _aligned_malloc(sizeof(Vertex) * countof(m_v), 32);
|
||||||
|
|
||||||
for(int i = 0; i < countof(m_v); i++)
|
for(int i = 0; i < countof(m_v); i++)
|
||||||
{
|
{
|
||||||
|
|
|
@ -23,12 +23,16 @@
|
||||||
|
|
||||||
#include "GSVector.h"
|
#include "GSVector.h"
|
||||||
|
|
||||||
__aligned16 union GSVertexSW
|
__aligned32 union GSVertexSW
|
||||||
{
|
{
|
||||||
struct {GSVector4 c, p, t;};
|
struct {GSVector4 c, p, t;};
|
||||||
struct {GSVector4 v[3];};
|
struct {GSVector4 v[3];};
|
||||||
struct {float f[12];};
|
struct {float f[12];};
|
||||||
|
|
||||||
|
#if _M_SSE >= 0x500
|
||||||
|
struct {GSVector8 cp, t_;};
|
||||||
|
#endif
|
||||||
|
|
||||||
GSVertexSW() {}
|
GSVertexSW() {}
|
||||||
GSVertexSW(const GSVertexSW& v) {*this = v;}
|
GSVertexSW(const GSVertexSW& v) {*this = v;}
|
||||||
|
|
||||||
|
@ -213,4 +217,3 @@ __forceinline GSVertexSW operator / (const GSVertexSW& v, float f)
|
||||||
v0.t = v.t / vf;
|
v0.t = v.t / vf;
|
||||||
return v0;
|
return v0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -120,8 +120,8 @@ void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primc
|
||||||
|
|
||||||
using namespace Xbyak;
|
using namespace Xbyak;
|
||||||
|
|
||||||
GSVertexTrace::CGSW::CGSW(uint32 key, void* ptr, size_t maxsize)
|
GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize)
|
||||||
: CodeGenerator(maxsize, ptr)
|
: CodeGenerator(maxsize, code)
|
||||||
{
|
{
|
||||||
#if _M_AMD64
|
#if _M_AMD64
|
||||||
#error TODO
|
#error TODO
|
||||||
|
@ -161,10 +161,10 @@ GSVertexTrace::CGSW::CGSW(uint32 key, void* ptr, size_t maxsize)
|
||||||
static const float fmin = -FLT_MAX;
|
static const float fmin = -FLT_MAX;
|
||||||
static const float fmax = FLT_MAX;
|
static const float fmax = FLT_MAX;
|
||||||
|
|
||||||
movss(xmm0, xmmword[&fmax]);
|
movss(xmm0, ptr[&fmax]);
|
||||||
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
|
||||||
movss(xmm1, xmmword[&fmin]);
|
movss(xmm1, ptr[&fmin]);
|
||||||
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
|
||||||
if(color)
|
if(color)
|
||||||
|
@ -202,7 +202,7 @@ L("loop");
|
||||||
|
|
||||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||||
{
|
{
|
||||||
movaps(xmm1, xmmword[edx + 1 * sizeof(GSVertexSW) + 32]);
|
movaps(xmm1, ptr[edx + 1 * sizeof(GSVertexSW) + 32]);
|
||||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -213,7 +213,7 @@ L("loop");
|
||||||
// min.c = min.c.minv(v[i + j].c);
|
// min.c = min.c.minv(v[i + j].c);
|
||||||
// max.c = max.c.maxv(v[i + j].c);
|
// max.c = max.c.maxv(v[i + j].c);
|
||||||
|
|
||||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW)]);
|
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW)]);
|
||||||
|
|
||||||
minps(xmm2, xmm0);
|
minps(xmm2, xmm0);
|
||||||
maxps(xmm3, xmm0);
|
maxps(xmm3, xmm0);
|
||||||
|
@ -222,7 +222,7 @@ L("loop");
|
||||||
// min.p = min.p.minv(v[i + j].p);
|
// min.p = min.p.minv(v[i + j].p);
|
||||||
// max.p = max.p.maxv(v[i + j].p);
|
// max.p = max.p.maxv(v[i + j].p);
|
||||||
|
|
||||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 16]);
|
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + 16]);
|
||||||
|
|
||||||
minps(xmm4, xmm0);
|
minps(xmm4, xmm0);
|
||||||
maxps(xmm5, xmm0);
|
maxps(xmm5, xmm0);
|
||||||
|
@ -232,7 +232,7 @@ L("loop");
|
||||||
// min.t = min.t.minv(v[i + j].t);
|
// min.t = min.t.minv(v[i + j].t);
|
||||||
// max.t = max.t.maxv(v[i + j].t);
|
// max.t = max.t.maxv(v[i + j].t);
|
||||||
|
|
||||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 32]);
|
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + 32]);
|
||||||
|
|
||||||
if(!fst)
|
if(!fst)
|
||||||
{
|
{
|
||||||
|
@ -265,27 +265,27 @@ L("loop");
|
||||||
{
|
{
|
||||||
cvttps2dq(xmm2, xmm2);
|
cvttps2dq(xmm2, xmm2);
|
||||||
psrld(xmm2, 7);
|
psrld(xmm2, 7);
|
||||||
movaps(xmmword[eax], xmm2);
|
movaps(ptr[eax], xmm2);
|
||||||
|
|
||||||
cvttps2dq(xmm3, xmm3);
|
cvttps2dq(xmm3, xmm3);
|
||||||
psrld(xmm3, 7);
|
psrld(xmm3, 7);
|
||||||
movaps(xmmword[edx], xmm3);
|
movaps(ptr[edx], xmm3);
|
||||||
}
|
}
|
||||||
|
|
||||||
movaps(xmmword[eax + 16], xmm4);
|
movaps(ptr[eax + 16], xmm4);
|
||||||
movaps(xmmword[edx + 16], xmm5);
|
movaps(ptr[edx + 16], xmm5);
|
||||||
|
|
||||||
if(tme)
|
if(tme)
|
||||||
{
|
{
|
||||||
movaps(xmmword[eax + 32], xmm6);
|
movaps(ptr[eax + 32], xmm6);
|
||||||
movaps(xmmword[edx + 32], xmm7);
|
movaps(ptr[edx + 32], xmm7);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
|
|
||||||
GSVertexTrace::CGHW9::CGHW9(uint32 key, void* ptr, size_t maxsize)
|
GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize)
|
||||||
: CodeGenerator(maxsize, ptr)
|
: CodeGenerator(maxsize, code)
|
||||||
{
|
{
|
||||||
#if _M_AMD64
|
#if _M_AMD64
|
||||||
#error TODO
|
#error TODO
|
||||||
|
@ -327,10 +327,10 @@ GSVertexTrace::CGHW9::CGHW9(uint32 key, void* ptr, size_t maxsize)
|
||||||
static const float fmin = -FLT_MAX;
|
static const float fmin = -FLT_MAX;
|
||||||
static const float fmax = FLT_MAX;
|
static const float fmax = FLT_MAX;
|
||||||
|
|
||||||
movss(xmm0, xmmword[&fmax]);
|
movss(xmm0, ptr[&fmax]);
|
||||||
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
|
||||||
movss(xmm1, xmmword[&fmin]);
|
movss(xmm1, ptr[&fmin]);
|
||||||
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
|
||||||
if(color)
|
if(color)
|
||||||
|
@ -368,7 +368,7 @@ L("loop");
|
||||||
|
|
||||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||||
{
|
{
|
||||||
movaps(xmm1, xmmword[edx + 5 * sizeof(GSVertexHW9) + 16]);
|
movaps(xmm1, ptr[edx + 5 * sizeof(GSVertexHW9) + 16]);
|
||||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -377,7 +377,7 @@ L("loop");
|
||||||
// min.p = min.p.minv(v[i + j].p);
|
// min.p = min.p.minv(v[i + j].p);
|
||||||
// max.p = max.p.maxv(v[i + j].p);
|
// max.p = max.p.maxv(v[i + j].p);
|
||||||
|
|
||||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexHW9) + 16]);
|
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + 16]);
|
||||||
|
|
||||||
minps(xmm4, xmm0);
|
minps(xmm4, xmm0);
|
||||||
maxps(xmm5, xmm0);
|
maxps(xmm5, xmm0);
|
||||||
|
@ -390,7 +390,7 @@ L("loop");
|
||||||
|
|
||||||
if(color && (iip || j == n - 1) || tme)
|
if(color && (iip || j == n - 1) || tme)
|
||||||
{
|
{
|
||||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexHW9)]);
|
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9)]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(color && (iip || j == n - 1))
|
if(color && (iip || j == n - 1))
|
||||||
|
@ -455,15 +455,15 @@ L("loop");
|
||||||
punpcklwd(xmm3, xmm0);
|
punpcklwd(xmm3, xmm0);
|
||||||
}
|
}
|
||||||
|
|
||||||
movaps(xmmword[eax], xmm2);
|
movaps(ptr[eax], xmm2);
|
||||||
movaps(xmmword[edx], xmm3);
|
movaps(ptr[edx], xmm3);
|
||||||
}
|
}
|
||||||
|
|
||||||
// m_min.p = pmin;
|
// m_min.p = pmin;
|
||||||
// m_max.p = pmax;
|
// m_max.p = pmax;
|
||||||
|
|
||||||
movaps(xmmword[eax + 16], xmm4);
|
movaps(ptr[eax + 16], xmm4);
|
||||||
movaps(xmmword[edx + 16], xmm5);
|
movaps(ptr[edx + 16], xmm5);
|
||||||
|
|
||||||
if(tme)
|
if(tme)
|
||||||
{
|
{
|
||||||
|
@ -473,15 +473,15 @@ L("loop");
|
||||||
shufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
shufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||||
shufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
shufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||||
|
|
||||||
movaps(xmmword[eax + 32], xmm6);
|
movaps(ptr[eax + 32], xmm6);
|
||||||
movaps(xmmword[edx + 32], xmm7);
|
movaps(ptr[edx + 32], xmm7);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
|
|
||||||
GSVertexTrace::CGHW11::CGHW11(uint32 key, void* ptr, size_t maxsize)
|
GSVertexTrace::CGHW11::CGHW11(uint32 key, void* code, size_t maxsize)
|
||||||
: CodeGenerator(maxsize, ptr)
|
: CodeGenerator(maxsize, code)
|
||||||
{
|
{
|
||||||
#if _M_AMD64
|
#if _M_AMD64
|
||||||
#error TODO
|
#error TODO
|
||||||
|
@ -521,10 +521,10 @@ GSVertexTrace::CGHW11::CGHW11(uint32 key, void* ptr, size_t maxsize)
|
||||||
static const float fmin = -FLT_MAX;
|
static const float fmin = -FLT_MAX;
|
||||||
static const float fmax = FLT_MAX;
|
static const float fmax = FLT_MAX;
|
||||||
|
|
||||||
movss(xmm0, xmmword[&fmax]);
|
movss(xmm0, ptr[&fmax]);
|
||||||
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
|
||||||
movss(xmm1, xmmword[&fmin]);
|
movss(xmm1, ptr[&fmin]);
|
||||||
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
|
||||||
if(color)
|
if(color)
|
||||||
|
@ -564,7 +564,7 @@ L("loop");
|
||||||
{
|
{
|
||||||
if(color && (iip || j == n - 1) || tme)
|
if(color && (iip || j == n - 1) || tme)
|
||||||
{
|
{
|
||||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexHW11)]);
|
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW11)]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(color && (iip || j == n - 1))
|
if(color && (iip || j == n - 1))
|
||||||
|
@ -593,7 +593,7 @@ L("loop");
|
||||||
maxps(xmm7, xmm0);
|
maxps(xmm7, xmm0);
|
||||||
}
|
}
|
||||||
|
|
||||||
movdqa(xmm0, xmmword[edx + j * sizeof(GSVertexHW11) + 16]);
|
movdqa(xmm0, ptr[edx + j * sizeof(GSVertexHW11) + 16]);
|
||||||
|
|
||||||
if(m_cpu.has(util::Cpu::tSSE41))
|
if(m_cpu.has(util::Cpu::tSSE41))
|
||||||
{
|
{
|
||||||
|
@ -648,8 +648,8 @@ L("loop");
|
||||||
punpcklwd(xmm3, xmm0);
|
punpcklwd(xmm3, xmm0);
|
||||||
}
|
}
|
||||||
|
|
||||||
movaps(xmmword[eax], xmm2);
|
movaps(ptr[eax], xmm2);
|
||||||
movaps(xmmword[edx], xmm3);
|
movaps(ptr[edx], xmm3);
|
||||||
}
|
}
|
||||||
|
|
||||||
// m_min.p = pmin.xyww();
|
// m_min.p = pmin.xyww();
|
||||||
|
@ -658,16 +658,16 @@ L("loop");
|
||||||
shufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
shufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||||
shufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
shufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||||
|
|
||||||
movaps(xmmword[eax + 16], xmm4);
|
movaps(ptr[eax + 16], xmm4);
|
||||||
movaps(xmmword[edx + 16], xmm5);
|
movaps(ptr[edx + 16], xmm5);
|
||||||
|
|
||||||
if(tme)
|
if(tme)
|
||||||
{
|
{
|
||||||
// m_min.t = tmin;
|
// m_min.t = tmin;
|
||||||
// m_max.t = tmax;
|
// m_max.t = tmax;
|
||||||
|
|
||||||
movaps(xmmword[eax + 32], xmm6);
|
movaps(ptr[eax + 32], xmm6);
|
||||||
movaps(xmmword[edx + 32], xmm7);
|
movaps(ptr[edx + 32], xmm7);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret();
|
ret();
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
|
|
||||||
class GSState;
|
class GSState;
|
||||||
|
|
||||||
__aligned16 class GSVertexTrace
|
__aligned32 class GSVertexTrace
|
||||||
{
|
{
|
||||||
struct Vertex {GSVector4i c; GSVector4 p, t;};
|
struct Vertex {GSVector4i c; GSVector4 p, t;};
|
||||||
struct VertexAlpha {int min, max; bool valid;};
|
struct VertexAlpha {int min, max; bool valid;};
|
||||||
|
@ -41,14 +41,14 @@ __aligned16 class GSVertexTrace
|
||||||
class CGSW : public Xbyak::CodeGenerator
|
class CGSW : public Xbyak::CodeGenerator
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
CGSW(uint32 key, void* ptr, size_t maxsize);
|
CGSW(uint32 key, void* code, size_t maxsize);
|
||||||
};
|
};
|
||||||
|
|
||||||
class GSVertexTraceMapSW : public GSCodeGeneratorFunctionMap<CGSW, uint32, VertexTracePtr>
|
class GSVertexTraceMapSW : public GSCodeGeneratorFunctionMap<CGSW, uint32, VertexTracePtr>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
GSVertexTraceMapSW() : GSCodeGeneratorFunctionMap("VertexTraceSW") {}
|
GSVertexTraceMapSW() : GSCodeGeneratorFunctionMap("VertexTraceSW") {}
|
||||||
CGSW* Create(uint32 key, void* ptr, size_t maxsize) {return new CGSW(key, ptr, maxsize);}
|
CGSW* Create(uint32 key, void* code, size_t maxsize) {return new CGSW(key, code, maxsize);}
|
||||||
};
|
};
|
||||||
|
|
||||||
class CGHW9 : public Xbyak::CodeGenerator
|
class CGHW9 : public Xbyak::CodeGenerator
|
||||||
|
@ -63,7 +63,7 @@ __aligned16 class GSVertexTrace
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
GSVertexTraceMapHW9() : GSCodeGeneratorFunctionMap("VertexTraceHW9") {}
|
GSVertexTraceMapHW9() : GSCodeGeneratorFunctionMap("VertexTraceHW9") {}
|
||||||
CGHW9* Create(uint32 key, void* ptr, size_t maxsize) {return new CGHW9(key, ptr, maxsize);}
|
CGHW9* Create(uint32 key, void* code, size_t maxsize) {return new CGHW9(key, code, maxsize);}
|
||||||
};
|
};
|
||||||
|
|
||||||
class CGHW11 : public Xbyak::CodeGenerator
|
class CGHW11 : public Xbyak::CodeGenerator
|
||||||
|
@ -78,7 +78,7 @@ __aligned16 class GSVertexTrace
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
GSVertexTraceMapHW11() : GSCodeGeneratorFunctionMap("VertexTraceHW11") {}
|
GSVertexTraceMapHW11() : GSCodeGeneratorFunctionMap("VertexTraceHW11") {}
|
||||||
CGHW11* Create(uint32 key, void* ptr, size_t maxsize) {return new CGHW11(key, ptr, maxsize);}
|
CGHW11* Create(uint32 key, void* code, size_t maxsize) {return new CGHW11(key, code, maxsize);}
|
||||||
};
|
};
|
||||||
|
|
||||||
GSVertexTraceMapSW m_map_sw;
|
GSVertexTraceMapSW m_map_sw;
|
||||||
|
|
|
@ -174,6 +174,7 @@ GSVector4i GSWnd::GetClientRect()
|
||||||
|
|
||||||
// Returns FALSE if the window has no title, or if th window title is under the strict
|
// Returns FALSE if the window has no title, or if th window title is under the strict
|
||||||
// management of the emulator.
|
// management of the emulator.
|
||||||
|
|
||||||
bool GSWnd::SetWindowText(const char* title)
|
bool GSWnd::SetWindowText(const char* title)
|
||||||
{
|
{
|
||||||
if( !m_IsManaged ) return false;
|
if( !m_IsManaged ) return false;
|
||||||
|
|
|
@ -40,4 +40,4 @@ EXPORTS
|
||||||
GSgetLastTag
|
GSgetLastTag
|
||||||
GSReplay
|
GSReplay
|
||||||
GSBenchmark
|
GSBenchmark
|
||||||
GSgetTitleInfo2
|
GSgetTitleInfo2
|
||||||
|
|
|
@ -57,6 +57,7 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
// Let's take advantage of the work that's already been done on making things cross-platform by bringing this in.
|
// Let's take advantage of the work that's already been done on making things cross-platform by bringing this in.
|
||||||
|
|
||||||
#include "Pcsx2Defs.h"
|
#include "Pcsx2Defs.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
@ -126,7 +127,7 @@ typedef signed long long int64;
|
||||||
|
|
||||||
#define D3DCOLORWRITEENABLE_RGBA (D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE | D3DCOLORWRITEENABLE_ALPHA)
|
#define D3DCOLORWRITEENABLE_RGBA (D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE | D3DCOLORWRITEENABLE_ALPHA)
|
||||||
|
|
||||||
#define USE_UPSCALE_HACKS //Hacks intended to fix upscaling / rendering glitches in HW renderers
|
#define USE_UPSCALE_HACKS // Hacks intended to fix upscaling / rendering glitches in HW renderers
|
||||||
|
|
||||||
// dxsdk beta missing these:
|
// dxsdk beta missing these:
|
||||||
#define D3D11_SHADER_MACRO D3D10_SHADER_MACRO
|
#define D3D11_SHADER_MACRO D3D10_SHADER_MACRO
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
#ifndef XBYAK_H_
|
#ifndef XBYAK_XBYAK_H_
|
||||||
#define XBYAK_H_
|
#define XBYAK_XBYAK_H_
|
||||||
/*!
|
/*!
|
||||||
@file xbyak.h
|
@file xbyak.h
|
||||||
@brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++
|
@brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++
|
||||||
@author herumi
|
@author herumi
|
||||||
@version $Revision: 1.157 $
|
@version $Revision: 1.238 $
|
||||||
@url http://homepage1.nifty.com/herumi/soft/xbyak.html
|
@url http://homepage1.nifty.com/herumi/soft/xbyak.html
|
||||||
@date $Date: 2008/12/30 04:53:11 $
|
@date $Date: 2011/02/04 03:46:09 $
|
||||||
@note modified new BSD license
|
@note modified new BSD license
|
||||||
http://www.opensource.org/licenses/bsd-license.php
|
http://www.opensource.org/licenses/bsd-license.php
|
||||||
*/
|
*/
|
||||||
|
@ -15,9 +15,12 @@
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
#ifdef __GNUC__
|
#include <algorithm>
|
||||||
#include <unistd.h>
|
#ifdef _WIN32
|
||||||
#include <sys/mman.h>
|
#include <windows.h>
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
|
@ -45,13 +48,6 @@
|
||||||
#pragma warning(disable : 4127) /* condition is constant(for "if" trick) */
|
#pragma warning(disable : 4127) /* condition is constant(for "if" trick) */
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#include <windows.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef NUM_OF_ARRAY
|
|
||||||
// template<class T, int N>
|
|
||||||
// size_t num_of_array(const T (&)[N]) { return N; }
|
|
||||||
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(*x))
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Xbyak {
|
namespace Xbyak {
|
||||||
|
@ -59,29 +55,35 @@ namespace Xbyak {
|
||||||
#include "xbyak_bin2hex.h"
|
#include "xbyak_bin2hex.h"
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
DEFAULT_MAX_CODE_SIZE = 2048,
|
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||||
VERSION = 0x2070, /* 0xABCD = A.BC(D) */
|
VERSION = 0x2990, /* 0xABCD = A.BC(D) */
|
||||||
};
|
};
|
||||||
/*
|
/*
|
||||||
#ifndef MIE_DEFINED_UINT32
|
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||||
#define MIE_DEFINED_UINT32
|
#define MIE_INTEGER_TYPE_DEFINED
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
typedef unsigned __int64 uint64;
|
typedef unsigned __int64 uint64;
|
||||||
#else
|
typedef __int64 sint64;
|
||||||
typedef unsigned long long uint64;
|
#else
|
||||||
#endif
|
typedef unsigned long long uint64;
|
||||||
typedef unsigned int uint32;
|
typedef long long sint64;
|
||||||
typedef unsigned short uint16;
|
#endif
|
||||||
typedef unsigned char uint8;
|
typedef unsigned int uint32;
|
||||||
#ifndef MIE_ALIGN
|
typedef unsigned short uint16;
|
||||||
#ifdef _MSC_VER
|
typedef unsigned char uint8;
|
||||||
#define MIE_ALIGN(x) __declspec(align(x))
|
|
||||||
#else
|
|
||||||
#define MIE_ALIGN(x) __attribute__((aligned(x)))
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
*/
|
*/
|
||||||
|
#ifndef MIE_ALIGN
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define MIE_ALIGN(x) __declspec(align(x))
|
||||||
|
#else
|
||||||
|
#define MIE_ALIGN(x) __attribute__((aligned(x)))
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#ifndef MIE_PACK // for shufps
|
||||||
|
#define MIE_PACK(x, y, z, w) ((x) * 64 + (y) * 16 + (z) * 4 + (w))
|
||||||
|
#endif
|
||||||
|
|
||||||
enum Error {
|
enum Error {
|
||||||
ERR_NONE = 0,
|
ERR_NONE = 0,
|
||||||
ERR_BAD_ADDRESSING,
|
ERR_BAD_ADDRESSING,
|
||||||
|
@ -101,6 +103,10 @@ enum Error {
|
||||||
ERR_CANT_USE_64BIT_DISP,
|
ERR_CANT_USE_64BIT_DISP,
|
||||||
ERR_OFFSET_IS_TOO_BIG,
|
ERR_OFFSET_IS_TOO_BIG,
|
||||||
ERR_MEM_SIZE_IS_NOT_SPECIFIED,
|
ERR_MEM_SIZE_IS_NOT_SPECIFIED,
|
||||||
|
ERR_BAD_MEM_SIZE,
|
||||||
|
ERR_BAD_ST_COMBINATION,
|
||||||
|
ERR_OVER_LOCAL_LABEL,
|
||||||
|
ERR_UNDER_LOCAL_LABEL,
|
||||||
ERR_INTERNAL
|
ERR_INTERNAL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -125,6 +131,10 @@ static inline const char *ConvertErrorToString(Error err)
|
||||||
"can't use 64bit disp(use (void*))",
|
"can't use 64bit disp(use (void*))",
|
||||||
"offset is too big",
|
"offset is too big",
|
||||||
"MEM size is not specified",
|
"MEM size is not specified",
|
||||||
|
"bad mem size",
|
||||||
|
"bad st combination",
|
||||||
|
"over local label",
|
||||||
|
"under local label",
|
||||||
"internal error",
|
"internal error",
|
||||||
};
|
};
|
||||||
if (err < 0 || err > ERR_INTERNAL) return 0;
|
if (err < 0 || err > ERR_INTERNAL) return 0;
|
||||||
|
@ -135,7 +145,7 @@ namespace inner {
|
||||||
|
|
||||||
enum { debug = 1 };
|
enum { debug = 1 };
|
||||||
|
|
||||||
static inline uint32 GetPtrDist(const void *p1, const void *p2 = 0)
|
static inline uint32 GetPtrDist(const void *p1, const void *p2)
|
||||||
{
|
{
|
||||||
uint64 diff = static_cast<const char *>(p1) - static_cast<const char *>(p2);
|
uint64 diff = static_cast<const char *>(p1) - static_cast<const char *>(p2);
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
|
@ -145,6 +155,7 @@ static inline uint32 GetPtrDist(const void *p1, const void *p2 = 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool IsInDisp8(uint32 x) { return 0xFFFFFF80 <= x || x <= 0x7F; }
|
static inline bool IsInDisp8(uint32 x) { return 0xFFFFFF80 <= x || x <= 0x7F; }
|
||||||
|
static inline bool IsInInt32(uint64 x) { return 0xFFFFFFFF80000000ULL <= x || x <= 0x7FFFFFFFU; }
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -163,7 +174,8 @@ public:
|
||||||
REG = 1 << 3,
|
REG = 1 << 3,
|
||||||
MMX = 1 << 4,
|
MMX = 1 << 4,
|
||||||
XMM = 1 << 5,
|
XMM = 1 << 5,
|
||||||
FPU = 1 << 6
|
FPU = 1 << 6,
|
||||||
|
YMM = 1 << 7
|
||||||
};
|
};
|
||||||
enum Code {
|
enum Code {
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
|
@ -191,10 +203,11 @@ public:
|
||||||
bool isNone() const { return kind_ == 0; }
|
bool isNone() const { return kind_ == 0; }
|
||||||
bool isMMX() const { return is(MMX); }
|
bool isMMX() const { return is(MMX); }
|
||||||
bool isXMM() const { return is(XMM); }
|
bool isXMM() const { return is(XMM); }
|
||||||
|
bool isYMM() const { return is(YMM); }
|
||||||
bool isREG(int bit = 0) const { return is(REG, bit); }
|
bool isREG(int bit = 0) const { return is(REG, bit); }
|
||||||
bool isMEM(int bit = 0) const { return is(MEM, bit); }
|
bool isMEM(int bit = 0) const { return is(MEM, bit); }
|
||||||
|
bool isFPU() const { return is(FPU); }
|
||||||
bool isExt8bit() const { return ext8bit_ != 0; }
|
bool isExt8bit() const { return ext8bit_ != 0; }
|
||||||
Operand changeBit(int bit) const { return Operand(idx_, static_cast<Kind>(kind_), bit, ext8bit_); }
|
|
||||||
// any bit is accetable if bit == 0
|
// any bit is accetable if bit == 0
|
||||||
bool is(int kind, uint32 bit = 0) const
|
bool is(int kind, uint32 bit = 0) const
|
||||||
{
|
{
|
||||||
|
@ -216,12 +229,18 @@ public:
|
||||||
{ "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" },
|
{ "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" },
|
||||||
};
|
};
|
||||||
return tbl[bit_ == 8 ? 0 : bit_ == 16 ? 1 : bit_ == 32 ? 2 : 3][idx_];
|
return tbl[bit_ == 8 ? 0 : bit_ == 16 ? 1 : bit_ == 32 ? 2 : 3][idx_];
|
||||||
} else if (isMMX()) {
|
} else if (isYMM()) {
|
||||||
static const char tbl[8][4] = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" };
|
static const char tbl[16][5] = { "ym0", "ym1", "ym2", "ym3", "ym4", "ym5", "ym6", "ym7", "ym8", "ym9", "ym10", "ym11", "ym12", "ym13", "ym14", "ym15" };
|
||||||
return tbl[idx_];
|
return tbl[idx_];
|
||||||
} else if (isXMM()) {
|
} else if (isXMM()) {
|
||||||
static const char tbl[16][5] = { "xm0", "xm1", "xm2", "xm3", "xm4", "xm5", "xm6", "xm7", "xm8", "xm9", "xm10", "xm11", "xm12", "xm13", "xm14", "xm15" };
|
static const char tbl[16][5] = { "xm0", "xm1", "xm2", "xm3", "xm4", "xm5", "xm6", "xm7", "xm8", "xm9", "xm10", "xm11", "xm12", "xm13", "xm14", "xm15" };
|
||||||
return tbl[idx_];
|
return tbl[idx_];
|
||||||
|
} else if (isMMX()) {
|
||||||
|
static const char tbl[8][4] = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" };
|
||||||
|
return tbl[idx_];
|
||||||
|
} else if (isFPU()) {
|
||||||
|
static const char tbl[8][4] = { "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7" };
|
||||||
|
return tbl[idx_];
|
||||||
}
|
}
|
||||||
throw ERR_INTERNAL;
|
throw ERR_INTERNAL;
|
||||||
}
|
}
|
||||||
|
@ -229,14 +248,15 @@ public:
|
||||||
|
|
||||||
class Reg : public Operand {
|
class Reg : public Operand {
|
||||||
void operator=(const Reg&);
|
void operator=(const Reg&);
|
||||||
|
bool hasRex() const { return isExt8bit() | isREG(64) | isExtIdx(); }
|
||||||
public:
|
public:
|
||||||
Reg() { }
|
Reg() { }
|
||||||
Reg(int idx, Kind kind, int bit = 0, int ext8bit = 0) : Operand(idx, kind, bit, ext8bit) { }
|
Reg(int idx, Kind kind, int bit = 0, int ext8bit = 0) : Operand(idx, kind, bit, ext8bit) { }
|
||||||
// reg = this
|
Reg changeBit(int bit) const { return Reg(getIdx(), getKind(), bit, isExt8bit()); }
|
||||||
uint8 getRex(const Reg& index = Reg(), const Reg& base = Reg()) const
|
bool isExtIdx() const { return getIdx() > 7; }
|
||||||
|
uint8 getRex(const Reg& base = Reg()) const
|
||||||
{
|
{
|
||||||
if ((!isExt8bit() && !index.isExt8bit() && !base.isExt8bit()) && (getIdx() | index.getIdx() | base.getIdx()) < 8) return 0;
|
return (hasRex() || base.hasRex()) ? uint8(0x40 | ((isREG(64) | base.isREG(64)) ? 8 : 0) | (isExtIdx() ? 4 : 0)| (base.isExtIdx() ? 1 : 0)) : 0;
|
||||||
return uint8(0x40 | ((getIdx() >> 3) << 2)| ((index.getIdx() >> 3) << 1) | (base.getIdx() >> 3));
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -261,7 +281,19 @@ public:
|
||||||
class Xmm : public Mmx {
|
class Xmm : public Mmx {
|
||||||
void operator=(const Xmm&);
|
void operator=(const Xmm&);
|
||||||
public:
|
public:
|
||||||
explicit Xmm(int idx) : Mmx(idx, Operand::XMM, 128) { }
|
explicit Xmm(int idx, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { }
|
||||||
|
};
|
||||||
|
|
||||||
|
class Ymm : public Xmm {
|
||||||
|
void operator=(const Ymm&);
|
||||||
|
public:
|
||||||
|
explicit Ymm(int idx) : Xmm(idx, Operand::YMM, 256) { }
|
||||||
|
};
|
||||||
|
|
||||||
|
class Fpu : public Reg {
|
||||||
|
void operator=(const Fpu&);
|
||||||
|
public:
|
||||||
|
explicit Fpu(int idx) : Reg(idx, Operand::FPU, 32) { }
|
||||||
};
|
};
|
||||||
|
|
||||||
// register for addressing(32bit or 64bit)
|
// register for addressing(32bit or 64bit)
|
||||||
|
@ -307,7 +339,7 @@ private:
|
||||||
{
|
{
|
||||||
return operator+(r, -static_cast<int>(disp));
|
return operator+(r, -static_cast<int>(disp));
|
||||||
}
|
}
|
||||||
void operator=(const Reg32e&); // don't call
|
void operator=(const Reg32e&);
|
||||||
public:
|
public:
|
||||||
explicit Reg32e(int idx, int bit)
|
explicit Reg32e(int idx, int bit)
|
||||||
: Reg(idx, REG, bit)
|
: Reg(idx, REG, bit)
|
||||||
|
@ -362,7 +394,7 @@ struct RegRip {
|
||||||
|
|
||||||
class CodeArray {
|
class CodeArray {
|
||||||
enum {
|
enum {
|
||||||
ALIGN_SIZE = 16,
|
ALIGN_PAGE_SIZE = 4096,
|
||||||
MAX_FIXED_BUF_SIZE = 8
|
MAX_FIXED_BUF_SIZE = 8
|
||||||
};
|
};
|
||||||
enum Type {
|
enum Type {
|
||||||
|
@ -381,13 +413,12 @@ protected:
|
||||||
public:
|
public:
|
||||||
CodeArray(size_t maxSize = MAX_FIXED_BUF_SIZE, void *userPtr = 0)
|
CodeArray(size_t maxSize = MAX_FIXED_BUF_SIZE, void *userPtr = 0)
|
||||||
: type_(userPtr ? USER_BUF : maxSize <= MAX_FIXED_BUF_SIZE ? FIXED_BUF : ALLOC_BUF)
|
: type_(userPtr ? USER_BUF : maxSize <= MAX_FIXED_BUF_SIZE ? FIXED_BUF : ALLOC_BUF)
|
||||||
, allocPtr_(type_ == ALLOC_BUF ? new uint8[maxSize + ALIGN_SIZE] : 0)
|
, allocPtr_(type_ == ALLOC_BUF ? new uint8[maxSize + ALIGN_PAGE_SIZE] : 0)
|
||||||
, maxSize_(maxSize)
|
, maxSize_(maxSize)
|
||||||
, top_(type_ == ALLOC_BUF ? getAlignedAddress(allocPtr_) : type_ == USER_BUF ? reinterpret_cast<uint8*>(userPtr) : buf_)
|
, top_(type_ == ALLOC_BUF ? getAlignedAddress(allocPtr_, ALIGN_PAGE_SIZE) : type_ == USER_BUF ? reinterpret_cast<uint8*>(userPtr) : buf_)
|
||||||
, size_(0)
|
, size_(0)
|
||||||
{
|
{
|
||||||
if (type_ == ALLOC_BUF && !protect(top_, maxSize, true)) {
|
if (type_ == ALLOC_BUF && !protect(top_, maxSize, true)) {
|
||||||
// fprintf(stderr, "can't protect (addr=%p, size=%u, canExec=%d)\n", addr, size, canExec);
|
|
||||||
throw ERR_CANT_PROTECT;
|
throw ERR_CANT_PROTECT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -452,19 +483,19 @@ public:
|
||||||
/*
|
/*
|
||||||
@param data [in] address of jmp data
|
@param data [in] address of jmp data
|
||||||
@param disp [in] offset from the next of jmp
|
@param disp [in] offset from the next of jmp
|
||||||
@param isShort [in] true if short jmp
|
@param size [in] write size(1, 2, 4, 8)
|
||||||
*/
|
*/
|
||||||
void rewrite(uint8 *data, uint32 disp, bool isShort)
|
void rewrite(uint8 *data, uint64 disp, size_t size)
|
||||||
{
|
{
|
||||||
if (isShort) {
|
if (size != 1 && size != 2 && size != 4 && size != 8) throw ERR_BAD_PARAMETER;
|
||||||
data[0] = static_cast<uint8>(disp);
|
for (size_t i = 0; i < size; i++) {
|
||||||
} else {
|
data[i] = static_cast<uint8>(disp >> (i * 8));
|
||||||
data[0] = static_cast<uint8>(disp);
|
|
||||||
data[1] = static_cast<uint8>(disp >> 8);
|
|
||||||
data[2] = static_cast<uint8>(disp >> 16);
|
|
||||||
data[3] = static_cast<uint8>(disp >> 24);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void updateRegField(uint8 regIdx) const
|
||||||
|
{
|
||||||
|
*top_ = (*top_ & B11000111) | ((regIdx << 3) & B00111000);
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
change exec permission of memory
|
change exec permission of memory
|
||||||
@param addr [in] buffer address
|
@param addr [in] buffer address
|
||||||
|
@ -474,15 +505,15 @@ public:
|
||||||
*/
|
*/
|
||||||
static inline bool protect(const void *addr, size_t size, bool canExec)
|
static inline bool protect(const void *addr, size_t size, bool canExec)
|
||||||
{
|
{
|
||||||
#ifdef __GNUC__
|
#if defined(_WIN32)
|
||||||
|
DWORD oldProtect;
|
||||||
|
return VirtualProtect(const_cast<void*>(addr), size, canExec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldProtect) != 0;
|
||||||
|
#elif defined(__GNUC__)
|
||||||
size_t pageSize = sysconf(_SC_PAGESIZE);
|
size_t pageSize = sysconf(_SC_PAGESIZE);
|
||||||
size_t iaddr = reinterpret_cast<size_t>(addr);
|
size_t iaddr = reinterpret_cast<size_t>(addr);
|
||||||
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
||||||
int mode = PROT_READ | PROT_WRITE | (canExec ? PROT_EXEC : 0);
|
int mode = PROT_READ | PROT_WRITE | (canExec ? PROT_EXEC : 0);
|
||||||
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
|
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
|
||||||
#elif defined(_WIN32)
|
|
||||||
DWORD oldProtect;
|
|
||||||
return VirtualProtect(const_cast<void*>(addr), size, canExec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldProtect) != 0;
|
|
||||||
#else
|
#else
|
||||||
return true;
|
return true;
|
||||||
#endif
|
#endif
|
||||||
|
@ -493,7 +524,7 @@ public:
|
||||||
@param alingedSize [in] power of two
|
@param alingedSize [in] power of two
|
||||||
@return aligned addr by alingedSize
|
@return aligned addr by alingedSize
|
||||||
*/
|
*/
|
||||||
static inline uint8 *getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE)
|
static inline uint8 *getAlignedAddress(uint8 *addr, size_t alignedSize = 16)
|
||||||
{
|
{
|
||||||
return reinterpret_cast<uint8*>((reinterpret_cast<size_t>(addr) + alignedSize - 1) & ~(alignedSize - static_cast<size_t>(1)));
|
return reinterpret_cast<uint8*>((reinterpret_cast<size_t>(addr) + alignedSize - 1) & ~(alignedSize - static_cast<size_t>(1)));
|
||||||
}
|
}
|
||||||
|
@ -521,11 +552,7 @@ public:
|
||||||
uint64 getDisp() const { return disp_; }
|
uint64 getDisp() const { return disp_; }
|
||||||
uint8 getRex() const { return rex_; }
|
uint8 getRex() const { return rex_; }
|
||||||
bool is64bitDisp() const { return is64bitDisp_; } // for moffset
|
bool is64bitDisp() const { return is64bitDisp_; } // for moffset
|
||||||
#ifdef XBYAK64
|
|
||||||
void setRex(uint8 rex) { rex_ = rex; }
|
void setRex(uint8 rex) { rex_ = rex; }
|
||||||
#else
|
|
||||||
void setRex(uint8) { }
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class AddressFrame {
|
class AddressFrame {
|
||||||
|
@ -536,7 +563,11 @@ public:
|
||||||
explicit AddressFrame(uint32 bit) : bit_(bit) { }
|
explicit AddressFrame(uint32 bit) : bit_(bit) { }
|
||||||
Address operator[](const void *disp) const
|
Address operator[](const void *disp) const
|
||||||
{
|
{
|
||||||
Reg32e r(Reg(), Reg(), 0, inner::GetPtrDist(disp));
|
size_t adr = reinterpret_cast<size_t>(disp);
|
||||||
|
#ifdef XBYAK64
|
||||||
|
if (adr > 0xFFFFFFFFU) throw ERR_OFFSET_IS_TOO_BIG;
|
||||||
|
#endif
|
||||||
|
Reg32e r(Reg(), Reg(), 0, static_cast<uint32>(adr));
|
||||||
return operator[](r);
|
return operator[](r);
|
||||||
}
|
}
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
|
@ -587,7 +618,8 @@ public:
|
||||||
} else if (mod == mod10 || (mod == mod00 && r.isNone())) {
|
} else if (mod == mod10 || (mod == mod00 && r.isNone())) {
|
||||||
frame.dd(r.disp_);
|
frame.dd(r.disp_);
|
||||||
}
|
}
|
||||||
frame.setRex(Reg().getRex(r.index_, r));
|
uint8 rex = ((r.getIdx() | r.index_.getIdx()) < 8) ? 0 : uint8(0x40 | ((r.index_.getIdx() >> 3) << 1) | (r.getIdx() >> 3));
|
||||||
|
frame.setRex(rex);
|
||||||
return frame;
|
return frame;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -600,6 +632,12 @@ struct JmpLabel {
|
||||||
class Label {
|
class Label {
|
||||||
CodeArray *base_;
|
CodeArray *base_;
|
||||||
int anonymousCount_; // for @@, @f, @b
|
int anonymousCount_; // for @@, @f, @b
|
||||||
|
enum {
|
||||||
|
maxStack = 10
|
||||||
|
};
|
||||||
|
int stack_[maxStack];
|
||||||
|
int stackPos_;
|
||||||
|
int usedCount_;
|
||||||
int localCount_; // for .***
|
int localCount_; // for .***
|
||||||
typedef std::map<const std::string, const uint8*> DefinedList;
|
typedef std::map<const std::string, const uint8*> DefinedList;
|
||||||
typedef std::multimap<const std::string, const JmpLabel> UndefinedList;
|
typedef std::multimap<const std::string, const JmpLabel> UndefinedList;
|
||||||
|
@ -628,15 +666,22 @@ public:
|
||||||
Label()
|
Label()
|
||||||
: base_(0)
|
: base_(0)
|
||||||
, anonymousCount_(0)
|
, anonymousCount_(0)
|
||||||
|
, stackPos_(1)
|
||||||
|
, usedCount_(0)
|
||||||
, localCount_(0)
|
, localCount_(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
void incLocalCount() { localCount_++; }
|
void enterLocal()
|
||||||
void decLocalCount() { localCount_--; }
|
|
||||||
void set(CodeArray *base)
|
|
||||||
{
|
{
|
||||||
base_ = base;
|
if (stackPos_ == maxStack) throw ERR_OVER_LOCAL_LABEL;
|
||||||
|
localCount_ = stack_[stackPos_++] = ++usedCount_;
|
||||||
}
|
}
|
||||||
|
void leaveLocal()
|
||||||
|
{
|
||||||
|
if (stackPos_ == 1) throw ERR_UNDER_LOCAL_LABEL;
|
||||||
|
localCount_ = stack_[--stackPos_ - 1];
|
||||||
|
}
|
||||||
|
void set(CodeArray *base) { base_ = base; }
|
||||||
void define(const char *label, const uint8 *address)
|
void define(const char *label, const uint8 *address)
|
||||||
{
|
{
|
||||||
std::string newLabel(label);
|
std::string newLabel(label);
|
||||||
|
@ -657,8 +702,9 @@ public:
|
||||||
const JmpLabel *jmp = &itr->second;
|
const JmpLabel *jmp = &itr->second;
|
||||||
uint32 disp = inner::GetPtrDist(address, jmp->endOfJmp);
|
uint32 disp = inner::GetPtrDist(address, jmp->endOfJmp);
|
||||||
if (jmp->isShort && !inner::IsInDisp8(disp)) throw ERR_LABEL_IS_TOO_FAR;
|
if (jmp->isShort && !inner::IsInDisp8(disp)) throw ERR_LABEL_IS_TOO_FAR;
|
||||||
uint8 *data = jmp->endOfJmp - (jmp->isShort ? 1 : 4);
|
size_t jmpSize = jmp->isShort ? 1 : 4;
|
||||||
base_->rewrite(data, disp, jmp->isShort);
|
uint8 *data = jmp->endOfJmp - jmpSize;
|
||||||
|
base_->rewrite(data, disp, jmpSize);
|
||||||
undefinedList_.erase(itr);
|
undefinedList_.erase(itr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -689,22 +735,22 @@ public:
|
||||||
static inline std::string toStr(int num)
|
static inline std::string toStr(int num)
|
||||||
{
|
{
|
||||||
char buf[16];
|
char buf[16];
|
||||||
static const char fmt[] = ".%08x";
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#if _MSC_VER < 1400
|
#if _MSC_VER < 1400
|
||||||
_snprintf(buf, sizeof(buf), fmt, num);
|
_snprintf
|
||||||
#else
|
#else
|
||||||
_snprintf_s(buf, sizeof(buf), fmt, num);
|
_snprintf_s
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
snprintf(buf, sizeof(buf), fmt, num);
|
snprintf
|
||||||
#endif
|
#endif
|
||||||
|
(buf, sizeof(buf), ".%08x", num);
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class CodeGenerator : public CodeArray {
|
class CodeGenerator : public CodeArray {
|
||||||
protected:
|
public:
|
||||||
enum LabelType {
|
enum LabelType {
|
||||||
T_SHORT,
|
T_SHORT,
|
||||||
T_NEAR,
|
T_NEAR,
|
||||||
|
@ -747,35 +793,43 @@ private:
|
||||||
{
|
{
|
||||||
return op1.isREG(i32e) && (op2.isXMM() || op2.isMEM());
|
return op1.isREG(i32e) && (op2.isXMM() || op2.isMEM());
|
||||||
}
|
}
|
||||||
void if16bit(const Operand& reg1, const Operand& reg2)
|
|
||||||
{
|
|
||||||
// except movsx(16bit, 32/64bit)
|
|
||||||
if ((reg1.isBit(16) && !reg2.isBit(i32e)) || (reg2.isBit(16) && !reg1.isBit(i32e))) db(0x66);
|
|
||||||
}
|
|
||||||
void rexAddr(const Address& addr, const Reg& reg = Reg())
|
|
||||||
{
|
|
||||||
#ifdef XBYAK64
|
|
||||||
if (addr.is32bit_) db(0x67);
|
|
||||||
#endif
|
|
||||||
if16bit(reg, addr);
|
|
||||||
uint32 rex = addr.getRex() | reg.getRex();
|
|
||||||
if (reg.isREG(64)) rex |= 0x48;
|
|
||||||
if (rex) db(rex);
|
|
||||||
}
|
|
||||||
void rex(const Operand& op1, const Operand& op2 = Operand())
|
void rex(const Operand& op1, const Operand& op2 = Operand())
|
||||||
{
|
{
|
||||||
if (op1.isMEM()) {
|
uint8 rex = 0;
|
||||||
rexAddr(static_cast<const Address&>(op1), static_cast<const Reg&>(op2));
|
const Operand *p1 = &op1, *p2 = &op2;
|
||||||
} else if (op2.isMEM()) {
|
if (p1->isMEM()) std::swap(p1, p2);
|
||||||
rexAddr(static_cast<const Address&>(op2), static_cast<const Reg&>(op1));
|
if (p1->isMEM()) throw ERR_BAD_COMBINATION;
|
||||||
|
if (p2->isMEM()) {
|
||||||
|
const Address& addr = static_cast<const Address&>(*p2);
|
||||||
|
if (BIT == 64 && addr.is32bit_) db(0x67);
|
||||||
|
rex = addr.getRex() | static_cast<const Reg&>(*p1).getRex();
|
||||||
} else {
|
} else {
|
||||||
const Reg& reg1 = static_cast<const Reg&>(op1);
|
|
||||||
const Reg& reg2 = static_cast<const Reg&>(op2);
|
|
||||||
// ModRM(reg, base);
|
// ModRM(reg, base);
|
||||||
if16bit(reg1, reg2);
|
rex = static_cast<const Reg&>(op2).getRex(static_cast<const Reg&>(op1));
|
||||||
uint8 rex = reg2.getRex(Reg(), reg1);
|
}
|
||||||
if (reg1.isREG(64) || reg2.isREG(64)) rex |= 0x48;
|
// except movsx(16bit, 32/64bit)
|
||||||
if (rex) db(rex);
|
if ((op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e))) db(0x66);
|
||||||
|
if (rex) db(rex);
|
||||||
|
}
|
||||||
|
enum AVXtype {
|
||||||
|
PP_NONE = 1 << 0,
|
||||||
|
PP_66 = 1 << 1,
|
||||||
|
PP_F3 = 1 << 2,
|
||||||
|
PP_F2 = 1 << 3,
|
||||||
|
MM_RESERVED = 1 << 4,
|
||||||
|
MM_0F = 1 << 5,
|
||||||
|
MM_0F38 = 1 << 6,
|
||||||
|
MM_0F3A = 1 << 7
|
||||||
|
};
|
||||||
|
void vex(bool r, int idx, bool is256, int type, bool x = false, bool b = false, int w = 1)
|
||||||
|
{
|
||||||
|
uint32 pp = (type & PP_66) ? 1 : (type & PP_F3) ? 2 : (type & PP_F2) ? 3 : 0;
|
||||||
|
uint32 vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp;
|
||||||
|
if (!b && !x && !w && (type & MM_0F)) {
|
||||||
|
db(0xC5); db((r ? 0 : 0x80) | vvvv);
|
||||||
|
} else {
|
||||||
|
uint32 mmmm = (type & MM_0F) ? 1 : (type & MM_0F38) ? 2 : (type & MM_0F3A) ? 3 : 0;
|
||||||
|
db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Label label_;
|
Label label_;
|
||||||
|
@ -792,10 +846,8 @@ private:
|
||||||
if (addr.is64bitDisp()) throw ERR_CANT_USE_64BIT_DISP;
|
if (addr.is64bitDisp()) throw ERR_CANT_USE_64BIT_DISP;
|
||||||
rex(addr, reg);
|
rex(addr, reg);
|
||||||
db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
|
db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
|
||||||
uint8 t = *addr.getCode();
|
addr.updateRegField(static_cast<uint8>(reg.getIdx()));
|
||||||
assert((t & ~0xC7) == 0); /* 0b11000111 */
|
db(addr.getCode(), static_cast<int>(addr.getSize()));
|
||||||
db(t | ((reg.getIdx() & 7) << 3)); // update reg field
|
|
||||||
db(addr.getCode() + 1, static_cast<int>(addr.getSize()) - 1);
|
|
||||||
}
|
}
|
||||||
void opJmp(const char *label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref)
|
void opJmp(const char *label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref)
|
||||||
{
|
{
|
||||||
|
@ -835,13 +887,13 @@ private:
|
||||||
if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) {
|
if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) {
|
||||||
db(shortCode);
|
db(shortCode);
|
||||||
db(0);
|
db(0);
|
||||||
rewrite(top + shortHeaderSize, disp - shortJmpSize, true);
|
rewrite(top + shortHeaderSize, disp - shortJmpSize, 1);
|
||||||
} else {
|
} else {
|
||||||
if (type == T_SHORT) throw ERR_LABEL_IS_TOO_FAR;
|
if (type == T_SHORT) throw ERR_LABEL_IS_TOO_FAR;
|
||||||
if (longPref) db(longPref);
|
if (longPref) db(longPref);
|
||||||
db(longCode);
|
db(longCode);
|
||||||
dd(0);
|
dd(0);
|
||||||
rewrite(top + longHeaderSize, disp - longJmpSize, false);
|
rewrite(top + longHeaderSize, disp - longJmpSize, 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* preCode is for SSSE3/SSE4 */
|
/* preCode is for SSSE3/SSE4 */
|
||||||
|
@ -864,8 +916,7 @@ private:
|
||||||
}
|
}
|
||||||
void opMMX(const Mmx& mmx, const Operand& op, int code, int pref = 0x66, int imm8 = NONE, int preCode = NONE)
|
void opMMX(const Mmx& mmx, const Operand& op, int code, int pref = 0x66, int imm8 = NONE, int preCode = NONE)
|
||||||
{
|
{
|
||||||
pref = mmx.isXMM() ? pref : NONE;
|
opGen(mmx, op, code, mmx.isXMM() ? pref : NONE, isXMMorMMX_MEM, imm8, preCode);
|
||||||
opGen(mmx, op, code, pref, isXMMorMMX_MEM, imm8, preCode);
|
|
||||||
}
|
}
|
||||||
void opMovXMM(const Operand& op1, const Operand& op2, int code, int pref)
|
void opMovXMM(const Operand& op1, const Operand& op2, int code, int pref)
|
||||||
{
|
{
|
||||||
|
@ -887,14 +938,14 @@ private:
|
||||||
opGen(mmx, op, code, 0x66, isXMM_REG32orMEM, imm, B00111010);
|
opGen(mmx, op, code, 0x66, isXMM_REG32orMEM, imm, B00111010);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void opR_ModM(const Operand& op, int bit, uint8 mod, int ext, int code0, int code1 = NONE, int code2 = NONE)
|
void opR_ModM(const Operand& op, int bit, int ext, int code0, int code1 = NONE, int code2 = NONE, bool disableRex = false)
|
||||||
{
|
{
|
||||||
|
int opBit = op.getBit();
|
||||||
|
if (disableRex && opBit == 64) opBit = 32;
|
||||||
if (op.isREG(bit)) {
|
if (op.isREG(bit)) {
|
||||||
rex(op);
|
opModR(Reg(ext, Operand::REG, opBit), static_cast<const Reg&>(op).changeBit(opBit), code0, code1, code2);
|
||||||
db(code0 | (op.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
|
|
||||||
db(getModRM(mod, ext, op.getIdx()));
|
|
||||||
} else if (op.isMEM()) {
|
} else if (op.isMEM()) {
|
||||||
opModM(static_cast<const Address&>(op), Reg(ext, Operand::REG, op.getBit()), code0, code1, code2);
|
opModM(static_cast<const Address&>(op), Reg(ext, Operand::REG, opBit), code0, code1, code2);
|
||||||
} else {
|
} else {
|
||||||
throw ERR_BAD_COMBINATION;
|
throw ERR_BAD_COMBINATION;
|
||||||
}
|
}
|
||||||
|
@ -902,13 +953,13 @@ private:
|
||||||
void opShift(const Operand& op, int imm, int ext)
|
void opShift(const Operand& op, int imm, int ext)
|
||||||
{
|
{
|
||||||
verifyMemHasSize(op);
|
verifyMemHasSize(op);
|
||||||
opR_ModM(op, 0, 3, ext, (B11000000 | ((imm == 1 ? 1 : 0) << 4)));
|
opR_ModM(op, 0, ext, (B11000000 | ((imm == 1 ? 1 : 0) << 4)));
|
||||||
if (imm != 1) db(imm);
|
if (imm != 1) db(imm);
|
||||||
}
|
}
|
||||||
void opShift(const Operand& op, const Reg8& cl, int ext)
|
void opShift(const Operand& op, const Reg8& cl, int ext)
|
||||||
{
|
{
|
||||||
if (cl.getIdx() != Operand::CL) throw ERR_BAD_COMBINATION;
|
if (cl.getIdx() != Operand::CL) throw ERR_BAD_COMBINATION;
|
||||||
opR_ModM(op, 0, 3, ext, B11010010);
|
opR_ModM(op, 0, ext, B11010010);
|
||||||
}
|
}
|
||||||
void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE)
|
void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE)
|
||||||
{
|
{
|
||||||
|
@ -941,20 +992,19 @@ private:
|
||||||
verifyMemHasSize(op);
|
verifyMemHasSize(op);
|
||||||
uint32 immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32;
|
uint32 immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32;
|
||||||
if (op.getBit() < immBit) throw ERR_IMM_IS_TOO_BIG;
|
if (op.getBit() < immBit) throw ERR_IMM_IS_TOO_BIG;
|
||||||
if (op.isREG()) {
|
if (op.isREG(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */
|
||||||
if (immBit == 16 && op.isBit(32)) immBit = 32; /* don't use MEM16 if 32bit mode */
|
|
||||||
}
|
|
||||||
if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al
|
if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al
|
||||||
rex(op);
|
rex(op);
|
||||||
db(code | 4 | (immBit == 8 ? 0 : 1));
|
db(code | 4 | (immBit == 8 ? 0 : 1));
|
||||||
} else {
|
} else {
|
||||||
int tmp = (op.getBit() > immBit && 32 > immBit) ? 2 : 0;
|
int tmp = immBit < (std::min)(op.getBit(), 32U) ? 2 : 0;
|
||||||
opR_ModM(op, 0, 3, ext, B10000000 | tmp);
|
opR_ModM(op, 0, ext, B10000000 | tmp);
|
||||||
}
|
}
|
||||||
db(imm, immBit / 8);
|
db(imm, immBit / 8);
|
||||||
}
|
}
|
||||||
void opIncDec(const Operand& op, int code, int ext)
|
void opIncDec(const Operand& op, int code, int ext)
|
||||||
{
|
{
|
||||||
|
verifyMemHasSize(op);
|
||||||
#ifndef XBYAK64
|
#ifndef XBYAK64
|
||||||
if (op.isREG() && !op.isBit(8)) {
|
if (op.isREG() && !op.isBit(8)) {
|
||||||
rex(op); db(code | op.getIdx());
|
rex(op); db(code | op.getIdx());
|
||||||
|
@ -964,21 +1014,15 @@ private:
|
||||||
code = B11111110;
|
code = B11111110;
|
||||||
if (op.isREG()) {
|
if (op.isREG()) {
|
||||||
opModR(Reg(ext, Operand::REG, op.getBit()), static_cast<const Reg&>(op), code);
|
opModR(Reg(ext, Operand::REG, op.getBit()), static_cast<const Reg&>(op), code);
|
||||||
} else if (op.isMEM() && op.getBit() > 0) {
|
|
||||||
opModM(static_cast<const Address&>(op), Reg(ext, Operand::REG, op.getBit()), code);
|
|
||||||
} else {
|
} else {
|
||||||
throw ERR_BAD_COMBINATION;
|
opModM(static_cast<const Address&>(op), Reg(ext, Operand::REG, op.getBit()), code);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void opPushPop(const Operand& op, int code, int ext, int alt)
|
void opPushPop(const Operand& op, int code, int ext, int alt)
|
||||||
{
|
{
|
||||||
if (op.isREG()) {
|
if (op.isREG()) {
|
||||||
#ifdef XBYAK64
|
|
||||||
if (op.isBit(16)) db(0x66);
|
if (op.isBit(16)) db(0x66);
|
||||||
if (static_cast<const Reg&>(op).getIdx() >= 8) db(0x41);
|
if (static_cast<const Reg&>(op).getIdx() >= 8) db(0x41);
|
||||||
#else
|
|
||||||
rex(op);
|
|
||||||
#endif
|
|
||||||
db(alt | (op.getIdx() & 7));
|
db(alt | (op.getIdx() & 7));
|
||||||
} else if (op.isMEM()) {
|
} else if (op.isMEM()) {
|
||||||
opModM(static_cast<const Address&>(op), Reg(ext, Operand::REG, op.getBit()), code);
|
opModM(static_cast<const Address&>(op), Reg(ext, Operand::REG, op.getBit()), code);
|
||||||
|
@ -990,16 +1034,51 @@ private:
|
||||||
{
|
{
|
||||||
if (op.isMEM() && op.getBit() == 0) throw ERR_MEM_SIZE_IS_NOT_SPECIFIED;
|
if (op.isMEM() && op.getBit() == 0) throw ERR_MEM_SIZE_IS_NOT_SPECIFIED;
|
||||||
}
|
}
|
||||||
protected:
|
void opMovxx(const Reg& reg, const Operand& op, uint8 code)
|
||||||
|
{
|
||||||
|
int w = op.isBit(16);
|
||||||
|
bool cond = reg.isREG() && (reg.getBit() > op.getBit());
|
||||||
|
opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w);
|
||||||
|
}
|
||||||
|
void opFpuMem(const Address& addr, uint8 m16, uint8 m32, uint8 m64, uint8 ext, uint8 m64ext)
|
||||||
|
{
|
||||||
|
if (addr.is64bitDisp()) throw ERR_CANT_USE_64BIT_DISP;
|
||||||
|
uint8 code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0;
|
||||||
|
if (!code) throw ERR_BAD_MEM_SIZE;
|
||||||
|
if (m64ext && addr.isBit(64)) ext = m64ext;
|
||||||
|
|
||||||
|
rex(addr, st0);
|
||||||
|
db(code);
|
||||||
|
addr.updateRegField(ext);
|
||||||
|
db(addr.getCode(), static_cast<int>(addr.getSize()));
|
||||||
|
}
|
||||||
|
// like yasm not nasm
|
||||||
|
// use code1 if reg1 == st0
|
||||||
|
// use code2 if reg1 != st0 && reg2 == st0
|
||||||
|
void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32 code1, uint32 code2)
|
||||||
|
{
|
||||||
|
uint32 code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0;
|
||||||
|
if (!code) throw ERR_BAD_ST_COMBINATION;
|
||||||
|
db(uint8(code >> 8));
|
||||||
|
db(uint8(code | (reg1.getIdx() | reg2.getIdx())));
|
||||||
|
}
|
||||||
|
void opFpu(const Fpu& reg, uint8 code1, uint8 code2)
|
||||||
|
{
|
||||||
|
db(code1); db(code2 | reg.getIdx());
|
||||||
|
}
|
||||||
|
public:
|
||||||
unsigned int getVersion() const { return VERSION; }
|
unsigned int getVersion() const { return VERSION; }
|
||||||
using CodeArray::db;
|
using CodeArray::db;
|
||||||
const Mmx mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
|
const Mmx mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
|
||||||
const Xmm xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
const Xmm xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||||
|
const Ymm ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7;
|
||||||
const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7;
|
const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7;
|
||||||
|
const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7;
|
||||||
const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi;
|
const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi;
|
||||||
const Reg16 ax, cx, dx, bx, sp, bp, si, di;
|
const Reg16 ax, cx, dx, bx, sp, bp, si, di;
|
||||||
const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
|
const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
|
||||||
const AddressFrame ptr, byte, word, dword, qword, xmmword;
|
const AddressFrame ptr, byte, word, dword, qword;
|
||||||
|
const Fpu st0, st1, st2, st3, st4, st5, st6, st7;
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
|
const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
|
||||||
const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d;
|
const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d;
|
||||||
|
@ -1007,7 +1086,9 @@ protected:
|
||||||
const Reg8 r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b;
|
const Reg8 r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b;
|
||||||
const Reg8 spl, bpl, sil, dil;
|
const Reg8 spl, bpl, sil, dil;
|
||||||
const Xmm xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
const Xmm xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
||||||
const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15;
|
const Ymm ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15;
|
||||||
|
const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience
|
||||||
|
const Ymm &ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15;
|
||||||
const RegRip rip;
|
const RegRip rip;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1015,8 +1096,8 @@ protected:
|
||||||
{
|
{
|
||||||
label_.define(label, getCurr());
|
label_.define(label, getCurr());
|
||||||
}
|
}
|
||||||
void inLocalLabel() { label_.incLocalCount(); }
|
void inLocalLabel() { label_.enterLocal(); }
|
||||||
void outLocalLabel() { label_.decLocalCount(); }
|
void outLocalLabel() { label_.leaveLocal(); }
|
||||||
void jmp(const char *label, LabelType type = T_AUTO)
|
void jmp(const char *label, LabelType type = T_AUTO)
|
||||||
{
|
{
|
||||||
opJmp(label, type, B11101011, B11101001, 0);
|
opJmp(label, type, B11101011, B11101001, 0);
|
||||||
|
@ -1027,7 +1108,11 @@ protected:
|
||||||
}
|
}
|
||||||
void jmp(const Operand& op)
|
void jmp(const Operand& op)
|
||||||
{
|
{
|
||||||
opR_ModM(op, i32e, 3, 4, 0xFF);
|
opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true);
|
||||||
|
}
|
||||||
|
void call(const Operand& op)
|
||||||
|
{
|
||||||
|
opR_ModM(op, 16 | i32e, 2, 0xFF, NONE, NONE, true);
|
||||||
}
|
}
|
||||||
// (REG|MEM, REG)
|
// (REG|MEM, REG)
|
||||||
void test(const Operand& op, const Reg& reg)
|
void test(const Operand& op, const Reg& reg)
|
||||||
|
@ -1042,10 +1127,9 @@ protected:
|
||||||
rex(op);
|
rex(op);
|
||||||
db(B10101000 | (op.isBit(8) ? 0 : 1));
|
db(B10101000 | (op.isBit(8) ? 0 : 1));
|
||||||
} else {
|
} else {
|
||||||
opR_ModM(op, 0, 3, 0, B11110110);
|
opR_ModM(op, 0, 0, B11110110);
|
||||||
}
|
}
|
||||||
int size = op.getBit() / 8; if (size > 4) size = 4;
|
db(imm, (std::min)(op.getBit() / 8, 4U));
|
||||||
db(imm, size);
|
|
||||||
}
|
}
|
||||||
void ret(int imm = 0)
|
void ret(int imm = 0)
|
||||||
{
|
{
|
||||||
|
@ -1134,24 +1218,39 @@ protected:
|
||||||
opRM_RM(reg1, reg2, B10001000);
|
opRM_RM(reg1, reg2, B10001000);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void mov(const Operand& op, uint64 imm)
|
void mov(const Operand& op,
|
||||||
|
#ifdef XBYAK64
|
||||||
|
uint64
|
||||||
|
#else
|
||||||
|
uint32
|
||||||
|
#endif
|
||||||
|
imm)
|
||||||
{
|
{
|
||||||
verifyMemHasSize(op);
|
verifyMemHasSize(op);
|
||||||
if (op.isREG()) {
|
if (op.isREG()) {
|
||||||
int w = op.isBit(8) ? 0 : 1;
|
rex(op);
|
||||||
rex(op); db(B10110000 | (w << 3) | (op.getIdx() & 7));
|
int code, size;
|
||||||
|
#ifdef XBYAK64
|
||||||
|
if (op.isBit(64) && inner::IsInInt32(imm)) {
|
||||||
|
db(B11000111);
|
||||||
|
code = B11000000;
|
||||||
|
size = 4;
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
code = B10110000 | ((op.isBit(8) ? 0 : 1) << 3);
|
||||||
|
size = op.getBit() / 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
db(code | (op.getIdx() & 7));
|
||||||
|
db(imm, size);
|
||||||
} else if (op.isMEM()) {
|
} else if (op.isMEM()) {
|
||||||
opModM(static_cast<const Address&>(op), Reg(0, Operand::REG, op.getBit()), B11000110);
|
opModM(static_cast<const Address&>(op), Reg(0, Operand::REG, op.getBit()), B11000110);
|
||||||
|
int size = op.getBit() / 8; if (size > 4) size = 4;
|
||||||
|
db(static_cast<uint32>(imm), size);
|
||||||
} else {
|
} else {
|
||||||
throw ERR_BAD_COMBINATION;
|
throw ERR_BAD_COMBINATION;
|
||||||
}
|
}
|
||||||
db(imm, op.getBit() / 8);
|
|
||||||
}
|
|
||||||
void opMovxx(const Reg& reg, const Operand& op, uint8 code)
|
|
||||||
{
|
|
||||||
int w = op.isBit(16);
|
|
||||||
bool cond = reg.isREG() && (reg.getBit() > op.getBit());
|
|
||||||
opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w);
|
|
||||||
}
|
}
|
||||||
void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, B11000111); }
|
void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, B11000111); }
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
|
@ -1180,20 +1279,17 @@ protected:
|
||||||
}
|
}
|
||||||
void call(const char *label)
|
void call(const char *label)
|
||||||
{
|
{
|
||||||
opJmp(label, T_NEAR, 0, B10011010, 0);
|
opJmp(label, T_NEAR, 0, B11101000, 0);
|
||||||
}
|
}
|
||||||
void call(const void *addr)
|
void call(const void *addr)
|
||||||
{
|
{
|
||||||
opJmp(addr, T_NEAR, 0, B11101000, 0);
|
opJmp(addr, T_NEAR, 0, B11101000, 0);
|
||||||
}
|
}
|
||||||
void call(const Operand& op)
|
|
||||||
{
|
|
||||||
opR_ModM(op, 16 | i32e, 3, 2, B11111111);
|
|
||||||
}
|
|
||||||
// special case
|
// special case
|
||||||
void movd(const Address& addr, const Mmx& mmx)
|
void movd(const Address& addr, const Mmx& mmx)
|
||||||
{
|
{
|
||||||
opModM(addr, Reg(mmx.getIdx(), Operand::REG, mmx.getBit() / 8), 0x0F, B01111110);
|
if (mmx.isXMM()) db(0x66);
|
||||||
|
opModM(addr, mmx, 0x0F, B01111110);
|
||||||
}
|
}
|
||||||
void movd(const Reg32& reg, const Mmx& mmx)
|
void movd(const Reg32& reg, const Mmx& mmx)
|
||||||
{
|
{
|
||||||
|
@ -1202,8 +1298,8 @@ protected:
|
||||||
}
|
}
|
||||||
void movd(const Mmx& mmx, const Address& addr)
|
void movd(const Mmx& mmx, const Address& addr)
|
||||||
{
|
{
|
||||||
ASSERT(!addr.isBit(32)); // don't use dword ptr, bogus, won't output 0x66 for xmm dest op
|
if (mmx.isXMM()) db(0x66);
|
||||||
opModM(addr, Reg(mmx.getIdx(), Operand::REG, mmx.getBit() / 8), 0x0F, B01101110);
|
opModM(addr, mmx, 0x0F, B01101110);
|
||||||
}
|
}
|
||||||
void movd(const Mmx& mmx, const Reg32& reg)
|
void movd(const Mmx& mmx, const Reg32& reg)
|
||||||
{
|
{
|
||||||
|
@ -1225,8 +1321,31 @@ protected:
|
||||||
}
|
}
|
||||||
void movq(const Address& addr, const Mmx& mmx)
|
void movq(const Address& addr, const Mmx& mmx)
|
||||||
{
|
{
|
||||||
opModM(addr, Reg(mmx.getIdx(), Operand::REG, mmx.getBit() / 8), 0x0F, mmx.isXMM() ? B11010110 : B01111111);
|
if (mmx.isXMM()) db(0x66);
|
||||||
|
opModM(addr, mmx, 0x0F, mmx.isXMM() ? B11010110 : B01111111);
|
||||||
}
|
}
|
||||||
|
#ifdef XBYAK64
|
||||||
|
void movq(const Reg64& reg, const Mmx& mmx)
|
||||||
|
{
|
||||||
|
if (mmx.isXMM()) db(0x66);
|
||||||
|
opModR(mmx, reg, 0x0F, B01111110);
|
||||||
|
}
|
||||||
|
void movq(const Mmx& mmx, const Reg64& reg)
|
||||||
|
{
|
||||||
|
if (mmx.isXMM()) db(0x66);
|
||||||
|
opModR(mmx, reg, 0x0F, B01101110);
|
||||||
|
}
|
||||||
|
void pextrq(const Operand& op, const Xmm& xmm, uint8 imm)
|
||||||
|
{
|
||||||
|
if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION;
|
||||||
|
opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, B00111010); // force to 64bit
|
||||||
|
}
|
||||||
|
void pinsrq(const Xmm& xmm, const Operand& op, uint8 imm)
|
||||||
|
{
|
||||||
|
if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION;
|
||||||
|
opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, B00111010); // force to 64bit
|
||||||
|
}
|
||||||
|
#endif
|
||||||
// MMX2 : pextrw : reg, mmx/xmm, imm
|
// MMX2 : pextrw : reg, mmx/xmm, imm
|
||||||
// SSE4 : pextrw, pextrb, pextrd, extractps : reg/mem, mmx/xmm, imm
|
// SSE4 : pextrw, pextrb, pextrd, extractps : reg/mem, mmx/xmm, imm
|
||||||
void pextrw(const Operand& op, const Mmx& xmm, uint8 imm) { opExt(op, xmm, 0x15, imm, true); }
|
void pextrw(const Operand& op, const Mmx& xmm, uint8 imm) { opExt(op, xmm, 0x15, imm, true); }
|
||||||
|
@ -1270,7 +1389,7 @@ protected:
|
||||||
bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM());
|
bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM());
|
||||||
if (!is16bit && !(reg.isREG(i32e) && (op.isREG(i32e) || op.isMEM()))) throw ERR_BAD_COMBINATION;
|
if (!is16bit && !(reg.isREG(i32e) && (op.isREG(i32e) || op.isMEM()))) throw ERR_BAD_COMBINATION;
|
||||||
if (is16bit) db(0x66);
|
if (is16bit) db(0x66);
|
||||||
db(0xF3); opModRM(Reg(reg.getIdx(), Operand::REG, i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, 0x0F, 0xB8);
|
db(0xF3); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, 0x0F, 0xB8);
|
||||||
}
|
}
|
||||||
void crc32(const Reg32e& reg, const Operand& op)
|
void crc32(const Reg32e& reg, const Operand& op)
|
||||||
{
|
{
|
||||||
|
@ -1278,17 +1397,86 @@ protected:
|
||||||
db(0xF2);
|
db(0xF2);
|
||||||
opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1));
|
opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1));
|
||||||
}
|
}
|
||||||
public:
|
void vextractps(const Operand& op, const Xmm& xmm, uint8 imm)
|
||||||
|
{
|
||||||
|
if (!(op.isREG(32) || op.isMEM()) || xmm.isYMM()) throw ERR_BAD_COMBINATION;
|
||||||
|
opAVX_X_XM_IMM(xmm, cvtReg(op, op.isREG(), Operand::XMM), MM_0F3A | PP_66, 0x17, false, 0, imm);
|
||||||
|
}
|
||||||
|
// support (x, x, x/m), (y, y, y/m)
|
||||||
|
void opAVX_X_X_XM(const Xmm& xm1, const Operand& op1, const Operand& op2, int type, int code0, bool supportYMM, int w = -1)
|
||||||
|
{
|
||||||
|
const Xmm *xm2;
|
||||||
|
const Operand *op;
|
||||||
|
if (op2.isNone()) {
|
||||||
|
xm2 = &xm1;
|
||||||
|
op = &op1;
|
||||||
|
} else {
|
||||||
|
if (!(op1.isXMM() || (supportYMM && op1.isYMM()))) throw ERR_BAD_COMBINATION;
|
||||||
|
xm2 = static_cast<const Xmm*>(&op1);
|
||||||
|
op = &op2;
|
||||||
|
}
|
||||||
|
// (xm1, xm2, op)
|
||||||
|
if (!((xm1.isXMM() && xm2->isXMM()) || (supportYMM && xm1.isYMM() && xm2->isYMM()))) throw ERR_BAD_COMBINATION;
|
||||||
|
bool x, b;
|
||||||
|
if (op->isMEM()) {
|
||||||
|
const Address& addr = *static_cast<const Address*>(op);
|
||||||
|
uint8 rex = addr.getRex();
|
||||||
|
x = (rex & 2) != 0;
|
||||||
|
b = (rex & 1) != 0;
|
||||||
|
if (BIT == 64 && addr.is32bit_) db(0x67);
|
||||||
|
if (BIT == 64 && w == -1) w = (rex & 4) ? 1 : 0;
|
||||||
|
} else {
|
||||||
|
x = false;
|
||||||
|
b = static_cast<const Reg*>(op)->isExtIdx();
|
||||||
|
}
|
||||||
|
if (w == -1) w = 0;
|
||||||
|
vex(xm1.isExtIdx(), xm2->getIdx(), xm1.isYMM(), type, x, b, w);
|
||||||
|
db(code0);
|
||||||
|
if (op->isMEM()) {
|
||||||
|
const Address& addr = *static_cast<const Address*>(op);
|
||||||
|
addr.updateRegField(static_cast<uint8>(xm1.getIdx()));
|
||||||
|
db(addr.getCode(), static_cast<int>(addr.getSize()));
|
||||||
|
} else {
|
||||||
|
db(getModRM(3, xm1.getIdx(), op->getIdx()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if cvt then return pointer to Xmm(idx) (or Ymm(idx)), otherwise return op
|
||||||
|
const Operand& cvtReg(const Operand& op, bool cvt, Operand::Kind kind) const
|
||||||
|
{
|
||||||
|
if (!cvt) return op;
|
||||||
|
static const Xmm* xmTbl[] = {
|
||||||
|
&xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7,
|
||||||
|
#ifdef XBYAK64
|
||||||
|
&xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
static const Ymm* ymTbl[] = {
|
||||||
|
&ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7,
|
||||||
|
#ifdef XBYAK64
|
||||||
|
&ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
return (kind == Operand::XMM) ? *xmTbl[op.getIdx()] : *ymTbl[op.getIdx()];
|
||||||
|
}
|
||||||
|
// support (x, x/m, imm), (y, y/m, imm)
|
||||||
|
void opAVX_X_XM_IMM(const Xmm& xmm, const Operand& op, int type, int code, bool supportYMM, int w = -1, int imm = NONE)
|
||||||
|
{
|
||||||
|
opAVX_X_X_XM(xmm, xmm.isXMM() ? xm0 : ym0, op, type, code, supportYMM, w); if (imm != NONE) db((uint8)imm);
|
||||||
|
}
|
||||||
enum { NONE = 256 };
|
enum { NONE = 256 };
|
||||||
|
public:
|
||||||
CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0)
|
CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0)
|
||||||
: CodeArray(maxSize, userPtr)
|
: CodeArray(maxSize, userPtr)
|
||||||
, mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7)
|
, mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7)
|
||||||
, xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7)
|
, xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7)
|
||||||
|
, ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7)
|
||||||
, xm0(xmm0), xm1(xmm1), xm2(xmm2), xm3(xmm3), xm4(xmm4), xm5(xmm5), xm6(xmm6), xm7(xmm7) // for my convenience
|
, xm0(xmm0), xm1(xmm1), xm2(xmm2), xm3(xmm3), xm4(xmm4), xm5(xmm5), xm6(xmm6), xm7(xmm7) // for my convenience
|
||||||
|
, ym0(ymm0), ym1(ymm1), ym2(ymm2), ym3(ymm3), ym4(ymm4), ym5(ymm5), ym6(ymm6), ym7(ymm7) // for my convenience
|
||||||
, eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI)
|
, eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI)
|
||||||
, ax(Operand::EAX), cx(Operand::ECX), dx(Operand::EDX), bx(Operand::EBX), sp(Operand::ESP), bp(Operand::EBP), si(Operand::ESI), di(Operand::EDI)
|
, ax(Operand::EAX), cx(Operand::ECX), dx(Operand::EDX), bx(Operand::EBX), sp(Operand::ESP), bp(Operand::EBP), si(Operand::ESI), di(Operand::EDI)
|
||||||
, al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH)
|
, al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH)
|
||||||
, ptr(0), byte(8), word(16), dword(32), qword(64), xmmword(128)
|
, ptr(0), byte(8), word(16), dword(32), qword(64)
|
||||||
|
, st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7)
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
, rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15)
|
, rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15)
|
||||||
, r8d(Operand::R8D), r9d(Operand::R9D), r10d(Operand::R10D), r11d(Operand::R11D), r12d(Operand::R12D), r13d(Operand::R13D), r14d(Operand::R14D), r15d(Operand::R15D)
|
, r8d(Operand::R8D), r9d(Operand::R9D), r10d(Operand::R10D), r11d(Operand::R11D), r12d(Operand::R12D), r13d(Operand::R13D), r14d(Operand::R14D), r15d(Operand::R15D)
|
||||||
|
@ -1296,7 +1484,9 @@ public:
|
||||||
, r8b(Operand::R8B), r9b(Operand::R9B), r10b(Operand::R10B), r11b(Operand::R11B), r12b(Operand::R12B), r13b(Operand::R13B), r14b(Operand::R14B), r15b(Operand::R15B)
|
, r8b(Operand::R8B), r9b(Operand::R9B), r10b(Operand::R10B), r11b(Operand::R11B), r12b(Operand::R12B), r13b(Operand::R13B), r14b(Operand::R14B), r15b(Operand::R15B)
|
||||||
, spl(Operand::SPL, 1), bpl(Operand::BPL, 1), sil(Operand::SIL, 1), dil(Operand::DIL, 1)
|
, spl(Operand::SPL, 1), bpl(Operand::BPL, 1), sil(Operand::SIL, 1), dil(Operand::DIL, 1)
|
||||||
, xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15)
|
, xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15)
|
||||||
|
, ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15)
|
||||||
, xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15) // for my convenience
|
, xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15) // for my convenience
|
||||||
|
, ym8(ymm8), ym9(ymm9), ym10(ymm10), ym11(ymm11), ym12(ymm12), ym13(ymm13), ym14(ymm14), ym15(ymm15) // for my convenience
|
||||||
, rip()
|
, rip()
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
|
@ -1309,7 +1499,7 @@ public:
|
||||||
// if (hasUndefinedLabel()) throw ERR_LABEL_IS_NOT_FOUND;
|
// if (hasUndefinedLabel()) throw ERR_LABEL_IS_NOT_FOUND;
|
||||||
return top_;
|
return top_;
|
||||||
}
|
}
|
||||||
#ifdef TEST_NM
|
#ifdef XBYAK_TEST
|
||||||
void dump(bool doClear = true)
|
void dump(bool doClear = true)
|
||||||
{
|
{
|
||||||
CodeArray::dump();
|
CodeArray::dump();
|
||||||
|
@ -1322,7 +1512,7 @@ public:
|
||||||
void align(int x = 16)
|
void align(int x = 16)
|
||||||
{
|
{
|
||||||
if (x != 4 && x != 8 && x != 16 && x != 32) throw ERR_BAD_ALIGN;
|
if (x != 4 && x != 8 && x != 16 && x != 32) throw ERR_BAD_ALIGN;
|
||||||
while (inner::GetPtrDist(getCurr()) % x) {
|
while (size_t(getCurr()) % x) {
|
||||||
nop();
|
nop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1335,4 +1525,4 @@ public:
|
||||||
|
|
||||||
} // end of namespace
|
} // end of namespace
|
||||||
|
|
||||||
#endif // XBYAK_H_
|
#endif // XBYAK_XBYAK_H_
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
const char *getVersionString() const { return "2.07"; }
|
const char *getVersionString() const { return "2.99"; }
|
||||||
void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); }
|
void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); }
|
||||||
void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); }
|
void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); }
|
||||||
void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); }
|
void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); }
|
||||||
|
@ -184,88 +184,94 @@ void movhpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0
|
||||||
void movlpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x66); }
|
void movlpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x66); }
|
||||||
void cmovo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 0); }
|
void cmovo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 0); }
|
||||||
void jo(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x70, 0x80, 0x0F); }
|
void jo(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x70, 0x80, 0x0F); }
|
||||||
void seto(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 0); }
|
void seto(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 0); }
|
||||||
void cmovno(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 1); }
|
void cmovno(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 1); }
|
||||||
void jno(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x71, 0x81, 0x0F); }
|
void jno(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x71, 0x81, 0x0F); }
|
||||||
void setno(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 1); }
|
void setno(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 1); }
|
||||||
void cmovb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); }
|
void cmovb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); }
|
||||||
void jb(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
|
void jb(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
|
||||||
void setb(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 2); }
|
void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 2); }
|
||||||
|
void cmovc(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); }
|
||||||
|
void jc(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
|
||||||
|
void setc(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 2); }
|
||||||
void cmovnae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); }
|
void cmovnae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); }
|
||||||
void jnae(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
|
void jnae(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
|
||||||
void setnae(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 2); }
|
void setnae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 2); }
|
||||||
void cmovnb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); }
|
void cmovnb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); }
|
||||||
void jnb(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
|
void jnb(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
|
||||||
void setnb(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 3); }
|
void setnb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 3); }
|
||||||
void cmovae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); }
|
void cmovae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); }
|
||||||
void jae(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
|
void jae(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
|
||||||
void setae(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 3); }
|
void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 3); }
|
||||||
|
void cmovnc(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); }
|
||||||
|
void jnc(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
|
||||||
|
void setnc(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 3); }
|
||||||
void cmove(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); }
|
void cmove(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); }
|
||||||
void je(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }
|
void je(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }
|
||||||
void sete(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 4); }
|
void sete(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 4); }
|
||||||
void cmovz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); }
|
void cmovz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); }
|
||||||
void jz(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }
|
void jz(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }
|
||||||
void setz(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 4); }
|
void setz(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 4); }
|
||||||
void cmovne(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); }
|
void cmovne(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); }
|
||||||
void jne(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }
|
void jne(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }
|
||||||
void setne(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 5); }
|
void setne(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 5); }
|
||||||
void cmovnz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); }
|
void cmovnz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); }
|
||||||
void jnz(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }
|
void jnz(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }
|
||||||
void setnz(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 5); }
|
void setnz(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 5); }
|
||||||
void cmovbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); }
|
void cmovbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); }
|
||||||
void jbe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }
|
void jbe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }
|
||||||
void setbe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 6); }
|
void setbe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 6); }
|
||||||
void cmovna(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); }
|
void cmovna(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); }
|
||||||
void jna(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }
|
void jna(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }
|
||||||
void setna(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 6); }
|
void setna(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 6); }
|
||||||
void cmovnbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); }
|
void cmovnbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); }
|
||||||
void jnbe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }
|
void jnbe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }
|
||||||
void setnbe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 7); }
|
void setnbe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 7); }
|
||||||
void cmova(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); }
|
void cmova(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); }
|
||||||
void ja(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }
|
void ja(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }
|
||||||
void seta(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 7); }
|
void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 7); }
|
||||||
void cmovs(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 8); }
|
void cmovs(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 8); }
|
||||||
void js(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x78, 0x88, 0x0F); }
|
void js(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x78, 0x88, 0x0F); }
|
||||||
void sets(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 8); }
|
void sets(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 8); }
|
||||||
void cmovns(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 9); }
|
void cmovns(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 9); }
|
||||||
void jns(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x79, 0x89, 0x0F); }
|
void jns(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x79, 0x89, 0x0F); }
|
||||||
void setns(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 9); }
|
void setns(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 9); }
|
||||||
void cmovp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); }
|
void cmovp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); }
|
||||||
void jp(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }
|
void jp(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }
|
||||||
void setp(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 10); }
|
void setp(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 10); }
|
||||||
void cmovpe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); }
|
void cmovpe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); }
|
||||||
void jpe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }
|
void jpe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }
|
||||||
void setpe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 10); }
|
void setpe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 10); }
|
||||||
void cmovnp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); }
|
void cmovnp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); }
|
||||||
void jnp(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }
|
void jnp(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }
|
||||||
void setnp(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 11); }
|
void setnp(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 11); }
|
||||||
void cmovpo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); }
|
void cmovpo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); }
|
||||||
void jpo(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }
|
void jpo(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }
|
||||||
void setpo(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 11); }
|
void setpo(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 11); }
|
||||||
void cmovl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); }
|
void cmovl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); }
|
||||||
void jl(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }
|
void jl(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }
|
||||||
void setl(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 12); }
|
void setl(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 12); }
|
||||||
void cmovnge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); }
|
void cmovnge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); }
|
||||||
void jnge(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }
|
void jnge(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }
|
||||||
void setnge(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 12); }
|
void setnge(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 12); }
|
||||||
void cmovnl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); }
|
void cmovnl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); }
|
||||||
void jnl(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }
|
void jnl(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }
|
||||||
void setnl(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 13); }
|
void setnl(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 13); }
|
||||||
void cmovge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); }
|
void cmovge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); }
|
||||||
void jge(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }
|
void jge(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }
|
||||||
void setge(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 13); }
|
void setge(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 13); }
|
||||||
void cmovle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); }
|
void cmovle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); }
|
||||||
void jle(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }
|
void jle(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }
|
||||||
void setle(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 14); }
|
void setle(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 14); }
|
||||||
void cmovng(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); }
|
void cmovng(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); }
|
||||||
void jng(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }
|
void jng(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }
|
||||||
void setng(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 14); }
|
void setng(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 14); }
|
||||||
void cmovnle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); }
|
void cmovnle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); }
|
||||||
void jnle(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
|
void jnle(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
|
||||||
void setnle(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 15); }
|
void setnle(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 15); }
|
||||||
void cmovg(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); }
|
void cmovg(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); }
|
||||||
void jg(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
|
void jg(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
|
||||||
void setg(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 15); }
|
void setg(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 15); }
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
void cdqe() { db(0x48); db(0x98); }
|
void cdqe() { db(0x48); db(0x98); }
|
||||||
#else
|
#else
|
||||||
|
@ -308,12 +314,57 @@ void mwait() { db(0x0F); db(0x01); db(0xC9); }
|
||||||
void rdmsr() { db(0x0F); db(0x32); }
|
void rdmsr() { db(0x0F); db(0x32); }
|
||||||
void rdpmc() { db(0x0F); db(0x33); }
|
void rdpmc() { db(0x0F); db(0x33); }
|
||||||
void rdtsc() { db(0x0F); db(0x31); }
|
void rdtsc() { db(0x0F); db(0x31); }
|
||||||
|
void rdtscp() { db(0x0F); db(0x01); db(0xF9); }
|
||||||
void wait() { db(0x9B); }
|
void wait() { db(0x9B); }
|
||||||
void wbinvd() { db(0x0F); db(0x09); }
|
void wbinvd() { db(0x0F); db(0x09); }
|
||||||
void wrmsr() { db(0x0F); db(0x30); }
|
void wrmsr() { db(0x0F); db(0x30); }
|
||||||
void xlatb() { db(0xD7); }
|
void xlatb() { db(0xD7); }
|
||||||
void popf() { db(0x9D); }
|
void popf() { db(0x9D); }
|
||||||
void pushf() { db(0x9C); }
|
void pushf() { db(0x9C); }
|
||||||
|
void vzeroall() { db(0xC5); db(0xFC); db(0x77); }
|
||||||
|
void vzeroupper() { db(0xC5); db(0xF8); db(0x77); }
|
||||||
|
void xgetbv() { db(0x0F); db(0x01); db(0xD0); }
|
||||||
|
void f2xm1() { db(0xD9); db(0xF0); }
|
||||||
|
void fabs() { db(0xD9); db(0xE1); }
|
||||||
|
void faddp() { db(0xDE); db(0xC1); }
|
||||||
|
void fchs() { db(0xD9); db(0xE0); }
|
||||||
|
void fcom() { db(0xD8); db(0xD1); }
|
||||||
|
void fcomp() { db(0xD8); db(0xD9); }
|
||||||
|
void fcompp() { db(0xDE); db(0xD9); }
|
||||||
|
void fcos() { db(0xD9); db(0xFF); }
|
||||||
|
void fdecstp() { db(0xD9); db(0xF6); }
|
||||||
|
void fdivp() { db(0xDE); db(0xF9); }
|
||||||
|
void fdivrp() { db(0xDE); db(0xF1); }
|
||||||
|
void fincstp() { db(0xD9); db(0xF7); }
|
||||||
|
void fld1() { db(0xD9); db(0xE8); }
|
||||||
|
void fldl2t() { db(0xD9); db(0xE9); }
|
||||||
|
void fldl2e() { db(0xD9); db(0xEA); }
|
||||||
|
void fldpi() { db(0xD9); db(0xEB); }
|
||||||
|
void fldlg2() { db(0xD9); db(0xEC); }
|
||||||
|
void fldln2() { db(0xD9); db(0xED); }
|
||||||
|
void fldz() { db(0xD9); db(0xEE); }
|
||||||
|
void fmulp() { db(0xDE); db(0xC9); }
|
||||||
|
void fnop() { db(0xD9); db(0xD0); }
|
||||||
|
void fpatan() { db(0xD9); db(0xF3); }
|
||||||
|
void fprem() { db(0xD9); db(0xF8); }
|
||||||
|
void fprem1() { db(0xD9); db(0xF5); }
|
||||||
|
void fptan() { db(0xD9); db(0xF2); }
|
||||||
|
void frndint() { db(0xD9); db(0xFC); }
|
||||||
|
void fscale() { db(0xD9); db(0xFD); }
|
||||||
|
void fsin() { db(0xD9); db(0xFE); }
|
||||||
|
void fsincos() { db(0xD9); db(0xFB); }
|
||||||
|
void fsqrt() { db(0xD9); db(0xFA); }
|
||||||
|
void fsubp() { db(0xDE); db(0xE9); }
|
||||||
|
void fsubrp() { db(0xDE); db(0xE1); }
|
||||||
|
void ftst() { db(0xD9); db(0xE4); }
|
||||||
|
void fucom() { db(0xDD); db(0xE1); }
|
||||||
|
void fucomp() { db(0xDD); db(0xE9); }
|
||||||
|
void fucompp() { db(0xDA); db(0xE9); }
|
||||||
|
void fxam() { db(0xD9); db(0xE5); }
|
||||||
|
void fxch() { db(0xD9); db(0xC9); }
|
||||||
|
void fxtract() { db(0xD9); db(0xF4); }
|
||||||
|
void fyl2x() { db(0xD9); db(0xF1); }
|
||||||
|
void fyl2xp1() { db(0xD9); db(0xF9); }
|
||||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||||
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
||||||
void add(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x00); }
|
void add(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x00); }
|
||||||
|
@ -332,12 +383,12 @@ void xor(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); }
|
||||||
void xor(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); }
|
void xor(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); }
|
||||||
void dec(const Operand& op) { opIncDec(op, 0x48, 1); }
|
void dec(const Operand& op) { opIncDec(op, 0x48, 1); }
|
||||||
void inc(const Operand& op) { opIncDec(op, 0x40, 0); }
|
void inc(const Operand& op) { opIncDec(op, 0x40, 0); }
|
||||||
void div(const Operand& op) { opR_ModM(op, 0, 3, 6, 0xF6); }
|
void div(const Operand& op) { opR_ModM(op, 0, 6, 0xF6); }
|
||||||
void idiv(const Operand& op) { opR_ModM(op, 0, 3, 7, 0xF6); }
|
void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); }
|
||||||
void imul(const Operand& op) { opR_ModM(op, 0, 3, 5, 0xF6); }
|
void imul(const Operand& op) { opR_ModM(op, 0, 5, 0xF6); }
|
||||||
void mul(const Operand& op) { opR_ModM(op, 0, 3, 4, 0xF6); }
|
void mul(const Operand& op) { opR_ModM(op, 0, 4, 0xF6); }
|
||||||
void neg(const Operand& op) { opR_ModM(op, 0, 3, 3, 0xF6); }
|
void neg(const Operand& op) { opR_ModM(op, 0, 3, 0xF6); }
|
||||||
void not(const Operand& op) { opR_ModM(op, 0, 3, 2, 0xF6); }
|
void not(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); }
|
||||||
void rcl(const Operand& op, int imm) { opShift(op, imm, 2); }
|
void rcl(const Operand& op, int imm) { opShift(op, imm, 2); }
|
||||||
void rcl(const Operand& op, const Reg8& cl) { opShift(op, cl, 2); }
|
void rcl(const Operand& op, const Reg8& cl) { opShift(op, cl, 2); }
|
||||||
void rcr(const Operand& op, int imm) { opShift(op, imm, 3); }
|
void rcr(const Operand& op, int imm) { opShift(op, imm, 3); }
|
||||||
|
@ -360,52 +411,57 @@ void shrd(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0
|
||||||
void shrd(const Operand& op, const Reg& reg, const Reg8& cl) { opShxd(op, reg, 0, 0xAC, &cl); }
|
void shrd(const Operand& op, const Reg& reg, const Reg8& cl) { opShxd(op, reg, 0, 0xAC, &cl); }
|
||||||
void bsf(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBC); }
|
void bsf(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBC); }
|
||||||
void bsr(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); }
|
void bsr(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); }
|
||||||
void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, 256, 0x38); }
|
void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, NONE, 0x38); }
|
||||||
void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, 256, 0x38); }
|
void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, NONE, 0x38); }
|
||||||
void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, 256, 0x38); }
|
void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, NONE, 0x38); }
|
||||||
void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, 0x66, 256, 0x38); }
|
void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, 0x66, NONE, 0x38); }
|
||||||
void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, 0x66, 256, 0x38); }
|
void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, 0x66, NONE, 0x38); }
|
||||||
void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, 0x66, 256, 0x38); }
|
void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, 0x66, NONE, 0x38); }
|
||||||
void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, 0x66, 256, 0x38); }
|
void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, 0x66, NONE, 0x38); }
|
||||||
void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, 0x66, 256, 0x38); }
|
void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, 0x66, NONE, 0x38); }
|
||||||
void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, 0x66, 256, 0x38); }
|
void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, 0x66, NONE, 0x38); }
|
||||||
void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, 0x66, 256, 0x38); }
|
void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, 0x66, NONE, 0x38); }
|
||||||
void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, 0x66, 256, 0x38); }
|
void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, 0x66, NONE, 0x38); }
|
||||||
void pmulhrsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0B, 0x66, 256, 0x38); }
|
void pmulhrsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0B, 0x66, NONE, 0x38); }
|
||||||
void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, 256, 0x38); }
|
void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, NONE, 0x38); }
|
||||||
void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, 256, 0x38); }
|
void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, NONE, 0x38); }
|
||||||
void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, 256, 0x38); }
|
void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, NONE, 0x38); }
|
||||||
void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast<uint8>(imm), 0x3a); }
|
void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast<uint8>(imm), 0x3a); }
|
||||||
void blendvpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void blendvpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void blendvps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void blendvps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void packusdw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2B, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void packusdw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2B, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pblendvb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x10, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pblendvb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x10, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pcmpeqq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x29, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pcmpeqq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x29, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void ptest(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x17, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void ptest(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x17, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmovsxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x20, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmovsxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x20, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmovsxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmovsxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmovsxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x22, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmovsxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x22, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmovsxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x23, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmovsxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x23, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmovsxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x24, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmovsxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x24, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmovsxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x25, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmovsxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x25, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmovzxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x30, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmovzxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x30, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmovzxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x31, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmovzxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x31, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmovzxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x32, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmovzxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x32, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmovzxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x33, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmovzxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x33, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmovzxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x34, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmovzxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x34, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmovzxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x35, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmovzxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x35, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pminsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x38, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pminsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x38, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pminsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x39, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pminsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x39, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pminuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3A, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pminuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3A, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pminud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3B, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pminud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3B, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmaxsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3C, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmaxsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3C, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmaxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3D, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmaxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3D, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmaxuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3E, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmaxuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3E, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmaxud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3F, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmaxud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3F, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmuldq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x28, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmuldq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x28, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pmulld(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pmulld(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void phminposuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void phminposuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void pcmpgtq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x37, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
void pcmpgtq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x37, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
|
void aesdec(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDE, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
|
void aesdeclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
|
void aesenc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDC, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
|
void aesenclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDD, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
|
void aesimc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDB, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||||
void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||||
void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||||
|
@ -420,6 +476,8 @@ void pcmpestrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x60
|
||||||
void pcmpestri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
void pcmpestri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||||
void pcmpistrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
void pcmpistrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||||
void pcmpistri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
void pcmpistri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||||
|
void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x44, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||||
|
void aeskeygenassist(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||||
void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); }
|
void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); }
|
||||||
void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
|
void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
|
||||||
void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
|
void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
|
||||||
|
@ -427,3 +485,540 @@ void movntpd(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getId
|
||||||
void movntdq(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0xE7); }
|
void movntdq(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0xE7); }
|
||||||
void movsx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xBE); }
|
void movsx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xBE); }
|
||||||
void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); }
|
void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); }
|
||||||
|
void fadd(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 0, 0); }
|
||||||
|
void fiadd(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 0, 0); }
|
||||||
|
void fcom(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 2, 0); }
|
||||||
|
void fcomp(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 3, 0); }
|
||||||
|
void fdiv(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 6, 0); }
|
||||||
|
void fidiv(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 6, 0); }
|
||||||
|
void fdivr(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 7, 0); }
|
||||||
|
void fidivr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 7, 0); }
|
||||||
|
void ficom(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 2, 0); }
|
||||||
|
void ficomp(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 3, 0); }
|
||||||
|
void fild(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDF, 0, 5); }
|
||||||
|
void fist(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0x00, 2, 0); }
|
||||||
|
void fistp(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDF, 3, 7); }
|
||||||
|
void fisttp(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDD, 1, 0); }
|
||||||
|
void fld(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 0, 0); }
|
||||||
|
void fmul(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 1, 0); }
|
||||||
|
void fimul(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 1, 0); }
|
||||||
|
void fst(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); }
|
||||||
|
void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); }
|
||||||
|
void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); }
|
||||||
|
void fisub(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 4, 0); }
|
||||||
|
void fsubr(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 5, 0); }
|
||||||
|
void fisubr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 5, 0); }
|
||||||
|
void fadd(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC0); }
|
||||||
|
void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); }
|
||||||
|
void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); }
|
||||||
|
void fcmove(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC8, 0x00C8); }
|
||||||
|
void fcmovbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD0, 0x00D0); }
|
||||||
|
void fcmovu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD8, 0x00D8); }
|
||||||
|
void fcmovnb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC0, 0x00C0); }
|
||||||
|
void fcmovne(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC8, 0x00C8); }
|
||||||
|
void fcmovnbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD0, 0x00D0); }
|
||||||
|
void fcmovnu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD8, 0x00D8); }
|
||||||
|
void fcomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBF0, 0x00F0); }
|
||||||
|
void fcomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFF0, 0x00F0); }
|
||||||
|
void fucomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBE8, 0x00E8); }
|
||||||
|
void fucomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFE8, 0x00E8); }
|
||||||
|
void fdiv(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F0, 0xDCF8); }
|
||||||
|
void fdivp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF8); }
|
||||||
|
void fdivr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F8, 0xDCF0); }
|
||||||
|
void fdivrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF0); }
|
||||||
|
void fmul(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C8, 0xDCC8); }
|
||||||
|
void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); }
|
||||||
|
void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); }
|
||||||
|
void fsubp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE8); }
|
||||||
|
void fsubr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E8, 0xDCE0); }
|
||||||
|
void fsubrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE0); }
|
||||||
|
void fcom(const Fpu& reg) { opFpu(reg, 0xD8, 0xD0); }
|
||||||
|
void fcomp(const Fpu& reg) { opFpu(reg, 0xD8, 0xD8); }
|
||||||
|
void ffree(const Fpu& reg) { opFpu(reg, 0xDD, 0xC0); }
|
||||||
|
void fld(const Fpu& reg) { opFpu(reg, 0xD9, 0xC0); }
|
||||||
|
void fst(const Fpu& reg) { opFpu(reg, 0xDD, 0xD0); }
|
||||||
|
void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); }
|
||||||
|
void fucom(const Fpu& reg) { opFpu(reg, 0xDD, 0xE0); }
|
||||||
|
void fucomp(const Fpu& reg) { opFpu(reg, 0xDD, 0xE8); }
|
||||||
|
void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); }
|
||||||
|
void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x58, true); }
|
||||||
|
void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x58, true); }
|
||||||
|
void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x58, false); }
|
||||||
|
void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x58, false); }
|
||||||
|
void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5C, true); }
|
||||||
|
void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5C, true); }
|
||||||
|
void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5C, false); }
|
||||||
|
void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5C, false); }
|
||||||
|
void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x59, true); }
|
||||||
|
void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x59, true); }
|
||||||
|
void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x59, false); }
|
||||||
|
void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x59, false); }
|
||||||
|
void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5E, true); }
|
||||||
|
void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5E, true); }
|
||||||
|
void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5E, false); }
|
||||||
|
void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5E, false); }
|
||||||
|
void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5F, true); }
|
||||||
|
void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5F, true); }
|
||||||
|
void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5F, false); }
|
||||||
|
void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5F, false); }
|
||||||
|
void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5D, true); }
|
||||||
|
void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5D, true); }
|
||||||
|
void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5D, false); }
|
||||||
|
void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5D, false); }
|
||||||
|
void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x54, true); }
|
||||||
|
void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x54, true); }
|
||||||
|
void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x55, true); }
|
||||||
|
void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x55, true); }
|
||||||
|
void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x56, true); }
|
||||||
|
void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x56, true); }
|
||||||
|
void vxorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x57, true); }
|
||||||
|
void vxorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x57, true); }
|
||||||
|
void vblendpd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0D, true, 0); db(imm); }
|
||||||
|
void vblendpd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0D, true, 0); db(imm); }
|
||||||
|
void vblendps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0C, true, 0); db(imm); }
|
||||||
|
void vblendps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0C, true, 0); db(imm); }
|
||||||
|
void vdppd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x41, false, 0); db(imm); }
|
||||||
|
void vdppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x41, false, 0); db(imm); }
|
||||||
|
void vdpps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); }
|
||||||
|
void vdpps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); }
|
||||||
|
void vmpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x42, false, 0); db(imm); }
|
||||||
|
void vmpsadbw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x42, false, 0); db(imm); }
|
||||||
|
void vpblendw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0E, false, 0); db(imm); }
|
||||||
|
void vpblendw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0E, false, 0); db(imm); }
|
||||||
|
void vroundsd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); }
|
||||||
|
void vroundsd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); }
|
||||||
|
void vroundss(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0A, false, 0); db(imm); }
|
||||||
|
void vroundss(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0A, false, 0); db(imm); }
|
||||||
|
void vpclmulqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x44, false, 0); db(imm); }
|
||||||
|
void vpclmulqdq(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x44, false, 0); db(imm); }
|
||||||
|
void vpermilps(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0C, true, 0); }
|
||||||
|
void vpermilpd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0D, true, 0); }
|
||||||
|
void vcmppd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xC2, true, -1); db(imm); }
|
||||||
|
void vcmppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xC2, true, -1); db(imm); }
|
||||||
|
void vcmpps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F, 0xC2, true, -1); db(imm); }
|
||||||
|
void vcmpps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0xC2, true, -1); db(imm); }
|
||||||
|
void vcmpsd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F2, 0xC2, false, -1); db(imm); }
|
||||||
|
void vcmpsd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F2, 0xC2, false, -1); db(imm); }
|
||||||
|
void vcmpss(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F3, 0xC2, false, -1); db(imm); }
|
||||||
|
void vcmpss(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0xC2, false, -1); db(imm); }
|
||||||
|
void vcvtsd2ss(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F2, 0x5A, false, -1); }
|
||||||
|
void vcvtsd2ss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F2, 0x5A, false, -1); }
|
||||||
|
void vcvtss2sd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F3, 0x5A, false, -1); }
|
||||||
|
void vcvtss2sd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x5A, false, -1); }
|
||||||
|
void vinsertps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x21, false, 0); db(imm); }
|
||||||
|
void vinsertps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x21, false, 0); db(imm); }
|
||||||
|
void vpacksswb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x63, false, -1); }
|
||||||
|
void vpacksswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x63, false, -1); }
|
||||||
|
void vpackssdw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6B, false, -1); }
|
||||||
|
void vpackssdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6B, false, -1); }
|
||||||
|
void vpackuswb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x67, false, -1); }
|
||||||
|
void vpackuswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x67, false, -1); }
|
||||||
|
void vpackusdw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x2B, false, -1); }
|
||||||
|
void vpackusdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x2B, false, -1); }
|
||||||
|
void vpaddb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFC, false, -1); }
|
||||||
|
void vpaddb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFC, false, -1); }
|
||||||
|
void vpaddw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFD, false, -1); }
|
||||||
|
void vpaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFD, false, -1); }
|
||||||
|
void vpaddd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFE, false, -1); }
|
||||||
|
void vpaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFE, false, -1); }
|
||||||
|
void vpaddq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD4, false, -1); }
|
||||||
|
void vpaddq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD4, false, -1); }
|
||||||
|
void vpaddsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEC, false, -1); }
|
||||||
|
void vpaddsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEC, false, -1); }
|
||||||
|
void vpaddsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xED, false, -1); }
|
||||||
|
void vpaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xED, false, -1); }
|
||||||
|
void vpaddusb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDC, false, -1); }
|
||||||
|
void vpaddusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDC, false, -1); }
|
||||||
|
void vpaddusw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDD, false, -1); }
|
||||||
|
void vpaddusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDD, false, -1); }
|
||||||
|
void vpalignr(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0F, false, -1); db(imm); }
|
||||||
|
void vpalignr(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0F, false, -1); db(imm); }
|
||||||
|
void vpand(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDB, false, -1); }
|
||||||
|
void vpand(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDB, false, -1); }
|
||||||
|
void vpandn(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDF, false, -1); }
|
||||||
|
void vpandn(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDF, false, -1); }
|
||||||
|
void vpavgb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE0, false, -1); }
|
||||||
|
void vpavgb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE0, false, -1); }
|
||||||
|
void vpavgw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE3, false, -1); }
|
||||||
|
void vpavgw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE3, false, -1); }
|
||||||
|
void vpcmpeqb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x74, false, -1); }
|
||||||
|
void vpcmpeqb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x74, false, -1); }
|
||||||
|
void vpcmpeqw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x75, false, -1); }
|
||||||
|
void vpcmpeqw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x75, false, -1); }
|
||||||
|
void vpcmpeqd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x76, false, -1); }
|
||||||
|
void vpcmpeqd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x76, false, -1); }
|
||||||
|
void vpcmpeqq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x29, false, -1); }
|
||||||
|
void vpcmpeqq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x29, false, -1); }
|
||||||
|
void vpcmpgtb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x64, false, -1); }
|
||||||
|
void vpcmpgtb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x64, false, -1); }
|
||||||
|
void vpcmpgtw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x65, false, -1); }
|
||||||
|
void vpcmpgtw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x65, false, -1); }
|
||||||
|
void vpcmpgtd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x66, false, -1); }
|
||||||
|
void vpcmpgtd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x66, false, -1); }
|
||||||
|
void vpcmpgtq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x37, false, -1); }
|
||||||
|
void vpcmpgtq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x37, false, -1); }
|
||||||
|
void vphaddw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x01, false, -1); }
|
||||||
|
void vphaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x01, false, -1); }
|
||||||
|
void vphaddd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x02, false, -1); }
|
||||||
|
void vphaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x02, false, -1); }
|
||||||
|
void vphaddsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x03, false, -1); }
|
||||||
|
void vphaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x03, false, -1); }
|
||||||
|
void vphsubw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x05, false, -1); }
|
||||||
|
void vphsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x05, false, -1); }
|
||||||
|
void vphsubd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x06, false, -1); }
|
||||||
|
void vphsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x06, false, -1); }
|
||||||
|
void vphsubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x07, false, -1); }
|
||||||
|
void vphsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x07, false, -1); }
|
||||||
|
void vpmaddwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF5, false, -1); }
|
||||||
|
void vpmaddwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF5, false, -1); }
|
||||||
|
void vpmaddubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x04, false, -1); }
|
||||||
|
void vpmaddubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x04, false, -1); }
|
||||||
|
void vpmaxsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3C, false, -1); }
|
||||||
|
void vpmaxsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3C, false, -1); }
|
||||||
|
void vpmaxsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEE, false, -1); }
|
||||||
|
void vpmaxsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEE, false, -1); }
|
||||||
|
void vpmaxsd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3D, false, -1); }
|
||||||
|
void vpmaxsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3D, false, -1); }
|
||||||
|
void vpmaxub(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDE, false, -1); }
|
||||||
|
void vpmaxub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDE, false, -1); }
|
||||||
|
void vpmaxuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3E, false, -1); }
|
||||||
|
void vpmaxuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3E, false, -1); }
|
||||||
|
void vpmaxud(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3F, false, -1); }
|
||||||
|
void vpmaxud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3F, false, -1); }
|
||||||
|
void vpminsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x38, false, -1); }
|
||||||
|
void vpminsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x38, false, -1); }
|
||||||
|
void vpminsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEA, false, -1); }
|
||||||
|
void vpminsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEA, false, -1); }
|
||||||
|
void vpminsd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x39, false, -1); }
|
||||||
|
void vpminsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x39, false, -1); }
|
||||||
|
void vpminub(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDA, false, -1); }
|
||||||
|
void vpminub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDA, false, -1); }
|
||||||
|
void vpminuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3A, false, -1); }
|
||||||
|
void vpminuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3A, false, -1); }
|
||||||
|
void vpminud(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3B, false, -1); }
|
||||||
|
void vpminud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3B, false, -1); }
|
||||||
|
void vpmulhuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE4, false, -1); }
|
||||||
|
void vpmulhuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE4, false, -1); }
|
||||||
|
void vpmulhrsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0B, false, -1); }
|
||||||
|
void vpmulhrsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0B, false, -1); }
|
||||||
|
void vpmulhw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE5, false, -1); }
|
||||||
|
void vpmulhw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE5, false, -1); }
|
||||||
|
void vpmullw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD5, false, -1); }
|
||||||
|
void vpmullw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD5, false, -1); }
|
||||||
|
void vpmulld(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x40, false, -1); }
|
||||||
|
void vpmulld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x40, false, -1); }
|
||||||
|
void vpmuludq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF4, false, -1); }
|
||||||
|
void vpmuludq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF4, false, -1); }
|
||||||
|
void vpmuldq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x28, false, -1); }
|
||||||
|
void vpmuldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x28, false, -1); }
|
||||||
|
void vpor(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEB, false, -1); }
|
||||||
|
void vpor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEB, false, -1); }
|
||||||
|
void vpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF6, false, -1); }
|
||||||
|
void vpsadbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF6, false, -1); }
|
||||||
|
void vpshufb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x00, false, -1); }
|
||||||
|
void vpsignb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x08, false, -1); }
|
||||||
|
void vpsignb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x08, false, -1); }
|
||||||
|
void vpsignw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x09, false, -1); }
|
||||||
|
void vpsignw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x09, false, -1); }
|
||||||
|
void vpsignd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0A, false, -1); }
|
||||||
|
void vpsignd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0A, false, -1); }
|
||||||
|
void vpsllw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF1, false, -1); }
|
||||||
|
void vpsllw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF1, false, -1); }
|
||||||
|
void vpslld(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF2, false, -1); }
|
||||||
|
void vpslld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF2, false, -1); }
|
||||||
|
void vpsllq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF3, false, -1); }
|
||||||
|
void vpsllq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF3, false, -1); }
|
||||||
|
void vpsraw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE1, false, -1); }
|
||||||
|
void vpsraw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE1, false, -1); }
|
||||||
|
void vpsrad(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE2, false, -1); }
|
||||||
|
void vpsrad(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE2, false, -1); }
|
||||||
|
void vpsrlw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD1, false, -1); }
|
||||||
|
void vpsrlw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD1, false, -1); }
|
||||||
|
void vpsrld(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD2, false, -1); }
|
||||||
|
void vpsrld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD2, false, -1); }
|
||||||
|
void vpsrlq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD3, false, -1); }
|
||||||
|
void vpsrlq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD3, false, -1); }
|
||||||
|
void vpsubb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF8, false, -1); }
|
||||||
|
void vpsubb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF8, false, -1); }
|
||||||
|
void vpsubw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF9, false, -1); }
|
||||||
|
void vpsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF9, false, -1); }
|
||||||
|
void vpsubd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFA, false, -1); }
|
||||||
|
void vpsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFA, false, -1); }
|
||||||
|
void vpsubq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFB, false, -1); }
|
||||||
|
void vpsubq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFB, false, -1); }
|
||||||
|
void vpsubsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE8, false, -1); }
|
||||||
|
void vpsubsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE8, false, -1); }
|
||||||
|
void vpsubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE9, false, -1); }
|
||||||
|
void vpsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE9, false, -1); }
|
||||||
|
void vpsubusb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD8, false, -1); }
|
||||||
|
void vpsubusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD8, false, -1); }
|
||||||
|
void vpsubusw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD9, false, -1); }
|
||||||
|
void vpsubusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD9, false, -1); }
|
||||||
|
void vpunpckhbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x68, false, -1); }
|
||||||
|
void vpunpckhbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x68, false, -1); }
|
||||||
|
void vpunpckhwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x69, false, -1); }
|
||||||
|
void vpunpckhwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x69, false, -1); }
|
||||||
|
void vpunpckhdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6A, false, -1); }
|
||||||
|
void vpunpckhdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6A, false, -1); }
|
||||||
|
void vpunpckhqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6D, false, -1); }
|
||||||
|
void vpunpckhqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6D, false, -1); }
|
||||||
|
void vpunpcklbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x60, false, -1); }
|
||||||
|
void vpunpcklbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x60, false, -1); }
|
||||||
|
void vpunpcklwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x61, false, -1); }
|
||||||
|
void vpunpcklwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x61, false, -1); }
|
||||||
|
void vpunpckldq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x62, false, -1); }
|
||||||
|
void vpunpckldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x62, false, -1); }
|
||||||
|
void vpunpcklqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6C, false, -1); }
|
||||||
|
void vpunpcklqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6C, false, -1); }
|
||||||
|
void vpxor(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEF, false, -1); }
|
||||||
|
void vpxor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEF, false, -1); }
|
||||||
|
void vrcpss(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F3, 0x53, false, -1); }
|
||||||
|
void vrcpss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x53, false, -1); }
|
||||||
|
void vrsqrtss(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F3, 0x52, false, -1); }
|
||||||
|
void vrsqrtss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x52, false, -1); }
|
||||||
|
void vshufpd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xC6, true, -1); db(imm); }
|
||||||
|
void vshufpd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xC6, true, -1); db(imm); }
|
||||||
|
void vshufps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F, 0xC6, true, -1); db(imm); }
|
||||||
|
void vshufps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0xC6, true, -1); db(imm); }
|
||||||
|
void vsqrtsd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F2, 0x51, false, -1); }
|
||||||
|
void vsqrtsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F2, 0x51, false, -1); }
|
||||||
|
void vsqrtss(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F3, 0x51, false, -1); }
|
||||||
|
void vsqrtss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x51, false, -1); }
|
||||||
|
void vunpckhpd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x15, true, -1); }
|
||||||
|
void vunpckhpd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x15, true, -1); }
|
||||||
|
void vunpckhps(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F, 0x15, true, -1); }
|
||||||
|
void vunpckhps(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0x15, true, -1); }
|
||||||
|
void vunpcklpd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x14, true, -1); }
|
||||||
|
void vunpcklpd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x14, true, -1); }
|
||||||
|
void vunpcklps(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F, 0x14, true, -1); }
|
||||||
|
void vunpcklps(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0x14, true, -1); }
|
||||||
|
void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0xDF, false, 0, imm); }
|
||||||
|
void vroundpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x09, true, 0, imm); }
|
||||||
|
void vroundps(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x08, true, 0, imm); }
|
||||||
|
void vpermilpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x05, true, 0, imm); }
|
||||||
|
void vpermilps(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x04, true, 0, imm); }
|
||||||
|
void vpcmpestri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x61, false, 0, imm); }
|
||||||
|
void vpcmpestrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x60, false, 0, imm); }
|
||||||
|
void vpcmpistri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x63, false, 0, imm); }
|
||||||
|
void vpcmpistrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x62, false, 0, imm); }
|
||||||
|
void vtestps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x0E, true, 0); }
|
||||||
|
void vtestpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x0F, true, 0); }
|
||||||
|
void vcomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x2F, false, -1); }
|
||||||
|
void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x2F, false, -1); }
|
||||||
|
void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x5B, true, -1); }
|
||||||
|
void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x5B, true, -1); }
|
||||||
|
void vcvttps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x5B, true, -1); }
|
||||||
|
void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x28, true, -1); }
|
||||||
|
void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x28, true, -1); }
|
||||||
|
void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x12, true, -1); }
|
||||||
|
void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x6F, true, -1); }
|
||||||
|
void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x6F, true, -1); }
|
||||||
|
void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x16, true, -1); }
|
||||||
|
void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x12, true, -1); }
|
||||||
|
void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x10, true, -1); }
|
||||||
|
void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x10, true, -1); }
|
||||||
|
void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1C, false, -1); }
|
||||||
|
void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1D, false, -1); }
|
||||||
|
void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1E, false, -1); }
|
||||||
|
void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x41, false, -1); }
|
||||||
|
void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x20, false, -1); }
|
||||||
|
void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x21, false, -1); }
|
||||||
|
void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x22, false, -1); }
|
||||||
|
void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x23, false, -1); }
|
||||||
|
void vpmovsxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x24, false, -1); }
|
||||||
|
void vpmovsxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x25, false, -1); }
|
||||||
|
void vpmovzxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x30, false, -1); }
|
||||||
|
void vpmovzxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x31, false, -1); }
|
||||||
|
void vpmovzxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x32, false, -1); }
|
||||||
|
void vpmovzxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x33, false, -1); }
|
||||||
|
void vpmovzxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x34, false, -1); }
|
||||||
|
void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x35, false, -1); }
|
||||||
|
void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x70, false, -1, imm); }
|
||||||
|
void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x70, false, -1, imm); }
|
||||||
|
void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x70, false, -1, imm); }
|
||||||
|
void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x17, false, -1); }
|
||||||
|
void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x53, true, -1); }
|
||||||
|
void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x52, true, -1); }
|
||||||
|
void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x51, true, -1); }
|
||||||
|
void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x51, true, -1); }
|
||||||
|
void vucomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x2E, false, -1); }
|
||||||
|
void vucomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x2E, false, -1); }
|
||||||
|
void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_66, 0x29, true, -1); }
|
||||||
|
void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F, 0x29, true, -1); }
|
||||||
|
void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_66, 0x7F, true, -1); }
|
||||||
|
void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_F3, 0x7F, true, -1); }
|
||||||
|
void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_66, 0x11, true, -1); }
|
||||||
|
void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F, 0x11, true, -1); }
|
||||||
|
void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0xD0, true, -1); }
|
||||||
|
void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0xD0, true, -1); }
|
||||||
|
void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x7C, true, -1); }
|
||||||
|
void vhaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x7C, true, -1); }
|
||||||
|
void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x7D, true, -1); }
|
||||||
|
void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x7D, true, -1); }
|
||||||
|
void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDC, false, 0); }
|
||||||
|
void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDD, false, 0); }
|
||||||
|
void vaesdec(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDE, false, 0); }
|
||||||
|
void vaesdeclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDF, false, 0); }
|
||||||
|
void vmaskmovps(const Xmm& xm1, const Xmm& xm2, const Address& addr) { opAVX_X_X_XM(xm1, xm2, addr, MM_0F38 | PP_66, 0x2C, true, 0); }
|
||||||
|
void vmaskmovps(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x2E, true, 0); }
|
||||||
|
void vmaskmovpd(const Xmm& xm1, const Xmm& xm2, const Address& addr) { opAVX_X_X_XM(xm1, xm2, addr, MM_0F38 | PP_66, 0x2D, true, 0); }
|
||||||
|
void vmaskmovpd(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x2F, true, 0); }
|
||||||
|
void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, op2, MM_0F | PP_66, 0x16, false); }
|
||||||
|
void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x17, false); }
|
||||||
|
void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, op2, MM_0F, 0x16, false); }
|
||||||
|
void vmovhps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F, 0x17, false); }
|
||||||
|
void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, op2, MM_0F | PP_66, 0x12, false); }
|
||||||
|
void vmovlpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x13, false); }
|
||||||
|
void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, op2, MM_0F, 0x12, false); }
|
||||||
|
void vmovlps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F, 0x13, false); }
|
||||||
|
void vfmadd132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x98, true, 1); }
|
||||||
|
void vfmadd213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA8, true, 1); }
|
||||||
|
void vfmadd231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB8, true, 1); }
|
||||||
|
void vfmadd132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x98, true, 0); }
|
||||||
|
void vfmadd213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA8, true, 0); }
|
||||||
|
void vfmadd231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB8, true, 0); }
|
||||||
|
void vfmadd132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x99, false, 1); }
|
||||||
|
void vfmadd213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA9, false, 1); }
|
||||||
|
void vfmadd231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB9, false, 1); }
|
||||||
|
void vfmadd132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x99, false, 0); }
|
||||||
|
void vfmadd213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA9, false, 0); }
|
||||||
|
void vfmadd231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB9, false, 0); }
|
||||||
|
void vfmaddsub132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x96, true, 1); }
|
||||||
|
void vfmaddsub213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA6, true, 1); }
|
||||||
|
void vfmaddsub231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB6, true, 1); }
|
||||||
|
void vfmaddsub132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x96, true, 0); }
|
||||||
|
void vfmaddsub213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA6, true, 0); }
|
||||||
|
void vfmaddsub231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB6, true, 0); }
|
||||||
|
void vfmsubadd132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x97, true, 1); }
|
||||||
|
void vfmsubadd213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA7, true, 1); }
|
||||||
|
void vfmsubadd231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB7, true, 1); }
|
||||||
|
void vfmsubadd132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x97, true, 0); }
|
||||||
|
void vfmsubadd213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA7, true, 0); }
|
||||||
|
void vfmsubadd231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB7, true, 0); }
|
||||||
|
void vfmsub132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9A, true, 1); }
|
||||||
|
void vfmsub213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAA, true, 1); }
|
||||||
|
void vfmsub231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBA, true, 1); }
|
||||||
|
void vfmsub132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9A, true, 0); }
|
||||||
|
void vfmsub213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAA, true, 0); }
|
||||||
|
void vfmsub231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBA, true, 0); }
|
||||||
|
void vfmsub132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9B, false, 1); }
|
||||||
|
void vfmsub213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAB, false, 1); }
|
||||||
|
void vfmsub231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBB, false, 1); }
|
||||||
|
void vfmsub132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9B, false, 0); }
|
||||||
|
void vfmsub213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAB, false, 0); }
|
||||||
|
void vfmsub231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBB, false, 0); }
|
||||||
|
void vfnmadd132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9C, true, 1); }
|
||||||
|
void vfnmadd213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAC, true, 1); }
|
||||||
|
void vfnmadd231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBC, true, 1); }
|
||||||
|
void vfnmadd132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9C, true, 0); }
|
||||||
|
void vfnmadd213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAC, true, 0); }
|
||||||
|
void vfnmadd231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBC, true, 0); }
|
||||||
|
void vfnmadd132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9D, false, 1); }
|
||||||
|
void vfnmadd213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAD, false, 1); }
|
||||||
|
void vfnmadd231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBD, false, 1); }
|
||||||
|
void vfnmadd132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9D, false, 0); }
|
||||||
|
void vfnmadd213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAD, false, 0); }
|
||||||
|
void vfnmadd231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBD, false, 0); }
|
||||||
|
void vfnmsub132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9E, true, 1); }
|
||||||
|
void vfnmsub213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAE, true, 1); }
|
||||||
|
void vfnmsub231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBE, true, 1); }
|
||||||
|
void vfnmsub132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9E, true, 0); }
|
||||||
|
void vfnmsub213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAE, true, 0); }
|
||||||
|
void vfnmsub231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBE, true, 0); }
|
||||||
|
void vfnmsub132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9F, false, 1); }
|
||||||
|
void vfnmsub213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAF, false, 1); }
|
||||||
|
void vfnmsub231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBF, false, 1); }
|
||||||
|
void vfnmsub132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9F, false, 0); }
|
||||||
|
void vfnmsub213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAF, false, 0); }
|
||||||
|
void vfnmsub231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBF, false, 0); }
|
||||||
|
void vaesimc(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0xDB, false, 0); }
|
||||||
|
void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, MM_0F38 | PP_66, 0x1A, true, 0); }
|
||||||
|
void vbroadcastsd(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, MM_0F38 | PP_66, 0x19, true, 0); }
|
||||||
|
void vbroadcastss(const Xmm& x, const Address& addr) { opAVX_X_XM_IMM(x, addr, MM_0F38 | PP_66, 0x18, true, 0); }
|
||||||
|
void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_XM_IMM(y, cvtReg(op, op.isXMM(), Operand::YMM), MM_0F3A | PP_66, 0x19, true, 0, imm); }
|
||||||
|
void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, cvtReg(op, op.isXMM(), Operand::YMM), MM_0F3A | PP_66, 0x18, true, 0); db(imm); }
|
||||||
|
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, MM_0F3A | PP_66, 0x06, true, 0); db(imm); }
|
||||||
|
void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_F2, 0xF0, true, 0); }
|
||||||
|
void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, MM_0F, 0xAE, false, -1); }
|
||||||
|
void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, MM_0F, 0xAE, false, -1); }
|
||||||
|
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }
|
||||||
|
void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }
|
||||||
|
void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), MM_0F | PP_66, 0xC5, false); db(imm); }
|
||||||
|
void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }
|
||||||
|
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }
|
||||||
|
void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); }
|
||||||
|
void vpinsrb(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); }
|
||||||
|
void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F | PP_66, 0xC4, false); db(imm); }
|
||||||
|
void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F | PP_66, 0xC4, false); db(imm); }
|
||||||
|
void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 0); db(imm); }
|
||||||
|
void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 0); db(imm); }
|
||||||
|
void vpmovmskb(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xD7, false); }
|
||||||
|
void vpslldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm7, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||||
|
void vpslldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm7, x, x, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||||
|
void vpsrldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm3, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||||
|
void vpsrldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm3, x, x, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||||
|
void vpsllw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm6, x1, x2, MM_0F | PP_66, 0x71, false); db(imm); }
|
||||||
|
void vpsllw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm6, x, x, MM_0F | PP_66, 0x71, false); db(imm); }
|
||||||
|
void vpslld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm6, x1, x2, MM_0F | PP_66, 0x72, false); db(imm); }
|
||||||
|
void vpslld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm6, x, x, MM_0F | PP_66, 0x72, false); db(imm); }
|
||||||
|
void vpsllq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm6, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||||
|
void vpsllq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm6, x, x, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||||
|
void vpsraw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm4, x1, x2, MM_0F | PP_66, 0x71, false); db(imm); }
|
||||||
|
void vpsraw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm4, x, x, MM_0F | PP_66, 0x71, false); db(imm); }
|
||||||
|
void vpsrad(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm4, x1, x2, MM_0F | PP_66, 0x72, false); db(imm); }
|
||||||
|
void vpsrad(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm4, x, x, MM_0F | PP_66, 0x72, false); db(imm); }
|
||||||
|
void vpsrlw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm2, x1, x2, MM_0F | PP_66, 0x71, false); db(imm); }
|
||||||
|
void vpsrlw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm2, x, x, MM_0F | PP_66, 0x71, false); db(imm); }
|
||||||
|
void vpsrld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm2, x1, x2, MM_0F | PP_66, 0x72, false); db(imm); }
|
||||||
|
void vpsrld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm2, x, x, MM_0F | PP_66, 0x72, false); db(imm); }
|
||||||
|
void vpsrlq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm2, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||||
|
void vpsrlq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm2, x, x, MM_0F | PP_66, 0x73, false); db(imm); }
|
||||||
|
void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4B, true); db(x4.getIdx() << 4); }
|
||||||
|
void vblendvpd(const Xmm& x1, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x1, op, MM_0F3A | PP_66, 0x4B, true); db(x4.getIdx() << 4); }
|
||||||
|
void vblendvps(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4A, true); db(x4.getIdx() << 4); }
|
||||||
|
void vblendvps(const Xmm& x1, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x1, op, MM_0F3A | PP_66, 0x4A, true); db(x4.getIdx() << 4); }
|
||||||
|
void vpblendvb(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4C, false); db(x4.getIdx() << 4); }
|
||||||
|
void vpblendvb(const Xmm& x1, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x1, op, MM_0F3A | PP_66, 0x4C, false); db(x4.getIdx() << 4); }
|
||||||
|
void vmovd(const Xmm& x, const Reg32& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x6E, false, 0); }
|
||||||
|
void vmovd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x6E, false, 0); }
|
||||||
|
void vmovd(const Reg32& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 0); }
|
||||||
|
void vmovd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x7E, false, 0); }
|
||||||
|
void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F, 0x12, false); }
|
||||||
|
void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F, 0x16, false); }
|
||||||
|
void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), x.isXMM() ? xm0 : ym0, x, MM_0F | PP_66, 0x50, true, 0); }
|
||||||
|
void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), x.isXMM() ? xm0 : ym0, x, MM_0F, 0x50, true, 0); }
|
||||||
|
void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0xE7, true); }
|
||||||
|
void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0x2B, true); }
|
||||||
|
void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F, 0x2B, true); }
|
||||||
|
void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F38 | PP_66, 0x2A, false); }
|
||||||
|
void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0x10, false); }
|
||||||
|
void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x10, false); }
|
||||||
|
void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x11, false); }
|
||||||
|
void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F3, 0x10, false); }
|
||||||
|
void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x10, false); }
|
||||||
|
void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x11, false); }
|
||||||
|
void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2D, false, 0); }
|
||||||
|
void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2C, false, 0); }
|
||||||
|
void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2D, false, 0); }
|
||||||
|
void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2C, false, 0); }
|
||||||
|
void vcvtsi2ss(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, cvtReg(op2, op2.isREG(), Operand::XMM), MM_0F | PP_F3, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); }
|
||||||
|
void vcvtsi2sd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, cvtReg(op2, op2.isREG(), Operand::XMM), MM_0F | PP_F2, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); }
|
||||||
|
void vcvtps2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, cvtReg(op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM), MM_0F, 0x5A, true); }
|
||||||
|
void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, cvtReg(op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM), MM_0F | PP_F3, 0xE6, true); }
|
||||||
|
void vcvtpd2ps(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0x5A, true); }
|
||||||
|
void vcvtpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_F2, 0xE6, true); }
|
||||||
|
void vcvttpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0xE6, true); }
|
||||||
|
#ifdef XBYAK64
|
||||||
|
void vmovq(const Xmm& x, const Reg64& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x6E, false, 1); }
|
||||||
|
void vmovq(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x7E, false, -1); }
|
||||||
|
void vmovq(const Reg64& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 1); }
|
||||||
|
void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0xD6, false, -1); }
|
||||||
|
void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_F3, 0x7E, false, -1); }
|
||||||
|
void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 1); db(imm); }
|
||||||
|
void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 1); db(imm); }
|
||||||
|
void vpinsrq(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 1); db(imm); }
|
||||||
|
void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2D, false, 1); }
|
||||||
|
void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2C, false, 1); }
|
||||||
|
void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2D, false, 1); }
|
||||||
|
void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2C, false, 1); }
|
||||||
|
#endif
|
||||||
|
|
|
@ -2,9 +2,10 @@
|
||||||
#define XBYAK_XBYAK_UTIL_H_
|
#define XBYAK_XBYAK_UTIL_H_
|
||||||
|
|
||||||
/**
|
/**
|
||||||
utility class for Xbyak
|
utility class and functions for Xbyak
|
||||||
@note this header is under construction
|
@note this header is UNDER CONSTRUCTION!
|
||||||
*/
|
*/
|
||||||
|
#include "xbyak/xbyak.h"
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#if (_MSC_VER < 1400) && defined(XBYAK32)
|
#if (_MSC_VER < 1400) && defined(XBYAK32)
|
||||||
|
@ -29,10 +30,17 @@
|
||||||
#include <intrin.h> // for __cpuid
|
#include <intrin.h> // for __cpuid
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#if __GNUC_PREREQ(4, 3)
|
#ifndef __GNUC_PREREQ
|
||||||
|
#define __GNUC_PREREQ(major, minor) (((major) << 16) + (minor))
|
||||||
|
#endif
|
||||||
|
#if __GNUC_PREREQ(4, 3) && !defined(__APPLE__)
|
||||||
#include <cpuid.h>
|
#include <cpuid.h>
|
||||||
#else
|
#else
|
||||||
#define __cpuid(eaxIn, a, b, c, d) __asm__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
|
#if defined(__APPLE__) && defined(XBYAK32) // avoid err : can't find a register in class `BREG' while reloading `asm'
|
||||||
|
#define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
|
||||||
|
#else
|
||||||
|
#define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -43,6 +51,10 @@ namespace Xbyak { namespace util {
|
||||||
*/
|
*/
|
||||||
class Cpu {
|
class Cpu {
|
||||||
unsigned int type_;
|
unsigned int type_;
|
||||||
|
unsigned int get32bitAsBE(const char *x) const
|
||||||
|
{
|
||||||
|
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
|
||||||
|
}
|
||||||
public:
|
public:
|
||||||
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
|
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
|
||||||
{
|
{
|
||||||
|
@ -64,11 +76,17 @@ public:
|
||||||
tSSE41 = 1 << 7,
|
tSSE41 = 1 << 7,
|
||||||
tSSE42 = 1 << 8,
|
tSSE42 = 1 << 8,
|
||||||
tPOPCNT = 1 << 9,
|
tPOPCNT = 1 << 9,
|
||||||
|
tAESNI = 1 << 10,
|
||||||
|
tSSE5 = 1 << 11,
|
||||||
|
tOSXSACE = 1 << 12,
|
||||||
|
tPCLMULQDQ = 1 << 13,
|
||||||
|
tAVX = 1 << 14,
|
||||||
|
tFMA = 1 << 15,
|
||||||
|
|
||||||
t3DN = 1 << 16,
|
t3DN = 1 << 16,
|
||||||
tE3DN = 1 << 17,
|
tE3DN = 1 << 17,
|
||||||
tSSE4a = 1 << 18,
|
tSSE4a = 1 << 18,
|
||||||
tSSE5 = 1 << 11,
|
tRDTSCP = 1 << 19,
|
||||||
|
|
||||||
tINTEL = 1 << 24,
|
tINTEL = 1 << 24,
|
||||||
tAMD = 1 << 25
|
tAMD = 1 << 25
|
||||||
|
@ -80,28 +98,39 @@ public:
|
||||||
getCpuid(0, data);
|
getCpuid(0, data);
|
||||||
static const char intel[] = "ntel";
|
static const char intel[] = "ntel";
|
||||||
static const char amd[] = "cAMD";
|
static const char amd[] = "cAMD";
|
||||||
if (data[2] == *reinterpret_cast<const unsigned int*>(amd)) {
|
if (data[2] == get32bitAsBE(amd)) {
|
||||||
type_ |= tAMD;
|
type_ |= tAMD;
|
||||||
getCpuid(0x80000001, data);
|
getCpuid(0x80000001, data);
|
||||||
if (data[3] & (1 << 31)) type_ |= t3DN;
|
if (data[3] & (1U << 31)) type_ |= t3DN;
|
||||||
if (data[3] & (1 << 15)) type_ |= tCMOV;
|
if (data[3] & (1U << 15)) type_ |= tCMOV;
|
||||||
if (data[3] & (1 << 30)) type_ |= tE3DN;
|
if (data[3] & (1U << 30)) type_ |= tE3DN;
|
||||||
if (data[3] & (1 << 22)) type_ |= tMMX2;
|
if (data[3] & (1U << 22)) type_ |= tMMX2;
|
||||||
|
if (data[3] & (1U << 27)) type_ |= tRDTSCP;
|
||||||
}
|
}
|
||||||
if (data[2] == *reinterpret_cast<const unsigned int*>(intel)) {
|
if (data[2] == get32bitAsBE(intel)) {
|
||||||
type_ |= tINTEL;
|
type_ |= tINTEL;
|
||||||
|
getCpuid(0x80000001, data);
|
||||||
|
if (data[3] & (1U << 27)) type_ |= tRDTSCP;
|
||||||
}
|
}
|
||||||
getCpuid(1, data);
|
getCpuid(1, data);
|
||||||
if (data[2] & (1 << 0)) type_ |= tSSE3;
|
if (data[2] & (1U << 0)) type_ |= tSSE3;
|
||||||
if (data[2] & (1 << 9)) type_ |= tSSSE3;
|
if (data[2] & (1U << 9)) type_ |= tSSSE3;
|
||||||
if (data[2] & (1 << 19)) type_ |= tSSE41;
|
if (data[2] & (1U << 19)) type_ |= tSSE41;
|
||||||
if (data[2] & (1 << 20)) type_ |= tSSE42;
|
if (data[2] & (1U << 20)) type_ |= tSSE42;
|
||||||
if (data[2] & (1 << 23)) type_ |= tPOPCNT;
|
if (data[2] & (1U << 23)) type_ |= tPOPCNT;
|
||||||
|
if (data[2] & (1U << 25)) type_ |= tAESNI;
|
||||||
if (data[3] & (1 << 15)) type_ |= tCMOV;
|
if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ;
|
||||||
if (data[3] & (1 << 23)) type_ |= tMMX;
|
if (data[2] & (1U << 27)) type_ |= tOSXSACE;
|
||||||
if (data[3] & (1 << 25)) type_ |= tMMX2 | tSSE;
|
#if _M_SSE >= 0x500
|
||||||
if (data[3] & (1 << 26)) type_ |= tSSE2;
|
// QQQ
|
||||||
|
// should check XFEATURE_ENABLED_MASK[2:1] = '11b' by xgetvb
|
||||||
|
if (data[2] & (1U << 28)) type_ |= tAVX;
|
||||||
|
if (data[2] & (1U << 12)) type_ |= tFMA;
|
||||||
|
#endif
|
||||||
|
if (data[3] & (1U << 15)) type_ |= tCMOV;
|
||||||
|
if (data[3] & (1U << 23)) type_ |= tMMX;
|
||||||
|
if (data[3] & (1U << 25)) type_ |= tMMX2 | tSSE;
|
||||||
|
if (data[3] & (1U << 26)) type_ |= tSSE2;
|
||||||
}
|
}
|
||||||
bool has(Type type) const
|
bool has(Type type) const
|
||||||
{
|
{
|
||||||
|
@ -109,6 +138,40 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class Clock {
|
||||||
|
public:
|
||||||
|
static inline uint64 getRdtsc()
|
||||||
|
{
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
return __rdtsc();
|
||||||
|
#else
|
||||||
|
unsigned int eax, edx;
|
||||||
|
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||||
|
return ((uint64)edx << 32) | eax;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
Clock()
|
||||||
|
: clock_(0)
|
||||||
|
, count_(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
void begin()
|
||||||
|
{
|
||||||
|
clock_ -= getRdtsc();
|
||||||
|
}
|
||||||
|
void end()
|
||||||
|
{
|
||||||
|
clock_ += getRdtsc();
|
||||||
|
count_++;
|
||||||
|
}
|
||||||
|
int getCount() const { return count_; }
|
||||||
|
uint64 getClock() const { return clock_; }
|
||||||
|
void clear() { count_ = 0; clock_ = 0; }
|
||||||
|
private:
|
||||||
|
uint64 clock_;
|
||||||
|
int count_;
|
||||||
|
};
|
||||||
|
|
||||||
#ifdef XBYAK32
|
#ifdef XBYAK32
|
||||||
|
|
||||||
namespace local {
|
namespace local {
|
||||||
|
@ -133,53 +196,47 @@ XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ebp)
|
||||||
#undef XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG
|
#undef XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG
|
||||||
} // end of local
|
} // end of local
|
||||||
|
|
||||||
template<class Gen>
|
/**
|
||||||
struct EnableSetEip : public Gen {
|
get eip to out register
|
||||||
EnableSetEip(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0)
|
@note out is not esp
|
||||||
: Gen(maxSize, userPtr)
|
*/
|
||||||
{
|
template<class T>
|
||||||
}
|
void setEipTo(T *self, const Xbyak::Reg32& out)
|
||||||
/**
|
{
|
||||||
get pid to out register
|
|
||||||
@note out = eax or ecx or edx
|
|
||||||
*/
|
|
||||||
void setEipTo(const Xbyak::Reg32& out)
|
|
||||||
{
|
|
||||||
#if 0
|
#if 0
|
||||||
Gen::call(Gen::getCurr() + 5);
|
self->call("@f");
|
||||||
Gen::pop(out);
|
self->L("@@");
|
||||||
|
self->pop(out);
|
||||||
#else
|
#else
|
||||||
int idx = out.getIdx();
|
int idx = out.getIdx();
|
||||||
switch (idx) {
|
switch (idx) {
|
||||||
case Xbyak::Operand::EAX:
|
case Xbyak::Operand::EAX:
|
||||||
Gen::call((void*)local::set_eip_to_eax);
|
self->call((void*)local::set_eip_to_eax);
|
||||||
break;
|
break;
|
||||||
case Xbyak::Operand::ECX:
|
case Xbyak::Operand::ECX:
|
||||||
Gen::call((void*)local::set_eip_to_ecx);
|
self->call((void*)local::set_eip_to_ecx);
|
||||||
break;
|
break;
|
||||||
case Xbyak::Operand::EDX:
|
case Xbyak::Operand::EDX:
|
||||||
Gen::call((void*)local::set_eip_to_edx);
|
self->call((void*)local::set_eip_to_edx);
|
||||||
break;
|
break;
|
||||||
case Xbyak::Operand::EBX:
|
case Xbyak::Operand::EBX:
|
||||||
Gen::call((void*)local::set_eip_to_ebx);
|
self->call((void*)local::set_eip_to_ebx);
|
||||||
break;
|
break;
|
||||||
case Xbyak::Operand::ESI:
|
case Xbyak::Operand::ESI:
|
||||||
Gen::call((void*)local::set_eip_to_esi);
|
self->call((void*)local::set_eip_to_esi);
|
||||||
break;
|
break;
|
||||||
case Xbyak::Operand::EDI:
|
case Xbyak::Operand::EDI:
|
||||||
Gen::call((void*)local::set_eip_to_edi);
|
self->call((void*)local::set_eip_to_edi);
|
||||||
break;
|
break;
|
||||||
case Xbyak::Operand::EBP:
|
case Xbyak::Operand::EBP:
|
||||||
Gen::call((void*)local::set_eip_to_ebp);
|
self->call((void*)local::set_eip_to_ebp);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
};
|
#endif
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
} } // end of util
|
} } // end of util
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue