GSdx: vs2010 fix and minor changes

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5678 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11@gmail.com 2013-06-23 10:46:24 +00:00
parent d20bd4f86a
commit 20d99ae9fc
15 changed files with 187 additions and 103 deletions

View File

@ -76,9 +76,9 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
m_sp = m_sp_map[sel];
}
void GPUDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
void GPUDrawScanline::EndDraw(uint64 frame, uint64 ticks, int actual, int total)
{
m_ds_map.UpdateStats(frame, ticks, pixels);
m_ds_map.UpdateStats(frame, ticks, actual, total);
}
#ifndef ENABLE_JIT_RASTERIZER

View File

@ -61,7 +61,7 @@ public:
// IDrawScanline
void BeginDraw(const GSRasterizerData* data);
void EndDraw(uint64 frame, uint64 ticks, int pixels);
void EndDraw(uint64 frame, uint64 ticks, int actual, int total);
#ifndef ENABLE_JIT_RASTERIZER

View File

@ -96,9 +96,9 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
m_sp = m_sp_map[sel];
}
void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int actual, int total)
{
m_ds_map.UpdateStats(frame, ticks, pixels);
m_ds_map.UpdateStats(frame, ticks, actual, total);
}
#ifndef ENABLE_JIT_RASTERIZER
@ -434,7 +434,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
skip = left & 7;
steps = pixels + skip - 8;
left -= skip;
test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))];
test = GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[skip]) | GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))]);
}
else
{
@ -1524,7 +1524,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
if(!sel.notest)
{
test = GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))];
test = GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))]);
}
}

View File

@ -68,7 +68,7 @@ public:
// IDrawScanline
void BeginDraw(const GSRasterizerData* data);
void EndDraw(uint64 frame, uint64 ticks, int pixels);
void EndDraw(uint64 frame, uint64 ticks, int actual, int total);
void DrawRect(const GSVector4i& r, const GSVertexSW& v);

View File

@ -24,24 +24,24 @@
#if _M_SSE >= 0x501
const GSVector8i GSDrawScanlineCodeGenerator::m_test[16] =
__aligned(const uint8, 8) GSDrawScanlineCodeGenerator::m_test[16][8] =
{
GSVector8i::zero(),
GSVector8i(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
GSVector8i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
GSVector8i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff),
GSVector8i::zero(),
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
{0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
{0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
{0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
{0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
{0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff},
{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff},
{0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
};
const GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4] =

View File

@ -135,7 +135,7 @@ public:
GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
#if _M_SSE >= 0x501
static const GSVector8i m_test[16];
static __aligned(const uint8, 8) m_test[16][8];
static const GSVector8 m_log2_coef[4];
#else
static const GSVector4i m_test[8];

View File

@ -2824,7 +2824,8 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
xor(dst, eax);
break;
case 2:
vpextrw(eax, src, i * 2);
if(i == 0) vmovd(eax, src);
else vpextrw(eax, src, i * 2);
mov(dst, ax);
break;
}

View File

@ -268,17 +268,16 @@ void GSDrawScanlineCodeGenerator::Init()
sub(ebx, edx);
// GSVector4i test = m_test[skip] | m_test[15 + (steps & (steps >> 31))];
shl(edx, 5);
vmovdqa(ymm7, ptr[edx + (size_t)&m_test[0]]);
mov(eax, ecx);
sar(eax, 31);
and(eax, ecx);
shl(eax, 5);
vpor(ymm7, ptr[eax + (size_t)&m_test[15]]);
vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[0]]);
vpmovsxbd(ymm0, ptr[eax * 8 + (size_t)&m_test[15]]);
vpor(ymm7, ymm0);
shl(edx, 5);
}
else
{
@ -592,9 +591,8 @@ void GSDrawScanlineCodeGenerator::Step()
mov(edx, ecx);
sar(edx, 31);
and(edx, ecx);
shl(edx, 5);
vmovdqa(ymm7, ptr[edx + (size_t)&m_test[15]]);
vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[15]]);
}
}
@ -2843,7 +2841,8 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
xor(dst, eax);
break;
case 2:
vpextrw(eax, src, j * 2);
if(j == 0) vmovd(eax, src);
else vpextrw(eax, src, j * 2);
mov(dst, ax);
break;
}

View File

@ -2902,49 +2902,33 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
{
Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2];
#if _M_SSE >= 0x401
switch(psm)
{
case 0:
if(i == 0) movd(dst, src);
#if _M_SSE >= 0x401
else pextrd(dst, src, i);
break;
case 1:
if(i == 0) movd(eax, src);
else pextrd(eax, src, i);
xor(eax, dst);
and(eax, 0xffffff);
xor(dst, eax);
break;
case 2:
pextrw(eax, src, i * 2);
mov(dst, ax);
break;
}
#else
switch(psm)
{
case 0:
if(i == 0) movd(dst, src);
#else
else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(dst, xmm0);}
#endif
break;
case 1:
if(i == 0) movd(eax, src);
#if _M_SSE >= 0x401
else pextrd(eax, src, i);
#else
else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(eax, xmm0);}
#endif
xor(eax, dst);
and(eax, 0xffffff);
xor(dst, eax);
break;
case 2:
pextrw(eax, src, i * 2);
if(i == 0) movd(eax, src);
else pextrw(eax, src, i * 2);
mov(dst, ax);
break;
}
#endif
}
void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)

View File

@ -32,7 +32,7 @@ protected:
struct ActivePtr
{
uint64 frame, frames;
uint64 ticks, pixels;
uint64 ticks, actual, total;
VALUE f;
};
@ -84,7 +84,7 @@ public:
return m_active->f;
}
void UpdateStats(uint64 frame, uint64 ticks, int pixels)
void UpdateStats(uint64 frame, uint64 ticks, int actual, int total)
{
if(m_active)
{
@ -95,7 +95,10 @@ public:
}
m_active->ticks += ticks;
m_active->pixels += pixels;
m_active->actual += actual;
m_active->total += total;
ASSERT(m_active->total >= m_active->actual);
}
}
@ -124,15 +127,15 @@ public:
if(p->frames > 0)
{
uint64 tpp = p->pixels > 0 ? p->ticks / p->pixels : 0;
uint64 tpp = p->actual > 0 ? p->ticks / p->actual : 0;
uint64 tpf = p->frames > 0 ? p->ticks / p->frames : 0;
uint64 ppf = p->frames > 0 ? p->pixels / p->frames : 0;
uint64 ppf = p->frames > 0 ? p->actual / p->frames : 0;
printf("[%014llx]%c %6.2f%% | %5.2f%% | f %4lld | p %10lld | tpp %4lld | tpf %9lld | ppf %7lld\n",
printf("[%014llx]%c %6.2f%% %5.2f%% f %4lld t %12lld p %12lld w %12lld tpp %4lld tpf %9lld ppf %9lld\n",
(uint64)key, m_map.find(key) == m_map.end() ? '*' : ' ',
(float)(tpf * 10000 / 50000000) / 100,
(float)(tpf * 10000 / 34000000) / 100,
(float)(tpf * 10000 / ttpf) / 100,
p->frames, p->pixels,
p->frames, p->ticks, p->actual, p->total - p->actual,
tpp, tpf, ppf);
}
}

View File

@ -37,8 +37,9 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* pe
, m_id(id)
, m_threads(threads)
, m_perfmon(perfmon)
, m_pixels(0)
{
memset(&m_pixels, 0, sizeof(m_pixels));
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
m_edge.count = 0;
@ -110,11 +111,11 @@ void GSRasterizer::Queue(shared_ptr<GSRasterizerData> data)
int GSRasterizer::GetPixels(bool reset)
{
int pixels = m_pixels;
int pixels = m_pixels.sum;
if(reset)
{
m_pixels = 0;
m_pixels.sum = 0;
}
return pixels;
@ -126,6 +127,9 @@ void GSRasterizer::Draw(GSRasterizerData* data)
if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return;
m_pixels.actual = 0;
m_pixels.total = 0;
data->start = __rdtsc();
m_ds->BeginDraw(data);
@ -212,11 +216,13 @@ void GSRasterizer::Draw(GSRasterizerData* data)
_mm256_zeroupper();
#endif
data->pixels = m_pixels;
data->pixels = m_pixels.actual;
uint64 ticks = __rdtsc() - data->start;
m_ds->EndDraw(data->frame, ticks, m_pixels);
m_pixels.sum += m_pixels.actual;
m_ds->EndDraw(data->frame, ticks, m_pixels.actual, m_pixels.total);
}
template<bool scissor_test>
@ -234,11 +240,9 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
{
if(IsOneOfMyScanlines(p.y))
{
m_pixels++;
m_ds->SetupPrim(vertex, index, GSVertexSW::zero());
m_ds->DrawScanline(1, p.x, p.y, v);
DrawScanline(1, p.x, p.y, v);
}
}
}
@ -257,11 +261,9 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
{
if(IsOneOfMyScanlines(p.y))
{
m_pixels++;
m_ds->SetupPrim(vertex, tmp_index, GSVertexSW::zero());
m_ds->DrawScanline(1, p.x, p.y, v);
DrawScanline(1, p.x, p.y, v);
}
}
}
@ -321,15 +323,13 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
if(pixels > 0)
{
m_pixels += pixels;
GSVertexSW dscan = dv / dv.p.xxxx();
scan += dscan * (l - scan.p).xxxx();
m_ds->SetupPrim(vertex, index, dscan);
m_ds->DrawScanline(pixels, left, p.y, scan);
DrawScanline(pixels, left, p.y, scan);
}
}
}
@ -560,8 +560,6 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
scan.c = scan.c + dscan.c * prestep;
AddScanline(e++, pixels, left, top, scan);
//m_pixels += pixels; m_ds->DrawScanline(pixels, left, top, scan);
}
top++;
@ -605,7 +603,10 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
{
m_ds->DrawRect(r, scan);
m_pixels += r.width() * r.height();
int pixels = r.width() * r.height();
m_pixels.actual += pixels;
m_pixels.total += pixels;
}
else
{
@ -619,7 +620,10 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
m_ds->DrawRect(r, scan);
m_pixels += r.width() * r.height();
int pixels = r.width() * r.height();
m_pixels.actual += pixels;
m_pixels.total += pixels;
top = r.bottom + ((m_threads - 1) << THREAD_HEIGHT);
}
@ -651,9 +655,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
{
if(IsOneOfMyScanlines(r.top))
{
m_pixels += r.width();
m_ds->DrawScanline(r.width(), r.left, r.top, scan);
DrawScanline(r.width(), r.left, r.top, scan);
}
if(++r.top >= r.bottom) break;
@ -883,9 +885,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
int left = e->_pad.i32[1];
int top = e->_pad.i32[2];
m_pixels += pixels;
m_ds->DrawScanline(pixels, left, top, *e++);
DrawScanline(pixels, left, top, *e++);
}
while(e < ee);
}
@ -897,9 +897,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
int left = e->_pad.i32[1];
int top = e->_pad.i32[2];
m_pixels += pixels;
m_ds->DrawEdge(pixels, left, top, *e++);
DrawEdge(pixels, left, top, *e++);
}
while(e < ee);
}
@ -908,6 +906,33 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
}
}
#if _M_SSE >= 0x501
#define PIXELS_PER_LOOP 8
#else
#define PIXELS_PER_LOOP 4
#endif
void GSRasterizer::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan)
{
m_pixels.actual += pixels;
m_pixels.total += ((left + pixels + (PIXELS_PER_LOOP - 1)) & ~(PIXELS_PER_LOOP - 1)) - (left & (PIXELS_PER_LOOP - 1));
//m_pixels.total += ((left + pixels + (PIXELS_PER_LOOP - 1)) & ~(PIXELS_PER_LOOP - 1)) - left;
ASSERT(m_pixels.actual <= m_pixels.total);
m_ds->DrawScanline(pixels, left, top, scan);
}
void GSRasterizer::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
{
m_pixels.actual += 1;
m_pixels.total += PIXELS_PER_LOOP - 1;
ASSERT(m_pixels.actual <= m_pixels.total);
m_ds->DrawEdge(pixels, left, top, scan);
}
//
GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon)

View File

@ -86,7 +86,7 @@ public:
virtual ~IDrawScanline() {}
virtual void BeginDraw(const GSRasterizerData* data) = 0;
virtual void EndDraw(uint64 frame, uint64 ticks, int pixels) = 0;
virtual void EndDraw(uint64 frame, uint64 ticks, int actual, int total) = 0;
#ifdef ENABLE_JIT_RASTERIZER
@ -134,7 +134,7 @@ protected:
GSVector4 m_fscissor_x;
GSVector4 m_fscissor_y;
struct {GSVertexSW* buff; int count;} m_edge;
int m_pixels;
struct {int sum, actual, total;} m_pixels;
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
@ -151,6 +151,9 @@ protected:
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge = false);
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
public:
GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);
virtual ~GSRasterizer();

View File

@ -2321,7 +2321,7 @@ void GSState::GrowVertexBuffer()
template<uint32 prim>
__forceinline void GSState::VertexKick(uint32 skip)
{
ASSERT(m_vertex.tail < m_vertex.maxcount);
ASSERT(m_vertex.tail < m_vertex.maxcount + 3);
size_t head = m_vertex.head;
size_t tail = m_vertex.tail;
@ -2340,7 +2340,7 @@ __forceinline void GSState::VertexKick(uint32 skip)
GSVector4i xy = v1.xxxx().sub16(m_ofxy);
#if _M_SSE >= 0x401
#if _M_SSE >= 0x501
GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.blend32<2>(xy.sra16(4)));
#else
GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.upl32(xy.sra16(4).yyyy()));
@ -2421,14 +2421,21 @@ __forceinline void GSState::VertexKick(uint32 skip)
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
// TODO: any way to do a 16-bit integer cross product?
// cross product is zero most of the time because either of the vertices are the same
/*
cross = GSVector4(v2.xyxyl().i16to32().sub32(v0.upl32(v1).i16to32())); // x20, y20, x21, y21
cross = cross * cross.wzwz(); // x20 * y21, y20 * x21
test |= GSVector4i::cast(cross == cross.yxwz());
*/
test = (test | v0 == v1) | (v1 == v2 | v0 == v2);
break;
case GS_TRIANGLEFAN:
/*
cross = GSVector4(v2.xyxyl().i16to32().sub32(v3.upl32(v1).i16to32())); // x23, y23, x21, y21
cross = cross * cross.wzwz(); // x23 * y21, y23 * x21
test |= GSVector4i::cast(cross == cross.yxwz());
*/
test = (test | v0 == v1) | (v1 == v2 | v0 == v2);
break;
}

View File

@ -1261,7 +1261,7 @@ public:
#endif
#if _M_SSE >= 0x401
#if _M_SSE >= 0x501
template<int i> __forceinline GSVector4i blend32(const GSVector4i& v) const
{
@ -3725,6 +3725,68 @@ public:
//
static __forceinline GSVector8i i8to16c(const void* p)
{
return GSVector8i(_mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)p)));
}
static __forceinline GSVector8i u8to16c(const void* p)
{
return GSVector8i(_mm256_cvtepu8_epi16(_mm_load_si128((__m128i*)p)));
}
static __forceinline GSVector8i i8to32c(const void* p)
{
return GSVector8i(_mm256_cvtepi8_epi32(_mm_loadl_epi64((__m128i*)p)));
}
static __forceinline GSVector8i u8to32c(const void* p)
{
return GSVector8i(_mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)p)));
}
static __forceinline GSVector8i i8to64c(int i)
{
return GSVector8i(_mm256_cvtepi8_epi64(_mm_cvtsi32_si128(i)));
}
static __forceinline GSVector8i u8to64c(int i)
{
return GSVector8i(_mm256_cvtepu8_epi64(_mm_cvtsi32_si128(i)));
}
static __forceinline GSVector8i i16to32c(const void* p)
{
return GSVector8i(_mm256_cvtepi16_epi32(_mm_load_si128((__m128i*)p)));
}
static __forceinline GSVector8i u16to32c(const void* p)
{
return GSVector8i(_mm256_cvtepu16_epi32(_mm_load_si128((__m128i*)p)));
}
static __forceinline GSVector8i i16to64c(const void* p)
{
return GSVector8i(_mm256_cvtepi16_epi64(_mm_loadl_epi64((__m128i*)p)));
}
static __forceinline GSVector8i u16to64c(const void* p)
{
return GSVector8i(_mm256_cvtepu16_epi64(_mm_loadl_epi64((__m128i*)p)));
}
static __forceinline GSVector8i i32to64c(const void* p)
{
return GSVector8i(_mm256_cvtepi32_epi64(_mm_load_si128((__m128i*)p)));
}
static __forceinline GSVector8i u32to64c(const void* p)
{
return GSVector8i(_mm256_cvtepu32_epi64(_mm_load_si128((__m128i*)p)));
}
//
template<int i> __forceinline GSVector8i srl() const
{
return GSVector8i(_mm256_srli_si256(m, i));

View File

@ -903,7 +903,7 @@ void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F3
void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x70, true, -1, imm); }
void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x70, true, -1, imm); }
void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x70, true, -1, imm); }
void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x17, false, -1); }
void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x17, true, -1); }
void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x53, true, -1); }
void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x52, true, -1); }
void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x51, true, -1); }