mirror of https://github.com/PCSX2/pcsx2.git
GSdx: vs2010 fix and minor changes
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5678 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
d20bd4f86a
commit
20d99ae9fc
|
@ -76,9 +76,9 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
|
|||
m_sp = m_sp_map[sel];
|
||||
}
|
||||
|
||||
void GPUDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
|
||||
void GPUDrawScanline::EndDraw(uint64 frame, uint64 ticks, int actual, int total)
|
||||
{
|
||||
m_ds_map.UpdateStats(frame, ticks, pixels);
|
||||
m_ds_map.UpdateStats(frame, ticks, actual, total);
|
||||
}
|
||||
|
||||
#ifndef ENABLE_JIT_RASTERIZER
|
||||
|
|
|
@ -61,7 +61,7 @@ public:
|
|||
// IDrawScanline
|
||||
|
||||
void BeginDraw(const GSRasterizerData* data);
|
||||
void EndDraw(uint64 frame, uint64 ticks, int pixels);
|
||||
void EndDraw(uint64 frame, uint64 ticks, int actual, int total);
|
||||
|
||||
#ifndef ENABLE_JIT_RASTERIZER
|
||||
|
||||
|
|
|
@ -96,9 +96,9 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
|
|||
m_sp = m_sp_map[sel];
|
||||
}
|
||||
|
||||
void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
|
||||
void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int actual, int total)
|
||||
{
|
||||
m_ds_map.UpdateStats(frame, ticks, pixels);
|
||||
m_ds_map.UpdateStats(frame, ticks, actual, total);
|
||||
}
|
||||
|
||||
#ifndef ENABLE_JIT_RASTERIZER
|
||||
|
@ -434,7 +434,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
skip = left & 7;
|
||||
steps = pixels + skip - 8;
|
||||
left -= skip;
|
||||
test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))];
|
||||
test = GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[skip]) | GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))]);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1524,7 +1524,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
if(!sel.notest)
|
||||
{
|
||||
test = GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))];
|
||||
test = GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -68,7 +68,7 @@ public:
|
|||
// IDrawScanline
|
||||
|
||||
void BeginDraw(const GSRasterizerData* data);
|
||||
void EndDraw(uint64 frame, uint64 ticks, int pixels);
|
||||
void EndDraw(uint64 frame, uint64 ticks, int actual, int total);
|
||||
|
||||
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
|
||||
|
||||
|
|
|
@ -24,24 +24,24 @@
|
|||
|
||||
#if _M_SSE >= 0x501
|
||||
|
||||
const GSVector8i GSDrawScanlineCodeGenerator::m_test[16] =
|
||||
__aligned(const uint8, 8) GSDrawScanlineCodeGenerator::m_test[16][8] =
|
||||
{
|
||||
GSVector8i::zero(),
|
||||
GSVector8i(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
||||
GSVector8i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
GSVector8i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
|
||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff),
|
||||
GSVector8i::zero(),
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
||||
{0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
|
||||
{0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
|
||||
{0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff},
|
||||
{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
};
|
||||
|
||||
const GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
|
||||
|
|
|
@ -135,7 +135,7 @@ public:
|
|||
GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
static const GSVector8i m_test[16];
|
||||
static __aligned(const uint8, 8) m_test[16][8];
|
||||
static const GSVector8 m_log2_coef[4];
|
||||
#else
|
||||
static const GSVector4i m_test[8];
|
||||
|
|
|
@ -2824,7 +2824,8 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
|||
xor(dst, eax);
|
||||
break;
|
||||
case 2:
|
||||
vpextrw(eax, src, i * 2);
|
||||
if(i == 0) vmovd(eax, src);
|
||||
else vpextrw(eax, src, i * 2);
|
||||
mov(dst, ax);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -268,17 +268,16 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
sub(ebx, edx);
|
||||
|
||||
// GSVector4i test = m_test[skip] | m_test[15 + (steps & (steps >> 31))];
|
||||
|
||||
shl(edx, 5);
|
||||
|
||||
vmovdqa(ymm7, ptr[edx + (size_t)&m_test[0]]);
|
||||
|
||||
|
||||
mov(eax, ecx);
|
||||
sar(eax, 31);
|
||||
and(eax, ecx);
|
||||
shl(eax, 5);
|
||||
|
||||
vpor(ymm7, ptr[eax + (size_t)&m_test[15]]);
|
||||
vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[0]]);
|
||||
vpmovsxbd(ymm0, ptr[eax * 8 + (size_t)&m_test[15]]);
|
||||
vpor(ymm7, ymm0);
|
||||
|
||||
shl(edx, 5);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -592,9 +591,8 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
mov(edx, ecx);
|
||||
sar(edx, 31);
|
||||
and(edx, ecx);
|
||||
shl(edx, 5);
|
||||
|
||||
vmovdqa(ymm7, ptr[edx + (size_t)&m_test[15]]);
|
||||
vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[15]]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2843,7 +2841,8 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
|||
xor(dst, eax);
|
||||
break;
|
||||
case 2:
|
||||
vpextrw(eax, src, j * 2);
|
||||
if(j == 0) vmovd(eax, src);
|
||||
else vpextrw(eax, src, j * 2);
|
||||
mov(dst, ax);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -2902,49 +2902,33 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
|||
{
|
||||
Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2];
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
switch(psm)
|
||||
{
|
||||
case 0:
|
||||
if(i == 0) movd(dst, src);
|
||||
#if _M_SSE >= 0x401
|
||||
else pextrd(dst, src, i);
|
||||
break;
|
||||
case 1:
|
||||
if(i == 0) movd(eax, src);
|
||||
else pextrd(eax, src, i);
|
||||
xor(eax, dst);
|
||||
and(eax, 0xffffff);
|
||||
xor(dst, eax);
|
||||
break;
|
||||
case 2:
|
||||
pextrw(eax, src, i * 2);
|
||||
mov(dst, ax);
|
||||
break;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
switch(psm)
|
||||
{
|
||||
case 0:
|
||||
if(i == 0) movd(dst, src);
|
||||
#else
|
||||
else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(dst, xmm0);}
|
||||
#endif
|
||||
break;
|
||||
case 1:
|
||||
if(i == 0) movd(eax, src);
|
||||
#if _M_SSE >= 0x401
|
||||
else pextrd(eax, src, i);
|
||||
#else
|
||||
else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(eax, xmm0);}
|
||||
#endif
|
||||
xor(eax, dst);
|
||||
and(eax, 0xffffff);
|
||||
xor(dst, eax);
|
||||
break;
|
||||
case 2:
|
||||
pextrw(eax, src, i * 2);
|
||||
if(i == 0) movd(eax, src);
|
||||
else pextrw(eax, src, i * 2);
|
||||
mov(dst, ax);
|
||||
break;
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
||||
|
|
|
@ -32,7 +32,7 @@ protected:
|
|||
struct ActivePtr
|
||||
{
|
||||
uint64 frame, frames;
|
||||
uint64 ticks, pixels;
|
||||
uint64 ticks, actual, total;
|
||||
VALUE f;
|
||||
};
|
||||
|
||||
|
@ -84,7 +84,7 @@ public:
|
|||
return m_active->f;
|
||||
}
|
||||
|
||||
void UpdateStats(uint64 frame, uint64 ticks, int pixels)
|
||||
void UpdateStats(uint64 frame, uint64 ticks, int actual, int total)
|
||||
{
|
||||
if(m_active)
|
||||
{
|
||||
|
@ -95,7 +95,10 @@ public:
|
|||
}
|
||||
|
||||
m_active->ticks += ticks;
|
||||
m_active->pixels += pixels;
|
||||
m_active->actual += actual;
|
||||
m_active->total += total;
|
||||
|
||||
ASSERT(m_active->total >= m_active->actual);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -124,15 +127,15 @@ public:
|
|||
|
||||
if(p->frames > 0)
|
||||
{
|
||||
uint64 tpp = p->pixels > 0 ? p->ticks / p->pixels : 0;
|
||||
uint64 tpp = p->actual > 0 ? p->ticks / p->actual : 0;
|
||||
uint64 tpf = p->frames > 0 ? p->ticks / p->frames : 0;
|
||||
uint64 ppf = p->frames > 0 ? p->pixels / p->frames : 0;
|
||||
uint64 ppf = p->frames > 0 ? p->actual / p->frames : 0;
|
||||
|
||||
printf("[%014llx]%c %6.2f%% | %5.2f%% | f %4lld | p %10lld | tpp %4lld | tpf %9lld | ppf %7lld\n",
|
||||
printf("[%014llx]%c %6.2f%% %5.2f%% f %4lld t %12lld p %12lld w %12lld tpp %4lld tpf %9lld ppf %9lld\n",
|
||||
(uint64)key, m_map.find(key) == m_map.end() ? '*' : ' ',
|
||||
(float)(tpf * 10000 / 50000000) / 100,
|
||||
(float)(tpf * 10000 / 34000000) / 100,
|
||||
(float)(tpf * 10000 / ttpf) / 100,
|
||||
p->frames, p->pixels,
|
||||
p->frames, p->ticks, p->actual, p->total - p->actual,
|
||||
tpp, tpf, ppf);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,8 +37,9 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* pe
|
|||
, m_id(id)
|
||||
, m_threads(threads)
|
||||
, m_perfmon(perfmon)
|
||||
, m_pixels(0)
|
||||
{
|
||||
memset(&m_pixels, 0, sizeof(m_pixels));
|
||||
|
||||
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
|
||||
m_edge.count = 0;
|
||||
|
||||
|
@ -110,11 +111,11 @@ void GSRasterizer::Queue(shared_ptr<GSRasterizerData> data)
|
|||
|
||||
int GSRasterizer::GetPixels(bool reset)
|
||||
{
|
||||
int pixels = m_pixels;
|
||||
int pixels = m_pixels.sum;
|
||||
|
||||
if(reset)
|
||||
{
|
||||
m_pixels = 0;
|
||||
m_pixels.sum = 0;
|
||||
}
|
||||
|
||||
return pixels;
|
||||
|
@ -126,6 +127,9 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
|||
|
||||
if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return;
|
||||
|
||||
m_pixels.actual = 0;
|
||||
m_pixels.total = 0;
|
||||
|
||||
data->start = __rdtsc();
|
||||
|
||||
m_ds->BeginDraw(data);
|
||||
|
@ -212,11 +216,13 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
|||
_mm256_zeroupper();
|
||||
#endif
|
||||
|
||||
data->pixels = m_pixels;
|
||||
data->pixels = m_pixels.actual;
|
||||
|
||||
uint64 ticks = __rdtsc() - data->start;
|
||||
|
||||
m_ds->EndDraw(data->frame, ticks, m_pixels);
|
||||
m_pixels.sum += m_pixels.actual;
|
||||
|
||||
m_ds->EndDraw(data->frame, ticks, m_pixels.actual, m_pixels.total);
|
||||
}
|
||||
|
||||
template<bool scissor_test>
|
||||
|
@ -234,11 +240,9 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
|
|||
{
|
||||
if(IsOneOfMyScanlines(p.y))
|
||||
{
|
||||
m_pixels++;
|
||||
|
||||
m_ds->SetupPrim(vertex, index, GSVertexSW::zero());
|
||||
|
||||
m_ds->DrawScanline(1, p.x, p.y, v);
|
||||
DrawScanline(1, p.x, p.y, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -257,11 +261,9 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
|
|||
{
|
||||
if(IsOneOfMyScanlines(p.y))
|
||||
{
|
||||
m_pixels++;
|
||||
|
||||
m_ds->SetupPrim(vertex, tmp_index, GSVertexSW::zero());
|
||||
|
||||
m_ds->DrawScanline(1, p.x, p.y, v);
|
||||
DrawScanline(1, p.x, p.y, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -321,15 +323,13 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
|
|||
|
||||
if(pixels > 0)
|
||||
{
|
||||
m_pixels += pixels;
|
||||
|
||||
GSVertexSW dscan = dv / dv.p.xxxx();
|
||||
|
||||
scan += dscan * (l - scan.p).xxxx();
|
||||
|
||||
m_ds->SetupPrim(vertex, index, dscan);
|
||||
|
||||
m_ds->DrawScanline(pixels, left, p.y, scan);
|
||||
DrawScanline(pixels, left, p.y, scan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -560,8 +560,6 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
|
|||
scan.c = scan.c + dscan.c * prestep;
|
||||
|
||||
AddScanline(e++, pixels, left, top, scan);
|
||||
|
||||
//m_pixels += pixels; m_ds->DrawScanline(pixels, left, top, scan);
|
||||
}
|
||||
|
||||
top++;
|
||||
|
@ -605,7 +603,10 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
|
|||
{
|
||||
m_ds->DrawRect(r, scan);
|
||||
|
||||
m_pixels += r.width() * r.height();
|
||||
int pixels = r.width() * r.height();
|
||||
|
||||
m_pixels.actual += pixels;
|
||||
m_pixels.total += pixels;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -619,7 +620,10 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
|
|||
|
||||
m_ds->DrawRect(r, scan);
|
||||
|
||||
m_pixels += r.width() * r.height();
|
||||
int pixels = r.width() * r.height();
|
||||
|
||||
m_pixels.actual += pixels;
|
||||
m_pixels.total += pixels;
|
||||
|
||||
top = r.bottom + ((m_threads - 1) << THREAD_HEIGHT);
|
||||
}
|
||||
|
@ -651,9 +655,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
|
|||
{
|
||||
if(IsOneOfMyScanlines(r.top))
|
||||
{
|
||||
m_pixels += r.width();
|
||||
|
||||
m_ds->DrawScanline(r.width(), r.left, r.top, scan);
|
||||
DrawScanline(r.width(), r.left, r.top, scan);
|
||||
}
|
||||
|
||||
if(++r.top >= r.bottom) break;
|
||||
|
@ -883,9 +885,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
|
|||
int left = e->_pad.i32[1];
|
||||
int top = e->_pad.i32[2];
|
||||
|
||||
m_pixels += pixels;
|
||||
|
||||
m_ds->DrawScanline(pixels, left, top, *e++);
|
||||
DrawScanline(pixels, left, top, *e++);
|
||||
}
|
||||
while(e < ee);
|
||||
}
|
||||
|
@ -897,9 +897,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
|
|||
int left = e->_pad.i32[1];
|
||||
int top = e->_pad.i32[2];
|
||||
|
||||
m_pixels += pixels;
|
||||
|
||||
m_ds->DrawEdge(pixels, left, top, *e++);
|
||||
DrawEdge(pixels, left, top, *e++);
|
||||
}
|
||||
while(e < ee);
|
||||
}
|
||||
|
@ -908,6 +906,33 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
|
|||
}
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
#define PIXELS_PER_LOOP 8
|
||||
#else
|
||||
#define PIXELS_PER_LOOP 4
|
||||
#endif
|
||||
|
||||
void GSRasterizer::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan)
|
||||
{
|
||||
m_pixels.actual += pixels;
|
||||
m_pixels.total += ((left + pixels + (PIXELS_PER_LOOP - 1)) & ~(PIXELS_PER_LOOP - 1)) - (left & (PIXELS_PER_LOOP - 1));
|
||||
//m_pixels.total += ((left + pixels + (PIXELS_PER_LOOP - 1)) & ~(PIXELS_PER_LOOP - 1)) - left;
|
||||
|
||||
ASSERT(m_pixels.actual <= m_pixels.total);
|
||||
|
||||
m_ds->DrawScanline(pixels, left, top, scan);
|
||||
}
|
||||
|
||||
void GSRasterizer::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
|
||||
{
|
||||
m_pixels.actual += 1;
|
||||
m_pixels.total += PIXELS_PER_LOOP - 1;
|
||||
|
||||
ASSERT(m_pixels.actual <= m_pixels.total);
|
||||
|
||||
m_ds->DrawEdge(pixels, left, top, scan);
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon)
|
||||
|
|
|
@ -86,7 +86,7 @@ public:
|
|||
virtual ~IDrawScanline() {}
|
||||
|
||||
virtual void BeginDraw(const GSRasterizerData* data) = 0;
|
||||
virtual void EndDraw(uint64 frame, uint64 ticks, int pixels) = 0;
|
||||
virtual void EndDraw(uint64 frame, uint64 ticks, int actual, int total) = 0;
|
||||
|
||||
#ifdef ENABLE_JIT_RASTERIZER
|
||||
|
||||
|
@ -134,7 +134,7 @@ protected:
|
|||
GSVector4 m_fscissor_x;
|
||||
GSVector4 m_fscissor_y;
|
||||
struct {GSVertexSW* buff; int count;} m_edge;
|
||||
int m_pixels;
|
||||
struct {int sum, actual, total;} m_pixels;
|
||||
|
||||
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
|
||||
|
||||
|
@ -151,6 +151,9 @@ protected:
|
|||
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
|
||||
__forceinline void Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge = false);
|
||||
|
||||
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
|
||||
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
|
||||
|
||||
public:
|
||||
GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);
|
||||
virtual ~GSRasterizer();
|
||||
|
|
|
@ -2321,7 +2321,7 @@ void GSState::GrowVertexBuffer()
|
|||
template<uint32 prim>
|
||||
__forceinline void GSState::VertexKick(uint32 skip)
|
||||
{
|
||||
ASSERT(m_vertex.tail < m_vertex.maxcount);
|
||||
ASSERT(m_vertex.tail < m_vertex.maxcount + 3);
|
||||
|
||||
size_t head = m_vertex.head;
|
||||
size_t tail = m_vertex.tail;
|
||||
|
@ -2340,7 +2340,7 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
|
||||
GSVector4i xy = v1.xxxx().sub16(m_ofxy);
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
#if _M_SSE >= 0x501
|
||||
GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.blend32<2>(xy.sra16(4)));
|
||||
#else
|
||||
GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.upl32(xy.sra16(4).yyyy()));
|
||||
|
@ -2421,14 +2421,21 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
// TODO: any way to do a 16-bit integer cross product?
|
||||
// cross product is zero most of the time because either of the vertices are the same
|
||||
/*
|
||||
cross = GSVector4(v2.xyxyl().i16to32().sub32(v0.upl32(v1).i16to32())); // x20, y20, x21, y21
|
||||
cross = cross * cross.wzwz(); // x20 * y21, y20 * x21
|
||||
test |= GSVector4i::cast(cross == cross.yxwz());
|
||||
*/
|
||||
test = (test | v0 == v1) | (v1 == v2 | v0 == v2);
|
||||
break;
|
||||
case GS_TRIANGLEFAN:
|
||||
/*
|
||||
cross = GSVector4(v2.xyxyl().i16to32().sub32(v3.upl32(v1).i16to32())); // x23, y23, x21, y21
|
||||
cross = cross * cross.wzwz(); // x23 * y21, y23 * x21
|
||||
test |= GSVector4i::cast(cross == cross.yxwz());
|
||||
*/
|
||||
test = (test | v0 == v1) | (v1 == v2 | v0 == v2);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -1261,7 +1261,7 @@ public:
|
|||
|
||||
#endif
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
#if _M_SSE >= 0x501
|
||||
|
||||
template<int i> __forceinline GSVector4i blend32(const GSVector4i& v) const
|
||||
{
|
||||
|
@ -3725,6 +3725,68 @@ public:
|
|||
|
||||
//
|
||||
|
||||
static __forceinline GSVector8i i8to16c(const void* p)
|
||||
{
|
||||
return GSVector8i(_mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)p)));
|
||||
}
|
||||
|
||||
static __forceinline GSVector8i u8to16c(const void* p)
|
||||
{
|
||||
return GSVector8i(_mm256_cvtepu8_epi16(_mm_load_si128((__m128i*)p)));
|
||||
}
|
||||
|
||||
static __forceinline GSVector8i i8to32c(const void* p)
|
||||
{
|
||||
return GSVector8i(_mm256_cvtepi8_epi32(_mm_loadl_epi64((__m128i*)p)));
|
||||
}
|
||||
|
||||
static __forceinline GSVector8i u8to32c(const void* p)
|
||||
{
|
||||
return GSVector8i(_mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)p)));
|
||||
}
|
||||
|
||||
static __forceinline GSVector8i i8to64c(int i)
|
||||
{
|
||||
return GSVector8i(_mm256_cvtepi8_epi64(_mm_cvtsi32_si128(i)));
|
||||
}
|
||||
|
||||
static __forceinline GSVector8i u8to64c(int i)
|
||||
{
|
||||
return GSVector8i(_mm256_cvtepu8_epi64(_mm_cvtsi32_si128(i)));
|
||||
}
|
||||
|
||||
static __forceinline GSVector8i i16to32c(const void* p)
|
||||
{
|
||||
return GSVector8i(_mm256_cvtepi16_epi32(_mm_load_si128((__m128i*)p)));
|
||||
}
|
||||
|
||||
static __forceinline GSVector8i u16to32c(const void* p)
|
||||
{
|
||||
return GSVector8i(_mm256_cvtepu16_epi32(_mm_load_si128((__m128i*)p)));
|
||||
}
|
||||
|
||||
static __forceinline GSVector8i i16to64c(const void* p)
|
||||
{
|
||||
return GSVector8i(_mm256_cvtepi16_epi64(_mm_loadl_epi64((__m128i*)p)));
|
||||
}
|
||||
|
||||
static __forceinline GSVector8i u16to64c(const void* p)
|
||||
{
|
||||
return GSVector8i(_mm256_cvtepu16_epi64(_mm_loadl_epi64((__m128i*)p)));
|
||||
}
|
||||
|
||||
static __forceinline GSVector8i i32to64c(const void* p)
|
||||
{
|
||||
return GSVector8i(_mm256_cvtepi32_epi64(_mm_load_si128((__m128i*)p)));
|
||||
}
|
||||
|
||||
static __forceinline GSVector8i u32to64c(const void* p)
|
||||
{
|
||||
return GSVector8i(_mm256_cvtepu32_epi64(_mm_load_si128((__m128i*)p)));
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
template<int i> __forceinline GSVector8i srl() const
|
||||
{
|
||||
return GSVector8i(_mm256_srli_si256(m, i));
|
||||
|
|
|
@ -903,7 +903,7 @@ void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F3
|
|||
void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x70, true, -1, imm); }
|
||||
void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x70, true, -1, imm); }
|
||||
void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x70, true, -1, imm); }
|
||||
void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x17, false, -1); }
|
||||
void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x17, true, -1); }
|
||||
void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x53, true, -1); }
|
||||
void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x52, true, -1); }
|
||||
void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x51, true, -1); }
|
||||
|
|
Loading…
Reference in New Issue