mirror of https://github.com/PCSX2/pcsx2.git
GSdx: vs2010 fix and minor changes
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5678 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
d20bd4f86a
commit
20d99ae9fc
|
@ -76,9 +76,9 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
|
||||||
m_sp = m_sp_map[sel];
|
m_sp = m_sp_map[sel];
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
|
void GPUDrawScanline::EndDraw(uint64 frame, uint64 ticks, int actual, int total)
|
||||||
{
|
{
|
||||||
m_ds_map.UpdateStats(frame, ticks, pixels);
|
m_ds_map.UpdateStats(frame, ticks, actual, total);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef ENABLE_JIT_RASTERIZER
|
#ifndef ENABLE_JIT_RASTERIZER
|
||||||
|
|
|
@ -61,7 +61,7 @@ public:
|
||||||
// IDrawScanline
|
// IDrawScanline
|
||||||
|
|
||||||
void BeginDraw(const GSRasterizerData* data);
|
void BeginDraw(const GSRasterizerData* data);
|
||||||
void EndDraw(uint64 frame, uint64 ticks, int pixels);
|
void EndDraw(uint64 frame, uint64 ticks, int actual, int total);
|
||||||
|
|
||||||
#ifndef ENABLE_JIT_RASTERIZER
|
#ifndef ENABLE_JIT_RASTERIZER
|
||||||
|
|
||||||
|
|
|
@ -96,9 +96,9 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
|
||||||
m_sp = m_sp_map[sel];
|
m_sp = m_sp_map[sel];
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
|
void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int actual, int total)
|
||||||
{
|
{
|
||||||
m_ds_map.UpdateStats(frame, ticks, pixels);
|
m_ds_map.UpdateStats(frame, ticks, actual, total);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef ENABLE_JIT_RASTERIZER
|
#ifndef ENABLE_JIT_RASTERIZER
|
||||||
|
@ -434,7 +434,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
skip = left & 7;
|
skip = left & 7;
|
||||||
steps = pixels + skip - 8;
|
steps = pixels + skip - 8;
|
||||||
left -= skip;
|
left -= skip;
|
||||||
test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))];
|
test = GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[skip]) | GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1524,7 +1524,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
|
|
||||||
if(!sel.notest)
|
if(!sel.notest)
|
||||||
{
|
{
|
||||||
test = GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))];
|
test = GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -68,7 +68,7 @@ public:
|
||||||
// IDrawScanline
|
// IDrawScanline
|
||||||
|
|
||||||
void BeginDraw(const GSRasterizerData* data);
|
void BeginDraw(const GSRasterizerData* data);
|
||||||
void EndDraw(uint64 frame, uint64 ticks, int pixels);
|
void EndDraw(uint64 frame, uint64 ticks, int actual, int total);
|
||||||
|
|
||||||
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
|
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
|
||||||
|
|
||||||
|
|
|
@ -24,24 +24,24 @@
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
const GSVector8i GSDrawScanlineCodeGenerator::m_test[16] =
|
__aligned(const uint8, 8) GSDrawScanlineCodeGenerator::m_test[16][8] =
|
||||||
{
|
{
|
||||||
GSVector8i::zero(),
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||||
GSVector8i(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
{0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||||
GSVector8i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
{0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
{0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
{0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
||||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
{0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
||||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
||||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
||||||
GSVector8i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
{0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
|
||||||
GSVector8i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
{0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
|
||||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
{0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff},
|
||||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff},
|
||||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff},
|
||||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff},
|
||||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff),
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff},
|
||||||
GSVector8i::zero(),
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||||
};
|
};
|
||||||
|
|
||||||
const GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
|
const GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
|
||||||
|
|
|
@ -135,7 +135,7 @@ public:
|
||||||
GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
static const GSVector8i m_test[16];
|
static __aligned(const uint8, 8) m_test[16][8];
|
||||||
static const GSVector8 m_log2_coef[4];
|
static const GSVector8 m_log2_coef[4];
|
||||||
#else
|
#else
|
||||||
static const GSVector4i m_test[8];
|
static const GSVector4i m_test[8];
|
||||||
|
|
|
@ -2824,7 +2824,8 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
||||||
xor(dst, eax);
|
xor(dst, eax);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
vpextrw(eax, src, i * 2);
|
if(i == 0) vmovd(eax, src);
|
||||||
|
else vpextrw(eax, src, i * 2);
|
||||||
mov(dst, ax);
|
mov(dst, ax);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -269,16 +269,15 @@ void GSDrawScanlineCodeGenerator::Init()
|
||||||
|
|
||||||
// GSVector4i test = m_test[skip] | m_test[15 + (steps & (steps >> 31))];
|
// GSVector4i test = m_test[skip] | m_test[15 + (steps & (steps >> 31))];
|
||||||
|
|
||||||
shl(edx, 5);
|
|
||||||
|
|
||||||
vmovdqa(ymm7, ptr[edx + (size_t)&m_test[0]]);
|
|
||||||
|
|
||||||
mov(eax, ecx);
|
mov(eax, ecx);
|
||||||
sar(eax, 31);
|
sar(eax, 31);
|
||||||
and(eax, ecx);
|
and(eax, ecx);
|
||||||
shl(eax, 5);
|
|
||||||
|
|
||||||
vpor(ymm7, ptr[eax + (size_t)&m_test[15]]);
|
vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[0]]);
|
||||||
|
vpmovsxbd(ymm0, ptr[eax * 8 + (size_t)&m_test[15]]);
|
||||||
|
vpor(ymm7, ymm0);
|
||||||
|
|
||||||
|
shl(edx, 5);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -592,9 +591,8 @@ void GSDrawScanlineCodeGenerator::Step()
|
||||||
mov(edx, ecx);
|
mov(edx, ecx);
|
||||||
sar(edx, 31);
|
sar(edx, 31);
|
||||||
and(edx, ecx);
|
and(edx, ecx);
|
||||||
shl(edx, 5);
|
|
||||||
|
|
||||||
vmovdqa(ymm7, ptr[edx + (size_t)&m_test[15]]);
|
vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[15]]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2843,7 +2841,8 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
||||||
xor(dst, eax);
|
xor(dst, eax);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
vpextrw(eax, src, j * 2);
|
if(j == 0) vmovd(eax, src);
|
||||||
|
else vpextrw(eax, src, j * 2);
|
||||||
mov(dst, ax);
|
mov(dst, ax);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2902,49 +2902,33 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
||||||
{
|
{
|
||||||
Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2];
|
Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2];
|
||||||
|
|
||||||
|
switch(psm)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
if(i == 0) movd(dst, src);
|
||||||
#if _M_SSE >= 0x401
|
#if _M_SSE >= 0x401
|
||||||
|
|
||||||
switch(psm)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
if(i == 0) movd(dst, src);
|
|
||||||
else pextrd(dst, src, i);
|
else pextrd(dst, src, i);
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
if(i == 0) movd(eax, src);
|
|
||||||
else pextrd(eax, src, i);
|
|
||||||
xor(eax, dst);
|
|
||||||
and(eax, 0xffffff);
|
|
||||||
xor(dst, eax);
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
pextrw(eax, src, i * 2);
|
|
||||||
mov(dst, ax);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
switch(psm)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
if(i == 0) movd(dst, src);
|
|
||||||
else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(dst, xmm0);}
|
else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(dst, xmm0);}
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
if(i == 0) movd(eax, src);
|
if(i == 0) movd(eax, src);
|
||||||
|
#if _M_SSE >= 0x401
|
||||||
|
else pextrd(eax, src, i);
|
||||||
|
#else
|
||||||
else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(eax, xmm0);}
|
else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(eax, xmm0);}
|
||||||
|
#endif
|
||||||
xor(eax, dst);
|
xor(eax, dst);
|
||||||
and(eax, 0xffffff);
|
and(eax, 0xffffff);
|
||||||
xor(dst, eax);
|
xor(dst, eax);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
pextrw(eax, src, i * 2);
|
if(i == 0) movd(eax, src);
|
||||||
|
else pextrw(eax, src, i * 2);
|
||||||
mov(dst, ax);
|
mov(dst, ax);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
||||||
|
|
|
@ -32,7 +32,7 @@ protected:
|
||||||
struct ActivePtr
|
struct ActivePtr
|
||||||
{
|
{
|
||||||
uint64 frame, frames;
|
uint64 frame, frames;
|
||||||
uint64 ticks, pixels;
|
uint64 ticks, actual, total;
|
||||||
VALUE f;
|
VALUE f;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -84,7 +84,7 @@ public:
|
||||||
return m_active->f;
|
return m_active->f;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UpdateStats(uint64 frame, uint64 ticks, int pixels)
|
void UpdateStats(uint64 frame, uint64 ticks, int actual, int total)
|
||||||
{
|
{
|
||||||
if(m_active)
|
if(m_active)
|
||||||
{
|
{
|
||||||
|
@ -95,7 +95,10 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
m_active->ticks += ticks;
|
m_active->ticks += ticks;
|
||||||
m_active->pixels += pixels;
|
m_active->actual += actual;
|
||||||
|
m_active->total += total;
|
||||||
|
|
||||||
|
ASSERT(m_active->total >= m_active->actual);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,15 +127,15 @@ public:
|
||||||
|
|
||||||
if(p->frames > 0)
|
if(p->frames > 0)
|
||||||
{
|
{
|
||||||
uint64 tpp = p->pixels > 0 ? p->ticks / p->pixels : 0;
|
uint64 tpp = p->actual > 0 ? p->ticks / p->actual : 0;
|
||||||
uint64 tpf = p->frames > 0 ? p->ticks / p->frames : 0;
|
uint64 tpf = p->frames > 0 ? p->ticks / p->frames : 0;
|
||||||
uint64 ppf = p->frames > 0 ? p->pixels / p->frames : 0;
|
uint64 ppf = p->frames > 0 ? p->actual / p->frames : 0;
|
||||||
|
|
||||||
printf("[%014llx]%c %6.2f%% | %5.2f%% | f %4lld | p %10lld | tpp %4lld | tpf %9lld | ppf %7lld\n",
|
printf("[%014llx]%c %6.2f%% %5.2f%% f %4lld t %12lld p %12lld w %12lld tpp %4lld tpf %9lld ppf %9lld\n",
|
||||||
(uint64)key, m_map.find(key) == m_map.end() ? '*' : ' ',
|
(uint64)key, m_map.find(key) == m_map.end() ? '*' : ' ',
|
||||||
(float)(tpf * 10000 / 50000000) / 100,
|
(float)(tpf * 10000 / 34000000) / 100,
|
||||||
(float)(tpf * 10000 / ttpf) / 100,
|
(float)(tpf * 10000 / ttpf) / 100,
|
||||||
p->frames, p->pixels,
|
p->frames, p->ticks, p->actual, p->total - p->actual,
|
||||||
tpp, tpf, ppf);
|
tpp, tpf, ppf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,8 +37,9 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* pe
|
||||||
, m_id(id)
|
, m_id(id)
|
||||||
, m_threads(threads)
|
, m_threads(threads)
|
||||||
, m_perfmon(perfmon)
|
, m_perfmon(perfmon)
|
||||||
, m_pixels(0)
|
|
||||||
{
|
{
|
||||||
|
memset(&m_pixels, 0, sizeof(m_pixels));
|
||||||
|
|
||||||
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
|
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
|
||||||
m_edge.count = 0;
|
m_edge.count = 0;
|
||||||
|
|
||||||
|
@ -110,11 +111,11 @@ void GSRasterizer::Queue(shared_ptr<GSRasterizerData> data)
|
||||||
|
|
||||||
int GSRasterizer::GetPixels(bool reset)
|
int GSRasterizer::GetPixels(bool reset)
|
||||||
{
|
{
|
||||||
int pixels = m_pixels;
|
int pixels = m_pixels.sum;
|
||||||
|
|
||||||
if(reset)
|
if(reset)
|
||||||
{
|
{
|
||||||
m_pixels = 0;
|
m_pixels.sum = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return pixels;
|
return pixels;
|
||||||
|
@ -126,6 +127,9 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
||||||
|
|
||||||
if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return;
|
if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return;
|
||||||
|
|
||||||
|
m_pixels.actual = 0;
|
||||||
|
m_pixels.total = 0;
|
||||||
|
|
||||||
data->start = __rdtsc();
|
data->start = __rdtsc();
|
||||||
|
|
||||||
m_ds->BeginDraw(data);
|
m_ds->BeginDraw(data);
|
||||||
|
@ -212,11 +216,13 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
||||||
_mm256_zeroupper();
|
_mm256_zeroupper();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
data->pixels = m_pixels;
|
data->pixels = m_pixels.actual;
|
||||||
|
|
||||||
uint64 ticks = __rdtsc() - data->start;
|
uint64 ticks = __rdtsc() - data->start;
|
||||||
|
|
||||||
m_ds->EndDraw(data->frame, ticks, m_pixels);
|
m_pixels.sum += m_pixels.actual;
|
||||||
|
|
||||||
|
m_ds->EndDraw(data->frame, ticks, m_pixels.actual, m_pixels.total);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool scissor_test>
|
template<bool scissor_test>
|
||||||
|
@ -234,11 +240,9 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
|
||||||
{
|
{
|
||||||
if(IsOneOfMyScanlines(p.y))
|
if(IsOneOfMyScanlines(p.y))
|
||||||
{
|
{
|
||||||
m_pixels++;
|
|
||||||
|
|
||||||
m_ds->SetupPrim(vertex, index, GSVertexSW::zero());
|
m_ds->SetupPrim(vertex, index, GSVertexSW::zero());
|
||||||
|
|
||||||
m_ds->DrawScanline(1, p.x, p.y, v);
|
DrawScanline(1, p.x, p.y, v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -257,11 +261,9 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
|
||||||
{
|
{
|
||||||
if(IsOneOfMyScanlines(p.y))
|
if(IsOneOfMyScanlines(p.y))
|
||||||
{
|
{
|
||||||
m_pixels++;
|
|
||||||
|
|
||||||
m_ds->SetupPrim(vertex, tmp_index, GSVertexSW::zero());
|
m_ds->SetupPrim(vertex, tmp_index, GSVertexSW::zero());
|
||||||
|
|
||||||
m_ds->DrawScanline(1, p.x, p.y, v);
|
DrawScanline(1, p.x, p.y, v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -321,15 +323,13 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
|
||||||
|
|
||||||
if(pixels > 0)
|
if(pixels > 0)
|
||||||
{
|
{
|
||||||
m_pixels += pixels;
|
|
||||||
|
|
||||||
GSVertexSW dscan = dv / dv.p.xxxx();
|
GSVertexSW dscan = dv / dv.p.xxxx();
|
||||||
|
|
||||||
scan += dscan * (l - scan.p).xxxx();
|
scan += dscan * (l - scan.p).xxxx();
|
||||||
|
|
||||||
m_ds->SetupPrim(vertex, index, dscan);
|
m_ds->SetupPrim(vertex, index, dscan);
|
||||||
|
|
||||||
m_ds->DrawScanline(pixels, left, p.y, scan);
|
DrawScanline(pixels, left, p.y, scan);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -560,8 +560,6 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
|
||||||
scan.c = scan.c + dscan.c * prestep;
|
scan.c = scan.c + dscan.c * prestep;
|
||||||
|
|
||||||
AddScanline(e++, pixels, left, top, scan);
|
AddScanline(e++, pixels, left, top, scan);
|
||||||
|
|
||||||
//m_pixels += pixels; m_ds->DrawScanline(pixels, left, top, scan);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
top++;
|
top++;
|
||||||
|
@ -605,7 +603,10 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
|
||||||
{
|
{
|
||||||
m_ds->DrawRect(r, scan);
|
m_ds->DrawRect(r, scan);
|
||||||
|
|
||||||
m_pixels += r.width() * r.height();
|
int pixels = r.width() * r.height();
|
||||||
|
|
||||||
|
m_pixels.actual += pixels;
|
||||||
|
m_pixels.total += pixels;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -619,7 +620,10 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
|
||||||
|
|
||||||
m_ds->DrawRect(r, scan);
|
m_ds->DrawRect(r, scan);
|
||||||
|
|
||||||
m_pixels += r.width() * r.height();
|
int pixels = r.width() * r.height();
|
||||||
|
|
||||||
|
m_pixels.actual += pixels;
|
||||||
|
m_pixels.total += pixels;
|
||||||
|
|
||||||
top = r.bottom + ((m_threads - 1) << THREAD_HEIGHT);
|
top = r.bottom + ((m_threads - 1) << THREAD_HEIGHT);
|
||||||
}
|
}
|
||||||
|
@ -651,9 +655,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
|
||||||
{
|
{
|
||||||
if(IsOneOfMyScanlines(r.top))
|
if(IsOneOfMyScanlines(r.top))
|
||||||
{
|
{
|
||||||
m_pixels += r.width();
|
DrawScanline(r.width(), r.left, r.top, scan);
|
||||||
|
|
||||||
m_ds->DrawScanline(r.width(), r.left, r.top, scan);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(++r.top >= r.bottom) break;
|
if(++r.top >= r.bottom) break;
|
||||||
|
@ -883,9 +885,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
|
||||||
int left = e->_pad.i32[1];
|
int left = e->_pad.i32[1];
|
||||||
int top = e->_pad.i32[2];
|
int top = e->_pad.i32[2];
|
||||||
|
|
||||||
m_pixels += pixels;
|
DrawScanline(pixels, left, top, *e++);
|
||||||
|
|
||||||
m_ds->DrawScanline(pixels, left, top, *e++);
|
|
||||||
}
|
}
|
||||||
while(e < ee);
|
while(e < ee);
|
||||||
}
|
}
|
||||||
|
@ -897,9 +897,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
|
||||||
int left = e->_pad.i32[1];
|
int left = e->_pad.i32[1];
|
||||||
int top = e->_pad.i32[2];
|
int top = e->_pad.i32[2];
|
||||||
|
|
||||||
m_pixels += pixels;
|
DrawEdge(pixels, left, top, *e++);
|
||||||
|
|
||||||
m_ds->DrawEdge(pixels, left, top, *e++);
|
|
||||||
}
|
}
|
||||||
while(e < ee);
|
while(e < ee);
|
||||||
}
|
}
|
||||||
|
@ -908,6 +906,33 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if _M_SSE >= 0x501
|
||||||
|
#define PIXELS_PER_LOOP 8
|
||||||
|
#else
|
||||||
|
#define PIXELS_PER_LOOP 4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void GSRasterizer::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan)
|
||||||
|
{
|
||||||
|
m_pixels.actual += pixels;
|
||||||
|
m_pixels.total += ((left + pixels + (PIXELS_PER_LOOP - 1)) & ~(PIXELS_PER_LOOP - 1)) - (left & (PIXELS_PER_LOOP - 1));
|
||||||
|
//m_pixels.total += ((left + pixels + (PIXELS_PER_LOOP - 1)) & ~(PIXELS_PER_LOOP - 1)) - left;
|
||||||
|
|
||||||
|
ASSERT(m_pixels.actual <= m_pixels.total);
|
||||||
|
|
||||||
|
m_ds->DrawScanline(pixels, left, top, scan);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GSRasterizer::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
|
||||||
|
{
|
||||||
|
m_pixels.actual += 1;
|
||||||
|
m_pixels.total += PIXELS_PER_LOOP - 1;
|
||||||
|
|
||||||
|
ASSERT(m_pixels.actual <= m_pixels.total);
|
||||||
|
|
||||||
|
m_ds->DrawEdge(pixels, left, top, scan);
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon)
|
GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon)
|
||||||
|
|
|
@ -86,7 +86,7 @@ public:
|
||||||
virtual ~IDrawScanline() {}
|
virtual ~IDrawScanline() {}
|
||||||
|
|
||||||
virtual void BeginDraw(const GSRasterizerData* data) = 0;
|
virtual void BeginDraw(const GSRasterizerData* data) = 0;
|
||||||
virtual void EndDraw(uint64 frame, uint64 ticks, int pixels) = 0;
|
virtual void EndDraw(uint64 frame, uint64 ticks, int actual, int total) = 0;
|
||||||
|
|
||||||
#ifdef ENABLE_JIT_RASTERIZER
|
#ifdef ENABLE_JIT_RASTERIZER
|
||||||
|
|
||||||
|
@ -134,7 +134,7 @@ protected:
|
||||||
GSVector4 m_fscissor_x;
|
GSVector4 m_fscissor_x;
|
||||||
GSVector4 m_fscissor_y;
|
GSVector4 m_fscissor_y;
|
||||||
struct {GSVertexSW* buff; int count;} m_edge;
|
struct {GSVertexSW* buff; int count;} m_edge;
|
||||||
int m_pixels;
|
struct {int sum, actual, total;} m_pixels;
|
||||||
|
|
||||||
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
|
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
|
||||||
|
|
||||||
|
@ -151,6 +151,9 @@ protected:
|
||||||
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
|
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
|
||||||
__forceinline void Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge = false);
|
__forceinline void Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge = false);
|
||||||
|
|
||||||
|
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
|
||||||
|
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);
|
GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);
|
||||||
virtual ~GSRasterizer();
|
virtual ~GSRasterizer();
|
||||||
|
|
|
@ -2321,7 +2321,7 @@ void GSState::GrowVertexBuffer()
|
||||||
template<uint32 prim>
|
template<uint32 prim>
|
||||||
__forceinline void GSState::VertexKick(uint32 skip)
|
__forceinline void GSState::VertexKick(uint32 skip)
|
||||||
{
|
{
|
||||||
ASSERT(m_vertex.tail < m_vertex.maxcount);
|
ASSERT(m_vertex.tail < m_vertex.maxcount + 3);
|
||||||
|
|
||||||
size_t head = m_vertex.head;
|
size_t head = m_vertex.head;
|
||||||
size_t tail = m_vertex.tail;
|
size_t tail = m_vertex.tail;
|
||||||
|
@ -2340,7 +2340,7 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
||||||
|
|
||||||
GSVector4i xy = v1.xxxx().sub16(m_ofxy);
|
GSVector4i xy = v1.xxxx().sub16(m_ofxy);
|
||||||
|
|
||||||
#if _M_SSE >= 0x401
|
#if _M_SSE >= 0x501
|
||||||
GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.blend32<2>(xy.sra16(4)));
|
GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.blend32<2>(xy.sra16(4)));
|
||||||
#else
|
#else
|
||||||
GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.upl32(xy.sra16(4).yyyy()));
|
GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.upl32(xy.sra16(4).yyyy()));
|
||||||
|
@ -2421,14 +2421,21 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
||||||
case GS_TRIANGLELIST:
|
case GS_TRIANGLELIST:
|
||||||
case GS_TRIANGLESTRIP:
|
case GS_TRIANGLESTRIP:
|
||||||
// TODO: any way to do a 16-bit integer cross product?
|
// TODO: any way to do a 16-bit integer cross product?
|
||||||
|
// cross product is zero most of the time because either of the vertices are the same
|
||||||
|
/*
|
||||||
cross = GSVector4(v2.xyxyl().i16to32().sub32(v0.upl32(v1).i16to32())); // x20, y20, x21, y21
|
cross = GSVector4(v2.xyxyl().i16to32().sub32(v0.upl32(v1).i16to32())); // x20, y20, x21, y21
|
||||||
cross = cross * cross.wzwz(); // x20 * y21, y20 * x21
|
cross = cross * cross.wzwz(); // x20 * y21, y20 * x21
|
||||||
test |= GSVector4i::cast(cross == cross.yxwz());
|
test |= GSVector4i::cast(cross == cross.yxwz());
|
||||||
|
*/
|
||||||
|
test = (test | v0 == v1) | (v1 == v2 | v0 == v2);
|
||||||
break;
|
break;
|
||||||
case GS_TRIANGLEFAN:
|
case GS_TRIANGLEFAN:
|
||||||
|
/*
|
||||||
cross = GSVector4(v2.xyxyl().i16to32().sub32(v3.upl32(v1).i16to32())); // x23, y23, x21, y21
|
cross = GSVector4(v2.xyxyl().i16to32().sub32(v3.upl32(v1).i16to32())); // x23, y23, x21, y21
|
||||||
cross = cross * cross.wzwz(); // x23 * y21, y23 * x21
|
cross = cross * cross.wzwz(); // x23 * y21, y23 * x21
|
||||||
test |= GSVector4i::cast(cross == cross.yxwz());
|
test |= GSVector4i::cast(cross == cross.yxwz());
|
||||||
|
*/
|
||||||
|
test = (test | v0 == v1) | (v1 == v2 | v0 == v2);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1261,7 +1261,7 @@ public:
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if _M_SSE >= 0x401
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
template<int i> __forceinline GSVector4i blend32(const GSVector4i& v) const
|
template<int i> __forceinline GSVector4i blend32(const GSVector4i& v) const
|
||||||
{
|
{
|
||||||
|
@ -3725,6 +3725,68 @@ public:
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
|
static __forceinline GSVector8i i8to16c(const void* p)
|
||||||
|
{
|
||||||
|
return GSVector8i(_mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)p)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline GSVector8i u8to16c(const void* p)
|
||||||
|
{
|
||||||
|
return GSVector8i(_mm256_cvtepu8_epi16(_mm_load_si128((__m128i*)p)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline GSVector8i i8to32c(const void* p)
|
||||||
|
{
|
||||||
|
return GSVector8i(_mm256_cvtepi8_epi32(_mm_loadl_epi64((__m128i*)p)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline GSVector8i u8to32c(const void* p)
|
||||||
|
{
|
||||||
|
return GSVector8i(_mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)p)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline GSVector8i i8to64c(int i)
|
||||||
|
{
|
||||||
|
return GSVector8i(_mm256_cvtepi8_epi64(_mm_cvtsi32_si128(i)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline GSVector8i u8to64c(int i)
|
||||||
|
{
|
||||||
|
return GSVector8i(_mm256_cvtepu8_epi64(_mm_cvtsi32_si128(i)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline GSVector8i i16to32c(const void* p)
|
||||||
|
{
|
||||||
|
return GSVector8i(_mm256_cvtepi16_epi32(_mm_load_si128((__m128i*)p)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline GSVector8i u16to32c(const void* p)
|
||||||
|
{
|
||||||
|
return GSVector8i(_mm256_cvtepu16_epi32(_mm_load_si128((__m128i*)p)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline GSVector8i i16to64c(const void* p)
|
||||||
|
{
|
||||||
|
return GSVector8i(_mm256_cvtepi16_epi64(_mm_loadl_epi64((__m128i*)p)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline GSVector8i u16to64c(const void* p)
|
||||||
|
{
|
||||||
|
return GSVector8i(_mm256_cvtepu16_epi64(_mm_loadl_epi64((__m128i*)p)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline GSVector8i i32to64c(const void* p)
|
||||||
|
{
|
||||||
|
return GSVector8i(_mm256_cvtepi32_epi64(_mm_load_si128((__m128i*)p)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline GSVector8i u32to64c(const void* p)
|
||||||
|
{
|
||||||
|
return GSVector8i(_mm256_cvtepu32_epi64(_mm_load_si128((__m128i*)p)));
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
|
||||||
template<int i> __forceinline GSVector8i srl() const
|
template<int i> __forceinline GSVector8i srl() const
|
||||||
{
|
{
|
||||||
return GSVector8i(_mm256_srli_si256(m, i));
|
return GSVector8i(_mm256_srli_si256(m, i));
|
||||||
|
|
|
@ -903,7 +903,7 @@ void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F3
|
||||||
void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x70, true, -1, imm); }
|
void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x70, true, -1, imm); }
|
||||||
void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x70, true, -1, imm); }
|
void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x70, true, -1, imm); }
|
||||||
void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x70, true, -1, imm); }
|
void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x70, true, -1, imm); }
|
||||||
void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x17, false, -1); }
|
void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x17, true, -1); }
|
||||||
void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x53, true, -1); }
|
void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x53, true, -1); }
|
||||||
void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x52, true, -1); }
|
void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x52, true, -1); }
|
||||||
void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x51, true, -1); }
|
void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x51, true, -1); }
|
||||||
|
|
Loading…
Reference in New Issue