GSdx: optimized the triangle setup of the rasterizer a bit, while it isn't the bottle-neck of drawing, it can still add a few percent to the fps.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4404 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-03-08 01:48:15 +00:00
parent b2319c7636
commit fe88ee4102
16 changed files with 397 additions and 372 deletions

View File

@ -379,8 +379,8 @@ void GSDrawScanlineCodeGenerator::Init()
}
else
{
vmovdqa(xmm13, ptr[&m_local.c.rb]);
vmovdqa(xmm14, ptr[&m_local.c.ga]);
vmovdqa(xmm13, ptr[r11 + offsetof(GSScanlineLocalData, c.rb)]);
vmovdqa(xmm14, ptr[r11 + offsetof(GSScanlineLocalData, c.ga)]);
}
}
}

View File

@ -273,12 +273,12 @@ void GSDrawScanlineCodeGenerator::Init()
mov(esi, dword[esp + _top]);
lea(esi, ptr[esi * 8]);
add(esi, dword[&m_local.gd->fzbr]);
add(esi, ptr[&m_local.gd->fzbr]);
// GSVector2i* fza_offset = &m_local.gd->fzbc[left >> 2];
lea(edi, ptr[ebx * 2]);
add(edi, dword[&m_local.gd->fzbc]);
add(edi, ptr[&m_local.gd->fzbc]);
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
{
@ -585,8 +585,8 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
// int za = fza_base.y + fza_offset->y;
mov(ebp, dword[esi + 4]);
add(ebp, dword[edi + 4]);
mov(ebp, ptr[esi + 4]);
add(ebp, ptr[edi + 4]);
// GSVector4i zs = zi;
@ -682,7 +682,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
return;
}
mov(ebx, dword[&m_local.gd->tex]);
mov(ebx, ptr[&m_local.gd->tex]);
// ebx = tex
@ -1446,8 +1446,8 @@ void GSDrawScanlineCodeGenerator::ReadFrame()
// int fa = fza_base.x + fza_offset->x;
mov(ebx, dword[esi]);
add(ebx, dword[edi]);
mov(ebx, ptr[esi]);
add(ebx, ptr[edi]);
if(!m_sel.rfb)
{
@ -1805,7 +1805,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
if(m_sel.fpsm == 2 && m_sel.dthe)
{
mov(eax, dword[esp + _top]);
mov(eax, ptr[esp + _top]);
and(eax, 3);
shl(eax, 5);
vpaddw(xmm5, ptr[eax + (size_t)&m_local.gd->dimx[0]]);

View File

@ -268,14 +268,14 @@ void GSDrawScanlineCodeGenerator::Init()
// GSVector2i* fza_base = &m_local.gd->fzbr[top];
mov(esi, dword[esp + _top]);
mov(esi, ptr[esp + _top]);
lea(esi, ptr[esi * 8]);
add(esi, dword[&m_local.gd->fzbr]);
add(esi, ptr[&m_local.gd->fzbr]);
// GSVector2i* fza_offset = &m_local.gd->fzbc[left >> 2];
lea(edi, ptr[ebx * 2]);
add(edi, dword[&m_local.gd->fzbc]);
add(edi, ptr[&m_local.gd->fzbc]);
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
{
@ -286,7 +286,7 @@ void GSDrawScanlineCodeGenerator::Init()
// ebx = &v
mov(ebx, dword[esp + _v]);
mov(ebx, ptr[esp + _v]);
}
if(!m_sel.sprite)
@ -587,8 +587,8 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
// int za = fza_base.y + fza_offset->y;
mov(ebp, dword[esi + 4]);
add(ebp, dword[edi + 4]);
mov(ebp, ptr[esi + 4]);
add(ebp, ptr[edi + 4]);
// GSVector4i zs = zi;
@ -684,7 +684,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
return;
}
mov(ebx, dword[&m_local.gd->tex]);
mov(ebx, ptr[&m_local.gd->tex]);
// ebx = tex
@ -1495,8 +1495,8 @@ void GSDrawScanlineCodeGenerator::ReadFrame()
// int fa = fza_base.x + fza_offset->x;
mov(ebx, dword[esi]);
add(ebx, dword[edi]);
mov(ebx, ptr[esi]);
add(ebx, ptr[edi]);
if(!m_sel.rfb)
{
@ -1875,7 +1875,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
if(m_sel.fpsm == 2 && m_sel.dthe)
{
mov(eax, dword[esp + _top]);
mov(eax, ptr[esp + _top]);
and(eax, 3);
shl(eax, 5);
paddw(xmm5, ptr[eax + (size_t)&m_local.gd->dimx[0]]);

View File

@ -234,6 +234,24 @@ public:
ml.method_size = (unsigned int)cg->getSize();
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml);
/*
name = format("c:/temp/%s_%016llx.bin", m_name.c_str(), (uint64)key);
if(FILE* fp = fopen(name.c_str(), "wb"))
{
fputc(0x0F, fp); fputc(0x0B, fp);
fputc(0xBB, fp); fputc(0x6F, fp); fputc(0x00, fp); fputc(0x00, fp); fputc(0x00, fp);
fputc(0x64, fp); fputc(0x67, fp); fputc(0x90, fp);
fwrite(cg->getCode(), cg->getSize(), 1, fp);
fputc(0xBB, fp); fputc(0xDE, fp); fputc(0x00, fp); fputc(0x00, fp); fputc(0x00, fp);
fputc(0x64, fp); fputc(0x67, fp); fputc(0x90, fp);
fputc(0x0F, fp); fputc(0x0B, fp);
fclose(fp);
}
*/
}
#endif

View File

@ -44,10 +44,14 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds)
, m_id(0)
, m_threads(1)
{
m_edge.buff = (GSScanline*)vmalloc(sizeof(GSScanline) * 2048, false);
m_edge.count = 0;
}
GSRasterizer::~GSRasterizer()
{
if(m_edge.buff != NULL) vmfree(m_edge.buff, sizeof(GSScanline) * 2048);
delete m_ds;
}
@ -68,10 +72,12 @@ void GSRasterizer::Draw(const GSRasterizerData* data)
{
m_ds->BeginDraw(data->param);
const GSVector4i scissor = data->scissor;
const GSVertexSW* vertices = data->vertices;
const int count = data->count;
m_scissor = data->scissor;
m_fscissor = GSVector4(data->scissor);
m_stats.Reset();
int64 start = __rdtsc();
@ -80,22 +86,22 @@ void GSRasterizer::Draw(const GSRasterizerData* data)
{
case GS_POINT_CLASS:
m_stats.prims = count;
for(int i = 0; i < count; i++) DrawPoint(&vertices[i], scissor);
for(int i = 0; i < count; i++) DrawPoint(&vertices[i]);
break;
case GS_LINE_CLASS:
ASSERT(!(count & 1));
m_stats.prims = count / 2;
for(int i = 0; i < count; i += 2) DrawLine(&vertices[i], scissor);
for(int i = 0; i < count; i += 2) DrawLine(&vertices[i]);
break;
case GS_TRIANGLE_CLASS:
ASSERT(!(count % 3));
m_stats.prims = count / 3;
for(int i = 0; i < count; i += 3) DrawTriangle(&vertices[i], scissor);
for(int i = 0; i < count; i += 3) DrawTriangle(&vertices[i]);
break;
case GS_SPRITE_CLASS:
ASSERT(!(count & 1));
m_stats.prims = count / 2;
for(int i = 0; i < count; i += 2) DrawSprite(&vertices[i], scissor);
for(int i = 0; i < count; i += 2) DrawSprite(&vertices[i]);
break;
default:
__assume(0);
@ -111,26 +117,26 @@ void GSRasterizer::GetStats(GSRasterizerStats& stats)
stats = m_stats;
}
void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor)
void GSRasterizer::DrawPoint(const GSVertexSW* v)
{
// TODO: round to closest for point, prestep for line
GSVector4i p(v->p);
if(scissor.left <= p.x && p.x < scissor.right && scissor.top <= p.y && p.y < scissor.bottom)
if(m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
{
if(IsOneOfMyScanlines(p.y))
{
m_stats.pixels++;
m_ds->SetupPrim(v, *v);
m_ds->DrawScanline(p.x + 1, p.x, p.y, *v);
m_stats.pixels++;
}
}
}
void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
void GSRasterizer::DrawLine(const GSVertexSW* v)
{
GSVertexSW dv = v[1] - v[0];
@ -148,8 +154,10 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
m_ds->SetupPrim(v, dscan);
DrawEdge(v[0], v[1], dv, scissor, i, 0);
DrawEdge(v[0], v[1], dv, scissor, i, 1);
DrawEdge(v[0], v[1], dv, i, 0);
DrawEdge(v[0], v[1], dv, i, 1);
FlushEdge();
return;
}
@ -176,7 +184,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
GSVector4i p(l.p);
if(scissor.top <= p.y && p.y < scissor.bottom)
if(m_scissor.top <= p.y && p.y < m_scissor.bottom)
{
GSVertexSW dscan = dv / dv.p.xxxx();
@ -184,9 +192,9 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y
GSVector4 fscissor(scissor);
DrawTriangleSection(p.y, p.y + 1, l, dl, dscan);
DrawTriangleSection(p.y, p.y + 1, l, dl, dscan, fscissor);
Flush();
}
}
@ -199,12 +207,13 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
GSVertexSW dedge = dv / dp.v[i];
// TODO: prestep + clip with the scissor
// TODO: inline drawpoint + Flush()
int steps = dpi.v[i];
while(steps-- > 0)
{
DrawPoint(&edge, scissor);
DrawPoint(&edge);
edge += dedge;
}
@ -222,301 +231,187 @@ static const int s_abc[8][4] =
{2, 1, 0, 0}, // a > b > c
};
void GSRasterizer::DrawTriangle(const GSVertexSW* vertices, const GSVector4i& scissor)
void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
{
GSVertexSW v[3];
// edge buffer is used here to avoid xmm save-restores (except when we do aa1 in the middle)
GSVertexSW v[4];
GSVertexSW dv[3];
GSVertexSW ddv[3];
GSVertexSW longest;
GSVertexSW dscan;
GSVector4 aabb = vertices[0].p.yyyy(vertices[1].p);
GSVector4 bccb = vertices[1].p.yyyy(vertices[2].p).xzzx();
int i = (aabb > bccb).mask() & 7;
int abc = (aabb > bccb).mask() & 7;
v[0] = vertices[s_abc[i][0]];
v[1] = vertices[s_abc[i][1]];
v[2] = vertices[s_abc[i][2]];
v[0] = vertices[s_abc[abc][0]];
v[1] = vertices[s_abc[abc][1]];
v[2] = vertices[s_abc[abc][2]];
aabb = v[0].p.yyyy(v[1].p);
bccb = v[1].p.yyyy(v[2].p).xzzx();
i = (aabb == bccb).mask() & 7;
int i = (aabb == bccb).mask() & 7;
if(m_ds->IsEdge())
{
DrawEdge(v, scissor);
}
switch(i)
{
case 0: // a < b < c
DrawTriangleTopBottom(v, scissor);
break;
case 1: // a == b < c
DrawTriangleBottom(v, scissor);
break;
case 4: // a < b == c
DrawTriangleTop(v, scissor);
break;
case 7: // a == b == c
break;
default:
__assume(0);
}
}
void GSRasterizer::DrawEdge(const GSVertexSW* v, const GSVector4i& scissor)
{
GSVertexSW dv[3];
GSVector4 tbf = aabb.xzxz(bccb).ceil();
GSVector4 tbmax = tbf.max(m_fscissor.yyyy());
GSVector4 tbmin = tbf.min(m_fscissor.wwww());
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin));
dv[0] = v[1] - v[0];
dv[1] = v[2] - v[0];
dv[2] = v[2] - v[1];
GSVector4 dx = dv[0].p.upl(dv[1].p).xyxy(dv[2].p);
GSVector4 dy = dv[0].p.upl(dv[1].p).zwyx(dv[2].p);
GSVector4 a = dx.abs() < dy.abs(); // |x| <= |y|
GSVector4 b = dx < GSVector4::zero(); // x < 0
GSVector4 c = dv[1].p * (dv[0].p / dv[1].p).yyyy() < dv[0].p; // longest.p.x < 0
int i = a.mask();
int j = ((a | b) ^ c.xxxx()).mask() ^ 2; // evil
GSVertexSW dscan;
dscan.p = GSVector4::zero();
dscan.t = GSVector4::zero();
dscan.c = GSVector4::zero();
m_ds->SetupPrim(v, dscan); // TODO: don't call it twice (can't be sure about the second call if the triangle is too small)
DrawEdge(v[0], v[1], dv[0], scissor, i & 1, j & 1);
DrawEdge(v[0], v[2], dv[1], scissor, i & 2, j & 2);
DrawEdge(v[1], v[2], dv[2], scissor, i & 4, j & 4);
}
void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor)
{
GSVertexSW longest;
longest.p = v[2].p - v[1].p;
int i = longest.p.upl(longest.p == GSVector4::zero()).mask();
if(i & 2) return;
i &= 1;
GSVertexSW& l = v[0];
GSVector4& r = v[0].p;
GSVector4 fscissor(scissor);
GSVector4 tb = l.p.upl(v[2].p).ceil();
GSVector4 tbmax = tb.max(fscissor.yyyy());
GSVector4 tbmin = tb.min(fscissor.wwww());
GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin));
int top = tbi.extract32<0>();
int bottom = tbi.extract32<2>();
if(top >= bottom) return;
longest.t = v[2].t - v[1].t;
longest.c = v[2].c - v[1].c;
GSVertexSW dscan = longest * longest.p.xxxx().rcp();
GSVertexSW vl = v[1 + i] - l;
GSVector4 vr = v[2 - i].p - r;
GSVertexSW dl = vl / vl.p.yyyy();
GSVector4 dr = vr / vr.yyyy();
GSVector4 dy = tbmax.zzzz() - l.p.yyyy();
l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y
dl.p = dl.p.upl(dr).xyzw(dl.p); // dr.x => dl.y
l += dl * dy;
m_ds->SetupPrim(v, dscan);
DrawTriangleSection(top, bottom, l, dl, dscan, fscissor);
}
void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor)
{
GSVertexSW longest;
longest.p = v[1].p - v[0].p;
int i = longest.p.upl(longest.p == GSVector4::zero()).mask();
if(i & 2) return;
i &= 1;
GSVertexSW& l = v[i];
GSVector4& r = v[1 - i].p;
GSVector4 fscissor(scissor);
GSVector4 tb = l.p.upl(v[2].p).ceil();
GSVector4 tbmax = tb.max(fscissor.yyyy());
GSVector4 tbmin = tb.min(fscissor.wwww());
GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin));
int top = tbi.extract32<0>();
int bottom = tbi.extract32<2>();
if(top >= bottom) return;
longest.t = v[1].t - v[0].t;
longest.c = v[1].c - v[0].c;
GSVertexSW dscan = longest * longest.p.xxxx().rcp();
GSVertexSW vl = v[2] - l;
GSVector4 vr = v[2].p - r;
GSVertexSW dl = vl / vl.p.yyyy();
GSVector4 dr = vr / vr.yyyy();
GSVector4 dy = tbmax.zzzz() - l.p.yyyy();
l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y
dl.p = dl.p.upl(dr).xyzw(dl.p); // dr.x => dl.y
l += dl * dy;
m_ds->SetupPrim(v, dscan);
DrawTriangleSection(top, bottom, l, dl, dscan, fscissor);
}
void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scissor)
{
GSVertexSW dv[3];
dv[0] = v[1] - v[0];
dv[1] = v[2] - v[0];
GSVertexSW longest = dv[1] * (dv[0].p / dv[1].p).yyyy() - dv[0];
int i = longest.p.upl(longest.p == GSVector4::zero()).mask();
if(i & 2) return;
i &= 1;
GSVertexSW dscan = longest * longest.p.xxxx().rcp();
m_ds->SetupPrim(v, dscan);
GSVector4 fscissor(scissor);
GSVector4 tb = v[0].p.upl(v[1].p).zwzw(v[1].p.upl(v[2].p)).ceil();
GSVector4 tbmax = tb.max(fscissor.yyyy());
GSVector4 tbmin = tb.min(fscissor.wwww());
GSVector4i tbi = GSVector4i(tbmax.xzyw(tbmin));
int top = tbi.extract32<0>();
int bottom = tbi.extract32<2>();
GSVertexSW& l = v[0];
GSVector4 r = v[0].p;
GSVertexSW dl = dv[i] / dv[i].p.yyyy();
GSVector4 dr = dv[1 - i].p / dv[1 - i].p.yyyy();
GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
l += dl * dy;
r += dr * dy;
if(top < bottom)
switch(i)
{
DrawTriangleSection(top, bottom, l, dl, r, dr, dscan, fscissor);
case 0: // a < b < c
ddv[0] = dv[0] / dv[0].p.yyyy();
ddv[1] = dv[1] / dv[1].p.yyyy();
ddv[2] = dv[2] / dv[2].p.yyyy();
longest = ddv[1] * dv[0].p.yyyy() - dv[0];
v[3] = v[1] + longest; // point between v[0] and v[2] where y == v[1].y
break;
case 1: // a == b < c
ddv[1] = dv[1] / dv[1].p.yyyy();
ddv[2] = dv[2] / dv[2].p.yyyy();
longest = dv[0];
break;
case 4: // a < b == c
ddv[0] = dv[0] / dv[0].p.yyyy();
ddv[1] = dv[1] / dv[1].p.yyyy();
longest = dv[2];
break;
case 7: // a == b == c
return;
default:
__assume(0);
}
top = tbi.y;
bottom = tbi.w;
int j = longest.p.upl(longest.p == GSVector4::zero()).mask();
if(top < bottom)
if(j & 2) return;
j &= 1;
dscan = longest * longest.p.xxxx().rcp();
if(m_ds->IsEdge())
{
if(i == 0)
{
l = v[1];
dv[2] = v[2] - v[1];
dl = dv[2] / dv[2].p.yyyy();
}
else
{
r = v[1].p;
dv[2].p = v[2].p - v[1].p;
dr = dv[2].p / dv[2].p.yyyy();
}
GSVector4 dx = dv[0].p.upl(dv[1].p).xyxy(dv[2].p);
GSVector4 dy = dv[0].p.upl(dv[1].p).zwyx(dv[2].p);
l += dl * (tbmax.zzzz() - l.p.yyyy());
r += dr * (tbmax.zzzz() - r.yyyy());
GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy|
GSVector4 b = dx < GSVector4::zero(); // dx < 0
GSVector4 c = longest.p.xxxx() < GSVector4::zero(); // longest.p.x < 0
l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y
dl.p = dl.p.upl(dr).xyzw(dl.p); // dr.x => dl.y
int i = a.mask();
int j = ((a | b) ^ c).mask() ^ 2; // evil
DrawTriangleSection(top, bottom, l, dl, dscan, fscissor);
DrawEdge(v[0], v[1], dv[0], i & 1, j & 1);
DrawEdge(v[0], v[2], dv[1], i & 2, j & 2);
DrawEdge(v[1], v[2], dv[2], i & 4, j & 4);
GSVertexSW dscan;
dscan.p = GSVector4::zero();
dscan.t = GSVector4::zero();
dscan.c = GSVector4::zero();
m_ds->SetupPrim(v, dscan);
FlushEdge();
}
switch(i)
{
case 0: // a < b < c
if(tb.x < tb.z)
{
GSVertexSW l = v[0];
GSVertexSW dl = ddv[j];
GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
l.p = l.p.xxzw(); // r.x => l.y
dl.p = dl.p.upl(ddv[1 - j].p).xyzw(dl.p); // dr.x => dl.y
l += dl * dy;
DrawTriangleSection(tb.x, tb.z, l, dl, dscan);
}
if(tb.y < tb.w)
{
GSVertexSW l = v[1 + (1 << j)];
GSVertexSW dl = ddv[2 - j];
GSVector4 dy = tbmax.zzzz() - l.p.yyyy();
l.p = l.p.upl(v[3 - (1 << j)].p).xyzw(l.p); // r.x => l.y
dl.p = dl.p.upl(ddv[1 + j].p).xyzw(dl.p); // dr.x => dl.y
l += dl * dy;
DrawTriangleSection(tb.y, tb.w, l, dl, dscan);
}
break;
case 1: // a == b < c
if(tb.x < tb.w)
{
GSVertexSW l = v[j];
GSVertexSW dl = ddv[1 + j];
GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
l.p = l.p.upl(v[1 - j].p).xyzw(l.p); // r.x => l.y
dl.p = dl.p.upl(ddv[2 - j].p).xyzw(dl.p); // dr.x => dl.y
l += dl * dy;
DrawTriangleSection(tb.x, tb.w, l, dl, dscan);
}
break;
case 4: // a < b == c
if(tb.x < tb.w)
{
GSVertexSW l = v[0];
GSVertexSW dl = ddv[j];
GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
l.p = l.p.xxzw(); // r.x => l.y
dl.p = dl.p.upl(ddv[1 - j].p).xyzw(dl.p); // dr.x => dl.y
l += dl * dy;
DrawTriangleSection(tb.x, tb.w, l, dl, dscan);
}
break;
default:
__assume(0);
}
m_ds->SetupPrim(v, dscan);
Flush();
}
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, GSVector4& r, const GSVector4& dr, const GSVertexSW& dscan, const GSVector4& fscissor)
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan)
{
ASSERT(top < bottom);
while(1)
{
do
{
if(IsOneOfMyScanlines(top))
{
GSVector4 lr = l.p.xyxy(r).ceil();
GSVector4 lrmax = lr.max(fscissor.xxxx());
GSVector4 lrmin = lr.min(fscissor.zzzz());
GSVector4i lri = GSVector4i(lrmax.xxzz(lrmin));
int left = lri.extract32<0>();
int right = lri.extract32<2>();
int pixels = right - left;
if(pixels > 0)
{
m_stats.pixels += pixels;
GSVertexSW scan = l + dscan * (lrmax - l.p).xxxx();
m_ds->DrawScanline(right, left, top, scan);
}
}
}
while(0);
if(++top >= bottom) break;
l += dl;
r += dr;
}
}
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan, const GSVector4& fscissor)
{
ASSERT(top < bottom);
GSScanline* RESTRICT e = &m_edge.buff[m_edge.count];
while(1)
{
@ -526,8 +421,8 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
{
GSVector4 lr = l.p.ceil();
GSVector4 lrmax = lr.max(fscissor.xxxx());
GSVector4 lrmin = lr.min(fscissor.zzzz());
GSVector4 lrmax = lr.max(m_fscissor.xxxx());
GSVector4 lrmin = lr.min(m_fscissor.zzzz());
GSVector4i lri = GSVector4i(lrmax.xxyy(lrmin));
@ -540,9 +435,13 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
{
m_stats.pixels += pixels;
GSVertexSW scan = l + dscan * (lrmax - l.p).xxxx();
e->scan = l + dscan * (lrmax - l.p).xxxx();
m_ds->DrawScanline(right, left, top, scan);
e->p.left = left;
e->p.top = top;
e->p.right = right;
e++;
}
}
}
@ -552,9 +451,11 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
l += dl;
}
m_edge.count += e - &m_edge.buff[m_edge.count];
}
void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scissor)
void GSRasterizer::DrawSprite(const GSVertexSW* vertices)
{
GSVertexSW v[2];
@ -569,7 +470,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
GSVector4i r(v[0].p.xyxy(v[1].p).ceil());
r = r.rintersect(scissor);
r = r.rintersect(m_scissor);
if(r.rempty()) return;
@ -611,14 +512,14 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
{
if(IsOneOfMyScanlines(r.top))
{
m_ds->DrawScanline(r.right, r.left, r.top, scan);
m_stats.pixels += r.width();
m_ds->DrawScanline(r.right, r.left, r.top, scan);
}
}
}
void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side)
void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side)
{
// orientation:
// - true: |dv.p.y| > |dv.p.x|
@ -630,14 +531,14 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
// TODO: bit slow and too much duplicated code
// TODO: inner pre-step is still missing (hardly noticable)
GSVector4 fscissor(scissor);
GSScanline* RESTRICT dst = &m_edge.buff[m_edge.count];
GSVector4 lrtb = v0.p.upl(v1.p).ceil();
if(orientation)
{
GSVector4 tbmax = lrtb.max(fscissor.yyyy());
GSVector4 tbmin = lrtb.min(fscissor.wwww());
GSVector4 tbmax = lrtb.max(m_fscissor.yyyy());
GSVector4 tbmin = lrtb.min(m_fscissor.wwww());
GSVector4i tbi = GSVector4i(tbmax.zwzw(tbmin));
@ -684,15 +585,18 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
int xi = x >> 16;
int xf = x & 0xffff;
if(scissor.left <= xi && xi < scissor.right && IsOneOfMyScanlines(xi))
if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(xi))
{
m_stats.pixels++;
edge.t.u32[3] = (0x10000 - xf) & 0xffff;
dst->scan = edge;
dst->scan.t.u32[3] = (0x10000 - xf) & 0xffff;
m_ds->DrawEdge(xi + 1, xi, top, edge);
dst->p.left = xi;
dst->p.top = top;
dst->p.right = xi + 1;
edge.t.u32[3] = 0;
dst++;
}
}
while(0);
@ -712,15 +616,18 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
int xi = (x >> 16) + 1;
int xf = x & 0xffff;
if(scissor.left <= xi && xi < scissor.right && IsOneOfMyScanlines(xi))
if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(xi))
{
m_stats.pixels++;
edge.t.u32[3] = xf;
dst->scan = edge;
dst->scan.t.u32[3] = xf;
m_ds->DrawEdge(xi + 1, xi, top, edge);
dst->p.left = xi;
dst->p.top = top;
dst->p.right = xi + 1;
edge.t.u32[3] = 0;
dst++;
}
}
while(0);
@ -734,8 +641,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
}
else
{
GSVector4 lrmax = lrtb.max(fscissor.xxxx());
GSVector4 lrmin = lrtb.min(fscissor.zzzz());
GSVector4 lrmax = lrtb.max(m_fscissor.xxxx());
GSVector4 lrmin = lrtb.min(m_fscissor.zzzz());
GSVector4i lri = GSVector4i(lrmax.xyxy(lrmin));
@ -782,15 +689,18 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
int yi = y >> 16;
int yf = y & 0xffff;
if(scissor.top <= yi && yi < scissor.bottom && IsOneOfMyScanlines(yi))
if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
{
m_stats.pixels++;
edge.t.u32[3] = (0x10000 - yf) & 0xffff;
dst->scan = edge;
dst->scan.t.u32[3] = (0x10000 - yf) & 0xffff;
m_ds->DrawEdge(left + 1, left, yi, edge);
dst->p.left = left;
dst->p.top = yi;
dst->p.right = left + 1;
edge.t.u32[3] = 0;
dst++;
}
}
while(0);
@ -810,15 +720,18 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
int yi = (y >> 16) + 1;
int yf = y & 0xffff;
if(scissor.top <= yi && yi < scissor.bottom && IsOneOfMyScanlines(yi))
if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
{
m_stats.pixels++;
edge.t.u32[3] = yf;
dst->scan = edge;
dst->scan.t.u32[3] = yf;
m_ds->DrawEdge(left + 1, left, yi, edge);
dst->p.left = left;
dst->p.top = yi;
dst->p.right = left + 1;
edge.t.u32[3] = 0;
dst++;
}
}
while(0);
@ -830,6 +743,34 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
}
}
}
m_edge.count += dst - &m_edge.buff[m_edge.count];
}
void GSRasterizer::Flush()
{
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
const GSScanline* s = m_edge.buff;
for(int count = m_edge.count; count > 0; count--, s++)
{
m_ds->DrawScanline(s->p.right, s->p.left, s->p.top, s->scan);
}
m_edge.count = 0;
}
void GSRasterizer::FlushEdge()
{
const GSScanline* s = m_edge.buff;
for(int count = m_edge.count; count > 0; count--, s++)
{
m_ds->DrawEdge(s->p.right, s->p.left, s->p.top, s->scan);
}
m_edge.count = 0;
}
//

View File

@ -59,7 +59,7 @@ public:
virtual void EndDraw(const GSRasterizerStats& stats, uint64 frame) = 0;
virtual void PrintStats() = 0;
__forceinline void SetupPrim(const GSVertexSW* v, const GSVertexSW& dscan) {m_sp(v, dscan);}
__forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);}
__forceinline void DrawScanline(int right, int left, int top, const GSVertexSW& scan) {m_ds(right, left, top, scan);}
__forceinline void DrawEdge(int right, int left, int top, const GSVertexSW& scan) {m_de(right, left, top, scan);}
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
@ -79,30 +79,33 @@ public:
virtual void SetThreadId(int id, int threads) = 0;
};
class GSRasterizer : public IRasterizer
__aligned(class, 32) GSRasterizer : public GSAlignedClass<32>, public IRasterizer
{
struct GSScanline {GSVertexSW scan; GSVector4i p;};
protected:
IDrawScanline* m_ds;
int m_id;
int m_threads;
GSRasterizerStats m_stats;
GSVector4i m_scissor;
GSVector4 m_fscissor;
struct {GSScanline* buff; int count;} m_edge;
void DrawPoint(const GSVertexSW* v, const GSVector4i& scissor);
void DrawLine(const GSVertexSW* v, const GSVector4i& scissor);
void DrawTriangle(const GSVertexSW* v, const GSVector4i& scissor);
void DrawEdge(const GSVertexSW* v, const GSVector4i& scissor);
void DrawSprite(const GSVertexSW* v, const GSVector4i& scissor);
void DrawPoint(const GSVertexSW* v);
void DrawLine(const GSVertexSW* v);
void DrawTriangle(const GSVertexSW* v);
void DrawSprite(const GSVertexSW* v);
void DrawEdge(const GSVertexSW* v);
void DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor);
void DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor);
void DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scissor);
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan);
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, GSVector4& r, const GSVector4& dr, const GSVertexSW& dscan, const GSVector4& scissor);
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan, const GSVector4& scissor);
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side);
__forceinline bool IsOneOfMyScanlines(int scanline) const;
inline bool IsOneOfMyScanlines(int scanline) const;
void Flush();
void FlushEdge();
public:
GSRasterizer(IDrawScanline* ds);

View File

@ -76,8 +76,6 @@ bool GSRenderer::CreateDevice(GSDevice* dev)
void GSRenderer::ResetDevice()
{
InvalidateTextureCache();
ResetPrim();
if(m_dev) m_dev->Reset(1, 1);

View File

@ -139,7 +139,7 @@ protected:
{
if(m_vertices != NULL) _aligned_free(m_vertices);
m_maxcount = max(10000, m_maxcount * 3/2);
m_maxcount = std::max<int>(10000, m_maxcount * 3 / 2);
m_vertices = (Vertex*)_aligned_malloc(sizeof(Vertex) * m_maxcount, 32);
m_maxcount -= 100;
}

View File

@ -484,13 +484,10 @@ protected:
}
}
void InvalidateTextureCache()
{
m_tc->RemoveAll();
}
void ResetDevice()
{
m_tc->RemoveAll();
__super::ResetDevice();
}

View File

@ -372,12 +372,12 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
if(gd.sel.ltf)
{
GSVector4 half(0x8000, 0x8000);
if(gd.sel.fst)
{
// if q is constant we can do the half pel shift for bilinear sampling on the vertices
GSVector4 half(0x8000, 0x8000);
GSVertexSW* v = m_vertices;
for(int i = 0, j = m_count; i < j; i++)

View File

@ -196,8 +196,6 @@ void GSState::Reset()
m_env.Reset();
m_context = &m_env.CTXT[0];
InvalidateTextureCache();
}
void GSState::ResetHandlers()
@ -770,8 +768,6 @@ void GSState::GIFRegHandlerFOGCOL(const GIFReg* r)
void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* r)
{
// TRACE(_T("TEXFLUSH\n"));
// InvalidateTextureCache();
}
template<int i> void GSState::GIFRegHandlerSCISSOR(const GIFReg* r)
@ -903,6 +899,7 @@ template<int i> void GSState::GIFRegHandlerFRAME(const GIFReg* r)
template<int i> void GSState::GIFRegHandlerZBUF(const GIFReg* r)
{
GIFRegZBUF ZBUF = r->ZBUF;
if(ZBUF.u32[0] == 0)
{
// during startup all regs are cleared to 0 (by the bios or something), so we mask z until this register becomes valid
@ -1396,9 +1393,6 @@ template void GSState::Transfer<3>(const uint8* mem, uint32 size);
template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
{
// [TODO] make me into a template parameter... I think. --air
static const bool FrameSkipIt = false;
GSPerfMonAutoTimer pmat(m_perfmon);
const uint8* start = mem;
@ -1420,7 +1414,7 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
// ASSERT(!(path.tag.PRE && path.tag.FLG == GIF_FLG_REGLIST)); // kingdom hearts
if(path.tag.PRE && (path.tag.FLG == GIF_FLG_PACKED) && !FrameSkipIt)
if(path.tag.PRE && path.tag.FLG == GIF_FLG_PACKED)
{
GIFRegPRIM r;
r.u64 = path.tag.PRIM;
@ -1551,7 +1545,7 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
if(m_mt)
{
// Hackfix for BIOS, which sends an incomplete packet when it does an XGKICK without
// having an EOP specified anywhere in VU1 memory. Needed until PCSX2 is fixed t
// having an EOP specified anywhere in VU1 memory. Needed until PCSX2 is fixed to
// handle it more properly (ie, without looping infinitely).
path.nloop = 0;
@ -1802,7 +1796,7 @@ bool GSState::GSTransferBuffer::Update(int tw, int th, int bpp, int& len)
if(total == 0)
{
start = end = 0;
total = min((tw * bpp >> 3) * th, 1024 * 1024 * 4);
total = std::min<int>((tw * bpp >> 3) * th, 1024 * 1024 * 4);
overflow = false;
}

View File

@ -28,7 +28,6 @@
#include "GSVertex.h"
#include "GSVertexList.h"
#include "GSUtil.h"
#include "GSDirtyRect.h"
#include "GSPerfMon.h"
#include "GSVector.h"
#include "GSDevice.h"
@ -208,7 +207,6 @@ public:
virtual void ResetPrim() = 0;
virtual void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {}
virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {}
virtual void InvalidateTextureCache() {}
void Move();
void Write(const uint8* mem, int len);

View File

@ -22,6 +22,7 @@
#pragma once
#include "GSRenderer.h"
#include "GSDirtyRect.h"
class GSTextureCache
{

View File

@ -23,15 +23,15 @@
#include "GSVector.h"
__aligned(struct, 32) GSVertexSW
__aligned(struct, 16) GSVertexSW
{
GSVector4 c, p, t;
GSVertexSW() {}
GSVertexSW(const GSVertexSW& v) {*this = v;}
void operator = (const GSVertexSW& v) {c = v.c; p = v.p; t = v.t;}
void operator += (const GSVertexSW& v) {c += v.c; p += v.p; t += v.t;}
__forceinline void operator = (const GSVertexSW& v) {c = v.c; p = v.p; t = v.t;}
__forceinline void operator += (const GSVertexSW& v) {c += v.c; p += v.p; t += v.t;}
friend GSVertexSW operator + (const GSVertexSW& v1, const GSVertexSW& v2);
friend GSVertexSW operator - (const GSVertexSW& v1, const GSVertexSW& v2);

View File

@ -528,7 +528,7 @@
<ClCompile Include="GSLocalMemory.cpp" />
<ClCompile Include="GSPerfMon.cpp" />
<ClCompile Include="GSRasterizer.cpp">
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">All</AssemblerOutput>
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">AssemblyAndSourceCode</AssemblerOutput>
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">AssemblyAndSourceCode</AssemblerOutput>
</ClCompile>
<ClCompile Include="GSRenderer.cpp" />

View File

@ -0,0 +1,75 @@
/*
* INTEL CONFIDENTIAL
* Copyright (2008-2009) Intel Corporation All Rights Reserved.
* The source code contained or described herein and all documents
* related to the source code ("Material") are owned by Intel Corporation
* or its suppliers or licensors. Title to the Material remains with
* Intel Corporation or its suppliers and licensors. The Material
* contains trade secrets and proprietary and confidential information
* of Intel or its suppliers and licensors. The Material is protected
* by worldwide copyright and trade secret laws and treaty provisions.
* No part of the Material may be used, copied, reproduced, modified,
* published, uploaded, posted, transmitted, distributed, or disclosed
* in any way without Intels prior express written permission.
*
* No license under any patent, copyright, trade secret or other
* intellectual property right is granted to or conferred upon you by
* disclosure or delivery of the Materials, either expressly, by implication,
* inducement, estoppel or otherwise. Any license under such intellectual
* property rights must be express and approved by Intel in writing.
*/
/********************************************************/
/* Binaries that contain IACA_MARKS will not run. */
/* Define IACA_MARKS_OFF when you compile your sources, */
/* to disable IACA_START, IACA_END, IACA_MSC64_START */
/* and IACA_MSC64_END */
/********************************************************/
#ifdef IACA_MARKS_OFF
#define IACA_START
#define IACA_END
#define IACA_MSC64_START
#define IACA_MSC64_END
#else
#if defined (__GNUC__)
#define IACA_SSC_MARK( MARK_ID ) \
__asm__ __volatile__ ( \
"\n\t movl $"#MARK_ID", %%ebx" \
"\n\t .byte 0x64, 0x67, 0x90" \
: : : "memory" );
#define IACA_UD_BYTES __asm__ __volatile__ ("\n\t .byte 0x0F, 0x0B");
#else
#define IACA_UD_BYTES {__asm _emit 0x0F \
__asm _emit 0x0B}
#define IACA_SSC_MARK(x) {__asm mov ebx, x\
__asm _emit 0x64 \
__asm _emit 0x67 \
__asm _emit 0x90 }
#define IACA_VC64_START __writegsbyte(111, 111);
#define IACA_VC64_END __writegsbyte(222, 222);
#endif
#define IACA_START {IACA_UD_BYTES \
IACA_SSC_MARK(111)}
#define IACA_END {IACA_SSC_MARK(222) \
IACA_UD_BYTES}
#endif
/**************** asm *****************
;START_MARKER
mov ebx, 111
db 0x64, 0x67, 0x90
;END_MARKER
mov ebx, 222
db 0x64, 0x67, 0x90
**************************************/