mirror of https://github.com/PCSX2/pcsx2.git
GSdx: optimized the triangle setup of the rasterizer a bit, while it isn't the bottle-neck of drawing, it can still add a few percent to the fps.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4404 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
b2319c7636
commit
fe88ee4102
|
@ -379,8 +379,8 @@ void GSDrawScanlineCodeGenerator::Init()
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
vmovdqa(xmm13, ptr[&m_local.c.rb]);
|
vmovdqa(xmm13, ptr[r11 + offsetof(GSScanlineLocalData, c.rb)]);
|
||||||
vmovdqa(xmm14, ptr[&m_local.c.ga]);
|
vmovdqa(xmm14, ptr[r11 + offsetof(GSScanlineLocalData, c.ga)]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -273,12 +273,12 @@ void GSDrawScanlineCodeGenerator::Init()
|
||||||
|
|
||||||
mov(esi, dword[esp + _top]);
|
mov(esi, dword[esp + _top]);
|
||||||
lea(esi, ptr[esi * 8]);
|
lea(esi, ptr[esi * 8]);
|
||||||
add(esi, dword[&m_local.gd->fzbr]);
|
add(esi, ptr[&m_local.gd->fzbr]);
|
||||||
|
|
||||||
// GSVector2i* fza_offset = &m_local.gd->fzbc[left >> 2];
|
// GSVector2i* fza_offset = &m_local.gd->fzbc[left >> 2];
|
||||||
|
|
||||||
lea(edi, ptr[ebx * 2]);
|
lea(edi, ptr[ebx * 2]);
|
||||||
add(edi, dword[&m_local.gd->fzbc]);
|
add(edi, ptr[&m_local.gd->fzbc]);
|
||||||
|
|
||||||
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
|
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
|
||||||
{
|
{
|
||||||
|
@ -585,8 +585,8 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
||||||
|
|
||||||
// int za = fza_base.y + fza_offset->y;
|
// int za = fza_base.y + fza_offset->y;
|
||||||
|
|
||||||
mov(ebp, dword[esi + 4]);
|
mov(ebp, ptr[esi + 4]);
|
||||||
add(ebp, dword[edi + 4]);
|
add(ebp, ptr[edi + 4]);
|
||||||
|
|
||||||
// GSVector4i zs = zi;
|
// GSVector4i zs = zi;
|
||||||
|
|
||||||
|
@ -682,7 +682,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
mov(ebx, dword[&m_local.gd->tex]);
|
mov(ebx, ptr[&m_local.gd->tex]);
|
||||||
|
|
||||||
// ebx = tex
|
// ebx = tex
|
||||||
|
|
||||||
|
@ -1446,8 +1446,8 @@ void GSDrawScanlineCodeGenerator::ReadFrame()
|
||||||
|
|
||||||
// int fa = fza_base.x + fza_offset->x;
|
// int fa = fza_base.x + fza_offset->x;
|
||||||
|
|
||||||
mov(ebx, dword[esi]);
|
mov(ebx, ptr[esi]);
|
||||||
add(ebx, dword[edi]);
|
add(ebx, ptr[edi]);
|
||||||
|
|
||||||
if(!m_sel.rfb)
|
if(!m_sel.rfb)
|
||||||
{
|
{
|
||||||
|
@ -1805,7 +1805,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
|
||||||
|
|
||||||
if(m_sel.fpsm == 2 && m_sel.dthe)
|
if(m_sel.fpsm == 2 && m_sel.dthe)
|
||||||
{
|
{
|
||||||
mov(eax, dword[esp + _top]);
|
mov(eax, ptr[esp + _top]);
|
||||||
and(eax, 3);
|
and(eax, 3);
|
||||||
shl(eax, 5);
|
shl(eax, 5);
|
||||||
vpaddw(xmm5, ptr[eax + (size_t)&m_local.gd->dimx[0]]);
|
vpaddw(xmm5, ptr[eax + (size_t)&m_local.gd->dimx[0]]);
|
||||||
|
|
|
@ -268,14 +268,14 @@ void GSDrawScanlineCodeGenerator::Init()
|
||||||
|
|
||||||
// GSVector2i* fza_base = &m_local.gd->fzbr[top];
|
// GSVector2i* fza_base = &m_local.gd->fzbr[top];
|
||||||
|
|
||||||
mov(esi, dword[esp + _top]);
|
mov(esi, ptr[esp + _top]);
|
||||||
lea(esi, ptr[esi * 8]);
|
lea(esi, ptr[esi * 8]);
|
||||||
add(esi, dword[&m_local.gd->fzbr]);
|
add(esi, ptr[&m_local.gd->fzbr]);
|
||||||
|
|
||||||
// GSVector2i* fza_offset = &m_local.gd->fzbc[left >> 2];
|
// GSVector2i* fza_offset = &m_local.gd->fzbc[left >> 2];
|
||||||
|
|
||||||
lea(edi, ptr[ebx * 2]);
|
lea(edi, ptr[ebx * 2]);
|
||||||
add(edi, dword[&m_local.gd->fzbc]);
|
add(edi, ptr[&m_local.gd->fzbc]);
|
||||||
|
|
||||||
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
|
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
|
||||||
{
|
{
|
||||||
|
@ -286,7 +286,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
||||||
|
|
||||||
// ebx = &v
|
// ebx = &v
|
||||||
|
|
||||||
mov(ebx, dword[esp + _v]);
|
mov(ebx, ptr[esp + _v]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!m_sel.sprite)
|
if(!m_sel.sprite)
|
||||||
|
@ -587,8 +587,8 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
||||||
|
|
||||||
// int za = fza_base.y + fza_offset->y;
|
// int za = fza_base.y + fza_offset->y;
|
||||||
|
|
||||||
mov(ebp, dword[esi + 4]);
|
mov(ebp, ptr[esi + 4]);
|
||||||
add(ebp, dword[edi + 4]);
|
add(ebp, ptr[edi + 4]);
|
||||||
|
|
||||||
// GSVector4i zs = zi;
|
// GSVector4i zs = zi;
|
||||||
|
|
||||||
|
@ -684,7 +684,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
mov(ebx, dword[&m_local.gd->tex]);
|
mov(ebx, ptr[&m_local.gd->tex]);
|
||||||
|
|
||||||
// ebx = tex
|
// ebx = tex
|
||||||
|
|
||||||
|
@ -1495,8 +1495,8 @@ void GSDrawScanlineCodeGenerator::ReadFrame()
|
||||||
|
|
||||||
// int fa = fza_base.x + fza_offset->x;
|
// int fa = fza_base.x + fza_offset->x;
|
||||||
|
|
||||||
mov(ebx, dword[esi]);
|
mov(ebx, ptr[esi]);
|
||||||
add(ebx, dword[edi]);
|
add(ebx, ptr[edi]);
|
||||||
|
|
||||||
if(!m_sel.rfb)
|
if(!m_sel.rfb)
|
||||||
{
|
{
|
||||||
|
@ -1875,7 +1875,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
|
||||||
|
|
||||||
if(m_sel.fpsm == 2 && m_sel.dthe)
|
if(m_sel.fpsm == 2 && m_sel.dthe)
|
||||||
{
|
{
|
||||||
mov(eax, dword[esp + _top]);
|
mov(eax, ptr[esp + _top]);
|
||||||
and(eax, 3);
|
and(eax, 3);
|
||||||
shl(eax, 5);
|
shl(eax, 5);
|
||||||
paddw(xmm5, ptr[eax + (size_t)&m_local.gd->dimx[0]]);
|
paddw(xmm5, ptr[eax + (size_t)&m_local.gd->dimx[0]]);
|
||||||
|
|
|
@ -234,6 +234,24 @@ public:
|
||||||
ml.method_size = (unsigned int)cg->getSize();
|
ml.method_size = (unsigned int)cg->getSize();
|
||||||
|
|
||||||
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml);
|
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml);
|
||||||
|
/*
|
||||||
|
name = format("c:/temp/%s_%016llx.bin", m_name.c_str(), (uint64)key);
|
||||||
|
|
||||||
|
if(FILE* fp = fopen(name.c_str(), "wb"))
|
||||||
|
{
|
||||||
|
fputc(0x0F, fp); fputc(0x0B, fp);
|
||||||
|
fputc(0xBB, fp); fputc(0x6F, fp); fputc(0x00, fp); fputc(0x00, fp); fputc(0x00, fp);
|
||||||
|
fputc(0x64, fp); fputc(0x67, fp); fputc(0x90, fp);
|
||||||
|
|
||||||
|
fwrite(cg->getCode(), cg->getSize(), 1, fp);
|
||||||
|
|
||||||
|
fputc(0xBB, fp); fputc(0xDE, fp); fputc(0x00, fp); fputc(0x00, fp); fputc(0x00, fp);
|
||||||
|
fputc(0x64, fp); fputc(0x67, fp); fputc(0x90, fp);
|
||||||
|
fputc(0x0F, fp); fputc(0x0B, fp);
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -44,10 +44,14 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds)
|
||||||
, m_id(0)
|
, m_id(0)
|
||||||
, m_threads(1)
|
, m_threads(1)
|
||||||
{
|
{
|
||||||
|
m_edge.buff = (GSScanline*)vmalloc(sizeof(GSScanline) * 2048, false);
|
||||||
|
m_edge.count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSRasterizer::~GSRasterizer()
|
GSRasterizer::~GSRasterizer()
|
||||||
{
|
{
|
||||||
|
if(m_edge.buff != NULL) vmfree(m_edge.buff, sizeof(GSScanline) * 2048);
|
||||||
|
|
||||||
delete m_ds;
|
delete m_ds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -68,10 +72,12 @@ void GSRasterizer::Draw(const GSRasterizerData* data)
|
||||||
{
|
{
|
||||||
m_ds->BeginDraw(data->param);
|
m_ds->BeginDraw(data->param);
|
||||||
|
|
||||||
const GSVector4i scissor = data->scissor;
|
|
||||||
const GSVertexSW* vertices = data->vertices;
|
const GSVertexSW* vertices = data->vertices;
|
||||||
const int count = data->count;
|
const int count = data->count;
|
||||||
|
|
||||||
|
m_scissor = data->scissor;
|
||||||
|
m_fscissor = GSVector4(data->scissor);
|
||||||
|
|
||||||
m_stats.Reset();
|
m_stats.Reset();
|
||||||
|
|
||||||
int64 start = __rdtsc();
|
int64 start = __rdtsc();
|
||||||
|
@ -80,22 +86,22 @@ void GSRasterizer::Draw(const GSRasterizerData* data)
|
||||||
{
|
{
|
||||||
case GS_POINT_CLASS:
|
case GS_POINT_CLASS:
|
||||||
m_stats.prims = count;
|
m_stats.prims = count;
|
||||||
for(int i = 0; i < count; i++) DrawPoint(&vertices[i], scissor);
|
for(int i = 0; i < count; i++) DrawPoint(&vertices[i]);
|
||||||
break;
|
break;
|
||||||
case GS_LINE_CLASS:
|
case GS_LINE_CLASS:
|
||||||
ASSERT(!(count & 1));
|
ASSERT(!(count & 1));
|
||||||
m_stats.prims = count / 2;
|
m_stats.prims = count / 2;
|
||||||
for(int i = 0; i < count; i += 2) DrawLine(&vertices[i], scissor);
|
for(int i = 0; i < count; i += 2) DrawLine(&vertices[i]);
|
||||||
break;
|
break;
|
||||||
case GS_TRIANGLE_CLASS:
|
case GS_TRIANGLE_CLASS:
|
||||||
ASSERT(!(count % 3));
|
ASSERT(!(count % 3));
|
||||||
m_stats.prims = count / 3;
|
m_stats.prims = count / 3;
|
||||||
for(int i = 0; i < count; i += 3) DrawTriangle(&vertices[i], scissor);
|
for(int i = 0; i < count; i += 3) DrawTriangle(&vertices[i]);
|
||||||
break;
|
break;
|
||||||
case GS_SPRITE_CLASS:
|
case GS_SPRITE_CLASS:
|
||||||
ASSERT(!(count & 1));
|
ASSERT(!(count & 1));
|
||||||
m_stats.prims = count / 2;
|
m_stats.prims = count / 2;
|
||||||
for(int i = 0; i < count; i += 2) DrawSprite(&vertices[i], scissor);
|
for(int i = 0; i < count; i += 2) DrawSprite(&vertices[i]);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
__assume(0);
|
__assume(0);
|
||||||
|
@ -111,26 +117,26 @@ void GSRasterizer::GetStats(GSRasterizerStats& stats)
|
||||||
stats = m_stats;
|
stats = m_stats;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor)
|
void GSRasterizer::DrawPoint(const GSVertexSW* v)
|
||||||
{
|
{
|
||||||
// TODO: round to closest for point, prestep for line
|
// TODO: round to closest for point, prestep for line
|
||||||
|
|
||||||
GSVector4i p(v->p);
|
GSVector4i p(v->p);
|
||||||
|
|
||||||
if(scissor.left <= p.x && p.x < scissor.right && scissor.top <= p.y && p.y < scissor.bottom)
|
if(m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
|
||||||
{
|
{
|
||||||
if(IsOneOfMyScanlines(p.y))
|
if(IsOneOfMyScanlines(p.y))
|
||||||
{
|
{
|
||||||
|
m_stats.pixels++;
|
||||||
|
|
||||||
m_ds->SetupPrim(v, *v);
|
m_ds->SetupPrim(v, *v);
|
||||||
|
|
||||||
m_ds->DrawScanline(p.x + 1, p.x, p.y, *v);
|
m_ds->DrawScanline(p.x + 1, p.x, p.y, *v);
|
||||||
|
|
||||||
m_stats.pixels++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
|
void GSRasterizer::DrawLine(const GSVertexSW* v)
|
||||||
{
|
{
|
||||||
GSVertexSW dv = v[1] - v[0];
|
GSVertexSW dv = v[1] - v[0];
|
||||||
|
|
||||||
|
@ -148,8 +154,10 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
|
||||||
|
|
||||||
m_ds->SetupPrim(v, dscan);
|
m_ds->SetupPrim(v, dscan);
|
||||||
|
|
||||||
DrawEdge(v[0], v[1], dv, scissor, i, 0);
|
DrawEdge(v[0], v[1], dv, i, 0);
|
||||||
DrawEdge(v[0], v[1], dv, scissor, i, 1);
|
DrawEdge(v[0], v[1], dv, i, 1);
|
||||||
|
|
||||||
|
FlushEdge();
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -176,7 +184,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
|
||||||
|
|
||||||
GSVector4i p(l.p);
|
GSVector4i p(l.p);
|
||||||
|
|
||||||
if(scissor.top <= p.y && p.y < scissor.bottom)
|
if(m_scissor.top <= p.y && p.y < m_scissor.bottom)
|
||||||
{
|
{
|
||||||
GSVertexSW dscan = dv / dv.p.xxxx();
|
GSVertexSW dscan = dv / dv.p.xxxx();
|
||||||
|
|
||||||
|
@ -184,9 +192,9 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
|
||||||
|
|
||||||
l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y
|
l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y
|
||||||
|
|
||||||
GSVector4 fscissor(scissor);
|
DrawTriangleSection(p.y, p.y + 1, l, dl, dscan);
|
||||||
|
|
||||||
DrawTriangleSection(p.y, p.y + 1, l, dl, dscan, fscissor);
|
Flush();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -199,12 +207,13 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
|
||||||
GSVertexSW dedge = dv / dp.v[i];
|
GSVertexSW dedge = dv / dp.v[i];
|
||||||
|
|
||||||
// TODO: prestep + clip with the scissor
|
// TODO: prestep + clip with the scissor
|
||||||
|
// TODO: inline drawpoint + Flush()
|
||||||
|
|
||||||
int steps = dpi.v[i];
|
int steps = dpi.v[i];
|
||||||
|
|
||||||
while(steps-- > 0)
|
while(steps-- > 0)
|
||||||
{
|
{
|
||||||
DrawPoint(&edge, scissor);
|
DrawPoint(&edge);
|
||||||
|
|
||||||
edge += dedge;
|
edge += dedge;
|
||||||
}
|
}
|
||||||
|
@ -222,301 +231,187 @@ static const int s_abc[8][4] =
|
||||||
{2, 1, 0, 0}, // a > b > c
|
{2, 1, 0, 0}, // a > b > c
|
||||||
};
|
};
|
||||||
|
|
||||||
void GSRasterizer::DrawTriangle(const GSVertexSW* vertices, const GSVector4i& scissor)
|
void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
||||||
{
|
{
|
||||||
GSVertexSW v[3];
|
// edge buffer is used here to avoid xmm save-restores (except when we do aa1 in the middle)
|
||||||
|
|
||||||
|
GSVertexSW v[4];
|
||||||
|
GSVertexSW dv[3];
|
||||||
|
GSVertexSW ddv[3];
|
||||||
|
GSVertexSW longest;
|
||||||
|
GSVertexSW dscan;
|
||||||
|
|
||||||
GSVector4 aabb = vertices[0].p.yyyy(vertices[1].p);
|
GSVector4 aabb = vertices[0].p.yyyy(vertices[1].p);
|
||||||
GSVector4 bccb = vertices[1].p.yyyy(vertices[2].p).xzzx();
|
GSVector4 bccb = vertices[1].p.yyyy(vertices[2].p).xzzx();
|
||||||
|
|
||||||
int i = (aabb > bccb).mask() & 7;
|
int abc = (aabb > bccb).mask() & 7;
|
||||||
|
|
||||||
v[0] = vertices[s_abc[i][0]];
|
v[0] = vertices[s_abc[abc][0]];
|
||||||
v[1] = vertices[s_abc[i][1]];
|
v[1] = vertices[s_abc[abc][1]];
|
||||||
v[2] = vertices[s_abc[i][2]];
|
v[2] = vertices[s_abc[abc][2]];
|
||||||
|
|
||||||
aabb = v[0].p.yyyy(v[1].p);
|
aabb = v[0].p.yyyy(v[1].p);
|
||||||
bccb = v[1].p.yyyy(v[2].p).xzzx();
|
bccb = v[1].p.yyyy(v[2].p).xzzx();
|
||||||
|
|
||||||
i = (aabb == bccb).mask() & 7;
|
int i = (aabb == bccb).mask() & 7;
|
||||||
|
|
||||||
if(m_ds->IsEdge())
|
GSVector4 tbf = aabb.xzxz(bccb).ceil();
|
||||||
{
|
GSVector4 tbmax = tbf.max(m_fscissor.yyyy());
|
||||||
DrawEdge(v, scissor);
|
GSVector4 tbmin = tbf.min(m_fscissor.wwww());
|
||||||
}
|
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin));
|
||||||
|
|
||||||
switch(i)
|
|
||||||
{
|
|
||||||
case 0: // a < b < c
|
|
||||||
DrawTriangleTopBottom(v, scissor);
|
|
||||||
break;
|
|
||||||
case 1: // a == b < c
|
|
||||||
DrawTriangleBottom(v, scissor);
|
|
||||||
break;
|
|
||||||
case 4: // a < b == c
|
|
||||||
DrawTriangleTop(v, scissor);
|
|
||||||
break;
|
|
||||||
case 7: // a == b == c
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
__assume(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void GSRasterizer::DrawEdge(const GSVertexSW* v, const GSVector4i& scissor)
|
|
||||||
{
|
|
||||||
GSVertexSW dv[3];
|
|
||||||
|
|
||||||
dv[0] = v[1] - v[0];
|
dv[0] = v[1] - v[0];
|
||||||
dv[1] = v[2] - v[0];
|
dv[1] = v[2] - v[0];
|
||||||
dv[2] = v[2] - v[1];
|
dv[2] = v[2] - v[1];
|
||||||
|
|
||||||
GSVector4 dx = dv[0].p.upl(dv[1].p).xyxy(dv[2].p);
|
switch(i)
|
||||||
GSVector4 dy = dv[0].p.upl(dv[1].p).zwyx(dv[2].p);
|
|
||||||
|
|
||||||
GSVector4 a = dx.abs() < dy.abs(); // |x| <= |y|
|
|
||||||
GSVector4 b = dx < GSVector4::zero(); // x < 0
|
|
||||||
GSVector4 c = dv[1].p * (dv[0].p / dv[1].p).yyyy() < dv[0].p; // longest.p.x < 0
|
|
||||||
|
|
||||||
int i = a.mask();
|
|
||||||
int j = ((a | b) ^ c.xxxx()).mask() ^ 2; // evil
|
|
||||||
|
|
||||||
GSVertexSW dscan;
|
|
||||||
|
|
||||||
dscan.p = GSVector4::zero();
|
|
||||||
dscan.t = GSVector4::zero();
|
|
||||||
dscan.c = GSVector4::zero();
|
|
||||||
|
|
||||||
m_ds->SetupPrim(v, dscan); // TODO: don't call it twice (can't be sure about the second call if the triangle is too small)
|
|
||||||
|
|
||||||
DrawEdge(v[0], v[1], dv[0], scissor, i & 1, j & 1);
|
|
||||||
DrawEdge(v[0], v[2], dv[1], scissor, i & 2, j & 2);
|
|
||||||
DrawEdge(v[1], v[2], dv[2], scissor, i & 4, j & 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor)
|
|
||||||
{
|
|
||||||
GSVertexSW longest;
|
|
||||||
|
|
||||||
longest.p = v[2].p - v[1].p;
|
|
||||||
|
|
||||||
int i = longest.p.upl(longest.p == GSVector4::zero()).mask();
|
|
||||||
|
|
||||||
if(i & 2) return;
|
|
||||||
|
|
||||||
i &= 1;
|
|
||||||
|
|
||||||
GSVertexSW& l = v[0];
|
|
||||||
GSVector4& r = v[0].p;
|
|
||||||
|
|
||||||
GSVector4 fscissor(scissor);
|
|
||||||
|
|
||||||
GSVector4 tb = l.p.upl(v[2].p).ceil();
|
|
||||||
|
|
||||||
GSVector4 tbmax = tb.max(fscissor.yyyy());
|
|
||||||
GSVector4 tbmin = tb.min(fscissor.wwww());
|
|
||||||
|
|
||||||
GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin));
|
|
||||||
|
|
||||||
int top = tbi.extract32<0>();
|
|
||||||
int bottom = tbi.extract32<2>();
|
|
||||||
|
|
||||||
if(top >= bottom) return;
|
|
||||||
|
|
||||||
longest.t = v[2].t - v[1].t;
|
|
||||||
longest.c = v[2].c - v[1].c;
|
|
||||||
|
|
||||||
GSVertexSW dscan = longest * longest.p.xxxx().rcp();
|
|
||||||
|
|
||||||
GSVertexSW vl = v[1 + i] - l;
|
|
||||||
GSVector4 vr = v[2 - i].p - r;
|
|
||||||
|
|
||||||
GSVertexSW dl = vl / vl.p.yyyy();
|
|
||||||
GSVector4 dr = vr / vr.yyyy();
|
|
||||||
|
|
||||||
GSVector4 dy = tbmax.zzzz() - l.p.yyyy();
|
|
||||||
|
|
||||||
l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y
|
|
||||||
dl.p = dl.p.upl(dr).xyzw(dl.p); // dr.x => dl.y
|
|
||||||
|
|
||||||
l += dl * dy;
|
|
||||||
|
|
||||||
m_ds->SetupPrim(v, dscan);
|
|
||||||
|
|
||||||
DrawTriangleSection(top, bottom, l, dl, dscan, fscissor);
|
|
||||||
}
|
|
||||||
|
|
||||||
void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor)
|
|
||||||
{
|
|
||||||
GSVertexSW longest;
|
|
||||||
|
|
||||||
longest.p = v[1].p - v[0].p;
|
|
||||||
|
|
||||||
int i = longest.p.upl(longest.p == GSVector4::zero()).mask();
|
|
||||||
|
|
||||||
if(i & 2) return;
|
|
||||||
|
|
||||||
i &= 1;
|
|
||||||
|
|
||||||
GSVertexSW& l = v[i];
|
|
||||||
GSVector4& r = v[1 - i].p;
|
|
||||||
|
|
||||||
GSVector4 fscissor(scissor);
|
|
||||||
|
|
||||||
GSVector4 tb = l.p.upl(v[2].p).ceil();
|
|
||||||
|
|
||||||
GSVector4 tbmax = tb.max(fscissor.yyyy());
|
|
||||||
GSVector4 tbmin = tb.min(fscissor.wwww());
|
|
||||||
|
|
||||||
GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin));
|
|
||||||
|
|
||||||
int top = tbi.extract32<0>();
|
|
||||||
int bottom = tbi.extract32<2>();
|
|
||||||
|
|
||||||
if(top >= bottom) return;
|
|
||||||
|
|
||||||
longest.t = v[1].t - v[0].t;
|
|
||||||
longest.c = v[1].c - v[0].c;
|
|
||||||
|
|
||||||
GSVertexSW dscan = longest * longest.p.xxxx().rcp();
|
|
||||||
|
|
||||||
GSVertexSW vl = v[2] - l;
|
|
||||||
GSVector4 vr = v[2].p - r;
|
|
||||||
|
|
||||||
GSVertexSW dl = vl / vl.p.yyyy();
|
|
||||||
GSVector4 dr = vr / vr.yyyy();
|
|
||||||
|
|
||||||
GSVector4 dy = tbmax.zzzz() - l.p.yyyy();
|
|
||||||
|
|
||||||
l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y
|
|
||||||
dl.p = dl.p.upl(dr).xyzw(dl.p); // dr.x => dl.y
|
|
||||||
|
|
||||||
l += dl * dy;
|
|
||||||
|
|
||||||
m_ds->SetupPrim(v, dscan);
|
|
||||||
|
|
||||||
DrawTriangleSection(top, bottom, l, dl, dscan, fscissor);
|
|
||||||
}
|
|
||||||
|
|
||||||
void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scissor)
|
|
||||||
{
|
|
||||||
GSVertexSW dv[3];
|
|
||||||
|
|
||||||
dv[0] = v[1] - v[0];
|
|
||||||
dv[1] = v[2] - v[0];
|
|
||||||
|
|
||||||
GSVertexSW longest = dv[1] * (dv[0].p / dv[1].p).yyyy() - dv[0];
|
|
||||||
|
|
||||||
int i = longest.p.upl(longest.p == GSVector4::zero()).mask();
|
|
||||||
|
|
||||||
if(i & 2) return;
|
|
||||||
|
|
||||||
i &= 1;
|
|
||||||
|
|
||||||
GSVertexSW dscan = longest * longest.p.xxxx().rcp();
|
|
||||||
|
|
||||||
m_ds->SetupPrim(v, dscan);
|
|
||||||
|
|
||||||
GSVector4 fscissor(scissor);
|
|
||||||
|
|
||||||
GSVector4 tb = v[0].p.upl(v[1].p).zwzw(v[1].p.upl(v[2].p)).ceil();
|
|
||||||
|
|
||||||
GSVector4 tbmax = tb.max(fscissor.yyyy());
|
|
||||||
GSVector4 tbmin = tb.min(fscissor.wwww());
|
|
||||||
|
|
||||||
GSVector4i tbi = GSVector4i(tbmax.xzyw(tbmin));
|
|
||||||
|
|
||||||
int top = tbi.extract32<0>();
|
|
||||||
int bottom = tbi.extract32<2>();
|
|
||||||
|
|
||||||
GSVertexSW& l = v[0];
|
|
||||||
GSVector4 r = v[0].p;
|
|
||||||
|
|
||||||
GSVertexSW dl = dv[i] / dv[i].p.yyyy();
|
|
||||||
GSVector4 dr = dv[1 - i].p / dv[1 - i].p.yyyy();
|
|
||||||
|
|
||||||
GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
|
|
||||||
|
|
||||||
l += dl * dy;
|
|
||||||
r += dr * dy;
|
|
||||||
|
|
||||||
if(top < bottom)
|
|
||||||
{
|
{
|
||||||
DrawTriangleSection(top, bottom, l, dl, r, dr, dscan, fscissor);
|
case 0: // a < b < c
|
||||||
|
ddv[0] = dv[0] / dv[0].p.yyyy();
|
||||||
|
ddv[1] = dv[1] / dv[1].p.yyyy();
|
||||||
|
ddv[2] = dv[2] / dv[2].p.yyyy();
|
||||||
|
longest = ddv[1] * dv[0].p.yyyy() - dv[0];
|
||||||
|
v[3] = v[1] + longest; // point between v[0] and v[2] where y == v[1].y
|
||||||
|
break;
|
||||||
|
case 1: // a == b < c
|
||||||
|
ddv[1] = dv[1] / dv[1].p.yyyy();
|
||||||
|
ddv[2] = dv[2] / dv[2].p.yyyy();
|
||||||
|
longest = dv[0];
|
||||||
|
break;
|
||||||
|
case 4: // a < b == c
|
||||||
|
ddv[0] = dv[0] / dv[0].p.yyyy();
|
||||||
|
ddv[1] = dv[1] / dv[1].p.yyyy();
|
||||||
|
longest = dv[2];
|
||||||
|
break;
|
||||||
|
case 7: // a == b == c
|
||||||
|
return;
|
||||||
|
default:
|
||||||
|
__assume(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
top = tbi.y;
|
int j = longest.p.upl(longest.p == GSVector4::zero()).mask();
|
||||||
bottom = tbi.w;
|
|
||||||
|
|
||||||
if(top < bottom)
|
if(j & 2) return;
|
||||||
|
|
||||||
|
j &= 1;
|
||||||
|
|
||||||
|
dscan = longest * longest.p.xxxx().rcp();
|
||||||
|
|
||||||
|
if(m_ds->IsEdge())
|
||||||
{
|
{
|
||||||
if(i == 0)
|
GSVector4 dx = dv[0].p.upl(dv[1].p).xyxy(dv[2].p);
|
||||||
{
|
GSVector4 dy = dv[0].p.upl(dv[1].p).zwyx(dv[2].p);
|
||||||
l = v[1];
|
|
||||||
dv[2] = v[2] - v[1];
|
|
||||||
dl = dv[2] / dv[2].p.yyyy();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
r = v[1].p;
|
|
||||||
dv[2].p = v[2].p - v[1].p;
|
|
||||||
dr = dv[2].p / dv[2].p.yyyy();
|
|
||||||
}
|
|
||||||
|
|
||||||
l += dl * (tbmax.zzzz() - l.p.yyyy());
|
GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy|
|
||||||
r += dr * (tbmax.zzzz() - r.yyyy());
|
GSVector4 b = dx < GSVector4::zero(); // dx < 0
|
||||||
|
GSVector4 c = longest.p.xxxx() < GSVector4::zero(); // longest.p.x < 0
|
||||||
|
|
||||||
l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y
|
int i = a.mask();
|
||||||
dl.p = dl.p.upl(dr).xyzw(dl.p); // dr.x => dl.y
|
int j = ((a | b) ^ c).mask() ^ 2; // evil
|
||||||
|
|
||||||
DrawTriangleSection(top, bottom, l, dl, dscan, fscissor);
|
DrawEdge(v[0], v[1], dv[0], i & 1, j & 1);
|
||||||
|
DrawEdge(v[0], v[2], dv[1], i & 2, j & 2);
|
||||||
|
DrawEdge(v[1], v[2], dv[2], i & 4, j & 4);
|
||||||
|
|
||||||
|
GSVertexSW dscan;
|
||||||
|
|
||||||
|
dscan.p = GSVector4::zero();
|
||||||
|
dscan.t = GSVector4::zero();
|
||||||
|
dscan.c = GSVector4::zero();
|
||||||
|
|
||||||
|
m_ds->SetupPrim(v, dscan);
|
||||||
|
|
||||||
|
FlushEdge();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch(i)
|
||||||
|
{
|
||||||
|
case 0: // a < b < c
|
||||||
|
|
||||||
|
if(tb.x < tb.z)
|
||||||
|
{
|
||||||
|
GSVertexSW l = v[0];
|
||||||
|
GSVertexSW dl = ddv[j];
|
||||||
|
|
||||||
|
GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
|
||||||
|
|
||||||
|
l.p = l.p.xxzw(); // r.x => l.y
|
||||||
|
dl.p = dl.p.upl(ddv[1 - j].p).xyzw(dl.p); // dr.x => dl.y
|
||||||
|
|
||||||
|
l += dl * dy;
|
||||||
|
|
||||||
|
DrawTriangleSection(tb.x, tb.z, l, dl, dscan);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(tb.y < tb.w)
|
||||||
|
{
|
||||||
|
GSVertexSW l = v[1 + (1 << j)];
|
||||||
|
GSVertexSW dl = ddv[2 - j];
|
||||||
|
|
||||||
|
GSVector4 dy = tbmax.zzzz() - l.p.yyyy();
|
||||||
|
|
||||||
|
l.p = l.p.upl(v[3 - (1 << j)].p).xyzw(l.p); // r.x => l.y
|
||||||
|
dl.p = dl.p.upl(ddv[1 + j].p).xyzw(dl.p); // dr.x => dl.y
|
||||||
|
|
||||||
|
l += dl * dy;
|
||||||
|
|
||||||
|
DrawTriangleSection(tb.y, tb.w, l, dl, dscan);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1: // a == b < c
|
||||||
|
|
||||||
|
if(tb.x < tb.w)
|
||||||
|
{
|
||||||
|
GSVertexSW l = v[j];
|
||||||
|
GSVertexSW dl = ddv[1 + j];
|
||||||
|
|
||||||
|
GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
|
||||||
|
|
||||||
|
l.p = l.p.upl(v[1 - j].p).xyzw(l.p); // r.x => l.y
|
||||||
|
dl.p = dl.p.upl(ddv[2 - j].p).xyzw(dl.p); // dr.x => dl.y
|
||||||
|
|
||||||
|
l += dl * dy;
|
||||||
|
|
||||||
|
DrawTriangleSection(tb.x, tb.w, l, dl, dscan);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 4: // a < b == c
|
||||||
|
|
||||||
|
if(tb.x < tb.w)
|
||||||
|
{
|
||||||
|
GSVertexSW l = v[0];
|
||||||
|
GSVertexSW dl = ddv[j];
|
||||||
|
|
||||||
|
GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
|
||||||
|
|
||||||
|
l.p = l.p.xxzw(); // r.x => l.y
|
||||||
|
dl.p = dl.p.upl(ddv[1 - j].p).xyzw(dl.p); // dr.x => dl.y
|
||||||
|
|
||||||
|
l += dl * dy;
|
||||||
|
|
||||||
|
DrawTriangleSection(tb.x, tb.w, l, dl, dscan);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
__assume(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_ds->SetupPrim(v, dscan);
|
||||||
|
|
||||||
|
Flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, GSVector4& r, const GSVector4& dr, const GSVertexSW& dscan, const GSVector4& fscissor)
|
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan)
|
||||||
{
|
{
|
||||||
ASSERT(top < bottom);
|
ASSERT(top < bottom);
|
||||||
|
|
||||||
while(1)
|
GSScanline* RESTRICT e = &m_edge.buff[m_edge.count];
|
||||||
{
|
|
||||||
do
|
|
||||||
{
|
|
||||||
if(IsOneOfMyScanlines(top))
|
|
||||||
{
|
|
||||||
GSVector4 lr = l.p.xyxy(r).ceil();
|
|
||||||
|
|
||||||
GSVector4 lrmax = lr.max(fscissor.xxxx());
|
|
||||||
GSVector4 lrmin = lr.min(fscissor.zzzz());
|
|
||||||
|
|
||||||
GSVector4i lri = GSVector4i(lrmax.xxzz(lrmin));
|
|
||||||
|
|
||||||
int left = lri.extract32<0>();
|
|
||||||
int right = lri.extract32<2>();
|
|
||||||
|
|
||||||
int pixels = right - left;
|
|
||||||
|
|
||||||
if(pixels > 0)
|
|
||||||
{
|
|
||||||
m_stats.pixels += pixels;
|
|
||||||
|
|
||||||
GSVertexSW scan = l + dscan * (lrmax - l.p).xxxx();
|
|
||||||
|
|
||||||
m_ds->DrawScanline(right, left, top, scan);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
while(0);
|
|
||||||
|
|
||||||
if(++top >= bottom) break;
|
|
||||||
|
|
||||||
l += dl;
|
|
||||||
r += dr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan, const GSVector4& fscissor)
|
|
||||||
{
|
|
||||||
ASSERT(top < bottom);
|
|
||||||
|
|
||||||
while(1)
|
while(1)
|
||||||
{
|
{
|
||||||
|
@ -526,8 +421,8 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
||||||
{
|
{
|
||||||
GSVector4 lr = l.p.ceil();
|
GSVector4 lr = l.p.ceil();
|
||||||
|
|
||||||
GSVector4 lrmax = lr.max(fscissor.xxxx());
|
GSVector4 lrmax = lr.max(m_fscissor.xxxx());
|
||||||
GSVector4 lrmin = lr.min(fscissor.zzzz());
|
GSVector4 lrmin = lr.min(m_fscissor.zzzz());
|
||||||
|
|
||||||
GSVector4i lri = GSVector4i(lrmax.xxyy(lrmin));
|
GSVector4i lri = GSVector4i(lrmax.xxyy(lrmin));
|
||||||
|
|
||||||
|
@ -540,9 +435,13 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
||||||
{
|
{
|
||||||
m_stats.pixels += pixels;
|
m_stats.pixels += pixels;
|
||||||
|
|
||||||
GSVertexSW scan = l + dscan * (lrmax - l.p).xxxx();
|
e->scan = l + dscan * (lrmax - l.p).xxxx();
|
||||||
|
|
||||||
m_ds->DrawScanline(right, left, top, scan);
|
e->p.left = left;
|
||||||
|
e->p.top = top;
|
||||||
|
e->p.right = right;
|
||||||
|
|
||||||
|
e++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -552,9 +451,11 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
||||||
|
|
||||||
l += dl;
|
l += dl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m_edge.count += e - &m_edge.buff[m_edge.count];
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scissor)
|
void GSRasterizer::DrawSprite(const GSVertexSW* vertices)
|
||||||
{
|
{
|
||||||
GSVertexSW v[2];
|
GSVertexSW v[2];
|
||||||
|
|
||||||
|
@ -569,7 +470,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
|
||||||
|
|
||||||
GSVector4i r(v[0].p.xyxy(v[1].p).ceil());
|
GSVector4i r(v[0].p.xyxy(v[1].p).ceil());
|
||||||
|
|
||||||
r = r.rintersect(scissor);
|
r = r.rintersect(m_scissor);
|
||||||
|
|
||||||
if(r.rempty()) return;
|
if(r.rempty()) return;
|
||||||
|
|
||||||
|
@ -611,14 +512,14 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
|
||||||
{
|
{
|
||||||
if(IsOneOfMyScanlines(r.top))
|
if(IsOneOfMyScanlines(r.top))
|
||||||
{
|
{
|
||||||
m_ds->DrawScanline(r.right, r.left, r.top, scan);
|
|
||||||
|
|
||||||
m_stats.pixels += r.width();
|
m_stats.pixels += r.width();
|
||||||
|
|
||||||
|
m_ds->DrawScanline(r.right, r.left, r.top, scan);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side)
|
void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side)
|
||||||
{
|
{
|
||||||
// orientation:
|
// orientation:
|
||||||
// - true: |dv.p.y| > |dv.p.x|
|
// - true: |dv.p.y| > |dv.p.x|
|
||||||
|
@ -630,14 +531,14 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
// TODO: bit slow and too much duplicated code
|
// TODO: bit slow and too much duplicated code
|
||||||
// TODO: inner pre-step is still missing (hardly noticable)
|
// TODO: inner pre-step is still missing (hardly noticable)
|
||||||
|
|
||||||
GSVector4 fscissor(scissor);
|
GSScanline* RESTRICT dst = &m_edge.buff[m_edge.count];
|
||||||
|
|
||||||
GSVector4 lrtb = v0.p.upl(v1.p).ceil();
|
GSVector4 lrtb = v0.p.upl(v1.p).ceil();
|
||||||
|
|
||||||
if(orientation)
|
if(orientation)
|
||||||
{
|
{
|
||||||
GSVector4 tbmax = lrtb.max(fscissor.yyyy());
|
GSVector4 tbmax = lrtb.max(m_fscissor.yyyy());
|
||||||
GSVector4 tbmin = lrtb.min(fscissor.wwww());
|
GSVector4 tbmin = lrtb.min(m_fscissor.wwww());
|
||||||
|
|
||||||
GSVector4i tbi = GSVector4i(tbmax.zwzw(tbmin));
|
GSVector4i tbi = GSVector4i(tbmax.zwzw(tbmin));
|
||||||
|
|
||||||
|
@ -684,15 +585,18 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
int xi = x >> 16;
|
int xi = x >> 16;
|
||||||
int xf = x & 0xffff;
|
int xf = x & 0xffff;
|
||||||
|
|
||||||
if(scissor.left <= xi && xi < scissor.right && IsOneOfMyScanlines(xi))
|
if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(xi))
|
||||||
{
|
{
|
||||||
m_stats.pixels++;
|
m_stats.pixels++;
|
||||||
|
|
||||||
edge.t.u32[3] = (0x10000 - xf) & 0xffff;
|
dst->scan = edge;
|
||||||
|
dst->scan.t.u32[3] = (0x10000 - xf) & 0xffff;
|
||||||
|
|
||||||
m_ds->DrawEdge(xi + 1, xi, top, edge);
|
dst->p.left = xi;
|
||||||
|
dst->p.top = top;
|
||||||
|
dst->p.right = xi + 1;
|
||||||
|
|
||||||
edge.t.u32[3] = 0;
|
dst++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while(0);
|
while(0);
|
||||||
|
@ -712,15 +616,18 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
int xi = (x >> 16) + 1;
|
int xi = (x >> 16) + 1;
|
||||||
int xf = x & 0xffff;
|
int xf = x & 0xffff;
|
||||||
|
|
||||||
if(scissor.left <= xi && xi < scissor.right && IsOneOfMyScanlines(xi))
|
if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(xi))
|
||||||
{
|
{
|
||||||
m_stats.pixels++;
|
m_stats.pixels++;
|
||||||
|
|
||||||
edge.t.u32[3] = xf;
|
dst->scan = edge;
|
||||||
|
dst->scan.t.u32[3] = xf;
|
||||||
|
|
||||||
m_ds->DrawEdge(xi + 1, xi, top, edge);
|
dst->p.left = xi;
|
||||||
|
dst->p.top = top;
|
||||||
|
dst->p.right = xi + 1;
|
||||||
|
|
||||||
edge.t.u32[3] = 0;
|
dst++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while(0);
|
while(0);
|
||||||
|
@ -734,8 +641,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
GSVector4 lrmax = lrtb.max(fscissor.xxxx());
|
GSVector4 lrmax = lrtb.max(m_fscissor.xxxx());
|
||||||
GSVector4 lrmin = lrtb.min(fscissor.zzzz());
|
GSVector4 lrmin = lrtb.min(m_fscissor.zzzz());
|
||||||
|
|
||||||
GSVector4i lri = GSVector4i(lrmax.xyxy(lrmin));
|
GSVector4i lri = GSVector4i(lrmax.xyxy(lrmin));
|
||||||
|
|
||||||
|
@ -782,15 +689,18 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
int yi = y >> 16;
|
int yi = y >> 16;
|
||||||
int yf = y & 0xffff;
|
int yf = y & 0xffff;
|
||||||
|
|
||||||
if(scissor.top <= yi && yi < scissor.bottom && IsOneOfMyScanlines(yi))
|
if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
|
||||||
{
|
{
|
||||||
m_stats.pixels++;
|
m_stats.pixels++;
|
||||||
|
|
||||||
edge.t.u32[3] = (0x10000 - yf) & 0xffff;
|
dst->scan = edge;
|
||||||
|
dst->scan.t.u32[3] = (0x10000 - yf) & 0xffff;
|
||||||
|
|
||||||
m_ds->DrawEdge(left + 1, left, yi, edge);
|
dst->p.left = left;
|
||||||
|
dst->p.top = yi;
|
||||||
|
dst->p.right = left + 1;
|
||||||
|
|
||||||
edge.t.u32[3] = 0;
|
dst++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while(0);
|
while(0);
|
||||||
|
@ -810,15 +720,18 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
int yi = (y >> 16) + 1;
|
int yi = (y >> 16) + 1;
|
||||||
int yf = y & 0xffff;
|
int yf = y & 0xffff;
|
||||||
|
|
||||||
if(scissor.top <= yi && yi < scissor.bottom && IsOneOfMyScanlines(yi))
|
if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
|
||||||
{
|
{
|
||||||
m_stats.pixels++;
|
m_stats.pixels++;
|
||||||
|
|
||||||
edge.t.u32[3] = yf;
|
dst->scan = edge;
|
||||||
|
dst->scan.t.u32[3] = yf;
|
||||||
|
|
||||||
m_ds->DrawEdge(left + 1, left, yi, edge);
|
dst->p.left = left;
|
||||||
|
dst->p.top = yi;
|
||||||
|
dst->p.right = left + 1;
|
||||||
|
|
||||||
edge.t.u32[3] = 0;
|
dst++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while(0);
|
while(0);
|
||||||
|
@ -830,6 +743,34 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m_edge.count += dst - &m_edge.buff[m_edge.count];
|
||||||
|
}
|
||||||
|
|
||||||
|
void GSRasterizer::Flush()
|
||||||
|
{
|
||||||
|
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
|
||||||
|
|
||||||
|
const GSScanline* s = m_edge.buff;
|
||||||
|
|
||||||
|
for(int count = m_edge.count; count > 0; count--, s++)
|
||||||
|
{
|
||||||
|
m_ds->DrawScanline(s->p.right, s->p.left, s->p.top, s->scan);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_edge.count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GSRasterizer::FlushEdge()
|
||||||
|
{
|
||||||
|
const GSScanline* s = m_edge.buff;
|
||||||
|
|
||||||
|
for(int count = m_edge.count; count > 0; count--, s++)
|
||||||
|
{
|
||||||
|
m_ds->DrawEdge(s->p.right, s->p.left, s->p.top, s->scan);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_edge.count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
|
@ -59,7 +59,7 @@ public:
|
||||||
virtual void EndDraw(const GSRasterizerStats& stats, uint64 frame) = 0;
|
virtual void EndDraw(const GSRasterizerStats& stats, uint64 frame) = 0;
|
||||||
virtual void PrintStats() = 0;
|
virtual void PrintStats() = 0;
|
||||||
|
|
||||||
__forceinline void SetupPrim(const GSVertexSW* v, const GSVertexSW& dscan) {m_sp(v, dscan);}
|
__forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);}
|
||||||
__forceinline void DrawScanline(int right, int left, int top, const GSVertexSW& scan) {m_ds(right, left, top, scan);}
|
__forceinline void DrawScanline(int right, int left, int top, const GSVertexSW& scan) {m_ds(right, left, top, scan);}
|
||||||
__forceinline void DrawEdge(int right, int left, int top, const GSVertexSW& scan) {m_de(right, left, top, scan);}
|
__forceinline void DrawEdge(int right, int left, int top, const GSVertexSW& scan) {m_de(right, left, top, scan);}
|
||||||
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
|
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
|
||||||
|
@ -79,30 +79,33 @@ public:
|
||||||
virtual void SetThreadId(int id, int threads) = 0;
|
virtual void SetThreadId(int id, int threads) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
class GSRasterizer : public IRasterizer
|
__aligned(class, 32) GSRasterizer : public GSAlignedClass<32>, public IRasterizer
|
||||||
{
|
{
|
||||||
|
struct GSScanline {GSVertexSW scan; GSVector4i p;};
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
IDrawScanline* m_ds;
|
IDrawScanline* m_ds;
|
||||||
int m_id;
|
int m_id;
|
||||||
int m_threads;
|
int m_threads;
|
||||||
GSRasterizerStats m_stats;
|
GSRasterizerStats m_stats;
|
||||||
|
GSVector4i m_scissor;
|
||||||
|
GSVector4 m_fscissor;
|
||||||
|
struct {GSScanline* buff; int count;} m_edge;
|
||||||
|
|
||||||
void DrawPoint(const GSVertexSW* v, const GSVector4i& scissor);
|
void DrawPoint(const GSVertexSW* v);
|
||||||
void DrawLine(const GSVertexSW* v, const GSVector4i& scissor);
|
void DrawLine(const GSVertexSW* v);
|
||||||
void DrawTriangle(const GSVertexSW* v, const GSVector4i& scissor);
|
void DrawTriangle(const GSVertexSW* v);
|
||||||
void DrawEdge(const GSVertexSW* v, const GSVector4i& scissor);
|
void DrawSprite(const GSVertexSW* v);
|
||||||
void DrawSprite(const GSVertexSW* v, const GSVector4i& scissor);
|
void DrawEdge(const GSVertexSW* v);
|
||||||
|
|
||||||
void DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor);
|
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan);
|
||||||
void DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor);
|
|
||||||
void DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scissor);
|
|
||||||
|
|
||||||
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, GSVector4& r, const GSVector4& dr, const GSVertexSW& dscan, const GSVector4& scissor);
|
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
|
||||||
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan, const GSVector4& scissor);
|
|
||||||
|
|
||||||
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side);
|
__forceinline bool IsOneOfMyScanlines(int scanline) const;
|
||||||
|
|
||||||
inline bool IsOneOfMyScanlines(int scanline) const;
|
void Flush();
|
||||||
|
void FlushEdge();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSRasterizer(IDrawScanline* ds);
|
GSRasterizer(IDrawScanline* ds);
|
||||||
|
|
|
@ -76,8 +76,6 @@ bool GSRenderer::CreateDevice(GSDevice* dev)
|
||||||
|
|
||||||
void GSRenderer::ResetDevice()
|
void GSRenderer::ResetDevice()
|
||||||
{
|
{
|
||||||
InvalidateTextureCache();
|
|
||||||
|
|
||||||
ResetPrim();
|
ResetPrim();
|
||||||
|
|
||||||
if(m_dev) m_dev->Reset(1, 1);
|
if(m_dev) m_dev->Reset(1, 1);
|
||||||
|
|
|
@ -139,7 +139,7 @@ protected:
|
||||||
{
|
{
|
||||||
if(m_vertices != NULL) _aligned_free(m_vertices);
|
if(m_vertices != NULL) _aligned_free(m_vertices);
|
||||||
|
|
||||||
m_maxcount = max(10000, m_maxcount * 3/2);
|
m_maxcount = std::max<int>(10000, m_maxcount * 3 / 2);
|
||||||
m_vertices = (Vertex*)_aligned_malloc(sizeof(Vertex) * m_maxcount, 32);
|
m_vertices = (Vertex*)_aligned_malloc(sizeof(Vertex) * m_maxcount, 32);
|
||||||
m_maxcount -= 100;
|
m_maxcount -= 100;
|
||||||
}
|
}
|
||||||
|
|
|
@ -484,13 +484,10 @@ protected:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void InvalidateTextureCache()
|
|
||||||
{
|
|
||||||
m_tc->RemoveAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
void ResetDevice()
|
void ResetDevice()
|
||||||
{
|
{
|
||||||
|
m_tc->RemoveAll();
|
||||||
|
|
||||||
__super::ResetDevice();
|
__super::ResetDevice();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -372,12 +372,12 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
||||||
|
|
||||||
if(gd.sel.ltf)
|
if(gd.sel.ltf)
|
||||||
{
|
{
|
||||||
GSVector4 half(0x8000, 0x8000);
|
|
||||||
|
|
||||||
if(gd.sel.fst)
|
if(gd.sel.fst)
|
||||||
{
|
{
|
||||||
// if q is constant we can do the half pel shift for bilinear sampling on the vertices
|
// if q is constant we can do the half pel shift for bilinear sampling on the vertices
|
||||||
|
|
||||||
|
GSVector4 half(0x8000, 0x8000);
|
||||||
|
|
||||||
GSVertexSW* v = m_vertices;
|
GSVertexSW* v = m_vertices;
|
||||||
|
|
||||||
for(int i = 0, j = m_count; i < j; i++)
|
for(int i = 0, j = m_count; i < j; i++)
|
||||||
|
|
|
@ -196,8 +196,6 @@ void GSState::Reset()
|
||||||
m_env.Reset();
|
m_env.Reset();
|
||||||
|
|
||||||
m_context = &m_env.CTXT[0];
|
m_context = &m_env.CTXT[0];
|
||||||
|
|
||||||
InvalidateTextureCache();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::ResetHandlers()
|
void GSState::ResetHandlers()
|
||||||
|
@ -770,8 +768,6 @@ void GSState::GIFRegHandlerFOGCOL(const GIFReg* r)
|
||||||
void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* r)
|
void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* r)
|
||||||
{
|
{
|
||||||
// TRACE(_T("TEXFLUSH\n"));
|
// TRACE(_T("TEXFLUSH\n"));
|
||||||
|
|
||||||
// InvalidateTextureCache();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int i> void GSState::GIFRegHandlerSCISSOR(const GIFReg* r)
|
template<int i> void GSState::GIFRegHandlerSCISSOR(const GIFReg* r)
|
||||||
|
@ -903,6 +899,7 @@ template<int i> void GSState::GIFRegHandlerFRAME(const GIFReg* r)
|
||||||
template<int i> void GSState::GIFRegHandlerZBUF(const GIFReg* r)
|
template<int i> void GSState::GIFRegHandlerZBUF(const GIFReg* r)
|
||||||
{
|
{
|
||||||
GIFRegZBUF ZBUF = r->ZBUF;
|
GIFRegZBUF ZBUF = r->ZBUF;
|
||||||
|
|
||||||
if(ZBUF.u32[0] == 0)
|
if(ZBUF.u32[0] == 0)
|
||||||
{
|
{
|
||||||
// during startup all regs are cleared to 0 (by the bios or something), so we mask z until this register becomes valid
|
// during startup all regs are cleared to 0 (by the bios or something), so we mask z until this register becomes valid
|
||||||
|
@ -1396,9 +1393,6 @@ template void GSState::Transfer<3>(const uint8* mem, uint32 size);
|
||||||
|
|
||||||
template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
||||||
{
|
{
|
||||||
// [TODO] make me into a template parameter... I think. --air
|
|
||||||
static const bool FrameSkipIt = false;
|
|
||||||
|
|
||||||
GSPerfMonAutoTimer pmat(m_perfmon);
|
GSPerfMonAutoTimer pmat(m_perfmon);
|
||||||
|
|
||||||
const uint8* start = mem;
|
const uint8* start = mem;
|
||||||
|
@ -1420,7 +1414,7 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
||||||
|
|
||||||
// ASSERT(!(path.tag.PRE && path.tag.FLG == GIF_FLG_REGLIST)); // kingdom hearts
|
// ASSERT(!(path.tag.PRE && path.tag.FLG == GIF_FLG_REGLIST)); // kingdom hearts
|
||||||
|
|
||||||
if(path.tag.PRE && (path.tag.FLG == GIF_FLG_PACKED) && !FrameSkipIt)
|
if(path.tag.PRE && path.tag.FLG == GIF_FLG_PACKED)
|
||||||
{
|
{
|
||||||
GIFRegPRIM r;
|
GIFRegPRIM r;
|
||||||
r.u64 = path.tag.PRIM;
|
r.u64 = path.tag.PRIM;
|
||||||
|
@ -1551,7 +1545,7 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
||||||
if(m_mt)
|
if(m_mt)
|
||||||
{
|
{
|
||||||
// Hackfix for BIOS, which sends an incomplete packet when it does an XGKICK without
|
// Hackfix for BIOS, which sends an incomplete packet when it does an XGKICK without
|
||||||
// having an EOP specified anywhere in VU1 memory. Needed until PCSX2 is fixed t
|
// having an EOP specified anywhere in VU1 memory. Needed until PCSX2 is fixed to
|
||||||
// handle it more properly (ie, without looping infinitely).
|
// handle it more properly (ie, without looping infinitely).
|
||||||
|
|
||||||
path.nloop = 0;
|
path.nloop = 0;
|
||||||
|
@ -1802,7 +1796,7 @@ bool GSState::GSTransferBuffer::Update(int tw, int th, int bpp, int& len)
|
||||||
if(total == 0)
|
if(total == 0)
|
||||||
{
|
{
|
||||||
start = end = 0;
|
start = end = 0;
|
||||||
total = min((tw * bpp >> 3) * th, 1024 * 1024 * 4);
|
total = std::min<int>((tw * bpp >> 3) * th, 1024 * 1024 * 4);
|
||||||
overflow = false;
|
overflow = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,6 @@
|
||||||
#include "GSVertex.h"
|
#include "GSVertex.h"
|
||||||
#include "GSVertexList.h"
|
#include "GSVertexList.h"
|
||||||
#include "GSUtil.h"
|
#include "GSUtil.h"
|
||||||
#include "GSDirtyRect.h"
|
|
||||||
#include "GSPerfMon.h"
|
#include "GSPerfMon.h"
|
||||||
#include "GSVector.h"
|
#include "GSVector.h"
|
||||||
#include "GSDevice.h"
|
#include "GSDevice.h"
|
||||||
|
@ -208,7 +207,6 @@ public:
|
||||||
virtual void ResetPrim() = 0;
|
virtual void ResetPrim() = 0;
|
||||||
virtual void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {}
|
virtual void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {}
|
||||||
virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {}
|
virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {}
|
||||||
virtual void InvalidateTextureCache() {}
|
|
||||||
|
|
||||||
void Move();
|
void Move();
|
||||||
void Write(const uint8* mem, int len);
|
void Write(const uint8* mem, int len);
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "GSRenderer.h"
|
#include "GSRenderer.h"
|
||||||
|
#include "GSDirtyRect.h"
|
||||||
|
|
||||||
class GSTextureCache
|
class GSTextureCache
|
||||||
{
|
{
|
||||||
|
|
|
@ -23,15 +23,15 @@
|
||||||
|
|
||||||
#include "GSVector.h"
|
#include "GSVector.h"
|
||||||
|
|
||||||
__aligned(struct, 32) GSVertexSW
|
__aligned(struct, 16) GSVertexSW
|
||||||
{
|
{
|
||||||
GSVector4 c, p, t;
|
GSVector4 c, p, t;
|
||||||
|
|
||||||
GSVertexSW() {}
|
GSVertexSW() {}
|
||||||
GSVertexSW(const GSVertexSW& v) {*this = v;}
|
GSVertexSW(const GSVertexSW& v) {*this = v;}
|
||||||
|
|
||||||
void operator = (const GSVertexSW& v) {c = v.c; p = v.p; t = v.t;}
|
__forceinline void operator = (const GSVertexSW& v) {c = v.c; p = v.p; t = v.t;}
|
||||||
void operator += (const GSVertexSW& v) {c += v.c; p += v.p; t += v.t;}
|
__forceinline void operator += (const GSVertexSW& v) {c += v.c; p += v.p; t += v.t;}
|
||||||
|
|
||||||
friend GSVertexSW operator + (const GSVertexSW& v1, const GSVertexSW& v2);
|
friend GSVertexSW operator + (const GSVertexSW& v1, const GSVertexSW& v2);
|
||||||
friend GSVertexSW operator - (const GSVertexSW& v1, const GSVertexSW& v2);
|
friend GSVertexSW operator - (const GSVertexSW& v1, const GSVertexSW& v2);
|
||||||
|
|
|
@ -528,7 +528,7 @@
|
||||||
<ClCompile Include="GSLocalMemory.cpp" />
|
<ClCompile Include="GSLocalMemory.cpp" />
|
||||||
<ClCompile Include="GSPerfMon.cpp" />
|
<ClCompile Include="GSPerfMon.cpp" />
|
||||||
<ClCompile Include="GSRasterizer.cpp">
|
<ClCompile Include="GSRasterizer.cpp">
|
||||||
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">All</AssemblerOutput>
|
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">AssemblyAndSourceCode</AssemblerOutput>
|
||||||
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">AssemblyAndSourceCode</AssemblerOutput>
|
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">AssemblyAndSourceCode</AssemblerOutput>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="GSRenderer.cpp" />
|
<ClCompile Include="GSRenderer.cpp" />
|
||||||
|
|
|
@ -0,0 +1,75 @@
|
||||||
|
/*
|
||||||
|
* INTEL CONFIDENTIAL
|
||||||
|
* Copyright (2008-2009) Intel Corporation All Rights Reserved.
|
||||||
|
* The source code contained or described herein and all documents
|
||||||
|
* related to the source code ("Material") are owned by Intel Corporation
|
||||||
|
* or its suppliers or licensors. Title to the Material remains with
|
||||||
|
* Intel Corporation or its suppliers and licensors. The Material
|
||||||
|
* contains trade secrets and proprietary and confidential information
|
||||||
|
* of Intel or its suppliers and licensors. The Material is protected
|
||||||
|
* by worldwide copyright and trade secret laws and treaty provisions.
|
||||||
|
* No part of the Material may be used, copied, reproduced, modified,
|
||||||
|
* published, uploaded, posted, transmitted, distributed, or disclosed
|
||||||
|
* in any way without Intel’s prior express written permission.
|
||||||
|
*
|
||||||
|
* No license under any patent, copyright, trade secret or other
|
||||||
|
* intellectual property right is granted to or conferred upon you by
|
||||||
|
* disclosure or delivery of the Materials, either expressly, by implication,
|
||||||
|
* inducement, estoppel or otherwise. Any license under such intellectual
|
||||||
|
* property rights must be express and approved by Intel in writing.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/********************************************************/
|
||||||
|
/* Binaries that contain IACA_MARKS will not run. */
|
||||||
|
/* Define IACA_MARKS_OFF when you compile your sources, */
|
||||||
|
/* to disable IACA_START, IACA_END, IACA_MSC64_START */
|
||||||
|
/* and IACA_MSC64_END */
|
||||||
|
/********************************************************/
|
||||||
|
#ifdef IACA_MARKS_OFF
|
||||||
|
|
||||||
|
#define IACA_START
|
||||||
|
#define IACA_END
|
||||||
|
#define IACA_MSC64_START
|
||||||
|
#define IACA_MSC64_END
|
||||||
|
|
||||||
|
#else
|
||||||
|
#if defined (__GNUC__)
|
||||||
|
#define IACA_SSC_MARK( MARK_ID ) \
|
||||||
|
__asm__ __volatile__ ( \
|
||||||
|
"\n\t movl $"#MARK_ID", %%ebx" \
|
||||||
|
"\n\t .byte 0x64, 0x67, 0x90" \
|
||||||
|
: : : "memory" );
|
||||||
|
|
||||||
|
#define IACA_UD_BYTES __asm__ __volatile__ ("\n\t .byte 0x0F, 0x0B");
|
||||||
|
|
||||||
|
#else
|
||||||
|
#define IACA_UD_BYTES {__asm _emit 0x0F \
|
||||||
|
__asm _emit 0x0B}
|
||||||
|
|
||||||
|
#define IACA_SSC_MARK(x) {__asm mov ebx, x\
|
||||||
|
__asm _emit 0x64 \
|
||||||
|
__asm _emit 0x67 \
|
||||||
|
__asm _emit 0x90 }
|
||||||
|
|
||||||
|
#define IACA_VC64_START __writegsbyte(111, 111);
|
||||||
|
#define IACA_VC64_END __writegsbyte(222, 222);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define IACA_START {IACA_UD_BYTES \
|
||||||
|
IACA_SSC_MARK(111)}
|
||||||
|
#define IACA_END {IACA_SSC_MARK(222) \
|
||||||
|
IACA_UD_BYTES}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**************** asm *****************
|
||||||
|
;START_MARKER
|
||||||
|
mov ebx, 111
|
||||||
|
db 0x64, 0x67, 0x90
|
||||||
|
|
||||||
|
;END_MARKER
|
||||||
|
mov ebx, 222
|
||||||
|
db 0x64, 0x67, 0x90
|
||||||
|
|
||||||
|
**************************************/
|
Loading…
Reference in New Issue