mirror of https://github.com/PCSX2/pcsx2.git
GSdx: only minor changes
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4494 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
c2266c3edc
commit
8ca01f4b77
|
@ -100,7 +100,7 @@ protected:
|
|||
r.right = r.left + 256;
|
||||
r.bottom = r.top + 256;
|
||||
|
||||
Dump(format("da_%d_%d_%d_%d_%d", m_env.STATUS.TP, r).c_str(), m_env.STATUS.TP, r, false);
|
||||
Dump(format("da_%d_%d_%d_%d_%d", m_env.STATUS.TP, r.left, r.top, r.right, r.bottom).c_str(), m_env.STATUS.TP, r, false);
|
||||
}
|
||||
*/
|
||||
|
||||
|
|
|
@ -610,20 +610,23 @@ int GPUState::PH_Read(GPUReg* r, int size)
|
|||
int w = r[2].XY.X;
|
||||
int h = r[2].XY.Y;
|
||||
|
||||
GSVector4i r2;
|
||||
if(w > 0 && h > 0)
|
||||
{
|
||||
GSVector4i r2;
|
||||
|
||||
r2.left = r[1].XY.X;
|
||||
r2.top = r[1].XY.Y;
|
||||
r2.right = r2.left + w;
|
||||
r2.bottom = r2.top + h;
|
||||
r2.left = r[1].XY.X;
|
||||
r2.top = r[1].XY.Y;
|
||||
r2.right = r2.left + w;
|
||||
r2.bottom = r2.top + h;
|
||||
|
||||
m_read.bytes = ((w * h + 1) & ~1) * 2;
|
||||
m_read.cur = 0;
|
||||
m_read.Reserve(m_read.bytes);
|
||||
m_read.bytes = ((w * h + 1) & ~1) * 2;
|
||||
m_read.cur = 0;
|
||||
m_read.Reserve(m_read.bytes);
|
||||
|
||||
m_mem.ReadRect(r2, (uint16*)m_read.buff);
|
||||
m_mem.ReadRect(r2, (uint16*)m_read.buff);
|
||||
|
||||
Dump("r");
|
||||
Dump("r");
|
||||
}
|
||||
|
||||
m_env.STATUS.IMG = 1;
|
||||
|
||||
|
|
|
@ -2764,42 +2764,33 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
|
||||
if(m_sel.mmin)
|
||||
{
|
||||
int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
|
||||
const int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
|
||||
|
||||
if(pixels == 4)
|
||||
{
|
||||
vmovdqa(ptr[&m_local.temp.test], xmm7);
|
||||
}
|
||||
|
||||
for(int j = 0; j < 4; j++)
|
||||
for(int j = 0; j < 4; j++)
|
||||
{
|
||||
mov(ebx, ptr[&lod_i->u32[j]]);
|
||||
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
for(int i = 0; i < pixels; i++)
|
||||
{
|
||||
mov(ebx, ptr[&lod_i->u32[j]]);
|
||||
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
|
||||
}
|
||||
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
|
||||
}
|
||||
}
|
||||
|
||||
if(pixels == 4)
|
||||
{
|
||||
vmovdqa(xmm5, xmm7);
|
||||
vmovdqa(xmm7, ptr[&m_local.temp.test]);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int j = 0; j < 4; j++)
|
||||
{
|
||||
mov(ebx, ptr[&lod_i->u32[j]]);
|
||||
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
ReadTexel(xmm6, xmm5, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
|
||||
|
||||
// TODO: might be faster to read in columns, inserts into the same register would be further from eachother (last one overwrites xmm5, need to use xmm7)
|
||||
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
|
||||
|
||||
for(int i = 0; i < pixels; i++)
|
||||
{
|
||||
|
|
|
@ -2928,36 +2928,29 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
{
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
|
||||
const int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
|
||||
|
||||
if(pixels == 4)
|
||||
{
|
||||
movdqa(ptr[&m_local.temp.test], xmm7);
|
||||
}
|
||||
|
||||
for(int j = 0; j < 4; j++)
|
||||
for(int j = 0; j < 4; j++)
|
||||
{
|
||||
mov(ebx, ptr[&lod_i->u32[j]]);
|
||||
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
for(int i = 0; i < pixels; i++)
|
||||
{
|
||||
mov(ebx, ptr[&lod_i->u32[j]]);
|
||||
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
|
||||
}
|
||||
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
|
||||
}
|
||||
}
|
||||
|
||||
if(pixels == 4)
|
||||
{
|
||||
movdqa(xmm5, xmm7);
|
||||
movdqa(xmm7, ptr[&m_local.temp.test]);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int j = 0; j < 4; j++)
|
||||
{
|
||||
mov(ebx, ptr[&lod_i->u32[j]]);
|
||||
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
ReadTexel(xmm6, xmm5, j);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
|
@ -3082,12 +3075,10 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
}
|
||||
else
|
||||
{
|
||||
int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
|
||||
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
// TODO: might be faster to read in columns, inserts into the same register would be further from eachother (last one overwrites xmm5, need to use xmm7)
|
||||
|
||||
for(int i = 0; i < pixels; i++)
|
||||
{
|
||||
for(int j = 0; j < 4; j++)
|
||||
|
@ -3098,7 +3089,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
|
||||
#else
|
||||
|
||||
int t[] = {1, 4, 1, 5, 2, 5, 2, 0};
|
||||
const int t[] = {1, 4, 1, 5, 2, 5, 2, 0};
|
||||
|
||||
for(int i = 0; i < pixels; i++)
|
||||
{
|
||||
|
|
|
@ -86,7 +86,8 @@ void GSRasterizer::Draw(const GSRasterizerData* data)
|
|||
{
|
||||
case GS_POINT_CLASS:
|
||||
m_stats.prims = count;
|
||||
for(int i = 0; i < count; i++) DrawPoint(&vertices[i]);
|
||||
if(data->scissor_test) DrawPoint<true>(vertices, count);
|
||||
else DrawPoint<false>(vertices, count);
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
ASSERT(!(count & 1));
|
||||
|
@ -117,19 +118,23 @@ void GSRasterizer::GetStats(GSRasterizerStats& stats)
|
|||
stats = m_stats;
|
||||
}
|
||||
|
||||
void GSRasterizer::DrawPoint(const GSVertexSW* v)
|
||||
template<bool scissor_test>
|
||||
void GSRasterizer::DrawPoint(const GSVertexSW* v, int count)
|
||||
{
|
||||
GSVector4i p(v->p);
|
||||
|
||||
if(m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
|
||||
for(; count > 0; count--, v++)
|
||||
{
|
||||
if(IsOneOfMyScanlines(p.y))
|
||||
GSVector4i p(v->p);
|
||||
|
||||
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
|
||||
{
|
||||
m_stats.pixels++;
|
||||
if(IsOneOfMyScanlines(p.y))
|
||||
{
|
||||
m_stats.pixels++;
|
||||
|
||||
m_ds->SetupPrim(v, *v);
|
||||
m_ds->SetupPrim(v, *v);
|
||||
|
||||
m_ds->DrawScanline(1, p.x, p.y, *v);
|
||||
m_ds->DrawScanline(1, p.x, p.y, *v);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -206,11 +211,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
|
|||
{
|
||||
if(IsOneOfMyScanlines(p.y))
|
||||
{
|
||||
*e = edge;
|
||||
|
||||
e->p.i16[0] = (int16)p.x;
|
||||
e->p.i16[1] = (int16)p.y;
|
||||
e->p.i16[2] = 1;
|
||||
AddScanline(e, 1, p.x, p.y, edge);
|
||||
|
||||
e++;
|
||||
}
|
||||
|
@ -243,8 +244,6 @@ static const int s_abc[8][4] =
|
|||
|
||||
void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
||||
{
|
||||
// TODO: GSVertexSW::c/t could be merged into a GSVector8
|
||||
|
||||
GSVertexSW v[4];
|
||||
GSVertexSW dv[3];
|
||||
GSVertexSW ddv[3];
|
||||
|
@ -338,7 +337,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
|||
GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
|
||||
|
||||
l.p = l.p.xxzw(); // r.x => l.y
|
||||
dl.p = dl.p.upl(ddv[1 - j].p).xyzw(dl.p); // dr.x => dl.y
|
||||
dl.p = dl.p.insert<0, 1>(ddv[1 - j].p); // dr.x => dl.y
|
||||
|
||||
l += dl * dy;
|
||||
|
||||
|
@ -351,13 +350,13 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
|||
// v[3] isn't accurate enough, it may leave gaps horizontally if it happens to be on the left side of the triangle
|
||||
// example: previous triangle's scanline ends on 48.9999, this one's starts from 49.0001, the pixel at 49 isn't drawn
|
||||
|
||||
GSVertexSW l = v[1 + (1 << j)];
|
||||
GSVertexSW l = v[1 + (j << 1)];
|
||||
GSVertexSW dl = ddv[2 - j];
|
||||
|
||||
GSVector4 dy = tbmax.zzzz() - l.p.yyyy();
|
||||
|
||||
l.p = l.p.upl(v[3 - (1 << j)].p).xyzw(l.p); // r.x => l.y
|
||||
dl.p = dl.p.upl(ddv[1 + j].p).xyzw(dl.p); // dr.x => dl.y
|
||||
l.p = l.p.insert<0, 1>(v[3 - (j << 1)].p); // r.x => l.y
|
||||
dl.p = dl.p.insert<0, 1>(ddv[1 + j].p); // dr.x => dl.y
|
||||
|
||||
l += dl * dy;
|
||||
|
||||
|
@ -375,8 +374,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
|||
|
||||
GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
|
||||
|
||||
l.p = l.p.upl(v[1 - j].p).xyzw(l.p); // r.x => l.y
|
||||
dl.p = dl.p.upl(ddv[2 - j].p).xyzw(dl.p); // dr.x => dl.y
|
||||
l.p = l.p.insert<0, 1>(v[1 - j].p); // r.x => l.y
|
||||
dl.p = dl.p.insert<0, 1>(ddv[2 - j].p); // dr.x => dl.y
|
||||
|
||||
l += dl * dy;
|
||||
|
||||
|
@ -395,7 +394,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
|||
GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
|
||||
|
||||
l.p = l.p.xxzw(); // r.x => l.y
|
||||
dl.p = dl.p.upl(ddv[1 - j].p).xyzw(dl.p); // dr.x => dl.y
|
||||
dl.p = dl.p.insert<0, 1>(ddv[1 - j].p); // dr.x => dl.y
|
||||
|
||||
l += dl * dy;
|
||||
|
||||
|
@ -417,13 +416,15 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
|||
|
||||
GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count];
|
||||
|
||||
GSVector4 scissor = m_fscissor.xzxz();
|
||||
|
||||
while(1)
|
||||
{
|
||||
if(IsOneOfMyScanlines(top))
|
||||
{
|
||||
GSVector4 lrf = l.p.ceil();
|
||||
GSVector4 lrmax = lrf.max(m_fscissor.xzxz());
|
||||
GSVector4 lrmin = lrf.min(m_fscissor.xzxz());
|
||||
GSVector4 lrmax = lrf.max(scissor);
|
||||
GSVector4 lrmin = lrf.min(scissor);
|
||||
GSVector4i lr = GSVector4i(lrmax.xxyy(lrmin));
|
||||
|
||||
int left = lr.extract32<0>();
|
||||
|
@ -435,11 +436,9 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
|||
{
|
||||
m_stats.pixels += pixels;
|
||||
|
||||
*e = l + dscan * (lrmax - l.p).xxxx();
|
||||
GSVector4 prestep = lrmax - l.p;
|
||||
|
||||
e->p.i16[0] = (int16)left;
|
||||
e->p.i16[1] = (int16)top;
|
||||
e->p.i16[2] = (int16)pixels;
|
||||
AddScanline(e, pixels, left, top, l + dscan * prestep.xxxx());
|
||||
|
||||
e++;
|
||||
}
|
||||
|
@ -496,8 +495,12 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices)
|
|||
dedge.t = (dv.t / dv.p.yyyy()).xyxy(zero).wyww();
|
||||
dscan.t = (dv.t / dv.p.xxxx()).xyxy(zero).xwww();
|
||||
|
||||
if(scan.p.y < (float)r.top) scan.t += dedge.t * ((float)r.top - scan.p.y);
|
||||
if(scan.p.x < (float)r.left) scan.t += dscan.t * ((float)r.left - scan.p.x);
|
||||
GSVector4 prestep = GSVector4(r.left, r.top) - scan.p;
|
||||
|
||||
int m = (prestep == GSVector4::zero()).mask();
|
||||
|
||||
if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy();
|
||||
if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx();
|
||||
|
||||
m_ds->SetupPrim(v, dscan);
|
||||
|
||||
|
@ -581,16 +584,10 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
|
||||
if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(xi))
|
||||
{
|
||||
m_stats.pixels++;
|
||||
|
||||
*e = edge;
|
||||
AddScanline(e, 1, xi, top, edge);
|
||||
|
||||
e->t.u32[3] = (0x10000 - xf) & 0xffff;
|
||||
|
||||
e->p.i16[0] = (int16)xi;
|
||||
e->p.i16[1] = (int16)top;
|
||||
e->p.i16[2] = 1;
|
||||
|
||||
e++;
|
||||
}
|
||||
|
||||
|
@ -609,16 +606,10 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
|
||||
if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(xi))
|
||||
{
|
||||
m_stats.pixels++;
|
||||
|
||||
*e = edge;
|
||||
AddScanline(e, 1, xi, top, edge);
|
||||
|
||||
e->t.u32[3] = xf;
|
||||
|
||||
e->p.i16[0] = (int16)xi;
|
||||
e->p.i16[1] = (int16)top;
|
||||
e->p.i16[2] = 1;
|
||||
|
||||
e++;
|
||||
}
|
||||
|
||||
|
@ -678,16 +669,10 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
|
||||
if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
|
||||
{
|
||||
m_stats.pixels++;
|
||||
AddScanline(e, 1, left, yi, edge);
|
||||
|
||||
*e = edge;
|
||||
|
||||
e->t.u32[3] = (0x10000 - yf) & 0xffff;
|
||||
|
||||
e->p.i16[0] = (int16)left;
|
||||
e->p.i16[1] = (int16)yi;
|
||||
e->p.i16[2] = 1;
|
||||
|
||||
e++;
|
||||
}
|
||||
|
||||
|
@ -706,16 +691,10 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
|
||||
if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
|
||||
{
|
||||
m_stats.pixels++;
|
||||
AddScanline(e, 1, left, yi, edge);
|
||||
|
||||
*e = edge;
|
||||
|
||||
e->t.u32[3] = yf;
|
||||
|
||||
e->p.i16[0] = (int16)left;
|
||||
e->p.i16[1] = (int16)yi;
|
||||
e->p.i16[2] = 1;
|
||||
|
||||
e++;
|
||||
}
|
||||
|
||||
|
@ -727,7 +706,20 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
}
|
||||
}
|
||||
|
||||
m_edge.count += e - &m_edge.buff[m_edge.count];
|
||||
int count = e - &m_edge.buff[m_edge.count];
|
||||
|
||||
m_stats.pixels += count;
|
||||
|
||||
m_edge.count += count;
|
||||
}
|
||||
|
||||
void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan)
|
||||
{
|
||||
*e = scan;
|
||||
|
||||
e->p.i16[0] = (int16)pixels;
|
||||
e->p.i16[1] = (int16)left;
|
||||
e->p.i16[2] = (int16)top;
|
||||
}
|
||||
|
||||
void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge)
|
||||
|
@ -741,18 +733,31 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo
|
|||
m_ds->SetupPrim(vertices, dscan);
|
||||
|
||||
const GSVertexSW* RESTRICT e = m_edge.buff;
|
||||
|
||||
int i = 0;
|
||||
const GSVertexSW* RESTRICT ee = e + count;
|
||||
|
||||
if(!edge)
|
||||
{
|
||||
do {m_ds->DrawScanline(e[i].p.i16[2], e[i].p.i16[0], e[i].p.i16[1], e[i]);}
|
||||
while(++i < count);
|
||||
do
|
||||
{
|
||||
int pixels = e->p.i16[0];
|
||||
int left = e->p.i16[1];
|
||||
int top = e->p.i16[2];
|
||||
|
||||
m_ds->DrawScanline(pixels, left, top, *e++);
|
||||
}
|
||||
while(e < ee);
|
||||
}
|
||||
else
|
||||
{
|
||||
do {m_ds->DrawEdge(e[i].p.i16[2], e[i].p.i16[0], e[i].p.i16[1], e[i]);}
|
||||
while(++i < count);
|
||||
do
|
||||
{
|
||||
int pixels = e->p.i16[0];
|
||||
int left = e->p.i16[1];
|
||||
int top = e->p.i16[2];
|
||||
|
||||
m_ds->DrawEdge(pixels, left, top, *e++);
|
||||
}
|
||||
while(e < ee);
|
||||
}
|
||||
|
||||
m_edge.count = 0;
|
||||
|
|
|
@ -31,11 +31,14 @@ __aligned(class, 32) GSRasterizerData
|
|||
{
|
||||
public:
|
||||
GSVector4i scissor;
|
||||
bool scissor_test;
|
||||
GS_PRIM_CLASS primclass;
|
||||
const GSVertexSW* vertices;
|
||||
int count;
|
||||
uint64 frame;
|
||||
const void* param;
|
||||
|
||||
GSRasterizerData() : scissor_test(true) {}
|
||||
};
|
||||
|
||||
class IDrawScanline : public GSAlignedClass<32>
|
||||
|
@ -60,8 +63,8 @@ public:
|
|||
virtual void PrintStats() = 0;
|
||||
|
||||
__forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);}
|
||||
__forceinline void DrawScanline(int right, int left, int top, const GSVertexSW& scan) {m_ds(right, left, top, scan);}
|
||||
__forceinline void DrawEdge(int right, int left, int top, const GSVertexSW& scan) {m_de(right, left, top, scan);}
|
||||
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);}
|
||||
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);}
|
||||
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
|
||||
|
||||
__forceinline bool IsEdge() const {return m_de != NULL;}
|
||||
|
@ -90,18 +93,20 @@ protected:
|
|||
GSVector4 m_fscissor;
|
||||
struct {GSVertexSW* buff; int count;} m_edge;
|
||||
|
||||
void DrawPoint(const GSVertexSW* v);
|
||||
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
|
||||
|
||||
template<bool scissor_test>
|
||||
void DrawPoint(const GSVertexSW* v, int count);
|
||||
void DrawLine(const GSVertexSW* v);
|
||||
void DrawTriangle(const GSVertexSW* v);
|
||||
void DrawSprite(const GSVertexSW* v);
|
||||
void DrawEdge(const GSVertexSW* v);
|
||||
|
||||
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan);
|
||||
|
||||
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
|
||||
|
||||
__forceinline bool IsOneOfMyScanlines(int scanline) const;
|
||||
|
||||
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
|
||||
__forceinline void Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge = false);
|
||||
|
||||
public:
|
||||
|
|
|
@ -72,7 +72,8 @@ void GSRendererSW::VSync(int field)
|
|||
m_reset = false;
|
||||
}
|
||||
|
||||
// if((m_perfmon.GetFrame() & 255) == 0) m_rl.PrintStats();
|
||||
//
|
||||
if((m_perfmon.GetFrame() & 255) == 0) m_rl.PrintStats();
|
||||
}
|
||||
|
||||
void GSRendererSW::ResetDevice()
|
||||
|
@ -171,18 +172,21 @@ void GSRendererSW::Draw()
|
|||
s_n++;
|
||||
}
|
||||
|
||||
GSVector4i scissor(m_context->scissor.in);
|
||||
GSVector4i bbox = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p));
|
||||
GSVector4i r = bbox.rintersect(scissor);
|
||||
|
||||
GSRasterizerData data;
|
||||
|
||||
data.scissor = GSVector4i(m_context->scissor.in);
|
||||
data.scissor.z = min(data.scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
|
||||
data.scissor = scissor;
|
||||
data.scissor.z = std::min<int>(data.scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
|
||||
data.scissor_test = !bbox.eq(r);
|
||||
data.primclass = m_vt.m_primclass;
|
||||
data.vertices = m_vertices;
|
||||
data.count = m_count;
|
||||
data.frame = m_perfmon.GetFrame();
|
||||
data.param = &gd;
|
||||
|
||||
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(data.scissor);
|
||||
|
||||
m_rl.Draw(&data, r.width(), r.height());
|
||||
|
||||
if(gd.sel.fwrite)
|
||||
|
@ -364,8 +368,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
// 100 l round
|
||||
// 101 l tri
|
||||
|
||||
// TODO: (int)m_vt.m_lod.x >= mxl => LCM == 1
|
||||
|
||||
if(m_vt.m_lod.x > 0)
|
||||
{
|
||||
gd.sel.ltf = context->TEX1.MMIN >> 2;
|
||||
|
@ -383,13 +385,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
if(gd.sel.mmin == 2)
|
||||
{
|
||||
mxl--;
|
||||
mxl--; // don't sample beyond the last level (TODO: add a dummy level instead?)
|
||||
}
|
||||
|
||||
gd.mxl = GSVector4((float)mxl);
|
||||
gd.l = GSVector4((float)(-0x10000 << context->TEX1.L));
|
||||
gd.k = GSVector4((float)k);
|
||||
|
||||
if(gd.sel.fst)
|
||||
{
|
||||
ASSERT(gd.sel.lcm == 1);
|
||||
|
@ -398,6 +396,13 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
gd.sel.lcm = 1;
|
||||
}
|
||||
|
||||
if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL)
|
||||
{
|
||||
k = (int)m_vt.m_lod.x << 16;
|
||||
|
||||
gd.sel.lcm = 1;
|
||||
}
|
||||
|
||||
if(gd.sel.lcm)
|
||||
{
|
||||
int lod = std::max<int>(std::min<int>(k, mxl), 0);
|
||||
|
@ -412,6 +417,12 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
// TODO: lot to optimize when lod is constant
|
||||
}
|
||||
else
|
||||
{
|
||||
gd.mxl = GSVector4((float)mxl);
|
||||
gd.l = GSVector4((float)(-0x10000 << context->TEX1.L));
|
||||
gd.k = GSVector4((float)k);
|
||||
}
|
||||
|
||||
GIFRegTEX0 MIP_TEX0 = context->TEX0;
|
||||
GIFRegCLAMP MIP_CLAMP = context->CLAMP;
|
||||
|
@ -486,8 +497,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
}
|
||||
else
|
||||
{
|
||||
// TODO: these shortcuts are not compatible with mipmapping, yet
|
||||
|
||||
if(gd.sel.fst == 0)
|
||||
{
|
||||
// skip per pixel division if q is constant
|
||||
|
@ -507,8 +516,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
v[i].t *= w;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: q is now destoroyed, but since q is constant we should be able to pre-calc gd.lod and change LCM to 1
|
||||
}
|
||||
else if(primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
|
@ -521,8 +528,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
v[i + 0].t *= w;
|
||||
v[i + 1].t *= w;
|
||||
}
|
||||
|
||||
// TODO: preserve q, or if there only one sprite then see the comment above
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -66,6 +66,20 @@ public:
|
|||
void Set() {SetEvent(m_hEvent);}
|
||||
bool Wait() {return WaitForSingleObject(m_hEvent, INFINITE) == WAIT_OBJECT_0;}
|
||||
};
|
||||
/*
|
||||
class GSAutoResetEvent
|
||||
{
|
||||
protected:
|
||||
long m_sync;
|
||||
|
||||
public:
|
||||
GSAutoResetEvent() {m_sync = 0;}
|
||||
~GSAutoResetEvent() {}
|
||||
|
||||
void Set() {_interlockedbittestandset(&m_sync, 0);}
|
||||
bool Wait() {while(!_interlockedbittestandreset(&m_sync, 0)) _mm_pause(); return true;}
|
||||
};
|
||||
*/
|
||||
|
||||
#else
|
||||
|
||||
|
|
|
@ -26,7 +26,8 @@ const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f);
|
|||
const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f);
|
||||
const GSVector4 GSVector4::m_half(0.5f);
|
||||
const GSVector4 GSVector4::m_one(1.0f);
|
||||
const GSVector4 GSVector4::m_x3f800000(_mm_castsi128_ps(_mm_set1_epi32(0x3f800000)));
|
||||
const GSVector4 GSVector4::m_two(2.0f);
|
||||
const GSVector4 GSVector4::m_four(4.0f);
|
||||
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
|
||||
|
||||
GSVector4i GSVector4i::fit(int arx, int ary) const
|
||||
|
|
|
@ -2330,8 +2330,8 @@ public:
|
|||
static const GSVector4 m_ps4567;
|
||||
static const GSVector4 m_half;
|
||||
static const GSVector4 m_one;
|
||||
|
||||
static const GSVector4 m_x3f800000;
|
||||
static const GSVector4 m_two;
|
||||
static const GSVector4 m_four;
|
||||
static const GSVector4 m_x4b000000;
|
||||
|
||||
__forceinline GSVector4()
|
||||
|
@ -2462,12 +2462,12 @@ public:
|
|||
|
||||
if((mode & 7) == (Round_NegInf & 7))
|
||||
{
|
||||
return b - ((a < b) & m_x3f800000);
|
||||
return b - ((a < b) & m_one);
|
||||
}
|
||||
|
||||
if((mode & 7) == (Round_PosInf & 7))
|
||||
{
|
||||
return b + ((a > b) & m_x3f800000);
|
||||
return b + ((a > b) & m_one);
|
||||
}
|
||||
|
||||
ASSERT((mode & 7) == (Round_NearestInt & 7)); // other modes aren't implemented
|
||||
|
@ -2702,7 +2702,66 @@ public:
|
|||
#endif
|
||||
}
|
||||
|
||||
// TODO: insert
|
||||
template<int src, int dst> __forceinline GSVector4 insert(const GSVector4& v) const
|
||||
{
|
||||
#if 0 // _M_SSE >= 0x401
|
||||
|
||||
// NOTE: it's faster with shuffles...
|
||||
|
||||
return GSVector4(_mm_insert_ps(m, v.m, _MM_MK_INSERTPS_NDX(src, dst, 0)));
|
||||
|
||||
#else
|
||||
|
||||
switch(dst)
|
||||
{
|
||||
case 0:
|
||||
switch(src)
|
||||
{
|
||||
case 0: return v.xxyy(*this).xzzw(*this);
|
||||
case 1: return v.yyyy(*this).xzzw(*this);
|
||||
case 2: return v.zzyy(*this).xzzw(*this);
|
||||
case 3: return v.wwyy(*this).xzzw(*this);
|
||||
default: __assume(0);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
switch(src)
|
||||
{
|
||||
case 0: return v.xxxx(*this).zxzw(*this);
|
||||
case 1: return v.yyxx(*this).zxzw(*this);
|
||||
case 2: return v.zzxx(*this).zxzw(*this);
|
||||
case 3: return v.wwxx(*this).zxzw(*this);
|
||||
default: __assume(0);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch(src)
|
||||
{
|
||||
case 0: return xyxz(v.xxww(*this));
|
||||
case 1: return xyxz(v.yyww(*this));
|
||||
case 2: return xyxz(v.zzww(*this));
|
||||
case 3: return xyxz(v.wwww(*this));
|
||||
default: __assume(0);
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
switch(src)
|
||||
{
|
||||
case 0: return xyzx(v.xxzz(*this));
|
||||
case 1: return xyzx(v.yyzz(*this));
|
||||
case 2: return xyzx(v.zzzz(*this));
|
||||
case 3: return xyzx(v.wwzz(*this));
|
||||
default: __assume(0);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<int i> __forceinline int extract() const
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue