GSdx: only minor changes

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4494 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-03-27 03:12:12 +00:00
parent c2266c3edc
commit 8ca01f4b77
10 changed files with 225 additions and 151 deletions

View File

@ -100,7 +100,7 @@ protected:
r.right = r.left + 256; r.right = r.left + 256;
r.bottom = r.top + 256; r.bottom = r.top + 256;
Dump(format("da_%d_%d_%d_%d_%d", m_env.STATUS.TP, r).c_str(), m_env.STATUS.TP, r, false); Dump(format("da_%d_%d_%d_%d_%d", m_env.STATUS.TP, r.left, r.top, r.right, r.bottom).c_str(), m_env.STATUS.TP, r, false);
} }
*/ */

View File

@ -610,6 +610,8 @@ int GPUState::PH_Read(GPUReg* r, int size)
int w = r[2].XY.X; int w = r[2].XY.X;
int h = r[2].XY.Y; int h = r[2].XY.Y;
if(w > 0 && h > 0)
{
GSVector4i r2; GSVector4i r2;
r2.left = r[1].XY.X; r2.left = r[1].XY.X;
@ -624,6 +626,7 @@ int GPUState::PH_Read(GPUReg* r, int size)
m_mem.ReadRect(r2, (uint16*)m_read.buff); m_mem.ReadRect(r2, (uint16*)m_read.buff);
Dump("r"); Dump("r");
}
m_env.STATUS.IMG = 1; m_env.STATUS.IMG = 1;

View File

@ -2764,42 +2764,33 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
if(m_sel.mmin) if(m_sel.mmin)
{ {
int r[] = {5, 6, 2, 4, 0, 1, 3, 7}; const int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
if(pixels == 4) if(pixels == 4)
{ {
vmovdqa(ptr[&m_local.temp.test], xmm7); vmovdqa(ptr[&m_local.temp.test], xmm7);
}
for(int j = 0; j < 4; j++) for(int j = 0; j < 4; j++)
{ {
mov(ebx, ptr[&lod_i->u32[j]]); mov(ebx, ptr[&lod_i->u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]); mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
for(int i = 0; i < 4; i++) for(int i = 0; i < pixels; i++)
{ {
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j); ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
} }
} }
if(pixels == 4)
{
vmovdqa(xmm5, xmm7); vmovdqa(xmm5, xmm7);
vmovdqa(xmm7, ptr[&m_local.temp.test]); vmovdqa(xmm7, ptr[&m_local.temp.test]);
} }
else
{
for(int j = 0; j < 4; j++)
{
mov(ebx, ptr[&lod_i->u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm6, xmm5, j);
}
}
} }
else else
{ {
int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
// TODO: might be faster to read in columns, inserts into the same register would be further from eachother (last one overwrites xmm5, need to use xmm7)
for(int i = 0; i < pixels; i++) for(int i = 0; i < pixels; i++)
{ {

View File

@ -2928,36 +2928,29 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
{ {
#if _M_SSE >= 0x401 #if _M_SSE >= 0x401
int r[] = {5, 6, 2, 4, 0, 1, 3, 7}; const int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
if(pixels == 4) if(pixels == 4)
{ {
movdqa(ptr[&m_local.temp.test], xmm7); movdqa(ptr[&m_local.temp.test], xmm7);
}
for(int j = 0; j < 4; j++) for(int j = 0; j < 4; j++)
{ {
mov(ebx, ptr[&lod_i->u32[j]]); mov(ebx, ptr[&lod_i->u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]); mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
for(int i = 0; i < 4; i++) for(int i = 0; i < pixels; i++)
{ {
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j); ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
} }
} }
if(pixels == 4)
{
movdqa(xmm5, xmm7); movdqa(xmm5, xmm7);
movdqa(xmm7, ptr[&m_local.temp.test]); movdqa(xmm7, ptr[&m_local.temp.test]);
} }
else
{
for(int j = 0; j < 4; j++)
{
mov(ebx, ptr[&lod_i->u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm6, xmm5, j);
}
}
#else #else
@ -3082,12 +3075,10 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
} }
else else
{ {
int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
#if _M_SSE >= 0x401 #if _M_SSE >= 0x401
// TODO: might be faster to read in columns, inserts into the same register would be further from eachother (last one overwrites xmm5, need to use xmm7)
for(int i = 0; i < pixels; i++) for(int i = 0; i < pixels; i++)
{ {
for(int j = 0; j < 4; j++) for(int j = 0; j < 4; j++)
@ -3098,7 +3089,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
#else #else
int t[] = {1, 4, 1, 5, 2, 5, 2, 0}; const int t[] = {1, 4, 1, 5, 2, 5, 2, 0};
for(int i = 0; i < pixels; i++) for(int i = 0; i < pixels; i++)
{ {

View File

@ -86,7 +86,8 @@ void GSRasterizer::Draw(const GSRasterizerData* data)
{ {
case GS_POINT_CLASS: case GS_POINT_CLASS:
m_stats.prims = count; m_stats.prims = count;
for(int i = 0; i < count; i++) DrawPoint(&vertices[i]); if(data->scissor_test) DrawPoint<true>(vertices, count);
else DrawPoint<false>(vertices, count);
break; break;
case GS_LINE_CLASS: case GS_LINE_CLASS:
ASSERT(!(count & 1)); ASSERT(!(count & 1));
@ -117,11 +118,14 @@ void GSRasterizer::GetStats(GSRasterizerStats& stats)
stats = m_stats; stats = m_stats;
} }
void GSRasterizer::DrawPoint(const GSVertexSW* v) template<bool scissor_test>
void GSRasterizer::DrawPoint(const GSVertexSW* v, int count)
{ {
for(; count > 0; count--, v++)
{
GSVector4i p(v->p); GSVector4i p(v->p);
if(m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom) if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
{ {
if(IsOneOfMyScanlines(p.y)) if(IsOneOfMyScanlines(p.y))
{ {
@ -132,6 +136,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v)
m_ds->DrawScanline(1, p.x, p.y, *v); m_ds->DrawScanline(1, p.x, p.y, *v);
} }
} }
}
} }
void GSRasterizer::DrawLine(const GSVertexSW* v) void GSRasterizer::DrawLine(const GSVertexSW* v)
@ -206,11 +211,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
{ {
if(IsOneOfMyScanlines(p.y)) if(IsOneOfMyScanlines(p.y))
{ {
*e = edge; AddScanline(e, 1, p.x, p.y, edge);
e->p.i16[0] = (int16)p.x;
e->p.i16[1] = (int16)p.y;
e->p.i16[2] = 1;
e++; e++;
} }
@ -243,8 +244,6 @@ static const int s_abc[8][4] =
void GSRasterizer::DrawTriangle(const GSVertexSW* vertices) void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
{ {
// TODO: GSVertexSW::c/t could be merged into a GSVector8
GSVertexSW v[4]; GSVertexSW v[4];
GSVertexSW dv[3]; GSVertexSW dv[3];
GSVertexSW ddv[3]; GSVertexSW ddv[3];
@ -338,7 +337,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
GSVector4 dy = tbmax.xxxx() - l.p.yyyy(); GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
l.p = l.p.xxzw(); // r.x => l.y l.p = l.p.xxzw(); // r.x => l.y
dl.p = dl.p.upl(ddv[1 - j].p).xyzw(dl.p); // dr.x => dl.y dl.p = dl.p.insert<0, 1>(ddv[1 - j].p); // dr.x => dl.y
l += dl * dy; l += dl * dy;
@ -351,13 +350,13 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
// v[3] isn't accurate enough, it may leave gaps horizontally if it happens to be on the left side of the triangle // v[3] isn't accurate enough, it may leave gaps horizontally if it happens to be on the left side of the triangle
// example: previous triangle's scanline ends on 48.9999, this one's starts from 49.0001, the pixel at 49 isn't drawn // example: previous triangle's scanline ends on 48.9999, this one's starts from 49.0001, the pixel at 49 isn't drawn
GSVertexSW l = v[1 + (1 << j)]; GSVertexSW l = v[1 + (j << 1)];
GSVertexSW dl = ddv[2 - j]; GSVertexSW dl = ddv[2 - j];
GSVector4 dy = tbmax.zzzz() - l.p.yyyy(); GSVector4 dy = tbmax.zzzz() - l.p.yyyy();
l.p = l.p.upl(v[3 - (1 << j)].p).xyzw(l.p); // r.x => l.y l.p = l.p.insert<0, 1>(v[3 - (j << 1)].p); // r.x => l.y
dl.p = dl.p.upl(ddv[1 + j].p).xyzw(dl.p); // dr.x => dl.y dl.p = dl.p.insert<0, 1>(ddv[1 + j].p); // dr.x => dl.y
l += dl * dy; l += dl * dy;
@ -375,8 +374,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
GSVector4 dy = tbmax.xxxx() - l.p.yyyy(); GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
l.p = l.p.upl(v[1 - j].p).xyzw(l.p); // r.x => l.y l.p = l.p.insert<0, 1>(v[1 - j].p); // r.x => l.y
dl.p = dl.p.upl(ddv[2 - j].p).xyzw(dl.p); // dr.x => dl.y dl.p = dl.p.insert<0, 1>(ddv[2 - j].p); // dr.x => dl.y
l += dl * dy; l += dl * dy;
@ -395,7 +394,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
GSVector4 dy = tbmax.xxxx() - l.p.yyyy(); GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
l.p = l.p.xxzw(); // r.x => l.y l.p = l.p.xxzw(); // r.x => l.y
dl.p = dl.p.upl(ddv[1 - j].p).xyzw(dl.p); // dr.x => dl.y dl.p = dl.p.insert<0, 1>(ddv[1 - j].p); // dr.x => dl.y
l += dl * dy; l += dl * dy;
@ -417,13 +416,15 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count]; GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count];
GSVector4 scissor = m_fscissor.xzxz();
while(1) while(1)
{ {
if(IsOneOfMyScanlines(top)) if(IsOneOfMyScanlines(top))
{ {
GSVector4 lrf = l.p.ceil(); GSVector4 lrf = l.p.ceil();
GSVector4 lrmax = lrf.max(m_fscissor.xzxz()); GSVector4 lrmax = lrf.max(scissor);
GSVector4 lrmin = lrf.min(m_fscissor.xzxz()); GSVector4 lrmin = lrf.min(scissor);
GSVector4i lr = GSVector4i(lrmax.xxyy(lrmin)); GSVector4i lr = GSVector4i(lrmax.xxyy(lrmin));
int left = lr.extract32<0>(); int left = lr.extract32<0>();
@ -435,11 +436,9 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
{ {
m_stats.pixels += pixels; m_stats.pixels += pixels;
*e = l + dscan * (lrmax - l.p).xxxx(); GSVector4 prestep = lrmax - l.p;
e->p.i16[0] = (int16)left; AddScanline(e, pixels, left, top, l + dscan * prestep.xxxx());
e->p.i16[1] = (int16)top;
e->p.i16[2] = (int16)pixels;
e++; e++;
} }
@ -496,8 +495,12 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices)
dedge.t = (dv.t / dv.p.yyyy()).xyxy(zero).wyww(); dedge.t = (dv.t / dv.p.yyyy()).xyxy(zero).wyww();
dscan.t = (dv.t / dv.p.xxxx()).xyxy(zero).xwww(); dscan.t = (dv.t / dv.p.xxxx()).xyxy(zero).xwww();
if(scan.p.y < (float)r.top) scan.t += dedge.t * ((float)r.top - scan.p.y); GSVector4 prestep = GSVector4(r.left, r.top) - scan.p;
if(scan.p.x < (float)r.left) scan.t += dscan.t * ((float)r.left - scan.p.x);
int m = (prestep == GSVector4::zero()).mask();
if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy();
if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx();
m_ds->SetupPrim(v, dscan); m_ds->SetupPrim(v, dscan);
@ -581,16 +584,10 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(xi)) if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(xi))
{ {
m_stats.pixels++; AddScanline(e, 1, xi, top, edge);
*e = edge;
e->t.u32[3] = (0x10000 - xf) & 0xffff; e->t.u32[3] = (0x10000 - xf) & 0xffff;
e->p.i16[0] = (int16)xi;
e->p.i16[1] = (int16)top;
e->p.i16[2] = 1;
e++; e++;
} }
@ -609,16 +606,10 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(xi)) if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(xi))
{ {
m_stats.pixels++; AddScanline(e, 1, xi, top, edge);
*e = edge;
e->t.u32[3] = xf; e->t.u32[3] = xf;
e->p.i16[0] = (int16)xi;
e->p.i16[1] = (int16)top;
e->p.i16[2] = 1;
e++; e++;
} }
@ -678,16 +669,10 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi)) if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
{ {
m_stats.pixels++; AddScanline(e, 1, left, yi, edge);
*e = edge;
e->t.u32[3] = (0x10000 - yf) & 0xffff; e->t.u32[3] = (0x10000 - yf) & 0xffff;
e->p.i16[0] = (int16)left;
e->p.i16[1] = (int16)yi;
e->p.i16[2] = 1;
e++; e++;
} }
@ -706,16 +691,10 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi)) if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
{ {
m_stats.pixels++; AddScanline(e, 1, left, yi, edge);
*e = edge;
e->t.u32[3] = yf; e->t.u32[3] = yf;
e->p.i16[0] = (int16)left;
e->p.i16[1] = (int16)yi;
e->p.i16[2] = 1;
e++; e++;
} }
@ -727,7 +706,20 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
} }
} }
m_edge.count += e - &m_edge.buff[m_edge.count]; int count = e - &m_edge.buff[m_edge.count];
m_stats.pixels += count;
m_edge.count += count;
}
void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan)
{
*e = scan;
e->p.i16[0] = (int16)pixels;
e->p.i16[1] = (int16)left;
e->p.i16[2] = (int16)top;
} }
void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge) void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge)
@ -741,18 +733,31 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo
m_ds->SetupPrim(vertices, dscan); m_ds->SetupPrim(vertices, dscan);
const GSVertexSW* RESTRICT e = m_edge.buff; const GSVertexSW* RESTRICT e = m_edge.buff;
const GSVertexSW* RESTRICT ee = e + count;
int i = 0;
if(!edge) if(!edge)
{ {
do {m_ds->DrawScanline(e[i].p.i16[2], e[i].p.i16[0], e[i].p.i16[1], e[i]);} do
while(++i < count); {
int pixels = e->p.i16[0];
int left = e->p.i16[1];
int top = e->p.i16[2];
m_ds->DrawScanline(pixels, left, top, *e++);
}
while(e < ee);
} }
else else
{ {
do {m_ds->DrawEdge(e[i].p.i16[2], e[i].p.i16[0], e[i].p.i16[1], e[i]);} do
while(++i < count); {
int pixels = e->p.i16[0];
int left = e->p.i16[1];
int top = e->p.i16[2];
m_ds->DrawEdge(pixels, left, top, *e++);
}
while(e < ee);
} }
m_edge.count = 0; m_edge.count = 0;

View File

@ -31,11 +31,14 @@ __aligned(class, 32) GSRasterizerData
{ {
public: public:
GSVector4i scissor; GSVector4i scissor;
bool scissor_test;
GS_PRIM_CLASS primclass; GS_PRIM_CLASS primclass;
const GSVertexSW* vertices; const GSVertexSW* vertices;
int count; int count;
uint64 frame; uint64 frame;
const void* param; const void* param;
GSRasterizerData() : scissor_test(true) {}
}; };
class IDrawScanline : public GSAlignedClass<32> class IDrawScanline : public GSAlignedClass<32>
@ -60,8 +63,8 @@ public:
virtual void PrintStats() = 0; virtual void PrintStats() = 0;
__forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);} __forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);}
__forceinline void DrawScanline(int right, int left, int top, const GSVertexSW& scan) {m_ds(right, left, top, scan);} __forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);}
__forceinline void DrawEdge(int right, int left, int top, const GSVertexSW& scan) {m_de(right, left, top, scan);} __forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);}
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);} __forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
__forceinline bool IsEdge() const {return m_de != NULL;} __forceinline bool IsEdge() const {return m_de != NULL;}
@ -90,18 +93,20 @@ protected:
GSVector4 m_fscissor; GSVector4 m_fscissor;
struct {GSVertexSW* buff; int count;} m_edge; struct {GSVertexSW* buff; int count;} m_edge;
void DrawPoint(const GSVertexSW* v); typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
template<bool scissor_test>
void DrawPoint(const GSVertexSW* v, int count);
void DrawLine(const GSVertexSW* v); void DrawLine(const GSVertexSW* v);
void DrawTriangle(const GSVertexSW* v); void DrawTriangle(const GSVertexSW* v);
void DrawSprite(const GSVertexSW* v); void DrawSprite(const GSVertexSW* v);
void DrawEdge(const GSVertexSW* v);
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan); __forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan);
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side); void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
__forceinline bool IsOneOfMyScanlines(int scanline) const; __forceinline bool IsOneOfMyScanlines(int scanline) const;
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge = false); __forceinline void Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge = false);
public: public:

View File

@ -72,7 +72,8 @@ void GSRendererSW::VSync(int field)
m_reset = false; m_reset = false;
} }
// if((m_perfmon.GetFrame() & 255) == 0) m_rl.PrintStats(); //
if((m_perfmon.GetFrame() & 255) == 0) m_rl.PrintStats();
} }
void GSRendererSW::ResetDevice() void GSRendererSW::ResetDevice()
@ -171,18 +172,21 @@ void GSRendererSW::Draw()
s_n++; s_n++;
} }
GSVector4i scissor(m_context->scissor.in);
GSVector4i bbox = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p));
GSVector4i r = bbox.rintersect(scissor);
GSRasterizerData data; GSRasterizerData data;
data.scissor = GSVector4i(m_context->scissor.in); data.scissor = scissor;
data.scissor.z = min(data.scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour data.scissor.z = std::min<int>(data.scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
data.scissor_test = !bbox.eq(r);
data.primclass = m_vt.m_primclass; data.primclass = m_vt.m_primclass;
data.vertices = m_vertices; data.vertices = m_vertices;
data.count = m_count; data.count = m_count;
data.frame = m_perfmon.GetFrame(); data.frame = m_perfmon.GetFrame();
data.param = &gd; data.param = &gd;
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(data.scissor);
m_rl.Draw(&data, r.width(), r.height()); m_rl.Draw(&data, r.width(), r.height());
if(gd.sel.fwrite) if(gd.sel.fwrite)
@ -364,8 +368,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
// 100 l round // 100 l round
// 101 l tri // 101 l tri
// TODO: (int)m_vt.m_lod.x >= mxl => LCM == 1
if(m_vt.m_lod.x > 0) if(m_vt.m_lod.x > 0)
{ {
gd.sel.ltf = context->TEX1.MMIN >> 2; gd.sel.ltf = context->TEX1.MMIN >> 2;
@ -383,13 +385,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
if(gd.sel.mmin == 2) if(gd.sel.mmin == 2)
{ {
mxl--; mxl--; // don't sample beyond the last level (TODO: add a dummy level instead?)
} }
gd.mxl = GSVector4((float)mxl);
gd.l = GSVector4((float)(-0x10000 << context->TEX1.L));
gd.k = GSVector4((float)k);
if(gd.sel.fst) if(gd.sel.fst)
{ {
ASSERT(gd.sel.lcm == 1); ASSERT(gd.sel.lcm == 1);
@ -398,6 +396,13 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
gd.sel.lcm = 1; gd.sel.lcm = 1;
} }
if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL)
{
k = (int)m_vt.m_lod.x << 16;
gd.sel.lcm = 1;
}
if(gd.sel.lcm) if(gd.sel.lcm)
{ {
int lod = std::max<int>(std::min<int>(k, mxl), 0); int lod = std::max<int>(std::min<int>(k, mxl), 0);
@ -412,6 +417,12 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
// TODO: lot to optimize when lod is constant // TODO: lot to optimize when lod is constant
} }
else
{
gd.mxl = GSVector4((float)mxl);
gd.l = GSVector4((float)(-0x10000 << context->TEX1.L));
gd.k = GSVector4((float)k);
}
GIFRegTEX0 MIP_TEX0 = context->TEX0; GIFRegTEX0 MIP_TEX0 = context->TEX0;
GIFRegCLAMP MIP_CLAMP = context->CLAMP; GIFRegCLAMP MIP_CLAMP = context->CLAMP;
@ -486,8 +497,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
} }
else else
{ {
// TODO: these shortcuts are not compatible with mipmapping, yet
if(gd.sel.fst == 0) if(gd.sel.fst == 0)
{ {
// skip per pixel division if q is constant // skip per pixel division if q is constant
@ -507,8 +516,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
v[i].t *= w; v[i].t *= w;
} }
} }
// TODO: q is now destoroyed, but since q is constant we should be able to pre-calc gd.lod and change LCM to 1
} }
else if(primclass == GS_SPRITE_CLASS) else if(primclass == GS_SPRITE_CLASS)
{ {
@ -521,8 +528,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
v[i + 0].t *= w; v[i + 0].t *= w;
v[i + 1].t *= w; v[i + 1].t *= w;
} }
// TODO: preserve q, or if there only one sprite then see the comment above
} }
} }

View File

@ -66,6 +66,20 @@ public:
void Set() {SetEvent(m_hEvent);} void Set() {SetEvent(m_hEvent);}
bool Wait() {return WaitForSingleObject(m_hEvent, INFINITE) == WAIT_OBJECT_0;} bool Wait() {return WaitForSingleObject(m_hEvent, INFINITE) == WAIT_OBJECT_0;}
}; };
/*
class GSAutoResetEvent
{
protected:
long m_sync;
public:
GSAutoResetEvent() {m_sync = 0;}
~GSAutoResetEvent() {}
void Set() {_interlockedbittestandset(&m_sync, 0);}
bool Wait() {while(!_interlockedbittestandreset(&m_sync, 0)) _mm_pause(); return true;}
};
*/
#else #else

View File

@ -26,7 +26,8 @@ const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f);
const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f); const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f);
const GSVector4 GSVector4::m_half(0.5f); const GSVector4 GSVector4::m_half(0.5f);
const GSVector4 GSVector4::m_one(1.0f); const GSVector4 GSVector4::m_one(1.0f);
const GSVector4 GSVector4::m_x3f800000(_mm_castsi128_ps(_mm_set1_epi32(0x3f800000))); const GSVector4 GSVector4::m_two(2.0f);
const GSVector4 GSVector4::m_four(4.0f);
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000))); const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
GSVector4i GSVector4i::fit(int arx, int ary) const GSVector4i GSVector4i::fit(int arx, int ary) const

View File

@ -2330,8 +2330,8 @@ public:
static const GSVector4 m_ps4567; static const GSVector4 m_ps4567;
static const GSVector4 m_half; static const GSVector4 m_half;
static const GSVector4 m_one; static const GSVector4 m_one;
static const GSVector4 m_two;
static const GSVector4 m_x3f800000; static const GSVector4 m_four;
static const GSVector4 m_x4b000000; static const GSVector4 m_x4b000000;
__forceinline GSVector4() __forceinline GSVector4()
@ -2462,12 +2462,12 @@ public:
if((mode & 7) == (Round_NegInf & 7)) if((mode & 7) == (Round_NegInf & 7))
{ {
return b - ((a < b) & m_x3f800000); return b - ((a < b) & m_one);
} }
if((mode & 7) == (Round_PosInf & 7)) if((mode & 7) == (Round_PosInf & 7))
{ {
return b + ((a > b) & m_x3f800000); return b + ((a > b) & m_one);
} }
ASSERT((mode & 7) == (Round_NearestInt & 7)); // other modes aren't implemented ASSERT((mode & 7) == (Round_NearestInt & 7)); // other modes aren't implemented
@ -2702,7 +2702,66 @@ public:
#endif #endif
} }
// TODO: insert template<int src, int dst> __forceinline GSVector4 insert(const GSVector4& v) const
{
#if 0 // _M_SSE >= 0x401
// NOTE: it's faster with shuffles...
return GSVector4(_mm_insert_ps(m, v.m, _MM_MK_INSERTPS_NDX(src, dst, 0)));
#else
switch(dst)
{
case 0:
switch(src)
{
case 0: return v.xxyy(*this).xzzw(*this);
case 1: return v.yyyy(*this).xzzw(*this);
case 2: return v.zzyy(*this).xzzw(*this);
case 3: return v.wwyy(*this).xzzw(*this);
default: __assume(0);
}
break;
case 1:
switch(src)
{
case 0: return v.xxxx(*this).zxzw(*this);
case 1: return v.yyxx(*this).zxzw(*this);
case 2: return v.zzxx(*this).zxzw(*this);
case 3: return v.wwxx(*this).zxzw(*this);
default: __assume(0);
}
break;
case 2:
switch(src)
{
case 0: return xyxz(v.xxww(*this));
case 1: return xyxz(v.yyww(*this));
case 2: return xyxz(v.zzww(*this));
case 3: return xyxz(v.wwww(*this));
default: __assume(0);
}
break;
case 3:
switch(src)
{
case 0: return xyzx(v.xxzz(*this));
case 1: return xyzx(v.yyzz(*this));
case 2: return xyzx(v.zzzz(*this));
case 3: return xyzx(v.wwzz(*this));
default: __assume(0);
}
break;
default:
__assume(0);
}
#endif
return *this;
}
template<int i> __forceinline int extract() const template<int i> __forceinline int extract() const
{ {