mirror of https://github.com/PCSX2/pcsx2.git
GSdx: using mipmap levels (only per batch, no tri-linear) and a couple of small changes, including the stdcall fix for linux.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4419 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
8f6151945d
commit
b31634df8f
|
@ -128,9 +128,8 @@ void GPUDrawScanlineCodeGenerator::Init()
|
|||
add(edi, edx);
|
||||
lea(edi, ptr[edi * 2 + (size_t)m_local.gd->vm]);
|
||||
|
||||
// int steps = right - left - 8;
|
||||
// int steps = pixels - 8;
|
||||
|
||||
sub(ecx, edx);
|
||||
sub(ecx, 8);
|
||||
|
||||
if(m_sel.dtd)
|
||||
|
|
|
@ -174,10 +174,10 @@ void GPURendererSW::VertexKick()
|
|||
int x = (int)(m_v.XY.X + m_env.DROFF.X) << m_scale.x;
|
||||
int y = (int)(m_v.XY.Y + m_env.DROFF.Y) << m_scale.y;
|
||||
|
||||
int s = m_v.UV.X;
|
||||
int t = m_v.UV.Y;
|
||||
int u = m_v.UV.X;
|
||||
int v = m_v.UV.Y;
|
||||
|
||||
GSVector4 pt(x, y, s, t);
|
||||
GSVector4 pt(x, y, u, v);
|
||||
|
||||
dst.p = pt.xyxy(GSVector4::zero());
|
||||
dst.t = (pt.zwzw(GSVector4::zero()) + GSVector4(0.125f)) * 256.0f;
|
||||
|
|
|
@ -815,7 +815,7 @@ union
|
|||
};
|
||||
};
|
||||
REG_END2
|
||||
__forceinline bool IsRepeating() {return (1 << TW) > (int)(TBW << 6);}
|
||||
__forceinline bool IsRepeating() {return (1 << TW) > (int)(TBW << 6) || (PSM == PSM_PSMT8 || PSM == PSM_PSMT4) && TBW == 1;}
|
||||
REG_END2
|
||||
|
||||
REG64_(GIFReg, TEX1)
|
||||
|
|
|
@ -54,7 +54,7 @@ GSVector4i GSDirtyRect::GetDirtyRect(const GIFRegTEX0& TEX0)
|
|||
}
|
||||
else
|
||||
{
|
||||
r = GSVector4i(left, top, right, bottom).ralign<GSVector4i::Outside>(src);
|
||||
r = GSVector4i(left, top, right, bottom).ralign<Align_Outside>(src);
|
||||
}
|
||||
|
||||
return r;
|
||||
|
@ -77,7 +77,7 @@ GSVector4i GSDirtyRectList::GetDirtyRectAndClear(const GIFRegTEX0& TEX0, const G
|
|||
|
||||
GSVector2i bs = GSLocalMemory::m_psm[TEX0.PSM].bs;
|
||||
|
||||
return r.ralign<GSVector4i::Outside>(bs).rintersect(GSVector4i(0, 0, size.x, size.y));
|
||||
return r.ralign<Align_Outside>(bs).rintersect(GSVector4i(0, 0, size.x, size.y));
|
||||
}
|
||||
|
||||
return GSVector4i::zero();
|
||||
|
|
|
@ -189,7 +189,7 @@ void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col,
|
|||
|
||||
color = color.andnot(mask);
|
||||
|
||||
GSVector4i br = r.ralign<GSVector4i::Inside>(GSVector2i(8 * 4 / sizeof(T), 8));
|
||||
GSVector4i br = r.ralign<Align_Inside>(GSVector2i(8 * 4 / sizeof(T), 8));
|
||||
|
||||
if(!br.rempty())
|
||||
{
|
||||
|
|
|
@ -19,9 +19,6 @@
|
|||
*
|
||||
*/
|
||||
|
||||
// TODO: x64 (use the extra regs to avoid spills of zs, zd, uf, vf, rb, ga and keep a few constants in the last two like aref or afix)
|
||||
// TODO: for edges doing 4 pixels is wasteful (needed memory access * 4)
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "GSDrawScanlineCodeGenerator.h"
|
||||
|
||||
|
|
|
@ -224,10 +224,9 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
|
||||
sub(rbx, rdx);
|
||||
|
||||
// int steps = right - left - 4;
|
||||
// int steps = pixels + skip - 4;
|
||||
|
||||
sub(rcx, rbx);
|
||||
sub(rcx, 4);
|
||||
lea(rcx, ptr[rcx + rdx - 4]);
|
||||
|
||||
// GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
|
@ -478,6 +477,12 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
|
||||
vpaddw(xmm13, xmm1);
|
||||
vpaddw(xmm14, xmm2);
|
||||
|
||||
// FIXME: color may underflow and roll over at the end of the line, if decreasing
|
||||
|
||||
vpxor(xmm0, xmm0);
|
||||
vpmaxsw(xmm13, xmm0);
|
||||
vpmaxsw(xmm14, xmm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -252,10 +252,9 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
|
||||
sub(ebx, edx);
|
||||
|
||||
// int steps = right - left - 4;
|
||||
// int steps = pixels + skip - 4;
|
||||
|
||||
sub(ecx, ebx);
|
||||
sub(ecx, 4);
|
||||
lea(ecx, ptr[ecx + edx - 4]);
|
||||
|
||||
// GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
|
@ -553,6 +552,12 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
vpaddw(xmm5, ptr[&m_local.temp.rb]);
|
||||
vpaddw(xmm6, ptr[&m_local.temp.ga]);
|
||||
|
||||
// FIXME: color may underflow and roll over at the end of the line, if decreasing
|
||||
|
||||
vpxor(xmm7, xmm7);
|
||||
vpmaxsw(xmm5, xmm7);
|
||||
vpmaxsw(xmm6, xmm7);
|
||||
|
||||
vmovdqa(ptr[&m_local.temp.rb], xmm5);
|
||||
vmovdqa(ptr[&m_local.temp.ga], xmm6);
|
||||
}
|
||||
|
|
|
@ -251,8 +251,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
|
||||
// int steps = right - left - 4;
|
||||
|
||||
sub(ecx, ebx);
|
||||
sub(ecx, 4);
|
||||
lea(ecx, ptr[ecx + edx - 4]);
|
||||
|
||||
// GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
|
@ -555,6 +554,12 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
paddw(xmm5, ptr[&m_local.temp.rb]);
|
||||
paddw(xmm6, ptr[&m_local.temp.ga]);
|
||||
|
||||
// FIXME: color may underflow and roll over at the end of the line, if decreasing
|
||||
|
||||
pxor(xmm7, xmm7);
|
||||
pmaxsw(xmm5, xmm7);
|
||||
pmaxsw(xmm6, xmm7);
|
||||
|
||||
movdqa(ptr[&m_local.temp.rb], xmm5);
|
||||
movdqa(ptr[&m_local.temp.ga], xmm6);
|
||||
}
|
||||
|
|
|
@ -1708,7 +1708,7 @@ void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT o, const GSVector4i& r,
|
|||
TEX0.TBW = o->bw;
|
||||
TEX0.PSM = o->psm;
|
||||
|
||||
GSVector4i cr = r.ralign<GSVector4i::Inside>(psm.bs);
|
||||
GSVector4i cr = r.ralign<Align_Inside>(psm.bs);
|
||||
|
||||
bool aligned = ((size_t)(dst + (cr.left - r.left) * sizeof(uint32)) & 0xf) == 0;
|
||||
|
||||
|
|
|
@ -173,14 +173,14 @@ public:
|
|||
|
||||
static uint32 BlockNumber8(int x, int y, uint32 bp, uint32 bw)
|
||||
{
|
||||
ASSERT((bw & 1) == 0);
|
||||
// ASSERT((bw & 1) == 0); // allowed for mipmap levels
|
||||
|
||||
return bp + ((y >> 1) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f) + blockTable8[(y >> 4) & 3][(x >> 4) & 7];
|
||||
}
|
||||
|
||||
static uint32 BlockNumber4(int x, int y, uint32 bp, uint32 bw)
|
||||
{
|
||||
ASSERT((bw & 1) == 0);
|
||||
// ASSERT((bw & 1) == 0); // allowed for mipmap levels
|
||||
|
||||
return bp + ((y >> 2) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f) + blockTable4[(y >> 4) & 7][(x >> 5) & 3];
|
||||
}
|
||||
|
@ -291,6 +291,7 @@ public:
|
|||
{
|
||||
uint32 page = (bp >> 5) + (y >> 5) * bw + (x >> 6);
|
||||
uint32 word = (page << 11) + pageOffset32[bp & 0x1f][y & 0x1f][x & 0x3f];
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
|
@ -298,6 +299,7 @@ public:
|
|||
{
|
||||
uint32 page = (bp >> 5) + (y >> 6) * bw + (x >> 6);
|
||||
uint32 word = (page << 12) + pageOffset16[bp & 0x1f][y & 0x3f][x & 0x3f];
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
|
@ -305,22 +307,27 @@ public:
|
|||
{
|
||||
uint32 page = (bp >> 5) + (y >> 6) * bw + (x >> 6);
|
||||
uint32 word = (page << 12) + pageOffset16S[bp & 0x1f][y & 0x3f][x & 0x3f];
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline uint32 PixelAddress8(int x, int y, uint32 bp, uint32 bw)
|
||||
{
|
||||
ASSERT((bw & 1) == 0);
|
||||
// ASSERT((bw & 1) == 0); // allowed for mipmap levels
|
||||
|
||||
uint32 page = (bp >> 5) + (y >> 6) * (bw >> 1) + (x >> 7);
|
||||
uint32 word = (page << 13) + pageOffset8[bp & 0x1f][y & 0x3f][x & 0x7f];
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline uint32 PixelAddress4(int x, int y, uint32 bp, uint32 bw)
|
||||
{
|
||||
ASSERT((bw & 1) == 0);
|
||||
// ASSERT((bw & 1) == 0); // allowed for mipmap levels
|
||||
|
||||
uint32 page = (bp >> 5) + (y >> 7) * (bw >> 1) + (x >> 7);
|
||||
uint32 word = (page << 14) + pageOffset4[bp & 0x1f][y & 0x7f][x & 0x7f];
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
|
@ -328,6 +335,7 @@ public:
|
|||
{
|
||||
uint32 page = (bp >> 5) + (y >> 5) * bw + (x >> 6);
|
||||
uint32 word = (page << 11) + pageOffset32Z[bp & 0x1f][y & 0x1f][x & 0x3f];
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
|
@ -335,6 +343,7 @@ public:
|
|||
{
|
||||
uint32 page = (bp >> 5) + (y >> 6) * bw + (x >> 6);
|
||||
uint32 word = (page << 12) + pageOffset16Z[bp & 0x1f][y & 0x3f][x & 0x3f];
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
|
@ -342,6 +351,7 @@ public:
|
|||
{
|
||||
uint32 page = (bp >> 5) + (y >> 6) * bw + (x >> 6);
|
||||
uint32 word = (page << 12) + pageOffset16SZ[bp & 0x1f][y & 0x3f][x & 0x3f];
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
|
|
|
@ -129,7 +129,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v)
|
|||
|
||||
m_ds->SetupPrim(v, *v);
|
||||
|
||||
m_ds->DrawScanline(p.x + 1, p.x, p.y, *v);
|
||||
m_ds->DrawScanline(1, p.x, p.y, *v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -144,16 +144,10 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
|
|||
|
||||
if(m_ds->IsEdge())
|
||||
{
|
||||
GSVertexSW dscan;
|
||||
|
||||
dscan.p = GSVector4::zero();
|
||||
dscan.t = GSVector4::zero();
|
||||
dscan.c = GSVector4::zero();
|
||||
|
||||
DrawEdge(v[0], v[1], dv, i, 0);
|
||||
DrawEdge(v[0], v[1], dv, i, 1);
|
||||
|
||||
Flush(v, dscan, true);
|
||||
Flush(v, GSVertexSW::zero(), true);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -170,13 +164,13 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
|
|||
|
||||
GSVertexSW l, dl;
|
||||
|
||||
l.p = v[0].p.blend8(v[1].p, mask);
|
||||
l.t = v[0].t.blend8(v[1].t, mask);
|
||||
l.c = v[0].c.blend8(v[1].c, mask);
|
||||
l.p = v[0].p.blend32(v[1].p, mask);
|
||||
l.t = v[0].t.blend32(v[1].t, mask);
|
||||
l.c = v[0].c.blend32(v[1].c, mask);
|
||||
|
||||
GSVector4 r;
|
||||
|
||||
r = v[1].p.blend8(v[0].p, mask);
|
||||
r = v[1].p.blend32(v[0].p, mask);
|
||||
|
||||
GSVector4i p(l.p);
|
||||
|
||||
|
@ -216,7 +210,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
|
|||
|
||||
e->p.i16[0] = (int16)p.x;
|
||||
e->p.i16[1] = (int16)p.y;
|
||||
e->p.i16[2] = (int16)(p.x + 1);
|
||||
e->p.i16[2] = 1;
|
||||
|
||||
e++;
|
||||
}
|
||||
|
@ -231,13 +225,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
|
|||
|
||||
m_stats.pixels += m_edge.count;
|
||||
|
||||
GSVertexSW dscan;
|
||||
|
||||
dscan.p = GSVector4::zero();
|
||||
dscan.t = GSVector4::zero();
|
||||
dscan.c = GSVector4::zero();
|
||||
|
||||
Flush(v, dscan);
|
||||
Flush(v, GSVertexSW::zero());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -278,8 +266,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
|||
int i = (aabb == bccb).mask() & 7;
|
||||
|
||||
GSVector4 tbf = aabb.xzxz(bccb).ceil();
|
||||
GSVector4 tbmax = tbf.max(m_fscissor.yyyy());
|
||||
GSVector4 tbmin = tbf.min(m_fscissor.wwww());
|
||||
GSVector4 tbmax = tbf.max(m_fscissor.ywyw());
|
||||
GSVector4 tbmin = tbf.min(m_fscissor.ywyw());
|
||||
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin));
|
||||
|
||||
dv[0] = v[1] - v[0];
|
||||
|
@ -335,13 +323,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
|||
DrawEdge(v[0], v[2], dv[1], i & 2, j & 2);
|
||||
DrawEdge(v[1], v[2], dv[2], i & 4, j & 4);
|
||||
|
||||
GSVertexSW dscan;
|
||||
|
||||
dscan.p = GSVector4::zero();
|
||||
dscan.t = GSVector4::zero();
|
||||
dscan.c = GSVector4::zero();
|
||||
|
||||
Flush(v, dscan, true);
|
||||
Flush(v, GSVertexSW::zero(), true);
|
||||
}
|
||||
|
||||
switch(i)
|
||||
|
@ -365,6 +347,10 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
|||
|
||||
if(tb.y < tb.w)
|
||||
{
|
||||
// TODO: j == 1 (x2 < x3 < x0 < x1)
|
||||
// v[3] isn't accurate enough, it may leave gaps horizontally if it happens to be on the left side of the triangle
|
||||
// example: previous triangle's scanline ends on 48.9999, this one's starts from 49.0001, the pixel at 49 isn't drawn
|
||||
|
||||
GSVertexSW l = v[1 + (1 << j)];
|
||||
GSVertexSW dl = ddv[2 - j];
|
||||
|
||||
|
@ -436,8 +422,8 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
|||
if(IsOneOfMyScanlines(top))
|
||||
{
|
||||
GSVector4 lrf = l.p.ceil();
|
||||
GSVector4 lrmax = lrf.max(m_fscissor.xxxx());
|
||||
GSVector4 lrmin = lrf.min(m_fscissor.zzzz());
|
||||
GSVector4 lrmax = lrf.max(m_fscissor.xzxz());
|
||||
GSVector4 lrmin = lrf.min(m_fscissor.xzxz());
|
||||
GSVector4i lr = GSVector4i(lrmax.xxyy(lrmin));
|
||||
|
||||
int left = lr.extract32<0>();
|
||||
|
@ -453,7 +439,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
|||
|
||||
e->p.i16[0] = (int16)left;
|
||||
e->p.i16[1] = (int16)top;
|
||||
e->p.i16[2] = (int16)right;
|
||||
e->p.i16[2] = (int16)pixels;
|
||||
|
||||
e++;
|
||||
}
|
||||
|
@ -473,12 +459,12 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices)
|
|||
|
||||
GSVector4 mask = (vertices[0].p < vertices[1].p).xyzw(GSVector4::zero());
|
||||
|
||||
v[0].p = vertices[1].p.blend8(vertices[0].p, mask);
|
||||
v[0].t = vertices[1].t.blend8(vertices[0].t, mask);
|
||||
v[0].p = vertices[1].p.blend32(vertices[0].p, mask);
|
||||
v[0].t = vertices[1].t.blend32(vertices[0].t, mask);
|
||||
v[0].c = vertices[1].c;
|
||||
|
||||
v[1].p = vertices[0].p.blend8(vertices[1].p, mask);
|
||||
v[1].t = vertices[0].t.blend8(vertices[1].t, mask);
|
||||
v[1].p = vertices[0].p.blend32(vertices[1].p, mask);
|
||||
v[1].t = vertices[0].t.blend32(vertices[1].t, mask);
|
||||
|
||||
GSVector4i r(v[0].p.xyxy(v[1].p).ceil());
|
||||
|
||||
|
@ -500,18 +486,13 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices)
|
|||
return;
|
||||
}
|
||||
|
||||
GSVector4 zero = GSVector4::zero();
|
||||
|
||||
GSVertexSW dedge, dscan;
|
||||
|
||||
dedge.p = zero;
|
||||
dscan.p = zero;
|
||||
|
||||
dedge.c = zero;
|
||||
dscan.c = zero;
|
||||
GSVertexSW dedge = GSVertexSW::zero();
|
||||
GSVertexSW dscan = GSVertexSW::zero();
|
||||
|
||||
GSVertexSW dv = v[1] - v[0];
|
||||
|
||||
GSVector4 zero = GSVector4::zero();
|
||||
|
||||
dedge.t = (dv.t / dv.p.yyyy()).xyxy(zero).wyww();
|
||||
dscan.t = (dv.t / dv.p.xxxx()).xyxy(zero).xwww();
|
||||
|
||||
|
@ -526,7 +507,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices)
|
|||
{
|
||||
m_stats.pixels += r.width();
|
||||
|
||||
m_ds->DrawScanline(r.right, r.left, r.top, scan);
|
||||
m_ds->DrawScanline(r.width(), r.left, r.top, scan);
|
||||
}
|
||||
|
||||
if(++r.top >= r.bottom) break;
|
||||
|
@ -555,7 +536,6 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
{
|
||||
GSVector4 tbmax = lrtb.max(m_fscissor.yyyy());
|
||||
GSVector4 tbmin = lrtb.min(m_fscissor.wwww());
|
||||
|
||||
GSVector4i tb = GSVector4i(tbmax.zwzw(tbmin));
|
||||
|
||||
int top, bottom;
|
||||
|
@ -609,7 +589,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
|
||||
e->p.i16[0] = (int16)xi;
|
||||
e->p.i16[1] = (int16)top;
|
||||
e->p.i16[2] = (int16)(xi + 1);
|
||||
e->p.i16[2] = 1;
|
||||
|
||||
e++;
|
||||
}
|
||||
|
@ -637,7 +617,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
|
||||
e->p.i16[0] = (int16)xi;
|
||||
e->p.i16[1] = (int16)top;
|
||||
e->p.i16[2] = (int16)(xi + 1);
|
||||
e->p.i16[2] = 1;
|
||||
|
||||
e++;
|
||||
}
|
||||
|
@ -653,7 +633,6 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
{
|
||||
GSVector4 lrmax = lrtb.max(m_fscissor.xxxx());
|
||||
GSVector4 lrmin = lrtb.min(m_fscissor.zzzz());
|
||||
|
||||
GSVector4i lr = GSVector4i(lrmax.xyxy(lrmin));
|
||||
|
||||
int left, right;
|
||||
|
@ -707,7 +686,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
|
||||
e->p.i16[0] = (int16)left;
|
||||
e->p.i16[1] = (int16)yi;
|
||||
e->p.i16[2] = (int16)(left + 1);
|
||||
e->p.i16[2] = 1;
|
||||
|
||||
e++;
|
||||
}
|
||||
|
@ -735,7 +714,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
|
||||
e->p.i16[0] = (int16)left;
|
||||
e->p.i16[1] = (int16)yi;
|
||||
e->p.i16[2] = (int16)(left + 1);
|
||||
e->p.i16[2] = 1;
|
||||
|
||||
e++;
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ class IDrawScanline : public GSAlignedClass<32>
|
|||
{
|
||||
public:
|
||||
typedef void (__fastcall *SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
typedef void (__fastcall *DrawScanlinePtr)(int right, int left, int top, const GSVertexSW& scan);
|
||||
typedef void (__fastcall *DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan);
|
||||
typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit
|
||||
|
||||
protected:
|
||||
|
|
|
@ -35,6 +35,7 @@ GSRenderer::GSRenderer()
|
|||
m_filter = theApp.GetConfig("filter", 1);
|
||||
m_vsync = !!theApp.GetConfig("vsync", 0);
|
||||
m_aa1 = !!theApp.GetConfig("aa1", 0);
|
||||
m_mipmap = !!theApp.GetConfig("mipmap", 1);
|
||||
|
||||
s_n = 0;
|
||||
s_dump = !!theApp.GetConfig("dump", 0);
|
||||
|
@ -513,6 +514,9 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
|
|||
case VK_DELETE:
|
||||
m_aa1 = !m_aa1;
|
||||
return;
|
||||
case VK_INSERT:
|
||||
m_mipmap = !m_mipmap;
|
||||
return;
|
||||
}
|
||||
|
||||
#else
|
||||
|
@ -523,25 +527,23 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
|
|||
}
|
||||
}
|
||||
|
||||
void GSRenderer::GetTextureMinMax(GSVector4i& r, bool linear)
|
||||
void GSRenderer::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear)
|
||||
{
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
||||
int tw = context->TEX0.TW;
|
||||
int th = context->TEX0.TH;
|
||||
int tw = TEX0.TW;
|
||||
int th = TEX0.TH;
|
||||
|
||||
int w = 1 << tw;
|
||||
int h = 1 << th;
|
||||
|
||||
GSVector4i tr(0, 0, w, h);
|
||||
|
||||
int wms = context->CLAMP.WMS;
|
||||
int wmt = context->CLAMP.WMT;
|
||||
int wms = CLAMP.WMS;
|
||||
int wmt = CLAMP.WMT;
|
||||
|
||||
int minu = (int)context->CLAMP.MINU;
|
||||
int minv = (int)context->CLAMP.MINV;
|
||||
int maxu = (int)context->CLAMP.MAXU;
|
||||
int maxv = (int)context->CLAMP.MAXV;
|
||||
int minu = (int)CLAMP.MINU;
|
||||
int minv = (int)CLAMP.MINV;
|
||||
int maxu = (int)CLAMP.MAXU;
|
||||
int maxv = (int)CLAMP.MAXV;
|
||||
|
||||
GSVector4i vr = tr;
|
||||
|
||||
|
@ -619,7 +621,7 @@ void GSRenderer::GetTextureMinMax(GSVector4i& r, bool linear)
|
|||
if(vr.x < uv.x) vr.x = uv.x;
|
||||
if(vr.z > uv.z + 1) vr.z = uv.z + 1;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT: // TODO
|
||||
case CLAMP_REGION_REPEAT:
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
|
@ -635,9 +637,7 @@ void GSRenderer::GetTextureMinMax(GSVector4i& r, bool linear)
|
|||
if(vr.y < uv.y) vr.y = uv.y;
|
||||
if(vr.w > uv.w + 1) vr.w = uv.w + 1;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT: // TODO
|
||||
//Xenosaga 2 and 3 use it
|
||||
//printf("gsdx: CLAMP_REGION_REPEAT not implemented, please report\n");
|
||||
case CLAMP_REGION_REPEAT:
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
|
@ -791,39 +791,6 @@ bool GSRenderer::TryAlphaTest(uint32& fm, uint32& zm)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool GSRenderer::IsLinear()
|
||||
{
|
||||
const GIFRegTEX1& TEX1 = m_context->TEX1;
|
||||
|
||||
bool mmin = TEX1.IsMinLinear();
|
||||
bool mmag = TEX1.IsMagLinear();
|
||||
|
||||
if(mmag == mmin || TEX1.MXL == 0) // MXL == 0 => MMIN ignored, tested it on ps2
|
||||
{
|
||||
return mmag;
|
||||
}
|
||||
|
||||
// if FST => assume Q = 1.0f (should not, but Q is very often bogus, 0 or DEN)
|
||||
// Fixme : Why should Q be bogus? (it used to be - Gabest)
|
||||
|
||||
if(!TEX1.LCM && !PRIM->FST)
|
||||
{
|
||||
float K = (float)TEX1.K / 16;
|
||||
float f = (float)(1 << TEX1.L) / log(2.0f);
|
||||
|
||||
// TODO: abs(Qmin) may not be <= abs(Qmax), check the sign
|
||||
|
||||
float LODmin = K + log(1.0f / fabs(m_vt.m_max.t.z)) * f;
|
||||
float LODmax = K + log(1.0f / fabs(m_vt.m_min.t.z)) * f;
|
||||
|
||||
return LODmax <= 0 ? mmag : LODmin > 0 ? mmin : mmag || mmin;
|
||||
}
|
||||
else
|
||||
{
|
||||
return TEX1.K <= 0 ? mmag : TEX1.K > 0 ? mmin : mmag || mmin;
|
||||
}
|
||||
}
|
||||
|
||||
bool GSRenderer::IsOpaque()
|
||||
{
|
||||
if(PRIM->AA1)
|
||||
|
|
|
@ -43,6 +43,7 @@ protected:
|
|||
int m_filter;
|
||||
bool m_vsync;
|
||||
bool m_aa1;
|
||||
bool m_mipmap;
|
||||
bool m_framelimit;
|
||||
|
||||
virtual GSTexture* GetOutput(int i) = 0;
|
||||
|
@ -51,10 +52,9 @@ protected:
|
|||
|
||||
// following functions need m_vt to be initialized
|
||||
|
||||
void GetTextureMinMax(GSVector4i& r, bool linear);
|
||||
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
|
||||
void GetAlphaMinMax();
|
||||
bool TryAlphaTest(uint32& fm, uint32& zm);
|
||||
bool IsLinear();
|
||||
bool IsOpaque();
|
||||
|
||||
public:
|
||||
|
|
|
@ -296,7 +296,7 @@ public:
|
|||
ps_sel.aem = env.TEXA.AEM;
|
||||
ps_sel.tfx = context->TEX0.TFX;
|
||||
ps_sel.tcc = context->TEX0.TCC;
|
||||
ps_sel.ltf = m_filter == 2 ? IsLinear() : m_filter;
|
||||
ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter;
|
||||
ps_sel.rt = tex->m_target;
|
||||
|
||||
int w = tex->m_texture->GetWidth();
|
||||
|
|
|
@ -566,7 +566,7 @@ protected:
|
|||
|
||||
GSVector4i r;
|
||||
|
||||
GetTextureMinMax(r, IsLinear());
|
||||
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());
|
||||
|
||||
tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
|
||||
|
||||
|
|
|
@ -102,7 +102,7 @@ GSTexture* GSRendererSW::GetOutput(int i)
|
|||
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[DISPFB.PSM];
|
||||
|
||||
(m_mem.*psm.rtx)(m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM), r.ralign<GSVector4i::Outside>(psm.bs), m_output, pitch, m_env.TEXA);
|
||||
(m_mem.*psm.rtx)(m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM), r.ralign<Align_Outside>(psm.bs), m_output, pitch, m_env.TEXA);
|
||||
|
||||
m_texture[i]->Update(r, m_output, pitch);
|
||||
|
||||
|
@ -136,7 +136,7 @@ void GSRendererSW::Draw()
|
|||
return;
|
||||
}
|
||||
|
||||
if(s_dump)
|
||||
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
|
||||
{
|
||||
uint64 frame = m_perfmon.GetFrame();
|
||||
|
||||
|
@ -204,7 +204,7 @@ void GSRendererSW::Draw()
|
|||
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
|
||||
|
||||
if(s_dump)
|
||||
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
|
||||
{
|
||||
uint64 frame = m_perfmon.GetFrame();
|
||||
|
||||
|
@ -324,7 +324,7 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
gd.sel.tfx = context->TEX0.TFX;
|
||||
gd.sel.tcc = context->TEX0.TCC;
|
||||
gd.sel.fst = PRIM->FST;
|
||||
gd.sel.ltf = IsLinear();
|
||||
gd.sel.ltf = m_vt.IsLinear();
|
||||
gd.sel.tlu = GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0;
|
||||
gd.sel.wms = context->CLAMP.WMS;
|
||||
gd.sel.wmt = context->CLAMP.WMT;
|
||||
|
@ -370,6 +370,134 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
}
|
||||
}
|
||||
|
||||
GIFRegTEX0 MIP_TEX0 = context->TEX0;
|
||||
GIFRegCLAMP MIP_CLAMP = context->CLAMP;
|
||||
|
||||
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
|
||||
{
|
||||
int level = (int)(m_vt.m_lod.x + 0.5f);
|
||||
|
||||
// FIXME: onimusa 3
|
||||
|
||||
level = std::min<int>(level, context->TEX1.MXL);
|
||||
level = std::min<int>(level, 6);
|
||||
|
||||
if(level > 0)
|
||||
{
|
||||
// printf("lvl %d\n", level);
|
||||
|
||||
switch(level)
|
||||
{
|
||||
case 1:
|
||||
MIP_TEX0.TBP0 = context->MIPTBP1.TBP1;
|
||||
MIP_TEX0.TBW = context->MIPTBP1.TBW1;
|
||||
break;
|
||||
case 2:
|
||||
MIP_TEX0.TBP0 = context->MIPTBP1.TBP2;
|
||||
MIP_TEX0.TBW = context->MIPTBP1.TBW2;
|
||||
break;
|
||||
case 3:
|
||||
MIP_TEX0.TBP0 = context->MIPTBP1.TBP3;
|
||||
MIP_TEX0.TBW = context->MIPTBP1.TBW3;
|
||||
break;
|
||||
case 4:
|
||||
MIP_TEX0.TBP0 = context->MIPTBP2.TBP4;
|
||||
MIP_TEX0.TBW = context->MIPTBP2.TBW4;
|
||||
break;
|
||||
case 5:
|
||||
MIP_TEX0.TBP0 = context->MIPTBP2.TBP5;
|
||||
MIP_TEX0.TBW = context->MIPTBP2.TBW5;
|
||||
break;
|
||||
case 6:
|
||||
MIP_TEX0.TBP0 = context->MIPTBP2.TBP6;
|
||||
MIP_TEX0.TBW = context->MIPTBP2.TBW6;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
ASSERT(MIP_TEX0.TBP0 != 0 && MIP_TEX0.TBW != 0);
|
||||
|
||||
int tw = (int)MIP_TEX0.TW - level;
|
||||
int th = (int)MIP_TEX0.TH - level;
|
||||
|
||||
switch(context->TEX1.MMIN)
|
||||
{
|
||||
case 2: case 3: // point (min size 1)
|
||||
tw = std::max<int>(tw, 0);
|
||||
th = std::max<int>(th, 0);
|
||||
break;
|
||||
case 4: case 5: // linear (min size 8)
|
||||
tw = std::max<int>(tw, 3);
|
||||
th = std::max<int>(th, 3);
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
// scale down the texture coordinates, including vertex trace
|
||||
|
||||
GSVector4 scale = GSVector4(1.0f) / GSVector4(1 << ((int)MIP_TEX0.TW - tw), 1 << ((int)MIP_TEX0.TH - th), 1, 1);
|
||||
|
||||
GSVertexSW* v = m_vertices;
|
||||
|
||||
for(int i = 0, j = m_count; i < j; i++)
|
||||
{
|
||||
v[i].t *= scale;
|
||||
}
|
||||
|
||||
m_vt.m_min.t *= scale;
|
||||
m_vt.m_max.t *= scale;
|
||||
|
||||
MIP_TEX0.TW = (uint32)tw;
|
||||
MIP_TEX0.TH = (uint32)th;
|
||||
|
||||
// this shift is done even for repeat modes
|
||||
|
||||
MIP_CLAMP.MINU >>= level;
|
||||
MIP_CLAMP.MAXU >>= level;
|
||||
MIP_CLAMP.MINV >>= level;
|
||||
MIP_CLAMP.MAXV >>= level;
|
||||
/*
|
||||
printf("%d%d%d%d%d L %d K %03x %.2f lod %.2f %.2f q %f %f\n",
|
||||
m_context->TEX1.MXL,
|
||||
m_context->TEX1.MMAG,
|
||||
m_context->TEX1.MMIN,
|
||||
PRIM->FST,
|
||||
m_context->TEX1.LCM,
|
||||
m_context->TEX1.L,
|
||||
m_context->TEX1.K,
|
||||
(float)m_context->TEX1.K / 16,
|
||||
m_context->TEX1.MXL > 0 ? m_vt.m_lod.x : 0,
|
||||
m_context->TEX1.MXL > 0 ? m_vt.m_lod.y : 0,
|
||||
1.0f / m_vt.m_min.t.z,
|
||||
1.0f / m_vt.m_max.t.z);
|
||||
*/
|
||||
if(s_dump)
|
||||
{
|
||||
uint64 frame = m_perfmon.GetFrame();
|
||||
|
||||
string s;
|
||||
|
||||
if(s_save && s_n >= s_saven)
|
||||
{
|
||||
s = format("c:\\temp1\\_%05d_f%lld_tex_%05x_%d_(%d%d%d%d%d %.2f %.2f).bmp",
|
||||
s_n, frame, (int)MIP_TEX0.TBP0, (int)MIP_TEX0.PSM,
|
||||
m_context->TEX1.MXL,
|
||||
m_context->TEX1.MMAG,
|
||||
m_vt.m_filter.mmag,
|
||||
m_context->TEX1.MMIN,
|
||||
m_vt.m_filter.mmin,
|
||||
m_context->TEX1.MXL > 0 ? m_vt.m_lod.x : 0,
|
||||
m_context->TEX1.MXL > 0 ? m_vt.m_lod.y : 0
|
||||
);
|
||||
|
||||
m_mem.SaveBMP(s, MIP_TEX0.TBP0, MIP_TEX0.TBW, MIP_TEX0.PSM, 1 << MIP_TEX0.TW, 1 << MIP_TEX0.TH);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(gd.sel.ltf)
|
||||
{
|
||||
if(gd.sel.fst)
|
||||
|
@ -389,9 +517,9 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
GSVector4i r;
|
||||
|
||||
GetTextureMinMax(r, gd.sel.ltf);
|
||||
GetTextureMinMax(r, MIP_TEX0, MIP_CLAMP, gd.sel.ltf);
|
||||
|
||||
const GSTextureCacheSW::GSTexture* t = m_tc->Lookup(context->TEX0, env.TEXA, r);
|
||||
const GSTextureCacheSW::GSTexture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, r);
|
||||
|
||||
if(!t) {ASSERT(0); return;}
|
||||
|
||||
|
@ -400,10 +528,10 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
gd.sel.tw = t->m_tw - 3;
|
||||
|
||||
uint16 tw = (uint16)(1 << context->TEX0.TW);
|
||||
uint16 th = (uint16)(1 << context->TEX0.TH);
|
||||
uint16 tw = (uint16)(1 << MIP_TEX0.TW);
|
||||
uint16 th = (uint16)(1 << MIP_TEX0.TH);
|
||||
|
||||
switch(context->CLAMP.WMS)
|
||||
switch(MIP_CLAMP.WMS)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
gd.t.min.u16[0] = tw - 1;
|
||||
|
@ -416,20 +544,20 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
gd.t.mask.u32[0] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
gd.t.min.u16[0] = std::min<int>(context->CLAMP.MINU, tw - 1);
|
||||
gd.t.max.u16[0] = std::min<int>(context->CLAMP.MAXU, tw - 1);
|
||||
gd.t.min.u16[0] = std::min<int>(MIP_CLAMP.MINU, tw - 1);
|
||||
gd.t.max.u16[0] = std::min<int>(MIP_CLAMP.MAXU, tw - 1);
|
||||
gd.t.mask.u32[0] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
gd.t.min.u16[0] = context->CLAMP.MINU;
|
||||
gd.t.max.u16[0] = context->CLAMP.MAXU;
|
||||
gd.t.min.u16[0] = MIP_CLAMP.MINU;
|
||||
gd.t.max.u16[0] = MIP_CLAMP.MAXU;
|
||||
gd.t.mask.u32[0] = 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
switch(context->CLAMP.WMT)
|
||||
switch(MIP_CLAMP.WMT)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
gd.t.min.u16[4] = th - 1;
|
||||
|
@ -442,13 +570,13 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
gd.t.mask.u32[2] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
gd.t.min.u16[4] = std::min<int>(context->CLAMP.MINV, th - 1);
|
||||
gd.t.max.u16[4] = std::min<int>(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
|
||||
gd.t.min.u16[4] = std::min<int>(MIP_CLAMP.MINV, th - 1);
|
||||
gd.t.max.u16[4] = std::min<int>(MIP_CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
|
||||
gd.t.mask.u32[2] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
gd.t.min.u16[4] = context->CLAMP.MINV;
|
||||
gd.t.max.u16[4] = context->CLAMP.MAXV;
|
||||
gd.t.min.u16[4] = MIP_CLAMP.MINV;
|
||||
gd.t.max.u16[4] = MIP_CLAMP.MAXV;
|
||||
gd.t.mask.u32[2] = 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -587,6 +587,47 @@ template<int i> void GSState::GIFRegHandlerTEX0(const GIFReg* r)
|
|||
if(TEX0.TH > 10) TEX0.TH = 10;
|
||||
|
||||
ApplyTEX0(i, TEX0);
|
||||
|
||||
if(m_env.CTXT[i].TEX1.MTBA)
|
||||
{
|
||||
uint32 bpp = GSLocalMemory::m_psm[TEX0.PSM].bpp;
|
||||
|
||||
uint32 tbp = TEX0.TBP0;
|
||||
uint32 tbw = TEX0.TBW;
|
||||
uint32 th = TEX0.TH;
|
||||
|
||||
if(th >= 3)
|
||||
{
|
||||
tbp += (((tbw << 6) * (1 << th) * bpp >> 3) + 255) >> 8;
|
||||
tbw = std::max<uint32>(tbw >> 1, 1);
|
||||
th--;
|
||||
|
||||
m_env.CTXT[i].MIPTBP1.TBP1 = tbp;
|
||||
m_env.CTXT[i].MIPTBP1.TBW1 = tbw;
|
||||
|
||||
tbp += (((tbw << 6) * (1 << th) * bpp >> 3) + 255) >> 8;
|
||||
tbw = std::max<uint32>(tbw >> 1, 1);
|
||||
th--;
|
||||
|
||||
m_env.CTXT[i].MIPTBP1.TBP2 = tbp;
|
||||
m_env.CTXT[i].MIPTBP1.TBW2 = tbw;
|
||||
|
||||
tbp += (((tbw << 6) * (1 << th) * bpp >> 3) + 255) >> 8;
|
||||
tbw = std::max<uint32>(tbw >> 1, 1);
|
||||
th--;
|
||||
|
||||
m_env.CTXT[i].MIPTBP1.TBP3 = tbp;
|
||||
m_env.CTXT[i].MIPTBP1.TBW3 = tbw;
|
||||
|
||||
// NOTE: TEX1.MXL must not be automatically set to 3 here
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
// printf("MTBA\n");
|
||||
}
|
||||
}
|
||||
|
||||
template<int i> void GSState::GIFRegHandlerCLAMP(const GIFReg* r)
|
||||
|
|
|
@ -292,7 +292,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rec
|
|||
|
||||
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
|
||||
|
||||
GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
|
||||
GSVector4i r = rect.ralign<Align_Outside>(bs);
|
||||
|
||||
if(!target)
|
||||
{
|
||||
|
@ -881,7 +881,7 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
|
|||
int tw = std::max<int>(1 << m_TEX0.TW, bs.x);
|
||||
int th = std::max<int>(1 << m_TEX0.TH, bs.y);
|
||||
|
||||
GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
|
||||
GSVector4i r = rect.ralign<Align_Outside>(bs);
|
||||
|
||||
if(r.eq(GSVector4i(0, 0, tw, th)))
|
||||
{
|
||||
|
|
|
@ -66,12 +66,12 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
|
|||
|
||||
if(t == NULL)
|
||||
{
|
||||
t = new GSTexture(m_state);
|
||||
const GSOffset* o = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||
|
||||
t = new GSTexture(m_state, o);
|
||||
|
||||
m_textures.insert(t);
|
||||
|
||||
const GSOffset* o = m_state->m_context->offset.tex;
|
||||
|
||||
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
|
||||
|
||||
int tw = 1 << TEX0.TW;
|
||||
|
@ -132,7 +132,7 @@ void GSTextureCacheSW::InvalidateVideoMem(const GSOffset* o, const GSVector4i& r
|
|||
|
||||
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
|
||||
|
||||
GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
|
||||
GSVector4i r = rect.ralign<Align_Outside>(bs);
|
||||
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
{
|
||||
|
@ -209,8 +209,9 @@ void GSTextureCacheSW::IncAge()
|
|||
|
||||
//
|
||||
|
||||
GSTextureCacheSW::GSTexture::GSTexture(GSState* state)
|
||||
GSTextureCacheSW::GSTexture::GSTexture(GSState* state, const GSOffset* offset)
|
||||
: m_state(state)
|
||||
, m_offset(offset)
|
||||
, m_buff(NULL)
|
||||
, m_tw(0)
|
||||
, m_age(0)
|
||||
|
@ -241,10 +242,25 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
|
|||
|
||||
GSVector2i bs = psm.bs;
|
||||
|
||||
int shift = psm.pal == 0 ? 2 : 0;
|
||||
|
||||
int tw = std::max<int>(1 << TEX0.TW, bs.x);
|
||||
int th = std::max<int>(1 << TEX0.TH, bs.y);
|
||||
|
||||
GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
|
||||
GSVector4i r = rect;
|
||||
|
||||
bool repeating = m_TEX0.IsRepeating();
|
||||
|
||||
if(m_TEX0.TBW == 1) // repeating)
|
||||
{
|
||||
// FIXME:
|
||||
// - marking a block prevents fetching it again to a different part of the texture
|
||||
// - only a real issue for TBW = 1 mipmap levels, where the repeating part is below and often exploited
|
||||
|
||||
r = GSVector4i(0, 0, tw, th);
|
||||
}
|
||||
|
||||
r = r.ralign<Align_Outside>(bs);
|
||||
|
||||
if(r.eq(GSVector4i(0, 0, tw, th)))
|
||||
{
|
||||
|
@ -260,20 +276,20 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
|
|||
return false;
|
||||
}
|
||||
|
||||
m_tw = std::max<int>(TEX0.TW, psm.pal > 0 ? 5 : 3); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
|
||||
#ifdef DEBUG
|
||||
for(uint32 i = 0, j = tw * th * sizeof(uint8); i < j; i++) ((uint8*)m_buff)[i] = 0xff;
|
||||
#endif
|
||||
|
||||
m_tw = std::max<int>(TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
|
||||
}
|
||||
|
||||
GSLocalMemory& mem = m_state->m_mem;
|
||||
|
||||
const GSOffset* o = m_state->m_context->offset.tex;
|
||||
|
||||
bool repeating = m_TEX0.IsRepeating();
|
||||
const GSOffset* RESTRICT o = m_offset;
|
||||
|
||||
uint32 blocks = 0;
|
||||
|
||||
GSLocalMemory::readTextureBlock rtxb = psm.rtxbP;
|
||||
|
||||
int shift = psm.pal == 0 ? 2 : 0;
|
||||
GSLocalMemory::readTextureBlock rtxbP = psm.rtxbP;
|
||||
|
||||
uint32 pitch = (1 << m_tw) << shift;
|
||||
|
||||
|
@ -299,7 +315,7 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
|
|||
m_valid[row] |= col;
|
||||
}
|
||||
|
||||
(mem.*rtxb)(block, &dst[x << shift], pitch, TEXA);
|
||||
(mem.*rtxbP)(block, &dst[x << shift], pitch, TEXA);
|
||||
|
||||
blocks++;
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ public:
|
|||
{
|
||||
public:
|
||||
GSState* m_state;
|
||||
const GSOffset* m_offset;
|
||||
GIFRegTEX0 m_TEX0;
|
||||
GIFRegTEXA m_TEXA;
|
||||
void* m_buff;
|
||||
|
@ -38,7 +39,7 @@ public:
|
|||
uint32 m_age;
|
||||
bool m_complete;
|
||||
|
||||
explicit GSTexture(GSState* state);
|
||||
explicit GSTexture(GSState* state, const GSOffset* offset);
|
||||
virtual ~GSTexture();
|
||||
|
||||
bool Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -30,13 +30,73 @@ __aligned(struct, 16) GSVertexSW
|
|||
__forceinline GSVertexSW() {}
|
||||
__forceinline GSVertexSW(const GSVertexSW& v) {*this = v;}
|
||||
|
||||
__forceinline void operator = (const GSVertexSW& v) {c = v.c; p = v.p; t = v.t;}
|
||||
__forceinline void operator += (const GSVertexSW& v) {c += v.c; p += v.p; t += v.t;}
|
||||
__forceinline static GSVertexSW zero()
|
||||
{
|
||||
GSVertexSW v;
|
||||
|
||||
friend GSVertexSW operator + (const GSVertexSW& v1, const GSVertexSW& v2);
|
||||
friend GSVertexSW operator - (const GSVertexSW& v1, const GSVertexSW& v2);
|
||||
friend GSVertexSW operator * (const GSVertexSW& v, const GSVector4& vv);
|
||||
friend GSVertexSW operator / (const GSVertexSW& v, const GSVector4& vv);
|
||||
v.p = GSVector4::zero();
|
||||
v.t = GSVector4::zero();
|
||||
v.c = GSVector4::zero();
|
||||
|
||||
return v;
|
||||
}
|
||||
__forceinline void operator = (const GSVertexSW& v)
|
||||
{
|
||||
p = v.p;
|
||||
t = v.t;
|
||||
c = v.c;
|
||||
}
|
||||
|
||||
__forceinline void operator += (const GSVertexSW& v)
|
||||
{
|
||||
p += v.p;
|
||||
t += v.t;
|
||||
c += v.c;
|
||||
}
|
||||
|
||||
__forceinline friend GSVertexSW operator + (const GSVertexSW& a, const GSVertexSW& b)
|
||||
{
|
||||
GSVertexSW v;
|
||||
|
||||
v.p = a.p + b.p;
|
||||
v.t = a.t + b.t;
|
||||
v.c = a.c + b.c;
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
__forceinline friend GSVertexSW operator - (const GSVertexSW& a, const GSVertexSW& b)
|
||||
{
|
||||
GSVertexSW v;
|
||||
|
||||
v.p = a.p - b.p;
|
||||
v.t = a.t - b.t;
|
||||
v.c = a.c - b.c;
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
__forceinline friend GSVertexSW operator * (const GSVertexSW& a, const GSVector4& b)
|
||||
{
|
||||
GSVertexSW v;
|
||||
|
||||
v.p = a.p * b;
|
||||
v.t = a.t * b;
|
||||
v.c = a.c * b;
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
__forceinline friend GSVertexSW operator / (const GSVertexSW& a, const GSVector4& b)
|
||||
{
|
||||
GSVertexSW v;
|
||||
|
||||
v.p = a.p / b;
|
||||
v.t = a.t / b;
|
||||
v.c = a.c / b;
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
static bool IsQuad(const GSVertexSW* v, int& tl, int& br)
|
||||
{
|
||||
|
@ -122,6 +182,25 @@ __aligned(struct, 16) GSVertexSW
|
|||
|
||||
br = i;
|
||||
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
{
|
||||
// p.z, p.w, t.z, t.w, c.x, c.y, c.z, c.w
|
||||
|
||||
GSVector8 v0 = GSVector8(v[0].p.zwzw(v[0].t), v[0].c);
|
||||
GSVector8 v1 = GSVector8(v[1].p.zwzw(v[1].t), v[1].c);
|
||||
GSVector8 v2 = GSVector8(v[2].p.zwzw(v[2].t), v[2].c);
|
||||
GSVector8 v3 = GSVector8(v[3].p.zwzw(v[3].t), v[3].c);
|
||||
GSVector8 v4 = GSVector8(v[4].p.zwzw(v[4].t), v[4].c);
|
||||
GSVector8 v5 = GSVector8(v[5].p.zwzw(v[5].t), v[5].c);
|
||||
|
||||
GSVector8 test = ((v0 == v1) & (v0 == v2)) & ((v0 == v3) & (v0 == v4)) & (v0 == v5);
|
||||
|
||||
return test.alltrue();
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
v0 = v[0].p.zwzw(v[0].t);
|
||||
v1 = v[1].p.zwzw(v[1].t);
|
||||
v2 = v[2].p.zwzw(v[2].t);
|
||||
|
@ -151,42 +230,7 @@ __aligned(struct, 16) GSVertexSW
|
|||
}
|
||||
|
||||
return true;
|
||||
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
__forceinline GSVertexSW operator + (const GSVertexSW& v1, const GSVertexSW& v2)
|
||||
{
|
||||
GSVertexSW v0;
|
||||
v0.c = v1.c + v2.c;
|
||||
v0.p = v1.p + v2.p;
|
||||
v0.t = v1.t + v2.t;
|
||||
return v0;
|
||||
}
|
||||
|
||||
__forceinline GSVertexSW operator - (const GSVertexSW& v1, const GSVertexSW& v2)
|
||||
{
|
||||
GSVertexSW v0;
|
||||
v0.c = v1.c - v2.c;
|
||||
v0.p = v1.p - v2.p;
|
||||
v0.t = v1.t - v2.t;
|
||||
return v0;
|
||||
}
|
||||
|
||||
__forceinline GSVertexSW operator * (const GSVertexSW& v, const GSVector4& vv)
|
||||
{
|
||||
GSVertexSW v0;
|
||||
v0.c = v.c * vv;
|
||||
v0.p = v.p * vv;
|
||||
v0.t = v.t * vv;
|
||||
return v0;
|
||||
}
|
||||
|
||||
__forceinline GSVertexSW operator / (const GSVertexSW& v, const GSVector4& vv)
|
||||
{
|
||||
GSVertexSW v0;
|
||||
v0.c = v.c / vv;
|
||||
v0.p = v.p / vv;
|
||||
v0.t = v.t / vv;
|
||||
return v0;
|
||||
}
|
||||
|
||||
};
|
|
@ -48,6 +48,52 @@ uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass)
|
|||
return hash;
|
||||
}
|
||||
|
||||
void GSVertexTrace::UpdateLOD()
|
||||
{
|
||||
if(!m_state->PRIM->TME) return;
|
||||
|
||||
const GIFRegTEX1& TEX1 = m_state->m_context->TEX1;
|
||||
|
||||
m_filter.mmag = TEX1.IsMagLinear();
|
||||
m_filter.mmin = TEX1.IsMinLinear();
|
||||
|
||||
if(TEX1.MXL == 0) // MXL == 0 => MMIN ignored, tested it on ps2
|
||||
{
|
||||
m_filter.linear = m_filter.mmag;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
float K = (float)TEX1.K / 16;
|
||||
|
||||
if(TEX1.LCM == 0) // && m_state->PRIM->FST == 0 // if FST => assume Q = 1.0f (should not, but Q is very often bogus, 0 or DEN)
|
||||
{
|
||||
// LOD = log2(1/|Q|) * (1 << L) + K
|
||||
|
||||
GSVector4::storel(&m_lod, m_max.t.uph(m_min.t).log2(2).neg() * (float)(1 << TEX1.L) + K);
|
||||
|
||||
if(m_lod.x > m_lod.y) {float tmp = m_lod.x; m_lod.x = m_lod.x; m_lod.y = tmp;}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_lod.x = K;
|
||||
m_lod.y = K;
|
||||
}
|
||||
|
||||
if(m_lod.y <= 0)
|
||||
{
|
||||
m_filter.linear = m_filter.mmag;
|
||||
}
|
||||
else if(m_lod.x > 0)
|
||||
{
|
||||
m_filter.linear = m_filter.mmin;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_filter.linear = m_filter.mmag | m_filter.mmin;
|
||||
}
|
||||
}
|
||||
|
||||
void GSVertexTrace::Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
m_map_sw[Hash(primclass)](count, v, m_min, m_max);
|
||||
|
@ -55,6 +101,8 @@ void GSVertexTrace::Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primcla
|
|||
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
|
||||
|
||||
m_alpha.valid = false;
|
||||
|
||||
UpdateLOD();
|
||||
}
|
||||
|
||||
void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass)
|
||||
|
@ -87,6 +135,8 @@ void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primcl
|
|||
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
|
||||
|
||||
m_alpha.valid = false;
|
||||
|
||||
UpdateLOD();
|
||||
}
|
||||
|
||||
void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primclass)
|
||||
|
@ -119,4 +169,7 @@ void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primc
|
|||
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
|
||||
|
||||
m_alpha.valid = false;
|
||||
|
||||
UpdateLOD();
|
||||
}
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ class GSState;
|
|||
__aligned(class, 32) GSVertexTrace
|
||||
{
|
||||
public:
|
||||
struct Vertex {GSVector4i c; GSVector4 p, t;};
|
||||
struct Vertex {GSVector4i c; GSVector4 p, t;}; // t.xy * 0x10000
|
||||
struct VertexAlpha {int min, max; bool valid;};
|
||||
|
||||
private:
|
||||
|
@ -60,16 +60,23 @@ private:
|
|||
GSCodeGeneratorFunctionMap<CGHW9, uint32, VertexTracePtr> m_map_hw9;
|
||||
GSCodeGeneratorFunctionMap<CGHW11, uint32, VertexTracePtr> m_map_hw11;
|
||||
|
||||
const GSState* m_state;
|
||||
|
||||
uint32 Hash(GS_PRIM_CLASS primclass);
|
||||
|
||||
const GSState* m_state;
|
||||
void UpdateLOD();
|
||||
|
||||
static const GSVector4 s_minmax;
|
||||
|
||||
public:
|
||||
GS_PRIM_CLASS m_primclass;
|
||||
Vertex m_min, m_max; // t.xy * 0x10000
|
||||
VertexAlpha m_alpha; // source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it
|
||||
|
||||
Vertex m_min;
|
||||
Vertex m_max;
|
||||
|
||||
// source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it
|
||||
|
||||
VertexAlpha m_alpha;
|
||||
|
||||
union
|
||||
{
|
||||
|
@ -78,10 +85,19 @@ public:
|
|||
struct {uint32 rgba:16, xyzf:4, stq:4;};
|
||||
} m_eq;
|
||||
|
||||
union
|
||||
{
|
||||
struct {uint32 mmag:1, mmin:1, linear:1;};
|
||||
} m_filter;
|
||||
|
||||
GSVector2 m_lod; // x = min, y = max
|
||||
|
||||
GSVertexTrace(const GSState* state);
|
||||
|
||||
void Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass);
|
||||
void Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass);
|
||||
void Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primclass);
|
||||
void Update(const GSVertexNull* v, int count, GS_PRIM_CLASS primclass) {}
|
||||
|
||||
bool IsLinear() const {return m_filter.linear;}
|
||||
};
|
||||
|
|
|
@ -107,7 +107,7 @@ using namespace stdext;
|
|||
#define __aligned(t, n) t __attribute__((aligned(n)))
|
||||
#define __fastcall __attribute__((fastcall))
|
||||
|
||||
#define EXPORT_C_(type) extern "C" type
|
||||
#define EXPORT_C_(type) extern "C" __attribute__((stdcall,externally_visible,visibility("default"))) type
|
||||
#define EXPORT_C EXPORT_C_(void)
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
|
Loading…
Reference in New Issue