GSdx: Found where the bypassed int z was destroyed. (bug appeared in r4967)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4975 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-12-01 17:08:10 +00:00
parent 9d54677055
commit f1537f53a2
6 changed files with 429 additions and 43 deletions

View File

@ -90,17 +90,406 @@ void GPUDrawScanline::PrintStats()
void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
{
// TODO
GPUScanlineSelector sel = m_global.sel;
const GSVector4* shift = GPUSetupPrimCodeGenerator::m_shift;
if(sel.tme && !sel.twin)
{
if(sel.sprite)
{
GSVector4i t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001();
t = t.ps32(t);
t = t.upl16(t);
m_local.twin[2].u = t.xxxx();
m_local.twin[2].v = t.yyyy();
}
else
{
// TODO: not really needed
m_local.twin[2].u = GSVector4i::x00ff();
m_local.twin[2].v = GSVector4i::x00ff();
}
}
if(sel.tme || sel.iip && sel.tfx != 3)
{
GSVector4 dt = dscan.t;
GSVector4 dc = dscan.c;
GSVector4i dtc8 = GSVector4i(dt * shift[0]).ps32(GSVector4i(dc * shift[0]));
if(sel.tme)
{
m_local.d8.st = dtc8.upl16(dtc8);
}
if(sel.iip && sel.tfx != 3)
{
m_local.d8.c = dtc8.uph16(dtc8);
}
if(sel.tme)
{
GSVector4 dtx = dt.xxxx();
GSVector4 dty = dt.yyyy();
m_local.d.s = GSVector4i(dtx * shift[1]).ps32(GSVector4i(dtx * shift[2]));
m_local.d.t = GSVector4i(dty * shift[1]).ps32(GSVector4i(dty * shift[2]));
}
if(sel.iip && sel.tfx != 3)
{
GSVector4 dcx = dc.xxxx();
GSVector4 dcy = dc.yyyy();
GSVector4 dcz = dc.zzzz();
m_local.d.r = GSVector4i(dcx * shift[1]).ps32(GSVector4i(dcx * shift[2]));
m_local.d.g = GSVector4i(dcy * shift[1]).ps32(GSVector4i(dcy * shift[2]));
m_local.d.b = GSVector4i(dcz * shift[1]).ps32(GSVector4i(dcz * shift[2]));
}
}
}
void GPUDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan)
{
// TODO
// TODO: not tested yet, probably bogus
GPUScanlineSelector sel = m_global.sel;
GSVector4i s, t;
GSVector4i uf, vf;
GSVector4i rf, gf, bf;
GSVector4i dither;
// Init
uint16* fb = (uint16*)m_global.vm + (top << (10 + sel.scalex)) + left;
int steps = pixels - 8;
if(sel.dtd)
{
dither = GSVector4i::load<false>(&GPUDrawScanlineCodeGenerator::m_dither[top & 3][left & 3]);
}
if(sel.tme)
{
GSVector4i vt = GSVector4i(scan.t).xxzzl();
s = vt.xxxx().add16(m_local.d.s);
t = vt.yyyy();
if(!sel.sprite)
{
t = t.add16(m_local.d.t);
}
else
{
if(sel.ltf)
{
vf = t.sll16(1).srl16(1);
}
}
}
if(sel.tfx != 3)
{
GSVector4i vc = GSVector4i(scan.c).xxzzlh();
rf = vc.xxxx();
gf = vc.yyyy();
bf = vc.zzzz();
if(sel.iip)
{
rf = rf.add16(m_local.d.r);
gf = gf.add16(m_local.d.g);
bf = bf.add16(m_local.d.b);
}
}
while(1)
{
do
{
GSVector4i test = GPUDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
GSVector4i fd = GSVector4i::load(fb, fb + 8);
GSVector4i r, g, b, a;
// TestMask
if(sel.me)
{
test |= fd.sra16(15);
if(test.alltrue()) continue;
}
// SampleTexture
if(sel.tme)
{
GSVector4i u0, v0, u1, v1;
GSVector4i addr00, addr01, addr10, addr11;
GSVector4i c00, c01, c10, c11;
if(sel.ltf)
{
u0 = s.sub16(GSVector4i(0x00200020)); // - 0.125f
v0 = t.sub16(GSVector4i(0x00200020)); // - 0.125f
uf = u0.sll16(8).srl16(1);
vf = v0.sll16(8).srl16(1);;
}
else
{
u0 = s;
v0 = t;
}
u0 = u0.srl16(8);
v0 = v0.srl16(8);
if(sel.ltf)
{
u1 = u0.add16(GSVector4i::x0001());
v1 = v0.add16(GSVector4i::x0001());
if(sel.twin)
{
u0 = (u0 & m_local.twin[0].u).add16(m_local.twin[1].u);
v0 = (v0 & m_local.twin[0].v).add16(m_local.twin[1].v);
u1 = (u1 & m_local.twin[0].u).add16(m_local.twin[1].u);
v1 = (v1 & m_local.twin[0].v).add16(m_local.twin[1].v);
}
else
{
u0 = u0.min_i16(m_local.twin[2].u);
v0 = v0.min_i16(m_local.twin[2].v);
u1 = u1.min_i16(m_local.twin[2].u);
v1 = v1.min_i16(m_local.twin[2].v);
}
addr00 = v0.sll16(8) | u0;
addr01 = v0.sll16(8) | u1;
addr10 = v1.sll16(8) | u0;
addr11 = v1.sll16(8) | u1;
// TODO
if(sel.tlu)
{
c00 = addr00.gather16_16((const uint16*)m_global.vm, m_global.clut);
c01 = addr01.gather16_16((const uint16*)m_global.vm, m_global.clut);
c10 = addr10.gather16_16((const uint16*)m_global.vm, m_global.clut);
c11 = addr11.gather16_16((const uint16*)m_global.vm, m_global.clut);
}
else
{
c00 = addr00.gather16_16((const uint16*)m_global.vm);
c01 = addr01.gather16_16((const uint16*)m_global.vm);
c10 = addr10.gather16_16((const uint16*)m_global.vm);
c11 = addr11.gather16_16((const uint16*)m_global.vm);
}
GSVector4i r00 = c00.sll16(11).srl16(8);
GSVector4i r01 = c01.sll16(11).srl16(8);
GSVector4i r10 = c10.sll16(11).srl16(8);
GSVector4i r11 = c11.sll16(11).srl16(8);
r00 = r00.lerp16<0>(r01, uf);
r10 = r10.lerp16<0>(r11, uf);
GSVector4i g00 = c00.sll16(6).srl16(11).sll16(3);
GSVector4i g01 = c01.sll16(6).srl16(11).sll16(3);
GSVector4i g10 = c10.sll16(6).srl16(11).sll16(3);
GSVector4i g11 = c11.sll16(6).srl16(11).sll16(3);
g00 = g00.lerp16<0>(g01, uf);
g10 = g10.lerp16<0>(g11, uf);
GSVector4i b00 = c00.sll16(1).srl16(11).sll16(3);
GSVector4i b01 = c01.sll16(1).srl16(11).sll16(3);
GSVector4i b10 = c10.sll16(1).srl16(11).sll16(3);
GSVector4i b11 = c11.sll16(1).srl16(11).sll16(3);
b00 = b00.lerp16<0>(b01, uf);
b10 = b10.lerp16<0>(b11, uf);
GSVector4i a00 = c00.sra16(15).sll16(8);
GSVector4i a01 = c01.sra16(15).sll16(8);
GSVector4i a10 = c10.sra16(15).sll16(8);
GSVector4i a11 = c11.sra16(15).sll16(8);
a00 = a00.lerp16<0>(a01, uf);
a10 = a10.lerp16<0>(a11, uf);
r = r00.lerp16<0>(r10, vf);
g = g00.lerp16<0>(g10, vf);
b = b00.lerp16<0>(b10, vf);
a = a00.lerp16<0>(a10, vf);
test |= (r | g | b | a).eq16(GSVector4i::zero()); // mask out blank pixels (not perfect)
a = a.gt16(GSVector4i::zero());
}
else
{
if(sel.twin)
{
u0 = (u0 & m_local.twin[0].u).add16(m_local.twin[1].u);
v0 = (v0 & m_local.twin[0].v).add16(m_local.twin[1].v);
}
else
{
u0 = u0.min_i16(m_local.twin[2].u);
v0 = v0.min_i16(m_local.twin[2].v);
}
addr00 = v0.sll16(8) | u0;
// TODO
if(sel.tlu)
{
c00 = addr00.gather16_16((const uint16*)m_global.vm, m_global.clut);
}
else
{
c00 = addr00.gather16_16((const uint16*)m_global.vm);
}
r = (c00 << 3) & 0x00f800f8;
g = (c00 >> 2) & 0x00f800f8;
b = (c00 >> 7) & 0x00f800f8;
a = c00.sra16(15);
test |= c00.eq16(GSVector4i::zero()); // mask out blank pixels
}
}
// ColorTFX
switch(sel.tfx)
{
case 0: // none (tfx = 0)
case 1: // none (tfx = tge)
r = rf.srl16(7);
g = gf.srl16(7);
b = bf.srl16(7);
break;
case 2: // modulate (tfx = tme | tge)
r = r.modulate16<1>(rf).clamp8();
g = g.modulate16<1>(gf).clamp8();
b = b.modulate16<1>(bf).clamp8();
break;
case 3: // decal (tfx = tme)
break;
default:
__assume(0);
}
// AlphaBlend
if(sel.abe)
{
GSVector4i rs = r;
GSVector4i gs = g;
GSVector4i bs = b;
GSVector4i rd = (fd & 0x001f001f) << 3;
GSVector4i gd = (fd & 0x03e003e0) >> 2;
GSVector4i bd = (fd & 0x7c007c00) >> 7;
switch(sel.abr)
{
case 0:
r = rd.avg8(rs);
g = gd.avg8(gs);
b = bd.avg8(bs);
break;
case 1:
r = rd.addus8(rs);
g = gd.addus8(gs);
b = bd.addus8(bs);
break;
case 2:
r = rd.subus8(rs);
g = gd.subus8(gs);
b = bd.subus8(bs);
break;
case 3:
r = rd.addus8(rs.srl16(2));
g = gd.addus8(gs.srl16(2));
b = bd.addus8(bs.srl16(2));
break;
default:
__assume(0);
}
if(sel.tme)
{
r = rs.blend8(rd, a);
g = gs.blend8(gd, a);
b = bs.blend8(bd, a);
}
}
// Dither
if(sel.dtd)
{
r = r.addus8(dither);
g = g.addus8(dither);
b = b.addus8(dither);
}
// WriteFrame
GSVector4i fs = r | g | b | (sel.md ? GSVector4i(0x80008000) : sel.tme ? a : GSVector4i::zero());
fs = fs.blend8(fd, test);
GSVector4i::store(fb, fb + 8, fs);
}
while(0);
if(steps <= 0) break;
steps -= 8;
fb += 8;
if(sel.tme)
{
GSVector4i st = m_local.d8.st;
s = s.add16(st.xxxx());
t = t.add16(st.yyyy());
}
if(sel.tfx != 3) // != decal
{
if(sel.iip)
{
GSVector4i c = m_local.d8.c;
rf = rf.add16(c.xxxx());
gf = gf.add16(c.yyyy());
bf = bf.add16(c.zzzz());
}
}
}
}
void GPUDrawScanline::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
{
// TODO
ASSERT(0);
}
void GPUDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)

View File

@ -121,7 +121,7 @@ void GPUDrawScanlineCodeGenerator::Init()
{
mov(eax, dword[esp + _top]);
// uint16* fb = &m_local.vm[(top << (10 + m_sel.scalex)) + left];
// uint16* fb = (uint16*)m_global.vm + (top << (10 + sel.scalex)) + left;
mov(edi, eax);
shl(edi, 10 + m_sel.scalex);
@ -134,7 +134,7 @@ void GPUDrawScanlineCodeGenerator::Init()
if(m_sel.dtd)
{
// dither = GSVector4i::load<false>(&s_dither[top & 3][left & 3]);
// dither = GSVector4i::load<false>(&m_dither[top & 3][left & 3]);
and(eax, 3);
shl(eax, 5);
@ -741,7 +741,7 @@ void GPUDrawScanlineCodeGenerator::AlphaBlend()
// xmm7 = test
// xmm0, xmm2 = free
// GSVector4i r = (d & 0x001f001f) << 3;
// GSVector4i r = (fd & 0x001f001f) << 3;
pcmpeqd(xmm0, xmm0);
psrlw(xmm0, 11); // 0x001f

View File

@ -30,9 +30,6 @@ class GPUDrawScanlineCodeGenerator : public GSCodeGenerator
{
void operator = (const GPUDrawScanlineCodeGenerator&);
static const GSVector4i m_test[8];
static const uint16 m_dither[4][16];
GPUScanlineSelector m_sel;
GPUScanlineLocalData& m_local;
@ -57,4 +54,7 @@ class GPUDrawScanlineCodeGenerator : public GSCodeGenerator
public:
GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
static const GSVector4i m_test[8];
static __aligned(const uint16, 32) m_dither[4][16];
};

View File

@ -28,8 +28,6 @@ class GPUSetupPrimCodeGenerator : public GSCodeGenerator
{
void operator = (const GPUSetupPrimCodeGenerator&);
static const GSVector4 m_shift[3];
GPUScanlineSelector m_sel;
GPUScanlineLocalData& m_local;
@ -37,4 +35,6 @@ class GPUSetupPrimCodeGenerator : public GSCodeGenerator
public:
GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
static const GSVector4 m_shift[3];
};

View File

@ -412,6 +412,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
GSVector4i u, v, uv[2];
GSVector4i lodi, lodf;
GSVector4i minuv, maxuv;
GSVector4i addr00, addr01, addr10, addr11;
GSVector4i c00, c01, c10, c11;
if(sel.mmin)
{
@ -529,12 +531,10 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
GSVector4i y1 = uv1.uph16() << (sel.tw + 3);
GSVector4i x1 = uv1.upl16();
GSVector4i addr00 = y0 + x0;
GSVector4i addr01 = y0 + x1;
GSVector4i addr10 = y1 + x0;
GSVector4i addr11 = y1 + x1;
GSVector4i c00, c01, c10, c11;
addr00 = y0 + x0;
addr01 = y0 + x1;
addr10 = y1 + x0;
addr11 = y1 + x1;
if(sel.tlu)
{
@ -582,9 +582,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
}
else
{
GSVector4i addr00 = y0 + x0;
GSVector4i c00;
addr00 = y0 + x0;
if(sel.tlu)
{
@ -654,12 +652,10 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
GSVector4i y1 = uv1.uph16() << (sel.tw + 3);
GSVector4i x1 = uv1.upl16();
GSVector4i addr00 = y0 + x0;
GSVector4i addr01 = y0 + x1;
GSVector4i addr10 = y1 + x0;
GSVector4i addr11 = y1 + x1;
GSVector4i c00, c01, c10, c11;
addr00 = y0 + x0;
addr01 = y0 + x1;
addr10 = y1 + x0;
addr11 = y1 + x1;
if(sel.tlu)
{
@ -707,9 +703,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
}
else
{
GSVector4i addr00 = y0 + x0;
GSVector4i c00;
addr00 = y0 + x0;
if(sel.tlu)
{
@ -797,12 +791,10 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
GSVector4i y1 = uv1.uph16() << (sel.tw + 3);
GSVector4i x1 = uv1.upl16();
GSVector4i addr00 = y0 + x0;
GSVector4i addr01 = y0 + x1;
GSVector4i addr10 = y1 + x0;
GSVector4i addr11 = y1 + x1;
GSVector4i c00, c01, c10, c11;
addr00 = y0 + x0;
addr01 = y0 + x1;
addr10 = y1 + x0;
addr11 = y1 + x1;
if(sel.tlu)
{
@ -844,9 +836,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
}
else
{
GSVector4i addr00 = y0 + x0;
GSVector4i c00;
addr00 = y0 + x0;
if(sel.tlu)
{

View File

@ -552,7 +552,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
for(int i = 0, j = m_count; i < j; i++)
{
v[i].t *= w;
GSVector4 t = v[i].t;
v[i].t = (t * w).xyzw(t);
}
}
}
@ -562,10 +564,13 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
for(int i = 0, j = m_count; i < j; i += 2)
{
GSVector4 w = v[i + 1].t.zzzz().rcpnr();
GSVector4 t0 = v[i + 0].t;
GSVector4 t1 = v[i + 1].t;
v[i + 0].t *= w;
v[i + 1].t *= w;
GSVector4 w = t1.zzzz().rcpnr();
v[i + 0].t = (t0 * w).xyzw(t0);
v[i + 1].t = (t1 * w).xyzw(t1);
}
}
}
@ -582,7 +587,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
for(int i = 0, j = m_count; i < j; i++)
{
v[i].t -= half;
GSVector4 t = v[i].t;
v[i].t = (t - half).xyzw(t);
}
}
}