GSdx: this may fix silent hill shadows and mister mosquito intro blur, also reduced texture cache keep-alive time from 30 to 10 frames and found two memory leaks, killzone can run a few seconds longer before crashing, I think there is something in pcsx2 allocating too much memory.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5096 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2012-02-12 17:56:06 +00:00
parent c641767431
commit 67ef781116
11 changed files with 174 additions and 158 deletions

View File

@ -324,7 +324,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
}
else if(sel.ltf)
{
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
vf = v.xxzzlh().srl16(12);
}
s = GSVector4::cast(u);
@ -514,8 +514,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
u -= 0x8000;
v -= 0x8000;
uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
uf = u.xxzzlh().srl16(12);
vf = v.xxzzlh().srl16(12);
}
GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16));
@ -581,19 +581,19 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
GSVector4i rb01 = c01.sll16(8).srl16(8);
GSVector4i ga01 = c01.srl16(8);
rb00 = rb00.lerp16<0>(rb01, uf);
ga00 = ga00.lerp16<0>(ga01, uf);
rb00 = rb00.lerp16_4(rb01, uf);
ga00 = ga00.lerp16_4(ga01, uf);
GSVector4i rb10 = c10.sll16(8).srl16(8);
GSVector4i ga10 = c10.srl16(8);
GSVector4i rb11 = c11.sll16(8).srl16(8);
GSVector4i ga11 = c11.srl16(8);
rb10 = rb10.lerp16<0>(rb11, uf);
ga10 = ga10.lerp16<0>(ga11, uf);
rb10 = rb10.lerp16_4(rb11, uf);
ga10 = ga10.lerp16_4(ga11, uf);
rb = rb00.lerp16<0>(rb10, vf);
ga = ga00.lerp16<0>(ga10, vf);
rb = rb00.lerp16_4(rb10, vf);
ga = ga00.lerp16_4(ga10, vf);
}
else
{
@ -635,8 +635,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
u -= 0x8000;
v -= 0x8000;
uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
uf = u.xxzzlh().srl16(12);
vf = v.xxzzlh().srl16(12);
}
GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16));
@ -702,19 +702,19 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
GSVector4i rb01 = c01.sll16(8).srl16(8);
GSVector4i ga01 = c01.srl16(8);
rb00 = rb00.lerp16<0>(rb01, uf);
ga00 = ga00.lerp16<0>(ga01, uf);
rb00 = rb00.lerp16_4(rb01, uf);
ga00 = ga00.lerp16_4(ga01, uf);
GSVector4i rb10 = c10.sll16(8).srl16(8);
GSVector4i ga10 = c10.srl16(8);
GSVector4i rb11 = c11.sll16(8).srl16(8);
GSVector4i ga11 = c11.srl16(8);
rb10 = rb10.lerp16<0>(rb11, uf);
ga10 = ga10.lerp16<0>(ga11, uf);
rb10 = rb10.lerp16_4(rb11, uf);
ga10 = ga10.lerp16_4(ga11, uf);
rb2 = rb00.lerp16<0>(rb10, vf);
ga2 = ga00.lerp16<0>(ga10, vf);
rb2 = rb00.lerp16_4(rb10, vf);
ga2 = ga00.lerp16_4(ga10, vf);
}
else
{
@ -745,7 +745,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
rb = rb.lerp16<0>(rb2, lodf);
ga = ga.lerp16<0>(ga2, lodf);
}
}
}
else
{
@ -770,11 +770,11 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
if(sel.ltf)
{
uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
uf = u.xxzzlh().srl16(12);
if(sel.prim != GS_SPRITE_CLASS)
{
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
vf = v.xxzzlh().srl16(12);
}
}
@ -835,19 +835,19 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
GSVector4i rb01 = c01.sll16(8).srl16(8);
GSVector4i ga01 = c01.srl16(8);
rb00 = rb00.lerp16<0>(rb01, uf);
ga00 = ga00.lerp16<0>(ga01, uf);
rb00 = rb00.lerp16_4(rb01, uf);
ga00 = ga00.lerp16_4(ga01, uf);
GSVector4i rb10 = c10.sll16(8).srl16(8);
GSVector4i ga10 = c10.srl16(8);
GSVector4i rb11 = c11.sll16(8).srl16(8);
GSVector4i ga11 = c11.srl16(8);
rb10 = rb10.lerp16<0>(rb11, uf);
ga10 = ga10.lerp16<0>(ga11, uf);
rb10 = rb10.lerp16_4(rb11, uf);
ga10 = ga10.lerp16_4(ga11, uf);
rb = rb00.lerp16<0>(rb10, vf);
ga = ga00.lerp16<0>(ga10, vf);
rb = rb00.lerp16_4(rb10, vf);
ga = ga00.lerp16_4(ga10, vf);
}
else
{

View File

@ -97,6 +97,25 @@ void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm&
#endif
}
void GSDrawScanlineCodeGenerator::lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f)
{
#if _M_SSE >= 0x500
vpsubw(a, b);
vpmullw(a, f);
vpsraw(a, 4);
vpaddw(a, b);
#else
psubw(a, b);
pmullw(a, f);
psraw(a, 4);
paddw(a, b);
#endif
}
void GSDrawScanlineCodeGenerator::mix16(const Xmm& a, const Xmm& b, const Xmm& temp)
{
#if _M_SSE >= 0x500

View File

@ -71,6 +71,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
void modulate16(const Xmm& a, const Operand& f, int shift);
void lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift);
void lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f);
void mix16(const Xmm& a, const Xmm& b, const Xmm& temp);
void clamp16(const Xmm& a, const Xmm& temp);
void alltrue();

View File

@ -389,8 +389,7 @@ void GSDrawScanlineCodeGenerator::Init()
{
vpshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm6, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm6, 15 - GS_BILINEAR_PRECISION);
vpsrlw(xmm6, 12);
vmovdqa(ptr[&m_local.temp.vf], xmm6);
}
}
@ -743,8 +742,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
vpsrlw(xmm0, 12);
vmovdqa(ptr[&m_local.temp.uf], xmm0);
if(m_sel.prim != GS_SPRITE_CLASS)
@ -753,8 +751,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
vpsrlw(xmm0, 12);
vmovdqa(ptr[&m_local.temp.vf], xmm0);
}
}
@ -878,11 +875,11 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// xmm5 = c11
// xmm7 = used
// rb00 = rb00.lerp16<0>(rb01, uf);
// ga00 = ga00.lerp16<0>(ga01, uf);
// rb00 = rb00.lerp16_4(rb01, uf);
// ga00 = ga00.lerp16_4(ga01, uf);
lerp16(xmm3, xmm2, xmm0, 0);
lerp16(xmm4, xmm6, xmm0, 0);
lerp16_4(xmm3, xmm2, xmm0);
lerp16_4(xmm4, xmm6, xmm0);
// xmm0 = uf
// xmm3 = rb00
@ -915,11 +912,11 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// xmm6 = ga11
// xmm7 = used
// rb10 = rb10.lerp16<0>(rb11, uf);
// ga10 = ga10.lerp16<0>(ga11, uf);
// rb10 = rb10.lerp16_4(rb11, uf);
// ga10 = ga10.lerp16_4(ga11, uf);
lerp16(xmm5, xmm1, xmm0, 0);
lerp16(xmm6, xmm2, xmm0, 0);
lerp16_4(xmm5, xmm1, xmm0);
lerp16_4(xmm6, xmm2, xmm0);
// xmm3 = rb00
// xmm4 = ga00
@ -928,13 +925,13 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// xmm0, xmm1, xmm2 = free
// xmm7 = used
// rb00 = rb00.lerp16<0>(rb10, vf);
// ga00 = ga00.lerp16<0>(ga10, vf);
// rb00 = rb00.lerp16_4(rb10, vf);
// ga00 = ga00.lerp16_4(ga10, vf);
vmovdqa(xmm0, ptr[&m_local.temp.vf]);
lerp16(xmm5, xmm3, xmm0, 0);
lerp16(xmm6, xmm4, xmm0, 0);
lerp16_4(xmm5, xmm3, xmm0);
lerp16_4(xmm6, xmm4, xmm0);
}
else
{
@ -1298,16 +1295,14 @@ return;
vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
vpsrlw(xmm0, 12);
vmovdqa(ptr[&m_local.temp.uf], xmm0);
// GSVector4i vf = v.xxzzlh().srl16(1);
vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
vpsrlw(xmm0, 12);
vmovdqa(ptr[&m_local.temp.vf], xmm0);
}
@ -1430,11 +1425,11 @@ return;
// xmm5 = c11
// xmm7 = used
// rb00 = rb00.lerp16<0>(rb01, uf);
// ga00 = ga00.lerp16<0>(ga01, uf);
// rb00 = rb00.lerp16_4(rb01, uf);
// ga00 = ga00.lerp16_4(ga01, uf);
lerp16(xmm3, xmm2, xmm0, 0);
lerp16(xmm4, xmm6, xmm0, 0);
lerp16_4(xmm3, xmm2, xmm0);
lerp16_4(xmm4, xmm6, xmm0);
// xmm0 = uf
// xmm3 = rb00
@ -1467,11 +1462,11 @@ return;
// xmm6 = ga11
// xmm7 = used
// rb10 = rb10.lerp16<0>(rb11, uf);
// ga10 = ga10.lerp16<0>(ga11, uf);
// rb10 = rb10.lerp16_4(rb11, uf);
// ga10 = ga10.lerp16_4(ga11, uf);
lerp16(xmm5, xmm1, xmm0, 0);
lerp16(xmm6, xmm2, xmm0, 0);
lerp16_4(xmm5, xmm1, xmm0);
lerp16_4(xmm6, xmm2, xmm0);
// xmm3 = rb00
// xmm4 = ga00
@ -1480,13 +1475,13 @@ return;
// xmm0, xmm1, xmm2 = free
// xmm7 = used
// rb00 = rb00.lerp16<0>(rb10, vf);
// ga00 = ga00.lerp16<0>(ga10, vf);
// rb00 = rb00.lerp16_4(rb10, vf);
// ga00 = ga00.lerp16_4(ga10, vf);
vmovdqa(xmm0, ptr[&m_local.temp.vf]);
lerp16(xmm5, xmm3, xmm0, 0);
lerp16(xmm6, xmm4, xmm0, 0);
lerp16_4(xmm5, xmm3, xmm0);
lerp16_4(xmm6, xmm4, xmm0);
}
else
{
@ -1541,16 +1536,14 @@ return;
vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
vpsrlw(xmm0, 12);
vmovdqa(ptr[&m_local.temp.uf], xmm0);
// GSVector4i vf = v.xxzzlh().srl16(1);
vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
vpsrlw(xmm0, 12);
vmovdqa(ptr[&m_local.temp.vf], xmm0);
}
@ -1673,11 +1666,11 @@ return;
// xmm5 = c11
// xmm7 = used
// rb00 = rb00.lerp16<0>(rb01, uf);
// ga00 = ga00.lerp16<0>(ga01, uf);
// rb00 = rb00.lerp16_4(rb01, uf);
// ga00 = ga00.lerp16_4(ga01, uf);
lerp16(xmm3, xmm2, xmm0, 0);
lerp16(xmm4, xmm6, xmm0, 0);
lerp16_4(xmm3, xmm2, xmm0);
lerp16_4(xmm4, xmm6, xmm0);
// xmm0 = uf
// xmm3 = rb00
@ -1710,11 +1703,11 @@ return;
// xmm6 = ga11
// xmm7 = used
// rb10 = rb10.lerp16<0>(rb11, uf);
// ga10 = ga10.lerp16<0>(ga11, uf);
// rb10 = rb10.lerp16_4(rb11, uf);
// ga10 = ga10.lerp16_4(ga11, uf);
lerp16(xmm5, xmm1, xmm0, 0);
lerp16(xmm6, xmm2, xmm0, 0);
lerp16_4(xmm5, xmm1, xmm0);
lerp16_4(xmm6, xmm2, xmm0);
// xmm3 = rb00
// xmm4 = ga00
@ -1723,13 +1716,13 @@ return;
// xmm0, xmm1, xmm2 = free
// xmm7 = used
// rb00 = rb00.lerp16<0>(rb10, vf);
// ga00 = ga00.lerp16<0>(ga10, vf);
// rb00 = rb00.lerp16_4(rb10, vf);
// ga00 = ga00.lerp16_4(ga10, vf);
vmovdqa(xmm0, ptr[&m_local.temp.vf]);
lerp16(xmm5, xmm3, xmm0, 0);
lerp16(xmm6, xmm4, xmm0, 0);
lerp16_4(xmm5, xmm3, xmm0);
lerp16_4(xmm6, xmm4, xmm0);
}
else
{

View File

@ -389,8 +389,7 @@ void GSDrawScanlineCodeGenerator::Init()
{
pshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm6, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) psllw(xmm6, 15 - GS_BILINEAR_PRECISION);
psrlw(xmm6, 12);
movdqa(ptr[&m_local.temp.vf], xmm6);
}
}
@ -748,8 +747,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
psrlw(xmm0, 12);
movdqa(ptr[&m_local.temp.uf], xmm0);
if(m_sel.prim != GS_SPRITE_CLASS)
@ -758,8 +756,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
psrlw(xmm0, 12);
movdqa(ptr[&m_local.temp.vf], xmm0);
}
}
@ -891,11 +888,11 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// xmm5 = c11
// xmm7 = used
// rb00 = rb00.lerp16<0>(rb01, uf);
// ga00 = ga00.lerp16<0>(ga01, uf);
// rb00 = rb00.lerp_4(rb01, uf);
// ga00 = ga00.lerp_4(ga01, uf);
lerp16(xmm3, xmm2, xmm0, 0);
lerp16(xmm4, xmm6, xmm0, 0);
lerp16_4(xmm3, xmm2, xmm0);
lerp16_4(xmm4, xmm6, xmm0);
// xmm0 = uf
// xmm3 = rb00
@ -930,11 +927,11 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// xmm6 = ga11
// xmm7 = used
// rb10 = rb10.lerp16<0>(rb11, uf);
// ga10 = ga10.lerp16<0>(ga11, uf);
// rb10 = rb10.lerp_4(rb11, uf);
// ga10 = ga10.lerp_4(ga11, uf);
lerp16(xmm5, xmm1, xmm0, 0);
lerp16(xmm6, xmm2, xmm0, 0);
lerp16_4(xmm5, xmm1, xmm0);
lerp16_4(xmm6, xmm2, xmm0);
// xmm3 = rb00
// xmm4 = ga00
@ -943,13 +940,13 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// xmm0, xmm1, xmm2 = free
// xmm7 = used
// rb00 = rb00.lerp16<0>(rb10, vf);
// ga00 = ga00.lerp16<0>(ga10, vf);
// rb00 = rb00.lerp_4(rb10, vf);
// ga00 = ga00.lerp_4(ga10, vf);
movdqa(xmm0, ptr[&m_local.temp.vf]);
lerp16(xmm5, xmm3, xmm0, 0);
lerp16(xmm6, xmm4, xmm0, 0);
lerp16_4(xmm5, xmm3, xmm0);
lerp16_4(xmm6, xmm4, xmm0);
}
else
{
@ -1353,16 +1350,14 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
psrlw(xmm0, 12);
movdqa(ptr[&m_local.temp.uf], xmm0);
// GSVector4i vf = v.xxzzlh().srl16(1);
pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
psrlw(xmm0, 12);
movdqa(ptr[&m_local.temp.vf], xmm0);
}
@ -1493,11 +1488,11 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// xmm5 = c11
// xmm7 = used
// rb00 = rb00.lerp16<0>(rb01, uf);
// ga00 = ga00.lerp16<0>(ga01, uf);
// rb00 = rb00.lerp_4(rb01, uf);
// ga00 = ga00.lerp_4(ga01, uf);
lerp16(xmm3, xmm2, xmm0, 0);
lerp16(xmm4, xmm6, xmm0, 0);
lerp16_4(xmm3, xmm2, xmm0);
lerp16_4(xmm4, xmm6, xmm0);
// xmm0 = uf
// xmm3 = rb00
@ -1532,11 +1527,11 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// xmm6 = ga11
// xmm7 = used
// rb10 = rb10.lerp16<0>(rb11, uf);
// ga10 = ga10.lerp16<0>(ga11, uf);
// rb10 = rb10.lerp_4(rb11, uf);
// ga10 = ga10.lerp_4(ga11, uf);
lerp16(xmm5, xmm1, xmm0, 0);
lerp16(xmm6, xmm2, xmm0, 0);
lerp16_4(xmm5, xmm1, xmm0);
lerp16_4(xmm6, xmm2, xmm0);
// xmm3 = rb00
// xmm4 = ga00
@ -1545,13 +1540,13 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// xmm0, xmm1, xmm2 = free
// xmm7 = used
// rb00 = rb00.lerp16<0>(rb10, vf);
// ga00 = ga00.lerp16<0>(ga10, vf);
// rb00 = rb00.lerp_4(rb10, vf);
// ga00 = ga00.lerp_4(ga10, vf);
movdqa(xmm0, ptr[&m_local.temp.vf]);
lerp16(xmm5, xmm3, xmm0, 0);
lerp16(xmm6, xmm4, xmm0, 0);
lerp16_4(xmm5, xmm3, xmm0);
lerp16_4(xmm6, xmm4, xmm0);
}
else
{
@ -1608,16 +1603,14 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
psrlw(xmm0, 12);
movdqa(ptr[&m_local.temp.uf], xmm0);
// GSVector4i vf = v.xxzzlh().srl16(1);
pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
psrlw(xmm0, 12);
movdqa(ptr[&m_local.temp.vf], xmm0);
}
@ -1748,11 +1741,11 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// xmm5 = c11
// xmm7 = used
// rb00 = rb00.lerp16<0>(rb01, uf);
// ga00 = ga00.lerp16<0>(ga01, uf);
// rb00 = rb00.lerp_4(rb01, uf);
// ga00 = ga00.lerp_4(ga01, uf);
lerp16(xmm3, xmm2, xmm0, 0);
lerp16(xmm4, xmm6, xmm0, 0);
lerp16_4(xmm3, xmm2, xmm0);
lerp16_4(xmm4, xmm6, xmm0);
// xmm0 = uf
// xmm3 = rb00
@ -1787,11 +1780,11 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// xmm6 = ga11
// xmm7 = used
// rb10 = rb10.lerp16<0>(rb11, uf);
// ga10 = ga10.lerp16<0>(ga11, uf);
// rb10 = rb10.lerp_4(rb11, uf);
// ga10 = ga10.lerp_4(ga11, uf);
lerp16(xmm5, xmm1, xmm0, 0);
lerp16(xmm6, xmm2, xmm0, 0);
lerp16_4(xmm5, xmm1, xmm0);
lerp16_4(xmm6, xmm2, xmm0);
// xmm3 = rb00
// xmm4 = ga00
@ -1800,13 +1793,13 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// xmm0, xmm1, xmm2 = free
// xmm7 = used
// rb00 = rb00.lerp16<0>(rb10, vf);
// ga00 = ga00.lerp16<0>(ga10, vf);
// rb00 = rb00.lerp_4(rb10, vf);
// ga00 = ga00.lerp_4(ga10, vf);
movdqa(xmm0, ptr[&m_local.temp.vf]);
lerp16(xmm5, xmm3, xmm0, 0);
lerp16(xmm6, xmm4, xmm0, 0);
lerp16_4(xmm5, xmm3, xmm0);
lerp16_4(xmm6, xmm4, xmm0);
}
else
{

View File

@ -447,6 +447,7 @@ GSLocalMemory::~GSLocalMemory()
vmfree(m_vm8, m_vmsize * 2);
for_each(m_omap.begin(), m_omap.end(), aligned_free_second());
for_each(m_pomap.begin(), m_pomap.end(), aligned_free_second());
for_each(m_po4map.begin(), m_po4map.end(), aligned_free_second());
for(hash_map<uint64, vector<GSVector2i>*>::iterator i = m_p2tmap.begin(); i != m_p2tmap.end(); i++)

View File

@ -680,6 +680,8 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
bool fb = fb_pages != NULL;
bool zb = zb_pages != NULL;
bool res = false;
if(m_fzb != m_context->offset.fzb4)
{
// targets changed, check everything
@ -724,7 +726,7 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
{
if(LOG) {fprintf(s_fp, "syncpoint 0\n"); fflush(s_fp);}
return true;
res = true;
}
//if(LOG) {fprintf(s_fp, "no syncpoint *\n"); fflush(s_fp);}
@ -785,7 +787,7 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
{
if(LOG) {fprintf(s_fp, "syncpoint 1\n"); fflush(s_fp);}
return true;
res = true;
}
}
}
@ -795,7 +797,7 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
// chross-check frame and z-buffer pages, they cannot overlap with eachother and with previous batches in queue,
// have to be careful when the two buffers are mutually enabled/disabled and alternating (Bully FBP/ZBP = 0x2300)
if(fb)
if(fb && !res)
{
for(const uint32* p = fb_pages; *p != GSOffset::EOP; p++)
{
@ -803,12 +805,14 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
{
if(LOG) {fprintf(s_fp, "syncpoint 2\n"); fflush(s_fp);}
return true;
res = true;
break;
}
}
}
if(zb)
if(zb && !res)
{
for(const uint32* p = zb_pages; *p != GSOffset::EOP; p++)
{
@ -816,14 +820,19 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
{
if(LOG) {fprintf(s_fp, "syncpoint 3\n"); fflush(s_fp);}
return true;
res = true;
break;
}
}
}
}
}
return false;
if(!fb && fb_pages != NULL) delete [] fb_pages;
if(!zb && zb_pages != NULL) delete [] zb_pages;
return res;
}
bool GSRendererSW::CheckSourcePages(SharedData* sd)
@ -1334,7 +1343,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
gd.zm |= GSVector4i::xffff0000();
}
if(gd.sel.prim == GS_SPRITE_CLASS && !gd.sel.ftest && !gd.sel.ztest && data->bbox.eq(data->bbox.rintersect(data->scissor)))
if(gd.sel.prim == GS_SPRITE_CLASS && !gd.sel.ftest && !gd.sel.ztest && data->bbox.eq(data->bbox.rintersect(data->scissor))) // TODO: check scissor horizontally only
{
gd.sel.notest = 1;

View File

@ -24,8 +24,6 @@
#include "GSLocalMemory.h"
#include "GSVector.h"
#define GS_BILINEAR_PRECISION 4 // max precision 15, but several games like okami, rogue galaxy, dq8 break above 4
union GSScanlineSelector
{
struct
@ -70,6 +68,7 @@ union GSScanlineSelector
uint32 lcm:1; // 52
uint32 mmin:2; // 53
uint32 notest:1; // 54 (no ztest, no atest, no date, no scissor test, and horizontally aligned to 4 pixels)
// TODO: 1D texture flag? could save 2 texture reads and 4 lerps with bilinear, and also the texture coordinate clamp/wrap code in one direction
};
struct

View File

@ -131,25 +131,6 @@ void GSTextureCacheSW::RemoveAll()
}
}
void GSTextureCacheSW::RemoveAt(Texture* t)
{
m_textures.erase(t);
for(uint32 start = t->m_TEX0.TBP0 >> 5, end = countof(m_map) - 1; start <= end; start++)
{
list<Texture*>& m = m_map[start];
for(list<Texture*>::iterator i = m.begin(); i != m.end(); )
{
list<Texture*>::iterator j = i++;
if(*j == t) {m.erase(j); break;}
}
}
delete t;
}
void GSTextureCacheSW::IncAge()
{
for(hash_set<Texture*>::iterator i = m_textures.begin(); i != m_textures.end(); )
@ -158,9 +139,23 @@ void GSTextureCacheSW::IncAge()
Texture* t = *j;
if(++t->m_age > 30)
if(++t->m_age > 10)
{
RemoveAt(t);
m_textures.erase(j);
for(const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++)
{
list<Texture*>& m = m_map[*p];
for(list<Texture*>::iterator i = m.begin(); i != m.end(); )
{
list<Texture*>::iterator j = i++;
if(*j == t) {m.erase(j); break;}
}
}
delete t;
}
}
}

View File

@ -68,6 +68,5 @@ public:
void InvalidatePages(const uint32* pages, uint32 psm);
void RemoveAll();
void RemoveAt(Texture* t);
void IncAge();
};

View File

@ -1004,6 +1004,13 @@ public:
return d.add16(a.sub16(b).modulate16<shift>(c));
}
__forceinline GSVector4i lerp16_4(const GSVector4i& a, const GSVector4i& f) const
{
// (a - this) * f >> 4 + this (a, this: 8-bit, f: 4-bit)
return add16(a.sub16(*this).mul16l(f).sra16(4));
}
template<int shift> __forceinline GSVector4i modulate16(const GSVector4i& f) const
{
// a * f << shift