mirror of https://github.com/PCSX2/pcsx2.git
GSdx: this may fix silent hill shadows and mister mosquito intro blur, also reduced texture cache keep-alive time from 30 to 10 frames and found two memory leaks, killzone can run a few seconds longer before crashing, I think there is something in pcsx2 allocating too much memory.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5096 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
c641767431
commit
67ef781116
|
@ -324,7 +324,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
}
|
||||
else if(sel.ltf)
|
||||
{
|
||||
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
|
||||
vf = v.xxzzlh().srl16(12);
|
||||
}
|
||||
|
||||
s = GSVector4::cast(u);
|
||||
|
@ -514,8 +514,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
u -= 0x8000;
|
||||
v -= 0x8000;
|
||||
|
||||
uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
|
||||
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
|
||||
uf = u.xxzzlh().srl16(12);
|
||||
vf = v.xxzzlh().srl16(12);
|
||||
}
|
||||
|
||||
GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16));
|
||||
|
@ -581,19 +581,19 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
GSVector4i rb01 = c01.sll16(8).srl16(8);
|
||||
GSVector4i ga01 = c01.srl16(8);
|
||||
|
||||
rb00 = rb00.lerp16<0>(rb01, uf);
|
||||
ga00 = ga00.lerp16<0>(ga01, uf);
|
||||
rb00 = rb00.lerp16_4(rb01, uf);
|
||||
ga00 = ga00.lerp16_4(ga01, uf);
|
||||
|
||||
GSVector4i rb10 = c10.sll16(8).srl16(8);
|
||||
GSVector4i ga10 = c10.srl16(8);
|
||||
GSVector4i rb11 = c11.sll16(8).srl16(8);
|
||||
GSVector4i ga11 = c11.srl16(8);
|
||||
|
||||
rb10 = rb10.lerp16<0>(rb11, uf);
|
||||
ga10 = ga10.lerp16<0>(ga11, uf);
|
||||
rb10 = rb10.lerp16_4(rb11, uf);
|
||||
ga10 = ga10.lerp16_4(ga11, uf);
|
||||
|
||||
rb = rb00.lerp16<0>(rb10, vf);
|
||||
ga = ga00.lerp16<0>(ga10, vf);
|
||||
rb = rb00.lerp16_4(rb10, vf);
|
||||
ga = ga00.lerp16_4(ga10, vf);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -635,8 +635,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
u -= 0x8000;
|
||||
v -= 0x8000;
|
||||
|
||||
uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
|
||||
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
|
||||
uf = u.xxzzlh().srl16(12);
|
||||
vf = v.xxzzlh().srl16(12);
|
||||
}
|
||||
|
||||
GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16));
|
||||
|
@ -702,19 +702,19 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
GSVector4i rb01 = c01.sll16(8).srl16(8);
|
||||
GSVector4i ga01 = c01.srl16(8);
|
||||
|
||||
rb00 = rb00.lerp16<0>(rb01, uf);
|
||||
ga00 = ga00.lerp16<0>(ga01, uf);
|
||||
rb00 = rb00.lerp16_4(rb01, uf);
|
||||
ga00 = ga00.lerp16_4(ga01, uf);
|
||||
|
||||
GSVector4i rb10 = c10.sll16(8).srl16(8);
|
||||
GSVector4i ga10 = c10.srl16(8);
|
||||
GSVector4i rb11 = c11.sll16(8).srl16(8);
|
||||
GSVector4i ga11 = c11.srl16(8);
|
||||
|
||||
rb10 = rb10.lerp16<0>(rb11, uf);
|
||||
ga10 = ga10.lerp16<0>(ga11, uf);
|
||||
rb10 = rb10.lerp16_4(rb11, uf);
|
||||
ga10 = ga10.lerp16_4(ga11, uf);
|
||||
|
||||
rb2 = rb00.lerp16<0>(rb10, vf);
|
||||
ga2 = ga00.lerp16<0>(ga10, vf);
|
||||
rb2 = rb00.lerp16_4(rb10, vf);
|
||||
ga2 = ga00.lerp16_4(ga10, vf);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -745,7 +745,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
rb = rb.lerp16<0>(rb2, lodf);
|
||||
ga = ga.lerp16<0>(ga2, lodf);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -770,11 +770,11 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
if(sel.ltf)
|
||||
{
|
||||
uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
|
||||
uf = u.xxzzlh().srl16(12);
|
||||
|
||||
if(sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
|
||||
vf = v.xxzzlh().srl16(12);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -835,19 +835,19 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
GSVector4i rb01 = c01.sll16(8).srl16(8);
|
||||
GSVector4i ga01 = c01.srl16(8);
|
||||
|
||||
rb00 = rb00.lerp16<0>(rb01, uf);
|
||||
ga00 = ga00.lerp16<0>(ga01, uf);
|
||||
rb00 = rb00.lerp16_4(rb01, uf);
|
||||
ga00 = ga00.lerp16_4(ga01, uf);
|
||||
|
||||
GSVector4i rb10 = c10.sll16(8).srl16(8);
|
||||
GSVector4i ga10 = c10.srl16(8);
|
||||
GSVector4i rb11 = c11.sll16(8).srl16(8);
|
||||
GSVector4i ga11 = c11.srl16(8);
|
||||
|
||||
rb10 = rb10.lerp16<0>(rb11, uf);
|
||||
ga10 = ga10.lerp16<0>(ga11, uf);
|
||||
rb10 = rb10.lerp16_4(rb11, uf);
|
||||
ga10 = ga10.lerp16_4(ga11, uf);
|
||||
|
||||
rb = rb00.lerp16<0>(rb10, vf);
|
||||
ga = ga00.lerp16<0>(ga10, vf);
|
||||
rb = rb00.lerp16_4(rb10, vf);
|
||||
ga = ga00.lerp16_4(ga10, vf);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -97,6 +97,25 @@ void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm&
|
|||
#endif
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f)
|
||||
{
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
vpsubw(a, b);
|
||||
vpmullw(a, f);
|
||||
vpsraw(a, 4);
|
||||
vpaddw(a, b);
|
||||
|
||||
#else
|
||||
|
||||
psubw(a, b);
|
||||
pmullw(a, f);
|
||||
psraw(a, 4);
|
||||
paddw(a, b);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::mix16(const Xmm& a, const Xmm& b, const Xmm& temp)
|
||||
{
|
||||
#if _M_SSE >= 0x500
|
||||
|
|
|
@ -71,6 +71,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
|
||||
void modulate16(const Xmm& a, const Operand& f, int shift);
|
||||
void lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift);
|
||||
void lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f);
|
||||
void mix16(const Xmm& a, const Xmm& b, const Xmm& temp);
|
||||
void clamp16(const Xmm& a, const Xmm& temp);
|
||||
void alltrue();
|
||||
|
|
|
@ -389,8 +389,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
{
|
||||
vpshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm6, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm6, 15 - GS_BILINEAR_PRECISION);
|
||||
vpsrlw(xmm6, 12);
|
||||
vmovdqa(ptr[&m_local.temp.vf], xmm6);
|
||||
}
|
||||
}
|
||||
|
@ -743,8 +742,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
vpsrlw(xmm0, 12);
|
||||
vmovdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
if(m_sel.prim != GS_SPRITE_CLASS)
|
||||
|
@ -753,8 +751,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
vpsrlw(xmm0, 12);
|
||||
vmovdqa(ptr[&m_local.temp.vf], xmm0);
|
||||
}
|
||||
}
|
||||
|
@ -878,11 +875,11 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// xmm5 = c11
|
||||
// xmm7 = used
|
||||
|
||||
// rb00 = rb00.lerp16<0>(rb01, uf);
|
||||
// ga00 = ga00.lerp16<0>(ga01, uf);
|
||||
// rb00 = rb00.lerp16_4(rb01, uf);
|
||||
// ga00 = ga00.lerp16_4(ga01, uf);
|
||||
|
||||
lerp16(xmm3, xmm2, xmm0, 0);
|
||||
lerp16(xmm4, xmm6, xmm0, 0);
|
||||
lerp16_4(xmm3, xmm2, xmm0);
|
||||
lerp16_4(xmm4, xmm6, xmm0);
|
||||
|
||||
// xmm0 = uf
|
||||
// xmm3 = rb00
|
||||
|
@ -915,11 +912,11 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// xmm6 = ga11
|
||||
// xmm7 = used
|
||||
|
||||
// rb10 = rb10.lerp16<0>(rb11, uf);
|
||||
// ga10 = ga10.lerp16<0>(ga11, uf);
|
||||
// rb10 = rb10.lerp16_4(rb11, uf);
|
||||
// ga10 = ga10.lerp16_4(ga11, uf);
|
||||
|
||||
lerp16(xmm5, xmm1, xmm0, 0);
|
||||
lerp16(xmm6, xmm2, xmm0, 0);
|
||||
lerp16_4(xmm5, xmm1, xmm0);
|
||||
lerp16_4(xmm6, xmm2, xmm0);
|
||||
|
||||
// xmm3 = rb00
|
||||
// xmm4 = ga00
|
||||
|
@ -928,13 +925,13 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// xmm0, xmm1, xmm2 = free
|
||||
// xmm7 = used
|
||||
|
||||
// rb00 = rb00.lerp16<0>(rb10, vf);
|
||||
// ga00 = ga00.lerp16<0>(ga10, vf);
|
||||
// rb00 = rb00.lerp16_4(rb10, vf);
|
||||
// ga00 = ga00.lerp16_4(ga10, vf);
|
||||
|
||||
vmovdqa(xmm0, ptr[&m_local.temp.vf]);
|
||||
|
||||
lerp16(xmm5, xmm3, xmm0, 0);
|
||||
lerp16(xmm6, xmm4, xmm0, 0);
|
||||
lerp16_4(xmm5, xmm3, xmm0);
|
||||
lerp16_4(xmm6, xmm4, xmm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1298,16 +1295,14 @@ return;
|
|||
|
||||
vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
vpsrlw(xmm0, 12);
|
||||
vmovdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
// GSVector4i vf = v.xxzzlh().srl16(1);
|
||||
|
||||
vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
vpsrlw(xmm0, 12);
|
||||
vmovdqa(ptr[&m_local.temp.vf], xmm0);
|
||||
}
|
||||
|
||||
|
@ -1430,11 +1425,11 @@ return;
|
|||
// xmm5 = c11
|
||||
// xmm7 = used
|
||||
|
||||
// rb00 = rb00.lerp16<0>(rb01, uf);
|
||||
// ga00 = ga00.lerp16<0>(ga01, uf);
|
||||
// rb00 = rb00.lerp16_4(rb01, uf);
|
||||
// ga00 = ga00.lerp16_4(ga01, uf);
|
||||
|
||||
lerp16(xmm3, xmm2, xmm0, 0);
|
||||
lerp16(xmm4, xmm6, xmm0, 0);
|
||||
lerp16_4(xmm3, xmm2, xmm0);
|
||||
lerp16_4(xmm4, xmm6, xmm0);
|
||||
|
||||
// xmm0 = uf
|
||||
// xmm3 = rb00
|
||||
|
@ -1467,11 +1462,11 @@ return;
|
|||
// xmm6 = ga11
|
||||
// xmm7 = used
|
||||
|
||||
// rb10 = rb10.lerp16<0>(rb11, uf);
|
||||
// ga10 = ga10.lerp16<0>(ga11, uf);
|
||||
// rb10 = rb10.lerp16_4(rb11, uf);
|
||||
// ga10 = ga10.lerp16_4(ga11, uf);
|
||||
|
||||
lerp16(xmm5, xmm1, xmm0, 0);
|
||||
lerp16(xmm6, xmm2, xmm0, 0);
|
||||
lerp16_4(xmm5, xmm1, xmm0);
|
||||
lerp16_4(xmm6, xmm2, xmm0);
|
||||
|
||||
// xmm3 = rb00
|
||||
// xmm4 = ga00
|
||||
|
@ -1480,13 +1475,13 @@ return;
|
|||
// xmm0, xmm1, xmm2 = free
|
||||
// xmm7 = used
|
||||
|
||||
// rb00 = rb00.lerp16<0>(rb10, vf);
|
||||
// ga00 = ga00.lerp16<0>(ga10, vf);
|
||||
// rb00 = rb00.lerp16_4(rb10, vf);
|
||||
// ga00 = ga00.lerp16_4(ga10, vf);
|
||||
|
||||
vmovdqa(xmm0, ptr[&m_local.temp.vf]);
|
||||
|
||||
lerp16(xmm5, xmm3, xmm0, 0);
|
||||
lerp16(xmm6, xmm4, xmm0, 0);
|
||||
lerp16_4(xmm5, xmm3, xmm0);
|
||||
lerp16_4(xmm6, xmm4, xmm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1541,16 +1536,14 @@ return;
|
|||
|
||||
vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
vpsrlw(xmm0, 12);
|
||||
vmovdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
// GSVector4i vf = v.xxzzlh().srl16(1);
|
||||
|
||||
vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
vpsrlw(xmm0, 12);
|
||||
vmovdqa(ptr[&m_local.temp.vf], xmm0);
|
||||
}
|
||||
|
||||
|
@ -1673,11 +1666,11 @@ return;
|
|||
// xmm5 = c11
|
||||
// xmm7 = used
|
||||
|
||||
// rb00 = rb00.lerp16<0>(rb01, uf);
|
||||
// ga00 = ga00.lerp16<0>(ga01, uf);
|
||||
// rb00 = rb00.lerp16_4(rb01, uf);
|
||||
// ga00 = ga00.lerp16_4(ga01, uf);
|
||||
|
||||
lerp16(xmm3, xmm2, xmm0, 0);
|
||||
lerp16(xmm4, xmm6, xmm0, 0);
|
||||
lerp16_4(xmm3, xmm2, xmm0);
|
||||
lerp16_4(xmm4, xmm6, xmm0);
|
||||
|
||||
// xmm0 = uf
|
||||
// xmm3 = rb00
|
||||
|
@ -1710,11 +1703,11 @@ return;
|
|||
// xmm6 = ga11
|
||||
// xmm7 = used
|
||||
|
||||
// rb10 = rb10.lerp16<0>(rb11, uf);
|
||||
// ga10 = ga10.lerp16<0>(ga11, uf);
|
||||
// rb10 = rb10.lerp16_4(rb11, uf);
|
||||
// ga10 = ga10.lerp16_4(ga11, uf);
|
||||
|
||||
lerp16(xmm5, xmm1, xmm0, 0);
|
||||
lerp16(xmm6, xmm2, xmm0, 0);
|
||||
lerp16_4(xmm5, xmm1, xmm0);
|
||||
lerp16_4(xmm6, xmm2, xmm0);
|
||||
|
||||
// xmm3 = rb00
|
||||
// xmm4 = ga00
|
||||
|
@ -1723,13 +1716,13 @@ return;
|
|||
// xmm0, xmm1, xmm2 = free
|
||||
// xmm7 = used
|
||||
|
||||
// rb00 = rb00.lerp16<0>(rb10, vf);
|
||||
// ga00 = ga00.lerp16<0>(ga10, vf);
|
||||
// rb00 = rb00.lerp16_4(rb10, vf);
|
||||
// ga00 = ga00.lerp16_4(ga10, vf);
|
||||
|
||||
vmovdqa(xmm0, ptr[&m_local.temp.vf]);
|
||||
|
||||
lerp16(xmm5, xmm3, xmm0, 0);
|
||||
lerp16(xmm6, xmm4, xmm0, 0);
|
||||
lerp16_4(xmm5, xmm3, xmm0);
|
||||
lerp16_4(xmm6, xmm4, xmm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -389,8 +389,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
{
|
||||
pshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm6, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) psllw(xmm6, 15 - GS_BILINEAR_PRECISION);
|
||||
psrlw(xmm6, 12);
|
||||
movdqa(ptr[&m_local.temp.vf], xmm6);
|
||||
}
|
||||
}
|
||||
|
@ -748,8 +747,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
psrlw(xmm0, 12);
|
||||
movdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
if(m_sel.prim != GS_SPRITE_CLASS)
|
||||
|
@ -758,8 +756,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
psrlw(xmm0, 12);
|
||||
movdqa(ptr[&m_local.temp.vf], xmm0);
|
||||
}
|
||||
}
|
||||
|
@ -891,11 +888,11 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// xmm5 = c11
|
||||
// xmm7 = used
|
||||
|
||||
// rb00 = rb00.lerp16<0>(rb01, uf);
|
||||
// ga00 = ga00.lerp16<0>(ga01, uf);
|
||||
// rb00 = rb00.lerp_4(rb01, uf);
|
||||
// ga00 = ga00.lerp_4(ga01, uf);
|
||||
|
||||
lerp16(xmm3, xmm2, xmm0, 0);
|
||||
lerp16(xmm4, xmm6, xmm0, 0);
|
||||
lerp16_4(xmm3, xmm2, xmm0);
|
||||
lerp16_4(xmm4, xmm6, xmm0);
|
||||
|
||||
// xmm0 = uf
|
||||
// xmm3 = rb00
|
||||
|
@ -930,11 +927,11 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// xmm6 = ga11
|
||||
// xmm7 = used
|
||||
|
||||
// rb10 = rb10.lerp16<0>(rb11, uf);
|
||||
// ga10 = ga10.lerp16<0>(ga11, uf);
|
||||
// rb10 = rb10.lerp_4(rb11, uf);
|
||||
// ga10 = ga10.lerp_4(ga11, uf);
|
||||
|
||||
lerp16(xmm5, xmm1, xmm0, 0);
|
||||
lerp16(xmm6, xmm2, xmm0, 0);
|
||||
lerp16_4(xmm5, xmm1, xmm0);
|
||||
lerp16_4(xmm6, xmm2, xmm0);
|
||||
|
||||
// xmm3 = rb00
|
||||
// xmm4 = ga00
|
||||
|
@ -943,13 +940,13 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// xmm0, xmm1, xmm2 = free
|
||||
// xmm7 = used
|
||||
|
||||
// rb00 = rb00.lerp16<0>(rb10, vf);
|
||||
// ga00 = ga00.lerp16<0>(ga10, vf);
|
||||
// rb00 = rb00.lerp_4(rb10, vf);
|
||||
// ga00 = ga00.lerp_4(ga10, vf);
|
||||
|
||||
movdqa(xmm0, ptr[&m_local.temp.vf]);
|
||||
|
||||
lerp16(xmm5, xmm3, xmm0, 0);
|
||||
lerp16(xmm6, xmm4, xmm0, 0);
|
||||
lerp16_4(xmm5, xmm3, xmm0);
|
||||
lerp16_4(xmm6, xmm4, xmm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1353,16 +1350,14 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
|
||||
pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
psrlw(xmm0, 12);
|
||||
movdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
// GSVector4i vf = v.xxzzlh().srl16(1);
|
||||
|
||||
pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
psrlw(xmm0, 12);
|
||||
movdqa(ptr[&m_local.temp.vf], xmm0);
|
||||
}
|
||||
|
||||
|
@ -1493,11 +1488,11 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
// xmm5 = c11
|
||||
// xmm7 = used
|
||||
|
||||
// rb00 = rb00.lerp16<0>(rb01, uf);
|
||||
// ga00 = ga00.lerp16<0>(ga01, uf);
|
||||
// rb00 = rb00.lerp_4(rb01, uf);
|
||||
// ga00 = ga00.lerp_4(ga01, uf);
|
||||
|
||||
lerp16(xmm3, xmm2, xmm0, 0);
|
||||
lerp16(xmm4, xmm6, xmm0, 0);
|
||||
lerp16_4(xmm3, xmm2, xmm0);
|
||||
lerp16_4(xmm4, xmm6, xmm0);
|
||||
|
||||
// xmm0 = uf
|
||||
// xmm3 = rb00
|
||||
|
@ -1532,11 +1527,11 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
// xmm6 = ga11
|
||||
// xmm7 = used
|
||||
|
||||
// rb10 = rb10.lerp16<0>(rb11, uf);
|
||||
// ga10 = ga10.lerp16<0>(ga11, uf);
|
||||
// rb10 = rb10.lerp_4(rb11, uf);
|
||||
// ga10 = ga10.lerp_4(ga11, uf);
|
||||
|
||||
lerp16(xmm5, xmm1, xmm0, 0);
|
||||
lerp16(xmm6, xmm2, xmm0, 0);
|
||||
lerp16_4(xmm5, xmm1, xmm0);
|
||||
lerp16_4(xmm6, xmm2, xmm0);
|
||||
|
||||
// xmm3 = rb00
|
||||
// xmm4 = ga00
|
||||
|
@ -1545,13 +1540,13 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
// xmm0, xmm1, xmm2 = free
|
||||
// xmm7 = used
|
||||
|
||||
// rb00 = rb00.lerp16<0>(rb10, vf);
|
||||
// ga00 = ga00.lerp16<0>(ga10, vf);
|
||||
// rb00 = rb00.lerp_4(rb10, vf);
|
||||
// ga00 = ga00.lerp_4(ga10, vf);
|
||||
|
||||
movdqa(xmm0, ptr[&m_local.temp.vf]);
|
||||
|
||||
lerp16(xmm5, xmm3, xmm0, 0);
|
||||
lerp16(xmm6, xmm4, xmm0, 0);
|
||||
lerp16_4(xmm5, xmm3, xmm0);
|
||||
lerp16_4(xmm6, xmm4, xmm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1608,16 +1603,14 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
|
||||
pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
psrlw(xmm0, 12);
|
||||
movdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
// GSVector4i vf = v.xxzzlh().srl16(1);
|
||||
|
||||
pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
psrlw(xmm0, 12);
|
||||
movdqa(ptr[&m_local.temp.vf], xmm0);
|
||||
}
|
||||
|
||||
|
@ -1748,11 +1741,11 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
// xmm5 = c11
|
||||
// xmm7 = used
|
||||
|
||||
// rb00 = rb00.lerp16<0>(rb01, uf);
|
||||
// ga00 = ga00.lerp16<0>(ga01, uf);
|
||||
// rb00 = rb00.lerp_4(rb01, uf);
|
||||
// ga00 = ga00.lerp_4(ga01, uf);
|
||||
|
||||
lerp16(xmm3, xmm2, xmm0, 0);
|
||||
lerp16(xmm4, xmm6, xmm0, 0);
|
||||
lerp16_4(xmm3, xmm2, xmm0);
|
||||
lerp16_4(xmm4, xmm6, xmm0);
|
||||
|
||||
// xmm0 = uf
|
||||
// xmm3 = rb00
|
||||
|
@ -1787,11 +1780,11 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
// xmm6 = ga11
|
||||
// xmm7 = used
|
||||
|
||||
// rb10 = rb10.lerp16<0>(rb11, uf);
|
||||
// ga10 = ga10.lerp16<0>(ga11, uf);
|
||||
// rb10 = rb10.lerp_4(rb11, uf);
|
||||
// ga10 = ga10.lerp_4(ga11, uf);
|
||||
|
||||
lerp16(xmm5, xmm1, xmm0, 0);
|
||||
lerp16(xmm6, xmm2, xmm0, 0);
|
||||
lerp16_4(xmm5, xmm1, xmm0);
|
||||
lerp16_4(xmm6, xmm2, xmm0);
|
||||
|
||||
// xmm3 = rb00
|
||||
// xmm4 = ga00
|
||||
|
@ -1800,13 +1793,13 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
// xmm0, xmm1, xmm2 = free
|
||||
// xmm7 = used
|
||||
|
||||
// rb00 = rb00.lerp16<0>(rb10, vf);
|
||||
// ga00 = ga00.lerp16<0>(ga10, vf);
|
||||
// rb00 = rb00.lerp_4(rb10, vf);
|
||||
// ga00 = ga00.lerp_4(ga10, vf);
|
||||
|
||||
movdqa(xmm0, ptr[&m_local.temp.vf]);
|
||||
|
||||
lerp16(xmm5, xmm3, xmm0, 0);
|
||||
lerp16(xmm6, xmm4, xmm0, 0);
|
||||
lerp16_4(xmm5, xmm3, xmm0);
|
||||
lerp16_4(xmm6, xmm4, xmm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -447,6 +447,7 @@ GSLocalMemory::~GSLocalMemory()
|
|||
vmfree(m_vm8, m_vmsize * 2);
|
||||
|
||||
for_each(m_omap.begin(), m_omap.end(), aligned_free_second());
|
||||
for_each(m_pomap.begin(), m_pomap.end(), aligned_free_second());
|
||||
for_each(m_po4map.begin(), m_po4map.end(), aligned_free_second());
|
||||
|
||||
for(hash_map<uint64, vector<GSVector2i>*>::iterator i = m_p2tmap.begin(); i != m_p2tmap.end(); i++)
|
||||
|
|
|
@ -680,6 +680,8 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
|||
bool fb = fb_pages != NULL;
|
||||
bool zb = zb_pages != NULL;
|
||||
|
||||
bool res = false;
|
||||
|
||||
if(m_fzb != m_context->offset.fzb4)
|
||||
{
|
||||
// targets changed, check everything
|
||||
|
@ -724,7 +726,7 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
|||
{
|
||||
if(LOG) {fprintf(s_fp, "syncpoint 0\n"); fflush(s_fp);}
|
||||
|
||||
return true;
|
||||
res = true;
|
||||
}
|
||||
|
||||
//if(LOG) {fprintf(s_fp, "no syncpoint *\n"); fflush(s_fp);}
|
||||
|
@ -785,7 +787,7 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
|||
{
|
||||
if(LOG) {fprintf(s_fp, "syncpoint 1\n"); fflush(s_fp);}
|
||||
|
||||
return true;
|
||||
res = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -795,7 +797,7 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
|||
// chross-check frame and z-buffer pages, they cannot overlap with eachother and with previous batches in queue,
|
||||
// have to be careful when the two buffers are mutually enabled/disabled and alternating (Bully FBP/ZBP = 0x2300)
|
||||
|
||||
if(fb)
|
||||
if(fb && !res)
|
||||
{
|
||||
for(const uint32* p = fb_pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
|
@ -803,12 +805,14 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
|||
{
|
||||
if(LOG) {fprintf(s_fp, "syncpoint 2\n"); fflush(s_fp);}
|
||||
|
||||
return true;
|
||||
res = true;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(zb)
|
||||
if(zb && !res)
|
||||
{
|
||||
for(const uint32* p = zb_pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
|
@ -816,14 +820,19 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
|||
{
|
||||
if(LOG) {fprintf(s_fp, "syncpoint 3\n"); fflush(s_fp);}
|
||||
|
||||
return true;
|
||||
res = true;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
if(!fb && fb_pages != NULL) delete [] fb_pages;
|
||||
if(!zb && zb_pages != NULL) delete [] zb_pages;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
bool GSRendererSW::CheckSourcePages(SharedData* sd)
|
||||
|
@ -1334,7 +1343,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
gd.zm |= GSVector4i::xffff0000();
|
||||
}
|
||||
|
||||
if(gd.sel.prim == GS_SPRITE_CLASS && !gd.sel.ftest && !gd.sel.ztest && data->bbox.eq(data->bbox.rintersect(data->scissor)))
|
||||
if(gd.sel.prim == GS_SPRITE_CLASS && !gd.sel.ftest && !gd.sel.ztest && data->bbox.eq(data->bbox.rintersect(data->scissor))) // TODO: check scissor horizontally only
|
||||
{
|
||||
gd.sel.notest = 1;
|
||||
|
||||
|
|
|
@ -24,8 +24,6 @@
|
|||
#include "GSLocalMemory.h"
|
||||
#include "GSVector.h"
|
||||
|
||||
#define GS_BILINEAR_PRECISION 4 // max precision 15, but several games like okami, rogue galaxy, dq8 break above 4
|
||||
|
||||
union GSScanlineSelector
|
||||
{
|
||||
struct
|
||||
|
@ -70,6 +68,7 @@ union GSScanlineSelector
|
|||
uint32 lcm:1; // 52
|
||||
uint32 mmin:2; // 53
|
||||
uint32 notest:1; // 54 (no ztest, no atest, no date, no scissor test, and horizontally aligned to 4 pixels)
|
||||
// TODO: 1D texture flag? could save 2 texture reads and 4 lerps with bilinear, and also the texture coordinate clamp/wrap code in one direction
|
||||
};
|
||||
|
||||
struct
|
||||
|
|
|
@ -131,25 +131,6 @@ void GSTextureCacheSW::RemoveAll()
|
|||
}
|
||||
}
|
||||
|
||||
void GSTextureCacheSW::RemoveAt(Texture* t)
|
||||
{
|
||||
m_textures.erase(t);
|
||||
|
||||
for(uint32 start = t->m_TEX0.TBP0 >> 5, end = countof(m_map) - 1; start <= end; start++)
|
||||
{
|
||||
list<Texture*>& m = m_map[start];
|
||||
|
||||
for(list<Texture*>::iterator i = m.begin(); i != m.end(); )
|
||||
{
|
||||
list<Texture*>::iterator j = i++;
|
||||
|
||||
if(*j == t) {m.erase(j); break;}
|
||||
}
|
||||
}
|
||||
|
||||
delete t;
|
||||
}
|
||||
|
||||
void GSTextureCacheSW::IncAge()
|
||||
{
|
||||
for(hash_set<Texture*>::iterator i = m_textures.begin(); i != m_textures.end(); )
|
||||
|
@ -158,9 +139,23 @@ void GSTextureCacheSW::IncAge()
|
|||
|
||||
Texture* t = *j;
|
||||
|
||||
if(++t->m_age > 30)
|
||||
if(++t->m_age > 10)
|
||||
{
|
||||
RemoveAt(t);
|
||||
m_textures.erase(j);
|
||||
|
||||
for(const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
list<Texture*>& m = m_map[*p];
|
||||
|
||||
for(list<Texture*>::iterator i = m.begin(); i != m.end(); )
|
||||
{
|
||||
list<Texture*>::iterator j = i++;
|
||||
|
||||
if(*j == t) {m.erase(j); break;}
|
||||
}
|
||||
}
|
||||
|
||||
delete t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,6 +68,5 @@ public:
|
|||
void InvalidatePages(const uint32* pages, uint32 psm);
|
||||
|
||||
void RemoveAll();
|
||||
void RemoveAt(Texture* t);
|
||||
void IncAge();
|
||||
};
|
||||
|
|
|
@ -1004,6 +1004,13 @@ public:
|
|||
return d.add16(a.sub16(b).modulate16<shift>(c));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i lerp16_4(const GSVector4i& a, const GSVector4i& f) const
|
||||
{
|
||||
// (a - this) * f >> 4 + this (a, this: 8-bit, f: 4-bit)
|
||||
|
||||
return add16(a.sub16(*this).mul16l(f).sra16(4));
|
||||
}
|
||||
|
||||
template<int shift> __forceinline GSVector4i modulate16(const GSVector4i& f) const
|
||||
{
|
||||
// a * f << shift
|
||||
|
|
Loading…
Reference in New Issue