From fcc09c2ccad0725cf3066c813461580258370dd7 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Sun, 29 Jan 2012 10:12:20 +0000 Subject: [PATCH] GSdx: fixing two different bugs of the sw renderer, addressing outside the texture in region wrap mode (skygunner), and little gaps in shadows and other random places (dq8, rogue galaxy, okami). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5085 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GS.h | 11 +++++- plugins/GSdx/GSDrawScanline.cpp | 14 +++---- .../GSDrawScanlineCodeGenerator.x86.avx.cpp | 21 ++++++---- .../GSdx/GSDrawScanlineCodeGenerator.x86.cpp | 21 ++++++---- plugins/GSdx/GSRasterizer.cpp | 38 +++++++++---------- plugins/GSdx/GSRenderer.cpp | 6 --- plugins/GSdx/GSRenderer.h | 6 --- plugins/GSdx/GSRendererSW.cpp | 8 ++-- plugins/GSdx/GSScanlineEnvironment.h | 2 + plugins/GSdx/GSState.cpp | 6 +++ plugins/GSdx/GSState.h | 6 +++ 11 files changed, 80 insertions(+), 59 deletions(-) diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index 83036e351b..981322bcf3 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -827,7 +827,16 @@ union }; }; REG_END2 - __forceinline bool IsRepeating() {return (1 << TW) > (int)(TBW << 6) || (PSM == PSM_PSMT8 || PSM == PSM_PSMT4) && TBW == 1;} + __forceinline bool IsRepeating() + { + if(TBW < 2) + { + if(PSM == PSM_PSMT8) return TW > 7 || TH > 6; + if(PSM == PSM_PSMT4) return TW > 7 || TH > 7; + } + + return (TBW << 6) < (1u << TW); + } REG_END2 REG64_(GIFReg, TEX1) diff --git a/plugins/GSdx/GSDrawScanline.cpp b/plugins/GSdx/GSDrawScanline.cpp index a5d23f05ea..a151d1038b 100644 --- a/plugins/GSdx/GSDrawScanline.cpp +++ b/plugins/GSdx/GSDrawScanline.cpp @@ -318,7 +318,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } else if(sel.ltf) { - vf = v.xxzzlh().srl16(1); + vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION); } s = GSVector4::cast(u); @@ -508,8 +508,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS u -= 0x8000; v -= 0x8000; - uf = u.xxzzlh().srl16(1); - vf = v.xxzzlh().srl16(1); + uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION); + vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION); } GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16)); @@ -629,8 +629,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS u -= 0x8000; v -= 0x8000; - uf = u.xxzzlh().srl16(1); - vf = v.xxzzlh().srl16(1); + uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION); + vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION); } GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16)); @@ -764,11 +764,11 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS if(sel.ltf) { - uf = u.xxzzlh().srl16(1); + uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION); if(sel.prim != GS_SPRITE_CLASS) { - vf = v.xxzzlh().srl16(1); + vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION); } } diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp index b67d9dfd30..d66da34b2a 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp @@ -380,7 +380,8 @@ void GSDrawScanlineCodeGenerator::Init() { vpshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm6, 1); + vpsrlw(xmm6, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm6, 15 - GS_BILINEAR_PRECISION); vmovdqa(ptr[&m_local.temp.vf], xmm6); } } @@ -730,7 +731,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture() vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 1); + vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION); vmovdqa(ptr[&m_local.temp.uf], xmm0); if(m_sel.prim != GS_SPRITE_CLASS) @@ -739,7 +741,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture() vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 1); + vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION); vmovdqa(ptr[&m_local.temp.vf], xmm0); } } @@ -1283,14 +1286,16 @@ return; vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 1); + vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION); vmovdqa(ptr[&m_local.temp.uf], xmm0); // GSVector4i vf = v.xxzzlh().srl16(1); vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 1); + vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION); vmovdqa(ptr[&m_local.temp.vf], xmm0); } @@ -1524,14 +1529,16 @@ return; vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 1); + vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION); vmovdqa(ptr[&m_local.temp.uf], xmm0); // GSVector4i vf = v.xxzzlh().srl16(1); vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 1); + vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION); vmovdqa(ptr[&m_local.temp.vf], xmm0); } diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp index aad049e47b..a806d383f4 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp @@ -380,7 +380,8 @@ void GSDrawScanlineCodeGenerator::Init() { pshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm6, 1); + psrlw(xmm6, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) psllw(xmm6, 15 - GS_BILINEAR_PRECISION); movdqa(ptr[&m_local.temp.vf], xmm6); } } @@ -735,7 +736,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture() pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 1); + psrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION); movdqa(ptr[&m_local.temp.uf], xmm0); if(m_sel.prim != GS_SPRITE_CLASS) @@ -744,7 +746,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture() pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 1); + psrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION); movdqa(ptr[&m_local.temp.vf], xmm0); } } @@ -1338,14 +1341,16 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 1); + psrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION); movdqa(ptr[&m_local.temp.uf], xmm0); // GSVector4i vf = v.xxzzlh().srl16(1); pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 1); + psrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION); movdqa(ptr[&m_local.temp.vf], xmm0); } @@ -1591,14 +1596,16 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 1); + psrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION); movdqa(ptr[&m_local.temp.uf], xmm0); // GSVector4i vf = v.xxzzlh().srl16(1); pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 1); + psrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION); movdqa(ptr[&m_local.temp.vf], xmm0); } diff --git a/plugins/GSdx/GSRasterizer.cpp b/plugins/GSdx/GSRasterizer.cpp index 5b1af85f85..7252cfaa89 100644 --- a/plugins/GSdx/GSRasterizer.cpp +++ b/plugins/GSdx/GSRasterizer.cpp @@ -444,28 +444,18 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) GSVector4 dxy01c = dxy01 * cross; - GSVector4 _z = dxy01c * dv[1].p.zzzz(dv[0].p); // dx0 * z1, dy0 * z1, dx1 * z0, dy1 * z0 - GSVector4 _f = dxy01c * dv[1].p.wwww(dv[0].p); // dx0 * f1, dy0 * f1, dx1 * f0, dy1 * f0 + /* + dscan = dv[1] * dxy01c.yyyy() - dv[0] * dxy01c.wwww(); + dedge = dv[0] * dxy01c.zzzz() - dv[1] * dxy01c.xxxx(); + */ - GSVector4 _zf = _z.ywyw(_f).hsub(_z.zxzx(_f)); // dy0 * z1 - dy1 * z0, dy0 * f1 - dy1 * f0, dx1 * z0 - dx0 * z1, dx1 * f0 - dx0 * f1 + dscan.p = dv[1].p * dxy01c.yyyy() - dv[0].p * dxy01c.wwww(); + dscan.t = dv[1].t * dxy01c.yyyy() - dv[0].t * dxy01c.wwww(); + dscan.c = dv[1].c * dxy01c.yyyy() - dv[0].c * dxy01c.wwww(); - dscan.p = _zf.zwxy(); // dy0 * z1 - dy1 * z0, dy0 * f1 - dy1 * f0 - dedge.p = _zf; // dx1 * z0 - dx0 * z1, dx1 * f0 - dx0 * f1 - - GSVector4 _s = dxy01c * dv[1].t.xxxx(dv[0].t); // dx0 * s1, dy0 * s1, dx1 * s0, dy1 * s0 - GSVector4 _t = dxy01c * dv[1].t.yyyy(dv[0].t); // dx0 * t1, dy0 * t1, dx1 * t0, dy1 * t0 - GSVector4 _q = dxy01c * dv[1].t.zzzz(dv[0].t); // dx0 * q1, dy0 * q1, dx1 * q0, dy1 * q0 - - dscan.t = _s.ywyw(_t).hsub(_q.ywyw()); // dy0 * s1 - dy1 * s0, dy0 * t1 - dy1 * t0, dy0 * q1 - dy1 * q0 - dedge.t = _s.zxzx(_t).hsub(_q.zxzx()); // dx1 * s0 - dx0 * s1, dx1 * t0 - dx0 * t1, dx1 * q0 - dx0 * q1 - - GSVector4 _r = dxy01c * dv[1].c.xxxx(dv[0].c); // dx0 * r1, dy0 * r1, dx1 * r0, dy1 * r0 - GSVector4 _g = dxy01c * dv[1].c.yyyy(dv[0].c); // dx0 * g1, dy0 * g1, dx1 * g0, dy1 * g0 - GSVector4 _b = dxy01c * dv[1].c.zzzz(dv[0].c); // dx0 * b1, dy0 * b1, dx1 * b0, dy1 * b0 - GSVector4 _a = dxy01c * dv[1].c.wwww(dv[0].c); // dx0 * a1, dy0 * a1, dx1 * a0, dy1 * a0 - - dscan.c = _r.ywyw(_g).hsub(_b.ywyw(_a)); // dy0 * r1 - dy1 * r0, dy0 * g1 - dy1 * g0, dy0 * b1 - dy1 * b0, dy0 * a1 - dy1 * a0 - dedge.c = _r.zxzx(_g).hsub(_b.zxzx(_a)); // dx1 * r0 - dx0 * r1, dx1 * g0 - dx0 * g1, dx1 * b0 - dx0 * b1, dx1 * a0 - dx0 * a1 + dedge.p = dv[0].p * dxy01c.zzzz() - dv[1].p * dxy01c.xxxx(); + dedge.t = dv[0].t * dxy01c.zzzz() - dv[1].t * dxy01c.xxxx(); + dedge.c = dv[0].c * dxy01c.zzzz() - dv[1].c * dxy01c.xxxx(); if(m1 & 1) { @@ -555,7 +545,13 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co scan.t = edge.t + dedge.t * dy; scan.c = edge.c + dedge.c * dy; - AddScanline(e++, pixels, left, top, scan + dscan * (l - p0).xxxx()); + GSVector4 prestep = (l - p0).xxxx(); + + scan.p += dscan.p * prestep; + scan.t += dscan.t * prestep; + scan.c += dscan.c * prestep; + + AddScanline(e++, pixels, left, top, scan); } top++; diff --git a/plugins/GSdx/GSRenderer.cpp b/plugins/GSdx/GSRenderer.cpp index 8840f81b90..a23ad61742 100644 --- a/plugins/GSdx/GSRenderer.cpp +++ b/plugins/GSdx/GSRenderer.cpp @@ -35,12 +35,6 @@ GSRenderer::GSRenderer() m_aa1 = !!theApp.GetConfig("aa1", 0); m_mipmap = !!theApp.GetConfig("mipmap", 1); m_fxaa = !!theApp.GetConfig("fxaa", 0); - - s_n = 0; - s_dump = !!theApp.GetConfig("dump", 0); - s_save = !!theApp.GetConfig("save", 0); - s_savez = !!theApp.GetConfig("savez", 0); - s_saven = theApp.GetConfig("saven", 0); } GSRenderer::~GSRenderer() diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index de83f32801..8f9db7ba13 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -51,12 +51,6 @@ public: GSWnd m_wnd; GSDevice* m_dev; - int s_n; - bool s_dump; - bool s_save; - bool s_savez; - int s_saven; - public: GSRenderer(); virtual ~GSRenderer(); diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index 73b877c60a..480e1de678 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -1176,8 +1176,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.t.mask.u32[0] = 0; break; case CLAMP_REGION_REPEAT: - gd.t.min.u16[0] = gd.t.minmax.u16[0] = context->CLAMP.MINU; - gd.t.max.u16[0] = gd.t.minmax.u16[2] = context->CLAMP.MAXU; + gd.t.min.u16[0] = gd.t.minmax.u16[0] = context->CLAMP.MINU & (tw - 1); + gd.t.max.u16[0] = gd.t.minmax.u16[2] = context->CLAMP.MAXU & (tw - 1); gd.t.mask.u32[0] = 0xffffffff; break; default: @@ -1202,8 +1202,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.t.mask.u32[2] = 0; break; case CLAMP_REGION_REPEAT: - gd.t.min.u16[4] = gd.t.minmax.u16[1] = context->CLAMP.MINV; - gd.t.max.u16[4] = gd.t.minmax.u16[3] = context->CLAMP.MAXV; + gd.t.min.u16[4] = gd.t.minmax.u16[1] = context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127 + gd.t.max.u16[4] = gd.t.minmax.u16[3] = context->CLAMP.MAXV & (th - 1); gd.t.mask.u32[2] = 0xffffffff; break; default: diff --git a/plugins/GSdx/GSScanlineEnvironment.h b/plugins/GSdx/GSScanlineEnvironment.h index f6ecaced11..75b82c8c27 100644 --- a/plugins/GSdx/GSScanlineEnvironment.h +++ b/plugins/GSdx/GSScanlineEnvironment.h @@ -24,6 +24,8 @@ #include "GSLocalMemory.h" #include "GSVector.h" +#define GS_BILINEAR_PRECISION 4 // max precision 15, but several games like okami, rogue galaxy, dq8 break above 4 + union GSScanlineSelector { struct diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index 5292df8761..9244bd162b 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -105,6 +105,12 @@ GSState::GSState() Reset(); ResetHandlers(); + + s_n = 0; + s_dump = !!theApp.GetConfig("dump", 0); + s_save = !!theApp.GetConfig("save", 0); + s_savez = !!theApp.GetConfig("savez", 0); + s_saven = theApp.GetConfig("saven", 0); } GSState::~GSState() diff --git a/plugins/GSdx/GSState.h b/plugins/GSdx/GSState.h index 325cdc0bda..38ae4a1044 100644 --- a/plugins/GSdx/GSState.h +++ b/plugins/GSdx/GSState.h @@ -193,6 +193,12 @@ public: GSDump m_dump; bool m_nativeres; + int s_n; + bool s_dump; + bool s_save; + bool s_savez; + int s_saven; + public: GSState(); virtual ~GSState();