diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index 83036e351b..981322bcf3 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -827,7 +827,16 @@ union }; }; REG_END2 - __forceinline bool IsRepeating() {return (1 << TW) > (int)(TBW << 6) || (PSM == PSM_PSMT8 || PSM == PSM_PSMT4) && TBW == 1;} + __forceinline bool IsRepeating() + { + if(TBW < 2) + { + if(PSM == PSM_PSMT8) return TW > 7 || TH > 6; + if(PSM == PSM_PSMT4) return TW > 7 || TH > 7; + } + + return (TBW << 6) < (1u << TW); + } REG_END2 REG64_(GIFReg, TEX1) diff --git a/plugins/GSdx/GSDrawScanline.cpp b/plugins/GSdx/GSDrawScanline.cpp index a5d23f05ea..a151d1038b 100644 --- a/plugins/GSdx/GSDrawScanline.cpp +++ b/plugins/GSdx/GSDrawScanline.cpp @@ -318,7 +318,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } else if(sel.ltf) { - vf = v.xxzzlh().srl16(1); + vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION); } s = GSVector4::cast(u); @@ -508,8 +508,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS u -= 0x8000; v -= 0x8000; - uf = u.xxzzlh().srl16(1); - vf = v.xxzzlh().srl16(1); + uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION); + vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION); } GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16)); @@ -629,8 +629,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS u -= 0x8000; v -= 0x8000; - uf = u.xxzzlh().srl16(1); - vf = v.xxzzlh().srl16(1); + uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION); + vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION); } GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16)); @@ -764,11 +764,11 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS if(sel.ltf) { - uf = u.xxzzlh().srl16(1); + uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION); if(sel.prim != GS_SPRITE_CLASS) { - vf = v.xxzzlh().srl16(1); + vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION); } } diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp index b67d9dfd30..d66da34b2a 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp @@ -380,7 +380,8 @@ void GSDrawScanlineCodeGenerator::Init() { vpshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm6, 1); + vpsrlw(xmm6, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm6, 15 - GS_BILINEAR_PRECISION); vmovdqa(ptr[&m_local.temp.vf], xmm6); } } @@ -730,7 +731,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture() vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 1); + vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION); vmovdqa(ptr[&m_local.temp.uf], xmm0); if(m_sel.prim != GS_SPRITE_CLASS) @@ -739,7 +741,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture() vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 1); + vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION); vmovdqa(ptr[&m_local.temp.vf], xmm0); } } @@ -1283,14 +1286,16 @@ return; vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 1); + vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION); vmovdqa(ptr[&m_local.temp.uf], xmm0); // GSVector4i vf = v.xxzzlh().srl16(1); vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 1); + vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION); vmovdqa(ptr[&m_local.temp.vf], xmm0); } @@ -1524,14 +1529,16 @@ return; vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 1); + vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION); vmovdqa(ptr[&m_local.temp.uf], xmm0); // GSVector4i vf = v.xxzzlh().srl16(1); vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vpsrlw(xmm0, 1); + vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION); vmovdqa(ptr[&m_local.temp.vf], xmm0); } diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp index aad049e47b..a806d383f4 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp @@ -380,7 +380,8 @@ void GSDrawScanlineCodeGenerator::Init() { pshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm6, 1); + psrlw(xmm6, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) psllw(xmm6, 15 - GS_BILINEAR_PRECISION); movdqa(ptr[&m_local.temp.vf], xmm6); } } @@ -735,7 +736,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture() pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 1); + psrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION); movdqa(ptr[&m_local.temp.uf], xmm0); if(m_sel.prim != GS_SPRITE_CLASS) @@ -744,7 +746,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture() pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 1); + psrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION); movdqa(ptr[&m_local.temp.vf], xmm0); } } @@ -1338,14 +1341,16 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 1); + psrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION); movdqa(ptr[&m_local.temp.uf], xmm0); // GSVector4i vf = v.xxzzlh().srl16(1); pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 1); + psrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION); movdqa(ptr[&m_local.temp.vf], xmm0); } @@ -1591,14 +1596,16 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 1); + psrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION); movdqa(ptr[&m_local.temp.uf], xmm0); // GSVector4i vf = v.xxzzlh().srl16(1); pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - psrlw(xmm0, 1); + psrlw(xmm0, 16 - GS_BILINEAR_PRECISION); + if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION); movdqa(ptr[&m_local.temp.vf], xmm0); } diff --git a/plugins/GSdx/GSRasterizer.cpp b/plugins/GSdx/GSRasterizer.cpp index 5b1af85f85..7252cfaa89 100644 --- a/plugins/GSdx/GSRasterizer.cpp +++ b/plugins/GSdx/GSRasterizer.cpp @@ -444,28 +444,18 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) GSVector4 dxy01c = dxy01 * cross; - GSVector4 _z = dxy01c * dv[1].p.zzzz(dv[0].p); // dx0 * z1, dy0 * z1, dx1 * z0, dy1 * z0 - GSVector4 _f = dxy01c * dv[1].p.wwww(dv[0].p); // dx0 * f1, dy0 * f1, dx1 * f0, dy1 * f0 + /* + dscan = dv[1] * dxy01c.yyyy() - dv[0] * dxy01c.wwww(); + dedge = dv[0] * dxy01c.zzzz() - dv[1] * dxy01c.xxxx(); + */ - GSVector4 _zf = _z.ywyw(_f).hsub(_z.zxzx(_f)); // dy0 * z1 - dy1 * z0, dy0 * f1 - dy1 * f0, dx1 * z0 - dx0 * z1, dx1 * f0 - dx0 * f1 + dscan.p = dv[1].p * dxy01c.yyyy() - dv[0].p * dxy01c.wwww(); + dscan.t = dv[1].t * dxy01c.yyyy() - dv[0].t * dxy01c.wwww(); + dscan.c = dv[1].c * dxy01c.yyyy() - dv[0].c * dxy01c.wwww(); - dscan.p = _zf.zwxy(); // dy0 * z1 - dy1 * z0, dy0 * f1 - dy1 * f0 - dedge.p = _zf; // dx1 * z0 - dx0 * z1, dx1 * f0 - dx0 * f1 - - GSVector4 _s = dxy01c * dv[1].t.xxxx(dv[0].t); // dx0 * s1, dy0 * s1, dx1 * s0, dy1 * s0 - GSVector4 _t = dxy01c * dv[1].t.yyyy(dv[0].t); // dx0 * t1, dy0 * t1, dx1 * t0, dy1 * t0 - GSVector4 _q = dxy01c * dv[1].t.zzzz(dv[0].t); // dx0 * q1, dy0 * q1, dx1 * q0, dy1 * q0 - - dscan.t = _s.ywyw(_t).hsub(_q.ywyw()); // dy0 * s1 - dy1 * s0, dy0 * t1 - dy1 * t0, dy0 * q1 - dy1 * q0 - dedge.t = _s.zxzx(_t).hsub(_q.zxzx()); // dx1 * s0 - dx0 * s1, dx1 * t0 - dx0 * t1, dx1 * q0 - dx0 * q1 - - GSVector4 _r = dxy01c * dv[1].c.xxxx(dv[0].c); // dx0 * r1, dy0 * r1, dx1 * r0, dy1 * r0 - GSVector4 _g = dxy01c * dv[1].c.yyyy(dv[0].c); // dx0 * g1, dy0 * g1, dx1 * g0, dy1 * g0 - GSVector4 _b = dxy01c * dv[1].c.zzzz(dv[0].c); // dx0 * b1, dy0 * b1, dx1 * b0, dy1 * b0 - GSVector4 _a = dxy01c * dv[1].c.wwww(dv[0].c); // dx0 * a1, dy0 * a1, dx1 * a0, dy1 * a0 - - dscan.c = _r.ywyw(_g).hsub(_b.ywyw(_a)); // dy0 * r1 - dy1 * r0, dy0 * g1 - dy1 * g0, dy0 * b1 - dy1 * b0, dy0 * a1 - dy1 * a0 - dedge.c = _r.zxzx(_g).hsub(_b.zxzx(_a)); // dx1 * r0 - dx0 * r1, dx1 * g0 - dx0 * g1, dx1 * b0 - dx0 * b1, dx1 * a0 - dx0 * a1 + dedge.p = dv[0].p * dxy01c.zzzz() - dv[1].p * dxy01c.xxxx(); + dedge.t = dv[0].t * dxy01c.zzzz() - dv[1].t * dxy01c.xxxx(); + dedge.c = dv[0].c * dxy01c.zzzz() - dv[1].c * dxy01c.xxxx(); if(m1 & 1) { @@ -555,7 +545,13 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co scan.t = edge.t + dedge.t * dy; scan.c = edge.c + dedge.c * dy; - AddScanline(e++, pixels, left, top, scan + dscan * (l - p0).xxxx()); + GSVector4 prestep = (l - p0).xxxx(); + + scan.p += dscan.p * prestep; + scan.t += dscan.t * prestep; + scan.c += dscan.c * prestep; + + AddScanline(e++, pixels, left, top, scan); } top++; diff --git a/plugins/GSdx/GSRenderer.cpp b/plugins/GSdx/GSRenderer.cpp index 8840f81b90..a23ad61742 100644 --- a/plugins/GSdx/GSRenderer.cpp +++ b/plugins/GSdx/GSRenderer.cpp @@ -35,12 +35,6 @@ GSRenderer::GSRenderer() m_aa1 = !!theApp.GetConfig("aa1", 0); m_mipmap = !!theApp.GetConfig("mipmap", 1); m_fxaa = !!theApp.GetConfig("fxaa", 0); - - s_n = 0; - s_dump = !!theApp.GetConfig("dump", 0); - s_save = !!theApp.GetConfig("save", 0); - s_savez = !!theApp.GetConfig("savez", 0); - s_saven = theApp.GetConfig("saven", 0); } GSRenderer::~GSRenderer() diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index de83f32801..8f9db7ba13 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -51,12 +51,6 @@ public: GSWnd m_wnd; GSDevice* m_dev; - int s_n; - bool s_dump; - bool s_save; - bool s_savez; - int s_saven; - public: GSRenderer(); virtual ~GSRenderer(); diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index 73b877c60a..480e1de678 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -1176,8 +1176,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.t.mask.u32[0] = 0; break; case CLAMP_REGION_REPEAT: - gd.t.min.u16[0] = gd.t.minmax.u16[0] = context->CLAMP.MINU; - gd.t.max.u16[0] = gd.t.minmax.u16[2] = context->CLAMP.MAXU; + gd.t.min.u16[0] = gd.t.minmax.u16[0] = context->CLAMP.MINU & (tw - 1); + gd.t.max.u16[0] = gd.t.minmax.u16[2] = context->CLAMP.MAXU & (tw - 1); gd.t.mask.u32[0] = 0xffffffff; break; default: @@ -1202,8 +1202,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.t.mask.u32[2] = 0; break; case CLAMP_REGION_REPEAT: - gd.t.min.u16[4] = gd.t.minmax.u16[1] = context->CLAMP.MINV; - gd.t.max.u16[4] = gd.t.minmax.u16[3] = context->CLAMP.MAXV; + gd.t.min.u16[4] = gd.t.minmax.u16[1] = context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127 + gd.t.max.u16[4] = gd.t.minmax.u16[3] = context->CLAMP.MAXV & (th - 1); gd.t.mask.u32[2] = 0xffffffff; break; default: diff --git a/plugins/GSdx/GSScanlineEnvironment.h b/plugins/GSdx/GSScanlineEnvironment.h index f6ecaced11..75b82c8c27 100644 --- a/plugins/GSdx/GSScanlineEnvironment.h +++ b/plugins/GSdx/GSScanlineEnvironment.h @@ -24,6 +24,8 @@ #include "GSLocalMemory.h" #include "GSVector.h" +#define GS_BILINEAR_PRECISION 4 // max precision 15, but several games like okami, rogue galaxy, dq8 break above 4 + union GSScanlineSelector { struct diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index 5292df8761..9244bd162b 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -105,6 +105,12 @@ GSState::GSState() Reset(); ResetHandlers(); + + s_n = 0; + s_dump = !!theApp.GetConfig("dump", 0); + s_save = !!theApp.GetConfig("save", 0); + s_savez = !!theApp.GetConfig("savez", 0); + s_saven = theApp.GetConfig("saven", 0); } GSState::~GSState() diff --git a/plugins/GSdx/GSState.h b/plugins/GSdx/GSState.h index 325cdc0bda..38ae4a1044 100644 --- a/plugins/GSdx/GSState.h +++ b/plugins/GSdx/GSState.h @@ -193,6 +193,12 @@ public: GSDump m_dump; bool m_nativeres; + int s_n; + bool s_dump; + bool s_save; + bool s_savez; + int s_saven; + public: GSState(); virtual ~GSState();