GSdx: fixing two different bugs of the sw renderer, addressing outside the texture in region wrap mode (skygunner), and little gaps in shadows and other random places (dq8, rogue galaxy, okami).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5085 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2012-01-29 10:12:20 +00:00
parent bd12a8b7b3
commit fcc09c2cca
11 changed files with 80 additions and 59 deletions

View File

@ -827,7 +827,16 @@ union
};
};
REG_END2
__forceinline bool IsRepeating() {return (1 << TW) > (int)(TBW << 6) || (PSM == PSM_PSMT8 || PSM == PSM_PSMT4) && TBW == 1;}
__forceinline bool IsRepeating()
{
if(TBW < 2)
{
if(PSM == PSM_PSMT8) return TW > 7 || TH > 6;
if(PSM == PSM_PSMT4) return TW > 7 || TH > 7;
}
return (TBW << 6) < (1u << TW);
}
REG_END2
REG64_(GIFReg, TEX1)

View File

@ -318,7 +318,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
}
else if(sel.ltf)
{
vf = v.xxzzlh().srl16(1);
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
}
s = GSVector4::cast(u);
@ -508,8 +508,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
u -= 0x8000;
v -= 0x8000;
uf = u.xxzzlh().srl16(1);
vf = v.xxzzlh().srl16(1);
uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
}
GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16));
@ -629,8 +629,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
u -= 0x8000;
v -= 0x8000;
uf = u.xxzzlh().srl16(1);
vf = v.xxzzlh().srl16(1);
uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
}
GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16));
@ -764,11 +764,11 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
if(sel.ltf)
{
uf = u.xxzzlh().srl16(1);
uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
if(sel.prim != GS_SPRITE_CLASS)
{
vf = v.xxzzlh().srl16(1);
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
}
}

View File

@ -380,7 +380,8 @@ void GSDrawScanlineCodeGenerator::Init()
{
vpshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm6, 1);
vpsrlw(xmm6, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm6, 15 - GS_BILINEAR_PRECISION);
vmovdqa(ptr[&m_local.temp.vf], xmm6);
}
}
@ -730,7 +731,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 1);
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
vmovdqa(ptr[&m_local.temp.uf], xmm0);
if(m_sel.prim != GS_SPRITE_CLASS)
@ -739,7 +741,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 1);
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
vmovdqa(ptr[&m_local.temp.vf], xmm0);
}
}
@ -1283,14 +1286,16 @@ return;
vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 1);
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
vmovdqa(ptr[&m_local.temp.uf], xmm0);
// GSVector4i vf = v.xxzzlh().srl16(1);
vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 1);
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
vmovdqa(ptr[&m_local.temp.vf], xmm0);
}
@ -1524,14 +1529,16 @@ return;
vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 1);
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
vmovdqa(ptr[&m_local.temp.uf], xmm0);
// GSVector4i vf = v.xxzzlh().srl16(1);
vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm0, 1);
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
vmovdqa(ptr[&m_local.temp.vf], xmm0);
}

View File

@ -380,7 +380,8 @@ void GSDrawScanlineCodeGenerator::Init()
{
pshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm6, 1);
psrlw(xmm6, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) psllw(xmm6, 15 - GS_BILINEAR_PRECISION);
movdqa(ptr[&m_local.temp.vf], xmm6);
}
}
@ -735,7 +736,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 1);
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
movdqa(ptr[&m_local.temp.uf], xmm0);
if(m_sel.prim != GS_SPRITE_CLASS)
@ -744,7 +746,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 1);
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
movdqa(ptr[&m_local.temp.vf], xmm0);
}
}
@ -1338,14 +1341,16 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 1);
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
movdqa(ptr[&m_local.temp.uf], xmm0);
// GSVector4i vf = v.xxzzlh().srl16(1);
pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 1);
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
movdqa(ptr[&m_local.temp.vf], xmm0);
}
@ -1591,14 +1596,16 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 1);
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
movdqa(ptr[&m_local.temp.uf], xmm0);
// GSVector4i vf = v.xxzzlh().srl16(1);
pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
psrlw(xmm0, 1);
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
movdqa(ptr[&m_local.temp.vf], xmm0);
}

View File

@ -444,28 +444,18 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
GSVector4 dxy01c = dxy01 * cross;
GSVector4 _z = dxy01c * dv[1].p.zzzz(dv[0].p); // dx0 * z1, dy0 * z1, dx1 * z0, dy1 * z0
GSVector4 _f = dxy01c * dv[1].p.wwww(dv[0].p); // dx0 * f1, dy0 * f1, dx1 * f0, dy1 * f0
/*
dscan = dv[1] * dxy01c.yyyy() - dv[0] * dxy01c.wwww();
dedge = dv[0] * dxy01c.zzzz() - dv[1] * dxy01c.xxxx();
*/
GSVector4 _zf = _z.ywyw(_f).hsub(_z.zxzx(_f)); // dy0 * z1 - dy1 * z0, dy0 * f1 - dy1 * f0, dx1 * z0 - dx0 * z1, dx1 * f0 - dx0 * f1
dscan.p = dv[1].p * dxy01c.yyyy() - dv[0].p * dxy01c.wwww();
dscan.t = dv[1].t * dxy01c.yyyy() - dv[0].t * dxy01c.wwww();
dscan.c = dv[1].c * dxy01c.yyyy() - dv[0].c * dxy01c.wwww();
dscan.p = _zf.zwxy(); // dy0 * z1 - dy1 * z0, dy0 * f1 - dy1 * f0
dedge.p = _zf; // dx1 * z0 - dx0 * z1, dx1 * f0 - dx0 * f1
GSVector4 _s = dxy01c * dv[1].t.xxxx(dv[0].t); // dx0 * s1, dy0 * s1, dx1 * s0, dy1 * s0
GSVector4 _t = dxy01c * dv[1].t.yyyy(dv[0].t); // dx0 * t1, dy0 * t1, dx1 * t0, dy1 * t0
GSVector4 _q = dxy01c * dv[1].t.zzzz(dv[0].t); // dx0 * q1, dy0 * q1, dx1 * q0, dy1 * q0
dscan.t = _s.ywyw(_t).hsub(_q.ywyw()); // dy0 * s1 - dy1 * s0, dy0 * t1 - dy1 * t0, dy0 * q1 - dy1 * q0
dedge.t = _s.zxzx(_t).hsub(_q.zxzx()); // dx1 * s0 - dx0 * s1, dx1 * t0 - dx0 * t1, dx1 * q0 - dx0 * q1
GSVector4 _r = dxy01c * dv[1].c.xxxx(dv[0].c); // dx0 * r1, dy0 * r1, dx1 * r0, dy1 * r0
GSVector4 _g = dxy01c * dv[1].c.yyyy(dv[0].c); // dx0 * g1, dy0 * g1, dx1 * g0, dy1 * g0
GSVector4 _b = dxy01c * dv[1].c.zzzz(dv[0].c); // dx0 * b1, dy0 * b1, dx1 * b0, dy1 * b0
GSVector4 _a = dxy01c * dv[1].c.wwww(dv[0].c); // dx0 * a1, dy0 * a1, dx1 * a0, dy1 * a0
dscan.c = _r.ywyw(_g).hsub(_b.ywyw(_a)); // dy0 * r1 - dy1 * r0, dy0 * g1 - dy1 * g0, dy0 * b1 - dy1 * b0, dy0 * a1 - dy1 * a0
dedge.c = _r.zxzx(_g).hsub(_b.zxzx(_a)); // dx1 * r0 - dx0 * r1, dx1 * g0 - dx0 * g1, dx1 * b0 - dx0 * b1, dx1 * a0 - dx0 * a1
dedge.p = dv[0].p * dxy01c.zzzz() - dv[1].p * dxy01c.xxxx();
dedge.t = dv[0].t * dxy01c.zzzz() - dv[1].t * dxy01c.xxxx();
dedge.c = dv[0].c * dxy01c.zzzz() - dv[1].c * dxy01c.xxxx();
if(m1 & 1)
{
@ -555,7 +545,13 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
scan.t = edge.t + dedge.t * dy;
scan.c = edge.c + dedge.c * dy;
AddScanline(e++, pixels, left, top, scan + dscan * (l - p0).xxxx());
GSVector4 prestep = (l - p0).xxxx();
scan.p += dscan.p * prestep;
scan.t += dscan.t * prestep;
scan.c += dscan.c * prestep;
AddScanline(e++, pixels, left, top, scan);
}
top++;

View File

@ -35,12 +35,6 @@ GSRenderer::GSRenderer()
m_aa1 = !!theApp.GetConfig("aa1", 0);
m_mipmap = !!theApp.GetConfig("mipmap", 1);
m_fxaa = !!theApp.GetConfig("fxaa", 0);
s_n = 0;
s_dump = !!theApp.GetConfig("dump", 0);
s_save = !!theApp.GetConfig("save", 0);
s_savez = !!theApp.GetConfig("savez", 0);
s_saven = theApp.GetConfig("saven", 0);
}
GSRenderer::~GSRenderer()

View File

@ -51,12 +51,6 @@ public:
GSWnd m_wnd;
GSDevice* m_dev;
int s_n;
bool s_dump;
bool s_save;
bool s_savez;
int s_saven;
public:
GSRenderer();
virtual ~GSRenderer();

View File

@ -1176,8 +1176,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
gd.t.mask.u32[0] = 0;
break;
case CLAMP_REGION_REPEAT:
gd.t.min.u16[0] = gd.t.minmax.u16[0] = context->CLAMP.MINU;
gd.t.max.u16[0] = gd.t.minmax.u16[2] = context->CLAMP.MAXU;
gd.t.min.u16[0] = gd.t.minmax.u16[0] = context->CLAMP.MINU & (tw - 1);
gd.t.max.u16[0] = gd.t.minmax.u16[2] = context->CLAMP.MAXU & (tw - 1);
gd.t.mask.u32[0] = 0xffffffff;
break;
default:
@ -1202,8 +1202,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
gd.t.mask.u32[2] = 0;
break;
case CLAMP_REGION_REPEAT:
gd.t.min.u16[4] = gd.t.minmax.u16[1] = context->CLAMP.MINV;
gd.t.max.u16[4] = gd.t.minmax.u16[3] = context->CLAMP.MAXV;
gd.t.min.u16[4] = gd.t.minmax.u16[1] = context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127
gd.t.max.u16[4] = gd.t.minmax.u16[3] = context->CLAMP.MAXV & (th - 1);
gd.t.mask.u32[2] = 0xffffffff;
break;
default:

View File

@ -24,6 +24,8 @@
#include "GSLocalMemory.h"
#include "GSVector.h"
#define GS_BILINEAR_PRECISION 4 // max precision 15, but several games like okami, rogue galaxy, dq8 break above 4
union GSScanlineSelector
{
struct

View File

@ -105,6 +105,12 @@ GSState::GSState()
Reset();
ResetHandlers();
s_n = 0;
s_dump = !!theApp.GetConfig("dump", 0);
s_save = !!theApp.GetConfig("save", 0);
s_savez = !!theApp.GetConfig("savez", 0);
s_saven = theApp.GetConfig("saven", 0);
}
GSState::~GSState()

View File

@ -193,6 +193,12 @@ public:
GSDump m_dump;
bool m_nativeres;
int s_n;
bool s_dump;
bool s_save;
bool s_savez;
int s_saven;
public:
GSState();
virtual ~GSState();