GSdx: Implemented edge anti-aliasing (aa1) for software mode, bios or ffx are good test subjects (not many other games use it). It's still a bit slow but could be improved a lot by not doing 4 pixels with sse for each single edge pixel, that's just a lot of unnecessary texture lookups. The bios config screen cubes are still bogus, gs_user on aa1 isn't too helpful...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@721 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-03-09 01:42:56 +00:00
parent ca4ce225a0
commit 120971ec4f
19 changed files with 629 additions and 291 deletions

View File

@ -117,6 +117,7 @@ static INT32 GSopen(void* dsp, char* title, int mt, int renderer)
rs.m_filter = AfxGetApp()->GetProfileInt(_T("Settings"), _T("filter"), 1);
rs.m_vsync = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("vsync"), FALSE);
rs.m_nativeres = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("nativeres"), FALSE);
rs.m_aa1 = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("aa1"), FALSE);
int threads = AfxGetApp()->GetProfileInt(_T("Settings"), _T("swthreads"), 1);

View File

@ -685,32 +685,6 @@ REG64_(GIFReg, PABE)
UINT32 _PAD2:32;
REG_END
/*
AA1 == 1 (for triangles, as tested on the real thing)
C ABE A Ae Ao Aoe
0 0 c c c c
0 1 s c s* c
1 0 d d c c
1 1 d d s* c
2 0 f f ? ?
2 1 f f ? ?
C = ALPHA::C
A = alpha used for blending
Ae = edge alpha used for blending
Ao = alpha to output
Aoe = edge alpha to output
c = coverage
s = source alpha
d = destination alpha
f = fixed alpha (TODO: test with 0x80/2)
* = only if s != 0x80 (s == 0x80 => s == c, but what about s > 0x80? s or 0x80? TODO)
*/
REG64_(GIFReg, PRIM)
UINT32 PRIM:3;
UINT32 IIP:1;

View File

@ -43,7 +43,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
const GSScanlineParam* p = (const GSScanlineParam*)data->param;
m_env.sel = p->sel;
m_sel = p->sel;
m_env.vm = p->vm;
m_env.fbr = p->fbo->row;
@ -60,11 +60,11 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
m_env.fga = GSVector4i((int)(env.FOGCOL.ai32[0] >> 8) & 0x00ff00ff);
m_env.dimx = env.dimx;
if(m_env.sel.fpsm == 1)
if(m_sel.fpsm == 1)
{
m_env.fm |= GSVector4i::xff000000();
}
else if(m_env.sel.fpsm == 2)
else if(m_sel.fpsm == 2)
{
GSVector4i rb = m_env.fm & 0x00f800f8;
GSVector4i ga = m_env.fm & 0x8000f800;
@ -72,29 +72,29 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
m_env.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | GSVector4i::xffff0000();
}
if(m_env.sel.zpsm == 1)
if(m_sel.zpsm == 1)
{
m_env.zm |= GSVector4i::xff000000();
}
else if(m_env.sel.zpsm == 2)
else if(m_sel.zpsm == 2)
{
m_env.zm |= GSVector4i::xffff0000();
}
if(m_env.sel.atst == ATST_LESS)
if(m_sel.atst == ATST_LESS)
{
m_env.sel.atst = ATST_LEQUAL;
m_sel.atst = ATST_LEQUAL;
m_env.aref -= GSVector4i::x00000001();
}
else if(m_env.sel.atst == ATST_GREATER)
else if(m_sel.atst == ATST_GREATER)
{
m_env.sel.atst = ATST_GEQUAL;
m_sel.atst = ATST_GEQUAL;
m_env.aref += GSVector4i::x00000001();
}
if(m_env.sel.tfx != TFX_NONE)
if(m_sel.tfx != TFX_NONE)
{
m_env.tex = p->tex;
m_env.clut = p->clut;
@ -163,9 +163,20 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
//
f->ssl = m_ds.Lookup(m_env.sel);
f->ssl = m_ds.Lookup(m_sel);
if(m_env.sel.IsSolidRect())
if(m_sel.aa1)// && (m_state->m_perfmon.GetFrame() & 0x40))
{
GSScanlineSelector sel;
sel.key = m_sel.key;
sel.zwrite = 0;
sel.edge = 1;
f->ssle = m_ds.Lookup(sel);
}
if(m_sel.IsSolidRect())
{
f->sr = (DrawSolidRectPtr)&GSDrawScanline::DrawSolidRect;
}
@ -176,14 +187,14 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
sel.key = 0;
sel.iip = m_env.sel.iip;
sel.tfx = m_env.sel.tfx;
sel.fst = m_env.sel.fst;
sel.fge = m_env.sel.fge;
sel.sprite = m_env.sel.sprite;
sel.fb = m_env.sel.fb;
sel.zb = m_env.sel.zb;
sel.zoverflow = m_env.sel.zoverflow;
sel.iip = m_sel.iip;
sel.tfx = m_sel.tfx;
sel.fst = m_sel.fst;
sel.fge = m_sel.fge;
sel.sprite = m_sel.sprite;
sel.fb = m_sel.fb;
sel.zb = m_sel.zb;
sel.zoverflow = m_sel.zoverflow;
f->ssp = m_sp.Lookup(sel);
}
@ -208,7 +219,7 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
{
DWORD z = (DWORD)(float)v.p.z;
if(m_env.sel.zpsm != 2)
if(m_sel.zpsm != 2)
{
if(m == 0)
{
@ -243,7 +254,7 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
c |= 0x80000000;
}
if(m_env.sel.fpsm != 2)
if(m_sel.fpsm != 2)
{
if(m == 0)
{
@ -353,7 +364,7 @@ GSDrawScanline::GSSetupPrimMap::GSSetupPrimMap(GSScanlineEnvironment& env)
GSSetupPrimCodeGenerator* GSDrawScanline::GSSetupPrimMap::Create(UINT64 key, void* ptr, size_t maxsize)
{
return new GSSetupPrimCodeGenerator(m_env, ptr, maxsize);
return new GSSetupPrimCodeGenerator(m_env, key, ptr, maxsize);
}
//
@ -366,5 +377,5 @@ GSDrawScanline::GSDrawScanlineMap::GSDrawScanlineMap(GSScanlineEnvironment& env)
GSDrawScanlineCodeGenerator* GSDrawScanline::GSDrawScanlineMap::Create(UINT64 key, void* ptr, size_t maxsize)
{
return new GSDrawScanlineCodeGenerator(m_env, ptr, maxsize);
return new GSDrawScanlineCodeGenerator(m_env, key, ptr, maxsize);
}

View File

@ -31,6 +31,7 @@
class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
{
GSScanlineEnvironment m_env;
GSScanlineSelector m_sel;
//

View File

@ -20,11 +20,12 @@
*/
// TODO: x64 (use the extra regs to avoid spills of zs, zd, uf, vf, rb, ga and keep a few constants in the last two like aref or afix)
// TODO: for edges doing 4 pixels is wasteful (needed memory access * 4)
#include "StdAfx.h"
#include "GSDrawScanlineCodeGenerator.h"
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize)
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, UINT64 key, void* ptr, size_t maxsize)
: CodeGenerator(maxsize, ptr)
, m_env(env)
{
@ -32,6 +33,8 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment&
#error TODO
#endif
m_sel.key = key;
Generate();
}
@ -46,7 +49,10 @@ void GSDrawScanlineCodeGenerator::Generate()
Init(params);
align(16);
if(!m_sel.edge)
{
align(16);
}
L("loop");
@ -60,7 +66,7 @@ L("loop");
// xmm6 = ga (!tme)
// xmm7 = test
bool tme = m_env.sel.tfx != TFX_NONE;
bool tme = m_sel.tfx != TFX_NONE;
TestZ(tme ? xmm5 : xmm2, tme ? xmm6 : xmm3);
@ -98,12 +104,12 @@ L("loop");
// xmm6 = ga
// xmm7 = test
if(m_env.sel.fwrite)
if(m_sel.fwrite)
{
movdqa(xmm3, xmmword[&m_env.fm]);
}
if(m_env.sel.zwrite)
if(m_sel.zwrite)
{
movdqa(xmm4, xmmword[&m_env.zm]);
}
@ -174,12 +180,12 @@ L("loop");
// fm |= test;
// zm |= test;
if(m_env.sel.fwrite)
if(m_sel.fwrite)
{
por(xmm3, xmm7);
}
if(m_env.sel.zwrite)
if(m_sel.zwrite)
{
por(xmm4, xmm7);
}
@ -188,19 +194,19 @@ L("loop");
pcmpeqd(xmm1, xmm1);
if(m_env.sel.fwrite && m_env.sel.zwrite)
if(m_sel.fwrite && m_sel.zwrite)
{
movdqa(xmm0, xmm1);
pcmpeqd(xmm1, xmm3);
pcmpeqd(xmm0, xmm4);
packssdw(xmm1, xmm0);
}
else if(m_env.sel.fwrite)
else if(m_sel.fwrite)
{
pcmpeqd(xmm1, xmm3);
packssdw(xmm1, xmm1);
}
else if(m_env.sel.zwrite)
else if(m_sel.zwrite)
{
pcmpeqd(xmm1, xmm4);
packssdw(xmm1, xmm1);
@ -253,12 +259,15 @@ L("step");
// if(steps <= 0) break;
test(ecx, ecx);
jle("exit", T_NEAR);
if(!m_sel.edge)
{
test(ecx, ecx);
jle("exit", T_NEAR);
Step();
Step();
jmp("loop", T_NEAR);
jmp("loop", T_NEAR);
}
L("exit");
@ -313,7 +322,7 @@ void GSDrawScanlineCodeGenerator::Init(int params)
lea(edi, ptr[ebx * 2]);
add(edi, dword[&m_env.fzbc]);
if(!m_env.sel.sprite && (m_env.sel.fwrite && m_env.sel.fge || m_env.sel.zb) || m_env.sel.fb && (m_env.sel.tfx != TFX_NONE || m_env.sel.iip))
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
{
// edx = &m_env.d[skip]
@ -325,13 +334,13 @@ void GSDrawScanlineCodeGenerator::Init(int params)
mov(ebx, dword[esp + _v]);
}
if(!m_env.sel.sprite)
if(!m_sel.sprite)
{
if(m_env.sel.fwrite && m_env.sel.fge || m_env.sel.zb)
if(m_sel.fwrite && m_sel.fge || m_sel.zb)
{
movaps(xmm0, xmmword[ebx + 16]); // v.p
if(m_env.sel.fwrite && m_env.sel.fge)
if(m_sel.fwrite && m_sel.fge)
{
// f = GSVector4i(vp).zzzzh().zzzz().add16(m_env.d[skip].f);
@ -343,7 +352,7 @@ void GSDrawScanlineCodeGenerator::Init(int params)
movdqa(xmmword[&m_env.temp.f], xmm1);
}
if(m_env.sel.zb)
if(m_sel.zb)
{
// z = vp.zzzz() + m_env.d[skip].z;
@ -356,19 +365,31 @@ void GSDrawScanlineCodeGenerator::Init(int params)
}
else
{
if(m_env.sel.ztest)
if(m_sel.ztest)
{
movdqa(xmm0, xmmword[&m_env.p.z]);
}
}
if(m_env.sel.fb)
if(m_sel.fb)
{
if(m_env.sel.tfx != TFX_NONE)
if(m_sel.edge)
{
movaps(xmm4, xmmword[ebx + 32]); // v.t
if(m_env.sel.fst)
cvttps2dq(xmm4, xmm4);
pshufhw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2));
pshufd(xmm4, xmm4, _MM_SHUFFLE(3, 3, 3, 3));
movdqa(xmmword[&m_env.temp.cov], xmm4);
}
if(m_sel.tfx != TFX_NONE)
{
movaps(xmm4, xmmword[ebx + 32]); // v.t
if(m_sel.fst)
{
// GSVector4i vti(vt);
@ -382,13 +403,13 @@ void GSDrawScanlineCodeGenerator::Init(int params)
paddd(xmm2, xmmword[edx + 16 * 7]);
if(!m_env.sel.sprite)
if(!m_sel.sprite)
{
paddd(xmm3, xmmword[edx + 16 * 8]);
}
else
{
if(m_env.sel.ltf)
if(m_sel.ltf)
{
movdqa(xmm4, xmm3);
pshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0));
@ -428,9 +449,9 @@ void GSDrawScanlineCodeGenerator::Init(int params)
}
}
if(m_env.sel.tfx != TFX_DECAL)
if(m_sel.tfx != TFX_DECAL)
{
if(m_env.sel.iip)
if(m_sel.iip)
{
// GSVector4i vc = GSVector4i(v.c);
@ -455,7 +476,7 @@ void GSDrawScanlineCodeGenerator::Init(int params)
}
else
{
if(m_env.sel.tfx == TFX_NONE)
if(m_sel.tfx == TFX_NONE)
{
movdqa(xmm5, xmmword[&m_env.c.rb]);
movdqa(xmm6, xmmword[&m_env.c.ga]);
@ -475,11 +496,11 @@ void GSDrawScanlineCodeGenerator::Step()
add(edi, 8);
if(!m_env.sel.sprite)
if(!m_sel.sprite)
{
// z += m_env.d4.z;
if(m_env.sel.zb)
if(m_sel.zb)
{
movaps(xmm0, xmmword[&m_env.temp.z]);
addps(xmm0, xmmword[&m_env.d4.z]);
@ -488,7 +509,7 @@ void GSDrawScanlineCodeGenerator::Step()
// f = f.add16(m_env.d4.f);
if(m_env.sel.fwrite && m_env.sel.fge)
if(m_sel.fwrite && m_sel.fge)
{
movdqa(xmm1, xmmword[&m_env.temp.f]);
paddw(xmm1, xmmword[&m_env.d4.f]);
@ -497,17 +518,17 @@ void GSDrawScanlineCodeGenerator::Step()
}
else
{
if(m_env.sel.ztest)
if(m_sel.ztest)
{
movdqa(xmm0, xmmword[&m_env.p.z]);
}
}
if(m_env.sel.fb)
if(m_sel.fb)
{
if(m_env.sel.tfx != TFX_NONE)
if(m_sel.tfx != TFX_NONE)
{
if(m_env.sel.fst)
if(m_sel.fst)
{
// GSVector4i st = m_env.d4.st;
@ -520,7 +541,7 @@ void GSDrawScanlineCodeGenerator::Step()
paddd(xmm2, xmmword[&m_env.temp.s]);
movdqa(xmmword[&m_env.temp.s], xmm2);
if(!m_env.sel.sprite)
if(!m_sel.sprite)
{
pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
paddd(xmm3, xmmword[&m_env.temp.t]);
@ -561,9 +582,9 @@ void GSDrawScanlineCodeGenerator::Step()
}
}
if(m_env.sel.tfx != TFX_DECAL)
if(m_sel.tfx != TFX_DECAL)
{
if(m_env.sel.iip)
if(m_sel.iip)
{
// GSVector4i c = m_env.d4.c;
@ -583,7 +604,7 @@ void GSDrawScanlineCodeGenerator::Step()
}
else
{
if(m_env.sel.tfx == TFX_NONE)
if(m_sel.tfx == TFX_NONE)
{
movdqa(xmm5, xmmword[&m_env.c.rb]);
movdqa(xmm6, xmmword[&m_env.c.ga]);
@ -604,7 +625,7 @@ void GSDrawScanlineCodeGenerator::Step()
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
{
if(!m_env.sel.zb)
if(!m_sel.zb)
{
return;
}
@ -616,9 +637,9 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
// GSVector4i zs = zi;
if(!m_env.sel.sprite)
if(!m_sel.sprite)
{
if(m_env.sel.zoverflow)
if(m_sel.zoverflow)
{
// zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
@ -644,30 +665,30 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
cvttps2dq(xmm0, xmm0);
}
if(m_env.sel.zwrite)
if(m_sel.zwrite)
{
movdqa(xmmword[&m_env.temp.zs], xmm0);
}
}
if(m_env.sel.ztest)
if(m_sel.ztest)
{
ReadPixel(xmm1, ebp);
if(m_env.sel.zwrite && m_env.sel.zpsm < 2)
if(m_sel.zwrite && m_sel.zpsm < 2)
{
movdqa(xmmword[&m_env.temp.zd], xmm1);
}
// zd &= 0xffffffff >> m_env.sel.zpsm * 8;
// zd &= 0xffffffff >> m_sel.zpsm * 8;
if(m_env.sel.zpsm)
if(m_sel.zpsm)
{
pslld(xmm1, m_env.sel.zpsm * 8);
psrld(xmm1, m_env.sel.zpsm * 8);
pslld(xmm1, m_sel.zpsm * 8);
psrld(xmm1, m_sel.zpsm * 8);
}
if(m_env.sel.zoverflow || m_env.sel.zpsm == 0)
if(m_sel.zoverflow || m_sel.zpsm == 0)
{
// GSVector4i o = GSVector4i::x80000000();
@ -683,7 +704,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
psubd(xmm1, xmm4);
}
switch(m_env.sel.ztst)
switch(m_sel.ztst)
{
case ZTST_GEQUAL:
// test |= zso < zdo;
@ -707,7 +728,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
void GSDrawScanlineCodeGenerator::SampleTexture()
{
if(!m_env.sel.fb || m_env.sel.tfx == TFX_NONE)
if(!m_sel.fb || m_sel.tfx == TFX_NONE)
{
return;
}
@ -716,14 +737,14 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// ebx = tex
if(!m_env.sel.fst)
if(!m_sel.fst)
{
// TODO: move these into Init/Step too?
cvttps2dq(xmm2, xmm2);
cvttps2dq(xmm3, xmm3);
if(m_env.sel.ltf)
if(m_sel.ltf)
{
// u -= 0x8000;
// v -= 0x8000;
@ -739,7 +760,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// xmm2 = u
// xmm3 = v
if(m_env.sel.ltf)
if(m_sel.ltf)
{
// GSVector4i uf = u.xxzzlh().srl16(1);
@ -749,7 +770,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
psrlw(xmm0, 1);
movdqa(xmmword[&m_env.temp.uf], xmm0);
if(!m_env.sel.sprite)
if(!m_sel.sprite)
{
// GSVector4i vf = v.xxzzlh().srl16(1);
@ -767,7 +788,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
psrad(xmm3, 16);
packssdw(xmm2, xmm3);
if(m_env.sel.ltf)
if(m_sel.ltf)
{
// GSVector4i uv1 = uv0.add16(GSVector4i::x0001());
@ -812,7 +833,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// xmm5, xmm6 = free
// xmm7 = used
if(m_env.sel.ltf)
if(m_sel.ltf)
{
// GSVector4i y1 = uv1.uph16() << tw;
// GSVector4i x1 = uv1.upl16();
@ -989,10 +1010,10 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv)
{
// xmm0, xmm1, xmm4, xmm5, xmm6 = free
int wms_clamp = ((m_env.sel.wms + 1) >> 1) & 1;
int wmt_clamp = ((m_env.sel.wmt + 1) >> 1) & 1;
int wms_clamp = ((m_sel.wms + 1) >> 1) & 1;
int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1;
int region = ((m_env.sel.wms | m_env.sel.wmt) >> 1) & 1;
int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1;
if(wms_clamp == wmt_clamp)
{
@ -1052,10 +1073,10 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
{
// xmm0, xmm1, xmm4, xmm5, xmm6 = free
int wms_clamp = ((m_env.sel.wms + 1) >> 1) & 1;
int wmt_clamp = ((m_env.sel.wmt + 1) >> 1) & 1;
int wms_clamp = ((m_sel.wms + 1) >> 1) & 1;
int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1;
int region = ((m_env.sel.wms | m_env.sel.wmt) >> 1) & 1;
int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1;
if(wms_clamp == wmt_clamp)
{
@ -1149,18 +1170,18 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
void GSDrawScanlineCodeGenerator::AlphaTFX()
{
if(!m_env.sel.fb)
if(!m_sel.fb)
{
return;
}
switch(m_env.sel.tfx)
switch(m_sel.tfx)
{
case TFX_MODULATE:
// GSVector4i ga = iip ? gaf : m_env.c.ga;
movdqa(xmm4, xmmword[m_env.sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
// gat = gat.modulate16<1>(ga).clamp8();
@ -1170,7 +1191,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
// if(!tcc) gat = gat.mix16(ga.srl16(7));
if(!m_env.sel.tcc)
if(!m_sel.tcc)
{
psrlw(xmm4, 7);
@ -1187,14 +1208,14 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
// GSVector4i ga = iip ? gaf : m_env.c.ga;
movdqa(xmm4, xmmword[m_env.sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
movdqa(xmm2, xmm4);
// gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7)));
psrlw(xmm4, 7);
if(m_env.sel.tcc)
if(m_sel.tcc)
{
paddusb(xmm4, xmm6);
}
@ -1207,11 +1228,11 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
// if(!tcc) gat = gat.mix16(ga.srl16(7));
if(!m_env.sel.tcc)
if(!m_sel.tcc)
{
// GSVector4i ga = iip ? gaf : m_env.c.ga;
movdqa(xmm4, xmmword[m_env.sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
movdqa(xmm2, xmm4);
psrlw(xmm4, 7);
@ -1225,33 +1246,81 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
// gat = iip ? ga.srl16(7) : ga;
if(m_env.sel.iip)
if(m_sel.iip)
{
psrlw(xmm6, 7);
}
break;
}
if(m_sel.aa1)
{
// gs_user figure 3-2: anti-aliasing after tfx, before tests, modifies alpha
// FIXME: bios config screen cubes
if(!m_sel.abe)
{
// a = cov
if(m_sel.edge)
{
movdqa(xmm0, xmmword[&m_env.temp.cov]);
}
else
{
pcmpeqd(xmm0, xmm0);
psllw(xmm0, 15);
psrlw(xmm0, 8);
}
mix16(xmm6, xmm0, xmm1);
}
else
{
// a = a == 0x80 ? cov : a
pcmpeqd(xmm0, xmm0);
psllw(xmm0, 15);
psrlw(xmm0, 8);
if(m_sel.edge)
{
movdqa(xmm1, xmmword[&m_env.temp.cov]);
}
else
{
movdqa(xmm1, xmm0);
}
pcmpeqw(xmm0, xmm6);
psrld(xmm0, 16);
pslld(xmm0, 16);
blend8(xmm6, xmm1);
}
}
}
void GSDrawScanlineCodeGenerator::TestAlpha()
{
switch(m_env.sel.afail)
switch(m_sel.afail)
{
case AFAIL_FB_ONLY:
if(!m_env.sel.zwrite) return;
if(!m_sel.zwrite) return;
break;
case AFAIL_ZB_ONLY:
if(!m_env.sel.fwrite) return;
if(!m_sel.fwrite) return;
break;
case AFAIL_RGB_ONLY:
if(!m_env.sel.zwrite && m_env.sel.fpsm == 1) return;
if(!m_sel.zwrite && m_sel.fpsm == 1) return;
break;
}
switch(m_env.sel.atst)
switch(m_sel.atst)
{
case ATST_NEVER:
// t = GSVector4i::xffffffff();
@ -1295,7 +1364,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha()
break;
}
switch(m_env.sel.afail)
switch(m_sel.afail)
{
case AFAIL_KEEP:
// test |= t;
@ -1326,12 +1395,12 @@ void GSDrawScanlineCodeGenerator::TestAlpha()
void GSDrawScanlineCodeGenerator::ColorTFX()
{
if(!m_env.sel.fwrite)
if(!m_sel.fwrite)
{
return;
}
switch(m_env.sel.tfx)
switch(m_sel.tfx)
{
case TFX_MODULATE:
@ -1339,7 +1408,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
// rbt = rbt.modulate16<1>(rb).clamp8();
modulate16<1>(xmm5, xmmword[m_env.sel.iip ? &m_env.temp.rb : &m_env.c.rb]);
modulate16<1>(xmm5, xmmword[m_sel.iip ? &m_env.temp.rb : &m_env.c.rb]);
clamp16(xmm5, xmm1);
@ -1352,11 +1421,11 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
case TFX_HIGHLIGHT:
case TFX_HIGHLIGHT2:
if(m_env.sel.tfx == TFX_HIGHLIGHT2 && m_env.sel.tcc)
if(m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc)
{
// GSVector4i ga = iip ? gaf : m_env.c.ga;
movdqa(xmm2, xmmword[m_env.sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
movdqa(xmm2, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
}
// gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat);
@ -1379,7 +1448,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
// rbt = rbt.modulate16<1>(rb).add16(af).clamp8();
modulate16<1>(xmm5, xmmword[m_env.sel.iip ? &m_env.temp.rb : &m_env.c.rb]);
modulate16<1>(xmm5, xmmword[m_sel.iip ? &m_env.temp.rb : &m_env.c.rb]);
paddw(xmm5, xmm2);
@ -1391,7 +1460,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
// rbt = iip ? rb.srl16(7) : rb;
if(m_env.sel.iip)
if(m_sel.iip)
{
psrlw(xmm5, 7);
}
@ -1402,7 +1471,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
void GSDrawScanlineCodeGenerator::Fog()
{
if(!m_env.sel.fwrite || !m_env.sel.fge)
if(!m_sel.fwrite || !m_sel.fge)
{
return;
}
@ -1410,7 +1479,7 @@ void GSDrawScanlineCodeGenerator::Fog()
// rb = m_env.frb.lerp16<0>(rb, f);
// ga = m_env.fga.lerp16<0>(ga, f).mix16(ga);
movdqa(xmm0, xmmword[!m_env.sel.sprite ? &m_env.temp.f : &m_env.p.f]);
movdqa(xmm0, xmmword[!m_sel.sprite ? &m_env.temp.f : &m_env.p.f]);
movdqa(xmm1, xmm6);
movdqa(xmm2, xmmword[&m_env.frb]);
@ -1424,7 +1493,7 @@ void GSDrawScanlineCodeGenerator::Fog()
void GSDrawScanlineCodeGenerator::ReadFrame()
{
if(!m_env.sel.fb)
if(!m_sel.fb)
{
return;
}
@ -1434,7 +1503,7 @@ void GSDrawScanlineCodeGenerator::ReadFrame()
mov(ebx, dword[esi]);
add(ebx, dword[edi]);
if(!m_env.sel.rfb)
if(!m_sel.rfb)
{
return;
}
@ -1444,7 +1513,7 @@ void GSDrawScanlineCodeGenerator::ReadFrame()
void GSDrawScanlineCodeGenerator::TestDestAlpha()
{
if(!m_env.sel.date || m_env.sel.fpsm != 0 && m_env.sel.fpsm != 2)
if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2)
{
return;
}
@ -1453,9 +1522,9 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
movdqa(xmm1, xmm2);
if(m_env.sel.datm)
if(m_sel.datm)
{
if(m_env.sel.fpsm == 2)
if(m_sel.fpsm == 2)
{
pxor(xmm0, xmm0);
psrld(xmm1, 15);
@ -1470,7 +1539,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
}
else
{
if(m_env.sel.fpsm == 2)
if(m_sel.fpsm == 2)
{
pslld(xmm1, 16);
}
@ -1485,16 +1554,16 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
void GSDrawScanlineCodeGenerator::WriteZBuf()
{
if(!m_env.sel.zwrite)
if(!m_sel.zwrite)
{
return;
}
movdqa(xmm1, xmmword[!m_env.sel.sprite ? &m_env.temp.zs : &m_env.p.z]);
movdqa(xmm1, xmmword[!m_sel.sprite ? &m_env.temp.zs : &m_env.p.z]);
bool fast = false;
if(m_env.sel.ztest && m_env.sel.zpsm < 2)
if(m_sel.ztest && m_sel.zpsm < 2)
{
// zs = zs.blend8(zd, zm);
@ -1505,43 +1574,24 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
fast = true;
}
WritePixel(xmm1, xmm0, ebp, dh, fast, m_env.sel.zpsm);
WritePixel(xmm1, xmm0, ebp, dh, fast, m_sel.zpsm);
}
void GSDrawScanlineCodeGenerator::AlphaBlend()
{
if(!m_env.sel.fwrite)
{
return;
}
/*
if(m_env.sel.aa1)
{
// hmm, the playstation logo does not look good...
printf("aa1 %016I64x\n", m_env.sel.key);
if(m_env.sel.fpsm != 1) // TODO: fm == 0xffxxxxxx
{
// a = 0x80
pcmpeqd(xmm0, xmm0);
psllw(xmm0, 15);
psrlw(xmm0, 8);
mix16(xmm6, xmm0, xmm1);
}
return;
}
*/
if(m_env.sel.abe == 255)
if(!m_sel.fwrite)
{
return;
}
if((m_env.sel.abea != m_env.sel.abeb) && (m_env.sel.abea == 1 || m_env.sel.abeb == 1 || m_env.sel.abec == 1) || m_env.sel.abed == 1)
if(m_sel.abe == 0 && m_sel.aa1 == 0)
{
switch(m_env.sel.fpsm)
return;
}
if((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1)
{
switch(m_sel.fpsm)
{
case 0:
case 1:
@ -1599,40 +1649,40 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
// xmm2, xmm3 = used
// xmm4, xmm7 = free
if(m_env.sel.pabe || (m_env.sel.abea != m_env.sel.abeb) && (m_env.sel.abeb == 0 || m_env.sel.abed == 0))
if(m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0))
{
movdqa(xmm4, xmm5);
}
if(m_env.sel.abea != m_env.sel.abeb)
if(m_sel.aba != m_sel.abb)
{
// rb = c[abea * 2 + 0];
// rb = c[aba * 2 + 0];
switch(m_env.sel.abea)
switch(m_sel.aba)
{
case 0: break;
case 1: movdqa(xmm5, xmm0); break;
case 2: pxor(xmm5, xmm5); break;
}
// rb = rb.sub16(c[abeb * 2 + 0]);
// rb = rb.sub16(c[abb * 2 + 0]);
switch(m_env.sel.abeb)
switch(m_sel.abb)
{
case 0: psubw(xmm5, xmm4); break;
case 1: psubw(xmm5, xmm0); break;
case 2: break;
}
if(!(m_env.sel.fpsm == 1 && m_env.sel.abec == 1))
if(!(m_sel.fpsm == 1 && m_sel.abc == 1))
{
// GSVector4i a = abec < 2 ? c[abec * 2 + 1].yywwlh().sll16(7) : m_env.afix;
// GSVector4i a = abc < 2 ? c[abc * 2 + 1].yywwlh().sll16(7) : m_env.afix;
switch(m_env.sel.abec)
switch(m_sel.abc)
{
case 0:
case 1:
movdqa(xmm7, m_env.sel.abec ? xmm1 : xmm6);
movdqa(xmm7, m_sel.abc ? xmm1 : xmm6);
pshuflw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1));
pshufhw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1));
psllw(xmm7, 7);
@ -1647,9 +1697,9 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
modulate16<1>(xmm5, xmm7);
}
// rb = rb.add16(c[abed * 2 + 0]);
// rb = rb.add16(c[abd * 2 + 0]);
switch(m_env.sel.abed)
switch(m_sel.abd)
{
case 0: paddw(xmm5, xmm4); break;
case 1: paddw(xmm5, xmm0); break;
@ -1658,9 +1708,9 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
}
else
{
// rb = c[abed * 2 + 0];
// rb = c[abd * 2 + 0];
switch(m_env.sel.abed)
switch(m_sel.abd)
{
case 0: break;
case 1: movdqa(xmm5, xmm0); break;
@ -1668,7 +1718,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
}
}
if(m_env.sel.pabe)
if(m_sel.pabe)
{
// mask = (c[1] << 8).sra32(31);
@ -1690,11 +1740,11 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
movdqa(xmm4, xmm6);
if(m_env.sel.abea != m_env.sel.abeb)
if(m_sel.aba != m_sel.abb)
{
// ga = c[abea * 2 + 1];
// ga = c[aba * 2 + 1];
switch(m_env.sel.abea)
switch(m_sel.aba)
{
case 0: break;
case 1: movdqa(xmm6, xmm1); break;
@ -1703,23 +1753,23 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
// ga = ga.sub16(c[abeb * 2 + 1]);
switch(m_env.sel.abeb)
switch(m_sel.abb)
{
case 0: psubw(xmm6, xmm4); break;
case 1: psubw(xmm6, xmm1); break;
case 2: break;
}
if(!(m_env.sel.fpsm == 1 && m_env.sel.abec == 1))
if(!(m_sel.fpsm == 1 && m_sel.abc == 1))
{
// ga = ga.modulate16<1>(a);
modulate16<1>(xmm6, xmm7);
}
// ga = ga.add16(c[abed * 2 + 1]);
// ga = ga.add16(c[abd * 2 + 1]);
switch(m_env.sel.abed)
switch(m_sel.abd)
{
case 0: paddw(xmm6, xmm4); break;
case 1: paddw(xmm6, xmm1); break;
@ -1728,9 +1778,9 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
}
else
{
// ga = c[abed * 2 + 1];
// ga = c[abd * 2 + 1];
switch(m_env.sel.abed)
switch(m_sel.abd)
{
case 0: break;
case 1: movdqa(xmm6, xmm1); break;
@ -1738,7 +1788,13 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
}
}
if(m_env.sel.pabe)
// xmm4 = src ga
// xmm5 = rb
// xmm6 = ga
// xmm2, xmm3 = used
// xmm0, xmm1, xmm7 = free
if(m_sel.pabe)
{
if(!m_cpu.has(util::Cpu::tSSE41))
{
@ -1757,7 +1813,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
}
else
{
if(m_env.sel.fpsm != 1) // TODO: fm == 0xffxxxxxx
if(m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx
{
mix16(xmm6, xmm4, xmm7);
}
@ -1768,12 +1824,12 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
{
const int _top = params + 4;
if(!m_env.sel.fwrite)
if(!m_sel.fwrite)
{
return;
}
if(m_env.sel.colclamp == 0)
if(m_sel.colclamp == 0)
{
// c[0] &= 0x000000ff;
// c[1] &= 0x000000ff;
@ -1784,7 +1840,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
pand(xmm6, xmm7);
}
if(m_env.sel.fpsm == 2 && m_env.sel.dthe)
if(m_sel.fpsm == 2 && m_sel.dthe)
{
mov(eax, dword[esp + _top]);
and(eax, 3);
@ -1800,7 +1856,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
punpckhwd(xmm7, xmm6);
packuswb(xmm5, xmm7);
if(m_env.sel.fba && m_env.sel.fpsm != 1)
if(m_sel.fba && m_sel.fpsm != 1)
{
// fs |= 0x80000000;
@ -1809,7 +1865,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
por(xmm5, xmm7);
}
if(m_env.sel.fpsm == 2)
if(m_sel.fpsm == 2)
{
// GSVector4i rb = fs & 0x00f800f8;
// GSVector4i ga = fs & 0x8000f800;
@ -1841,16 +1897,16 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
por(xmm5, xmm7);
}
if(m_env.sel.rfb)
if(m_sel.rfb)
{
// fs = fs.blend(fd, fm);
blend(xmm5, xmm2, xmm3); // TODO: could be skipped in certain cases, depending on fpsm and fm
}
bool fast = m_env.sel.rfb && m_env.sel.fpsm < 2;
bool fast = m_sel.rfb && m_sel.fpsm < 2;
WritePixel(xmm5, xmm0, ebx, dl, fast, m_env.sel.fpsm);
WritePixel(xmm5, xmm0, ebx, dl, fast, m_sel.fpsm);
}
void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr)
@ -1992,9 +2048,9 @@ void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uin
if(i == 0) movd(eax, addr);
else pextrd(eax, addr, i);
if(m_env.sel.tlu) movzx(eax, byte[ebx + eax]);
if(m_sel.tlu) movzx(eax, byte[ebx + eax]);
const Address& src = m_env.sel.tlu ? ptr[eax * 4 + (size_t)m_env.clut] : ptr[ebx + eax * 4];
const Address& src = m_sel.tlu ? ptr[eax * 4 + (size_t)m_env.clut] : ptr[ebx + eax * 4];
if(i == 0) movd(dst, src);
else pinsrd(dst, src, i);

View File

@ -36,6 +36,7 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
util::Cpu m_cpu;
GSScanlineEnvironment& m_env;
GSScanlineSelector m_sel;
void Generate();
@ -72,5 +73,5 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
void blendr(const Xmm& b, const Xmm& a, const Xmm& mask);
public:
GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize);
GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, UINT64 key, void* ptr, size_t maxsize);
};

View File

@ -38,9 +38,10 @@ GSRasterizer::~GSRasterizer()
void GSRasterizer::Draw(const GSRasterizerData* data)
{
m_dsf.sr = NULL;
m_dsf.ssl = NULL;
m_dsf.ssle = NULL;
m_dsf.ssp = NULL;
m_dsf.sr = NULL;
m_ds->BeginDraw(data, &m_dsf);
@ -111,6 +112,25 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
GSVertexSW dv = v[1] - v[0];
GSVector4 dp = dv.p.abs();
if(m_dsf.ssle)
{
int i = (dp < dp.yxwz()).mask() & 1; // |x| <= |y|
GSVertexSW dscan;
dscan.p = GSVector4::zero();
dscan.t = GSVector4::zero();
dscan.c = GSVector4::zero();
m_dsf.ssp(v, dscan);
DrawEdge(v[0], v[1], dv, scissor, i, 0);
DrawEdge(v[0], v[1], dv, scissor, i, 1);
return;
}
GSVector4i dpi(dp);
if(dpi.y == 0)
@ -197,6 +217,11 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices, const GSVector4i& sc
i = (aabb == bccb).mask() & 7;
if(m_dsf.ssle)
{
DrawTriangleEdge(v, scissor);
}
switch(i)
{
case 0: // a < b < c
@ -215,6 +240,37 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices, const GSVector4i& sc
}
}
void GSRasterizer::DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& scissor)
{
GSVertexSW dv[3];
dv[0] = v[1] - v[0];
dv[1] = v[2] - v[0];
dv[2] = v[2] - v[1];
GSVector4 dx = dv[0].p.upl(dv[1].p).xyxy(dv[2].p);
GSVector4 dy = dv[0].p.upl(dv[1].p).zwyx(dv[2].p);
GSVector4 a = dx.abs() < dy.abs(); // |x| <= |y|
GSVector4 b = dx < GSVector4::zero(); // x < 0
GSVector4 c = dv[1].p * (dv[0].p / dv[1].p).yyyy() < dv[0].p; // longest.p.x < 0
int i = a.mask();
int j = ((a | b) ^ c.xxxx()).mask() ^ 2; // evil
GSVertexSW dscan;
dscan.p = GSVector4::zero();
dscan.t = GSVector4::zero();
dscan.c = GSVector4::zero();
m_dsf.ssp(v, dscan); // TODO: don't call it twice (can't be sure about the second call if the triangle is too small)
DrawEdge(v[0], v[1], dv[0], scissor, i & 1, j & 1);
DrawEdge(v[0], v[2], dv[1], scissor, i & 2, j & 2);
DrawEdge(v[1], v[2], dv[2], scissor, i & 4, j & 4);
}
void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor)
{
GSVertexSW longest;
@ -559,6 +615,222 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
}
}
void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side)
{
// orientation:
// - true: |dv.p.y| > |dv.p.x|
// - false |dv.p.x| > |dv.p.y|
// side:
// - true: top/left edge
// - false: bottom/right edge
// TODO: bit slow and too much duplicated code
// TODO: inner pre-step is still missing (hardly noticable)
GSVector4 fscissor(scissor);
GSVector4 lrtb = v0.p.upl(v1.p).ceil();
if(orientation)
{
GSVector4 tbmax = lrtb.maxv(fscissor.yyyy());
GSVector4 tbmin = lrtb.minv(fscissor.wwww());
GSVector4i tbi = GSVector4i(tbmax.zwzw(tbmin));
int top, bottom;
GSVertexSW edge, dedge;
if((dv.p >= GSVector4::zero()).mask() & 2)
{
top = tbi.extract32<0>();
bottom = tbi.extract32<3>();
if(top >= bottom) return;
edge = v0;
dedge = dv / dv.p.yyyy();
edge += dedge * (tbmax.zzzz() - edge.p.yyyy());
}
else
{
top = tbi.extract32<1>();
bottom = tbi.extract32<2>();
if(top >= bottom) return;
edge = v1;
dedge = dv / dv.p.yyyy();
edge += dedge * (tbmax.wwww() - edge.p.yyyy());
}
if(side)
{
while(1)
{
do
{
if((top % m_threads) == m_id)
{
GSVector4 p = edge.p.ceil();
if(((fscissor.xxxx() < p) & (p <= fscissor.zzzz())).mask() & 1)
{
GSVector4 coverage = (p - edge.p).xxxx();
edge.t = edge.t.xyxy(edge.t.uph(coverage * 0x80)); // coverage => t.w
int x = GSVector4i(p).extract32<0>() - 1;
m_stats.pixels++;
m_dsf.ssle(x + 1, x, top, edge);
}
}
}
while(0);
if(++top >= bottom) break;
edge += dedge;
}
}
else
{
while(1)
{
do
{
if((top % m_threads) == m_id)
{
GSVector4 p = edge.p.floor();
if(((fscissor.xxxx() <= p) & (p < fscissor.zzzz())).mask() & 1)
{
GSVector4 coverage = (edge.p - p).xxxx();
edge.t = edge.t.xyxy(edge.t.uph(coverage * 0x80)); // coverage => t.w
int x = GSVector4i(p).extract32<0>() + 1;
m_stats.pixels++;
m_dsf.ssle(x + 1, x, top, edge);
}
}
}
while(0);
if(++top >= bottom) break;
edge += dedge;
}
}
}
else
{
GSVector4 lrmax = lrtb.maxv(fscissor.xxxx());
GSVector4 lrmin = lrtb.minv(fscissor.zzzz());
GSVector4i lri = GSVector4i(lrmax.xyxy(lrmin));
int left, right;
GSVertexSW edge, dedge;
if((dv.p >= GSVector4::zero()).mask() & 1)
{
left = lri.extract32<0>();
right = lri.extract32<3>();
if(left >= right) return;
edge = v0;
dedge = dv / dv.p.xxxx();
edge += dedge * (lrmax.xxxx() - edge.p.xxxx());
}
else
{
left = lri.extract32<1>();
right = lri.extract32<2>();
if(left >= right) return;
edge = v1;
dedge = dv / dv.p.xxxx();
edge += dedge * (lrmax.yyyy() - edge.p.xxxx());
}
if(side)
{
while(1)
{
do
{
GSVector4 p = edge.p.ceil();
if(((fscissor.yyyy() < p) & (p <= fscissor.wwww())).mask() & 2)
{
int y = GSVector4i(p).extract32<1>() - 1;
if((y % m_threads) == m_id)
{
GSVector4 coverage = (p - edge.p).yyyy();
edge.t = edge.t.xyxy(edge.t.uph(coverage * 0x80)); // coverage => t.w
m_stats.pixels++;
m_dsf.ssle(left + 1, left, y, edge);
}
}
}
while(0);
if(++left >= right) break;
edge += dedge;
}
}
else
{
while(1)
{
do
{
GSVector4 p = edge.p.floor();
if(((fscissor.yyyy() <= p) & (p < fscissor.wwww())).mask() & 2)
{
int y = GSVector4i(p).extract32<1>() + 1;
if((y % m_threads) == m_id)
{
GSVector4 coverage = (edge.p - p).yyyy();
edge.t = edge.t.xyxy(edge.t.uph(coverage * 0x80)); // coverage => t.w
m_stats.pixels++;
m_dsf.ssle(left + 1, left, y, edge);
}
}
}
while(0);
if(++left >= right) break;
edge += dedge;
}
}
}
}
//
GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, int id, int threads, long* sync)

View File

@ -48,15 +48,16 @@ public:
class IDrawScanline
{
public:
typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v);
typedef void (__fastcall *DrawScanlineStaticPtr)(int right, int left, int top, const GSVertexSW& v);
typedef void (__fastcall *SetupPrimStaticPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v);
struct Functions
{
DrawSolidRectPtr sr; // TODO
DrawScanlineStaticPtr ssl;
DrawScanlineStaticPtr ssle;
SetupPrimStaticPtr ssp;
DrawSolidRectPtr sr; // TODO
};
virtual ~IDrawScanline() {}
@ -78,6 +79,7 @@ protected:
void DrawPoint(const GSVertexSW* v, const GSVector4i& scissor);
void DrawLine(const GSVertexSW* v, const GSVector4i& scissor);
void DrawTriangle(const GSVertexSW* v, const GSVector4i& scissor);
void DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& scissor);
void DrawSprite(const GSVertexSW* v, const GSVector4i& scissor);
void DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor);
@ -87,6 +89,8 @@ protected:
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, GSVector4& r, const GSVector4& dr, const GSVertexSW& dscan, const GSVector4& scissor);
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan, const GSVector4& scissor);
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side);
public:
GSRasterizer(IDrawScanline* ds, int id = 0, int threads = 0);
virtual ~GSRasterizer();

View File

@ -34,6 +34,7 @@ struct GSRendererSettings
int m_filter;
bool m_vsync;
bool m_nativeres;
bool m_aa1;
};
class GSRendererBase : public GSState, protected GSRendererSettings
@ -84,6 +85,12 @@ protected:
m_osd = !m_osd;
return true;
}
if(msg.wParam == VK_DELETE)
{
m_aa1 = !m_aa1;
return true;
}
}
return false;
@ -103,6 +110,7 @@ public:
m_filter = rs.m_filter;
m_vsync = rs.m_vsync;
m_nativeres = rs.m_nativeres;
m_aa1 = rs.m_aa1;
};
virtual bool Create(LPCTSTR title) = 0;

View File

@ -262,11 +262,11 @@ protected:
p.sel.zpsm = 3;
p.sel.atst = ATST_ALWAYS;
p.sel.tfx = TFX_NONE;
p.sel.abe = 255;
p.sel.ababcd = 255;
p.sel.sprite = primclass == GS_SPRITE_CLASS ? 1 : 0;
p.fm = context->FRAME.FBMSK;
p.zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 || PRIM->AA1 && primclass == GS_LINE_CLASS ? 0xffffffff : 0;
p.zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
if(context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER)
{
@ -449,22 +449,24 @@ protected:
p.sel.datm = context->TEST.DATM;
}
if(PRIM->ABE)
if(PRIM->ABE && !context->ALPHA.IsOpaque() || PRIM->AA1)
{
if(!context->ALPHA.IsOpaque())
p.sel.abe = PRIM->ABE;
p.sel.ababcd = context->ALPHA.ai32[0];
if(env.PABE.PABE)
{
p.sel.abe = context->ALPHA.ai32[0];
p.sel.pabe = env.PABE.PABE;
p.sel.pabe = 1;
}
if(PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS))
{
p.sel.aa1 = m_aa1 ? 1 : 0;
}
}
if(PRIM->AA1)
{
p.sel.aa1 = 1;
}
if(p.sel.date
|| p.sel.abea == 1 || p.sel.abeb == 1 || p.sel.abec == 1 || p.sel.abed == 1
|| p.sel.aba == 1 || p.sel.abb == 1 || p.sel.abc == 1 || p.sel.abd == 1
|| p.sel.atst != ATST_ALWAYS && p.sel.afail == AFAIL_RGB_ONLY
|| p.sel.fpsm == 0 && p.fm != 0 && p.fm != 0xffffffff
|| p.sel.fpsm == 1 && (p.fm & 0x00ffffff) != 0 && (p.fm & 0x00ffffff) != 0x00ffffff
@ -532,7 +534,7 @@ protected:
GSRasterizerStats stats;
m_rl.GetStats(stats);
m_perfmon.Put(GSPerfMon::Draw, 1);
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
@ -576,6 +578,15 @@ protected:
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz1_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM);
if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);}
}
if(0)//stats.ticks > 1000000)
{
printf("* [%I64d | %012I64x] ticks %I64d prims %d (%d) pixels %d (%d)\n",
m_perfmon.GetFrame(), p.sel.key,
stats.ticks,
stats.prims, stats.prims > 0 ? (int)(stats.ticks / stats.prims) : -1,
stats.pixels, stats.pixels > 0 ? (int)(stats.ticks / stats.pixels) : -1);
}
}
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r)

View File

@ -41,34 +41,37 @@ union GSScanlineSelector
DWORD tlu:1; // 18
DWORD fge:1; // 19
DWORD date:1; // 20
DWORD abea:2; // 21
DWORD abeb:2; // 23
DWORD abec:2; // 25
DWORD abed:2; // 27
DWORD pabe:1; // 29
DWORD rfb:1; // 30
DWORD sprite:1; // 31
DWORD abe:1; // 21
DWORD aba:2; // 22
DWORD abb:2; // 24
DWORD abc:2; // 26
DWORD abd:2; // 28
DWORD pabe:1; // 30
DWORD aa1:1; // 31
DWORD fwrite:1; // 32
DWORD ftest:1; // 33
DWORD zwrite:1; // 34
DWORD ztest:1; // 35
DWORD wms:2; // 36
DWORD wmt:2; // 38
DWORD datm:1; // 40
DWORD colclamp:1; // 41
DWORD fba:1; // 42
DWORD dthe:1; // 43
DWORD zoverflow:1; // 44 (z max >= 0x80000000)
DWORD aa1:1; // 45
DWORD rfb:1; // 34
DWORD zwrite:1; // 35
DWORD ztest:1; // 36
DWORD zoverflow:1; // 37 (z max >= 0x80000000)
DWORD wms:2; // 38
DWORD wmt:2; // 40
DWORD datm:1; // 42
DWORD colclamp:1; // 43
DWORD fba:1; // 44
DWORD dthe:1; // 45
DWORD sprite:1; // 46
DWORD edge:1; // 47
};
struct
{
DWORD _pad1:21;
DWORD abe:8;
DWORD _pad2:3;
DWORD _pad1:22;
DWORD ababcd:8;
DWORD _pad2:2;
DWORD fb:2;
DWORD _pad3:1;
DWORD zb:2;
};
@ -88,7 +91,7 @@ union GSScanlineSelector
return sprite
&& iip == 0
&& tfx == TFX_NONE
&& abe == 255
&& abe == 0
&& ztst <= 1
&& atst <= 1
&& date == 0
@ -114,8 +117,6 @@ __declspec(align(16)) struct GSScanlineParam
__declspec(align(16)) struct GSScanlineEnvironment
{
GSScanlineSelector sel;
void* vm;
const void* tex;
const DWORD* clut;
@ -140,5 +141,5 @@ __declspec(align(16)) struct GSScanlineEnvironment
struct {GSVector4 z, stq; GSVector4i c, f, st;} d4;
struct {GSVector4i rb, ga;} c;
struct {GSVector4i z, f;} p;
struct {GSVector4i z, f, s, t, q, rb, ga, zs, zd, uf, vf;} temp;
struct {GSVector4i z, f, s, t, q, rb, ga, zs, zd, uf, vf, cov;} temp;
};

View File

@ -75,6 +75,7 @@ GSSettingsDlg::GSSettingsDlg(CWnd* pParent /*=NULL*/)
, m_vsync(FALSE)
, m_logz(FALSE)
, m_fba(TRUE)
, m_aa1(FALSE)
{
}
@ -115,6 +116,7 @@ void GSSettingsDlg::DoDataExchange(CDataExchange* pDX)
DDX_Check(pDX, IDC_CHECK2, m_vsync);
DDX_Check(pDX, IDC_CHECK5, m_logz);
DDX_Check(pDX, IDC_CHECK7, m_fba);
DDX_Check(pDX, IDC_CHECK8, m_aa1);
}
BEGIN_MESSAGE_MAP(GSSettingsDlg, CDialog)
@ -220,6 +222,7 @@ BOOL GSSettingsDlg::OnInitDialog()
m_vsync = !!pApp->GetProfileInt(_T("Settings"), _T("vsync"), FALSE);
m_logz = !!pApp->GetProfileInt(_T("Settings"), _T("logz"), FALSE);
m_fba = !!pApp->GetProfileInt(_T("Settings"), _T("fba"), TRUE);
m_aa1 = !!pApp->GetProfileInt(_T("Settings"), _T("aa1"), FALSE);
m_resx.SetRange(512, 4096);
m_resy.SetRange(512, 4096);
@ -283,6 +286,7 @@ void GSSettingsDlg::OnOK()
pApp->WriteProfileInt(_T("Settings"), _T("vsync"), m_vsync);
pApp->WriteProfileInt(_T("Settings"), _T("logz"), m_logz);
pApp->WriteProfileInt(_T("Settings"), _T("fba"), m_fba);
pApp->WriteProfileInt(_T("Settings"), _T("aa1"), m_aa1);
pApp->WriteProfileInt(_T("Settings"), _T("resx"), m_resx.GetPos());
pApp->WriteProfileInt(_T("Settings"), _T("resy"), m_resy.GetPos());

View File

@ -60,6 +60,7 @@ public:
BOOL m_vsync;
BOOL m_logz;
BOOL m_fba;
BOOL m_aa1;
protected:
virtual LRESULT DefWindowProc(UINT message, WPARAM wParam, LPARAM lParam);

View File

@ -24,14 +24,16 @@
#include "StdAfx.h"
#include "GSSetupPrimCodeGenerator.h"
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize)
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, UINT64 key, void* ptr, size_t maxsize)
: CodeGenerator(maxsize, ptr)
, m_env(env)
{
m_en.z = m_env.sel.zb ? 1 : 0;
m_en.f = m_env.sel.fb && m_env.sel.fge ? 1 : 0;
m_en.t = m_env.sel.fb && m_env.sel.tfx != TFX_NONE ? 1 : 0;
m_en.c = m_env.sel.fb && m_env.sel.tfx != TFX_DECAL ? 1 : 0;
m_sel.key = key;
m_en.z = m_sel.zb ? 1 : 0;
m_en.f = m_sel.fb && m_sel.fge ? 1 : 0;
m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0;
m_en.c = m_sel.fb && m_sel.tfx != TFX_DECAL ? 1 : 0;
#if _M_AMD64
#error TODO
@ -44,7 +46,7 @@ void GSSetupPrimCodeGenerator::Generate()
{
const int params = 0;
if((m_en.z || m_en.f) && !m_env.sel.sprite || m_en.t || m_en.c && m_env.sel.iip)
if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip)
{
for(int i = 0; i < 5; i++)
{
@ -68,7 +70,7 @@ void GSSetupPrimCodeGenerator::Depth()
return;
}
if(!m_env.sel.sprite)
if(!m_sel.sprite)
{
// GSVector4 t = dscan.p;
@ -148,7 +150,7 @@ void GSSetupPrimCodeGenerator::Depth()
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
if(m_env.sel.zoverflow)
if(m_sel.zoverflow)
{
// m_env.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
@ -193,7 +195,7 @@ void GSSetupPrimCodeGenerator::Texture()
movaps(xmm1, xmm0);
mulps(xmm1, xmm3);
if(m_env.sel.fst)
if(m_sel.fst)
{
// m_env.d4.st = GSVector4i(t * 4.0f);
@ -207,7 +209,7 @@ void GSSetupPrimCodeGenerator::Texture()
movaps(xmmword[&m_env.d4.stq], xmm1);
}
for(int j = 0, k = m_env.sel.fst ? 2 : 3; j < k; j++)
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
{
// GSVector4 ds = t.xxxx();
// GSVector4 dt = t.yyyy();
@ -223,7 +225,7 @@ void GSSetupPrimCodeGenerator::Texture()
movaps(xmm2, xmm1);
mulps(xmm2, Xmm(4 + i));
if(m_env.sel.fst)
if(m_sel.fst)
{
// m_env.d[i].si/ti = GSVector4i(v);
@ -257,7 +259,7 @@ void GSSetupPrimCodeGenerator::Color()
return;
}
if(m_env.sel.iip)
if(m_sel.iip)
{
// GSVector4 c = dscan.c;
@ -351,7 +353,7 @@ void GSSetupPrimCodeGenerator::Color()
// if(!tme) c = c.srl16(7);
if(m_env.sel.tfx == TFX_NONE)
if(m_sel.tfx == TFX_NONE)
{
psrlw(xmm0, 7);
}

View File

@ -36,6 +36,7 @@ class GSSetupPrimCodeGenerator : public CodeGenerator
util::Cpu m_cpu;
GSScanlineEnvironment& m_env;
GSScanlineSelector m_sel;
struct {DWORD z:1, f:1, t:1, c:1;} m_en;
@ -46,5 +47,5 @@ class GSSetupPrimCodeGenerator : public CodeGenerator
void Color();
public:
GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize);
GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, UINT64 key, void* ptr, size_t maxsize);
};

View File

@ -2193,12 +2193,12 @@ public:
GSVector4 abs() const
{
return GSVector4(_mm_abs_ps(m));
return *this & cast(GSVector4i::x7fffffff());
}
GSVector4 neg() const
{
return GSVector4(_mm_neg_ps(m));
return *this ^ cast(GSVector4i::x80000000());
}
GSVector4 rcp() const

View File

@ -82,7 +82,7 @@ IDB_LOGO10 BITMAP "res\\logo10.bmp"
// Dialog
//
IDD_CONFIG DIALOGEX 0, 0, 189, 231
IDD_CONFIG DIALOGEX 0, 0, 189, 245
STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU
CAPTION "Settings..."
FONT 8, "MS Shell Dlg", 400, 0, 0x1
@ -105,17 +105,19 @@ BEGIN
EDITTEXT IDC_EDIT2,109,132,35,13,ES_AUTOHSCROLL | ES_NUMBER
CONTROL "",IDC_SPIN2,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,133,135,11,14
CONTROL "Native",IDC_CHECK1,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,149,134,33,10
CONTROL "Texture filtering",IDC_CHECK4,"Button",BS_AUTO3STATE | WS_TABSTOP,7,180,67,10
CONTROL "Enable tv-out",IDC_CHECK3,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,167,57,10
CONTROL "Wait vsync",IDC_CHECK2,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,193,51,10
CONTROL "NLOOP hack",IDC_CHECK6,"Button",BS_AUTO3STATE | WS_TABSTOP,7,167,55,10
CONTROL "Logarithmic Z",IDC_CHECK5,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,180,58,10
CONTROL "Alpha correction (FBA)",IDC_CHECK7,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,193,102,10
DEFPUSHBUTTON "OK",IDOK,43,210,50,14
PUSHBUTTON "Cancel",IDCANCEL,96,210,50,14
LTEXT "SW rend. threads:",IDC_STATIC,7,149,60,8
EDITTEXT IDC_EDIT3,71,147,35,13,ES_AUTOHSCROLL | ES_NUMBER
CONTROL "",IDC_SPIN3,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,99,150,11,14
CONTROL "NLOOP hack",IDC_CHECK6,"Button",BS_AUTO3STATE | WS_TABSTOP,7,167,55,10
CONTROL "Enable tv-out",IDC_CHECK3,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,167,57,10
CONTROL "Texture filtering",IDC_CHECK4,"Button",BS_AUTO3STATE | WS_TABSTOP,7,180,67,10
CONTROL "Logarithmic Z",IDC_CHECK5,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,180,58,10
CONTROL "Wait vsync",IDC_CHECK2,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,193,51,10
CONTROL "Alpha correction (FBA)",IDC_CHECK7,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,193,102,10
CONTROL "Edge anti-aliasing (AA1, sw-mode only)",IDC_CHECK8,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,206,141,10
DEFPUSHBUTTON "OK",IDOK,43,224,50,14
PUSHBUTTON "Cancel",IDCANCEL,96,224,50,14
END
IDD_CAPTURE DIALOGEX 0, 0, 279, 71
@ -179,7 +181,7 @@ BEGIN
VERTGUIDE, 80
VERTGUIDE, 182
TOPMARGIN, 7
BOTTOMMARGIN, 224
BOTTOMMARGIN, 238
END
IDD_CAPTURE, DIALOG

View File

@ -13,6 +13,7 @@
#define IDC_EDIT1 2009
#define IDC_EDIT2 2010
#define IDC_BUTTON1 2011
#define IDC_CHECK8 2011
#define IDC_BUTTON2 2012
#define IDC_EDIT3 2012
#define IDC_CUSTOM1 2013

View File

@ -47,19 +47,6 @@
const __m128 ps_3f800000 = _mm_castsi128_ps(_mm_set1_epi32(0x3f800000));
const __m128 ps_4b000000 = _mm_castsi128_ps(_mm_set1_epi32(0x4b000000));
const __m128 ps_7fffffff = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
const __m128 ps_80000000 = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
const __m128 ps_ffffffff = _mm_castsi128_ps(_mm_set1_epi32(0xffffffff));
__forceinline __m128 _mm_neg_ps(__m128 r)
{
return _mm_xor_ps(ps_80000000, r);
}
__forceinline __m128 _mm_abs_ps(__m128 r)
{
return _mm_and_ps(ps_7fffffff, r);
}
#define _MM_TRANSPOSE4_SI128(row0, row1, row2, row3) \
{ \