mirror of https://github.com/PCSX2/pcsx2.git
GSdx: Implemented edge anti-aliasing (aa1) for software mode, bios or ffx are good test subjects (not many other games use it). It's still a bit slow but could be improved a lot by not doing 4 pixels with sse for each single edge pixel, that's just a lot of unnecessary texture lookups. The bios config screen cubes are still bogus, gs_user on aa1 isn't too helpful...
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@721 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
ca4ce225a0
commit
120971ec4f
|
@ -117,6 +117,7 @@ static INT32 GSopen(void* dsp, char* title, int mt, int renderer)
|
|||
rs.m_filter = AfxGetApp()->GetProfileInt(_T("Settings"), _T("filter"), 1);
|
||||
rs.m_vsync = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("vsync"), FALSE);
|
||||
rs.m_nativeres = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("nativeres"), FALSE);
|
||||
rs.m_aa1 = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("aa1"), FALSE);
|
||||
|
||||
int threads = AfxGetApp()->GetProfileInt(_T("Settings"), _T("swthreads"), 1);
|
||||
|
||||
|
|
|
@ -685,32 +685,6 @@ REG64_(GIFReg, PABE)
|
|||
UINT32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
/*
|
||||
|
||||
AA1 == 1 (for triangles, as tested on the real thing)
|
||||
|
||||
C ABE A Ae Ao Aoe
|
||||
0 0 c c c c
|
||||
0 1 s c s* c
|
||||
1 0 d d c c
|
||||
1 1 d d s* c
|
||||
2 0 f f ? ?
|
||||
2 1 f f ? ?
|
||||
|
||||
C = ALPHA::C
|
||||
A = alpha used for blending
|
||||
Ae = edge alpha used for blending
|
||||
Ao = alpha to output
|
||||
Aoe = edge alpha to output
|
||||
|
||||
c = coverage
|
||||
s = source alpha
|
||||
d = destination alpha
|
||||
f = fixed alpha (TODO: test with 0x80/2)
|
||||
* = only if s != 0x80 (s == 0x80 => s == c, but what about s > 0x80? s or 0x80? TODO)
|
||||
|
||||
*/
|
||||
|
||||
REG64_(GIFReg, PRIM)
|
||||
UINT32 PRIM:3;
|
||||
UINT32 IIP:1;
|
||||
|
|
|
@ -43,7 +43,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
|
||||
const GSScanlineParam* p = (const GSScanlineParam*)data->param;
|
||||
|
||||
m_env.sel = p->sel;
|
||||
m_sel = p->sel;
|
||||
|
||||
m_env.vm = p->vm;
|
||||
m_env.fbr = p->fbo->row;
|
||||
|
@ -60,11 +60,11 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
m_env.fga = GSVector4i((int)(env.FOGCOL.ai32[0] >> 8) & 0x00ff00ff);
|
||||
m_env.dimx = env.dimx;
|
||||
|
||||
if(m_env.sel.fpsm == 1)
|
||||
if(m_sel.fpsm == 1)
|
||||
{
|
||||
m_env.fm |= GSVector4i::xff000000();
|
||||
}
|
||||
else if(m_env.sel.fpsm == 2)
|
||||
else if(m_sel.fpsm == 2)
|
||||
{
|
||||
GSVector4i rb = m_env.fm & 0x00f800f8;
|
||||
GSVector4i ga = m_env.fm & 0x8000f800;
|
||||
|
@ -72,29 +72,29 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
m_env.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | GSVector4i::xffff0000();
|
||||
}
|
||||
|
||||
if(m_env.sel.zpsm == 1)
|
||||
if(m_sel.zpsm == 1)
|
||||
{
|
||||
m_env.zm |= GSVector4i::xff000000();
|
||||
}
|
||||
else if(m_env.sel.zpsm == 2)
|
||||
else if(m_sel.zpsm == 2)
|
||||
{
|
||||
m_env.zm |= GSVector4i::xffff0000();
|
||||
}
|
||||
|
||||
if(m_env.sel.atst == ATST_LESS)
|
||||
if(m_sel.atst == ATST_LESS)
|
||||
{
|
||||
m_env.sel.atst = ATST_LEQUAL;
|
||||
m_sel.atst = ATST_LEQUAL;
|
||||
|
||||
m_env.aref -= GSVector4i::x00000001();
|
||||
}
|
||||
else if(m_env.sel.atst == ATST_GREATER)
|
||||
else if(m_sel.atst == ATST_GREATER)
|
||||
{
|
||||
m_env.sel.atst = ATST_GEQUAL;
|
||||
m_sel.atst = ATST_GEQUAL;
|
||||
|
||||
m_env.aref += GSVector4i::x00000001();
|
||||
}
|
||||
|
||||
if(m_env.sel.tfx != TFX_NONE)
|
||||
if(m_sel.tfx != TFX_NONE)
|
||||
{
|
||||
m_env.tex = p->tex;
|
||||
m_env.clut = p->clut;
|
||||
|
@ -163,9 +163,20 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
|
||||
//
|
||||
|
||||
f->ssl = m_ds.Lookup(m_env.sel);
|
||||
f->ssl = m_ds.Lookup(m_sel);
|
||||
|
||||
if(m_env.sel.IsSolidRect())
|
||||
if(m_sel.aa1)// && (m_state->m_perfmon.GetFrame() & 0x40))
|
||||
{
|
||||
GSScanlineSelector sel;
|
||||
|
||||
sel.key = m_sel.key;
|
||||
sel.zwrite = 0;
|
||||
sel.edge = 1;
|
||||
|
||||
f->ssle = m_ds.Lookup(sel);
|
||||
}
|
||||
|
||||
if(m_sel.IsSolidRect())
|
||||
{
|
||||
f->sr = (DrawSolidRectPtr)&GSDrawScanline::DrawSolidRect;
|
||||
}
|
||||
|
@ -176,14 +187,14 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
|
||||
sel.key = 0;
|
||||
|
||||
sel.iip = m_env.sel.iip;
|
||||
sel.tfx = m_env.sel.tfx;
|
||||
sel.fst = m_env.sel.fst;
|
||||
sel.fge = m_env.sel.fge;
|
||||
sel.sprite = m_env.sel.sprite;
|
||||
sel.fb = m_env.sel.fb;
|
||||
sel.zb = m_env.sel.zb;
|
||||
sel.zoverflow = m_env.sel.zoverflow;
|
||||
sel.iip = m_sel.iip;
|
||||
sel.tfx = m_sel.tfx;
|
||||
sel.fst = m_sel.fst;
|
||||
sel.fge = m_sel.fge;
|
||||
sel.sprite = m_sel.sprite;
|
||||
sel.fb = m_sel.fb;
|
||||
sel.zb = m_sel.zb;
|
||||
sel.zoverflow = m_sel.zoverflow;
|
||||
|
||||
f->ssp = m_sp.Lookup(sel);
|
||||
}
|
||||
|
@ -208,7 +219,7 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
|
|||
{
|
||||
DWORD z = (DWORD)(float)v.p.z;
|
||||
|
||||
if(m_env.sel.zpsm != 2)
|
||||
if(m_sel.zpsm != 2)
|
||||
{
|
||||
if(m == 0)
|
||||
{
|
||||
|
@ -243,7 +254,7 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
|
|||
c |= 0x80000000;
|
||||
}
|
||||
|
||||
if(m_env.sel.fpsm != 2)
|
||||
if(m_sel.fpsm != 2)
|
||||
{
|
||||
if(m == 0)
|
||||
{
|
||||
|
@ -353,7 +364,7 @@ GSDrawScanline::GSSetupPrimMap::GSSetupPrimMap(GSScanlineEnvironment& env)
|
|||
|
||||
GSSetupPrimCodeGenerator* GSDrawScanline::GSSetupPrimMap::Create(UINT64 key, void* ptr, size_t maxsize)
|
||||
{
|
||||
return new GSSetupPrimCodeGenerator(m_env, ptr, maxsize);
|
||||
return new GSSetupPrimCodeGenerator(m_env, key, ptr, maxsize);
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -366,5 +377,5 @@ GSDrawScanline::GSDrawScanlineMap::GSDrawScanlineMap(GSScanlineEnvironment& env)
|
|||
|
||||
GSDrawScanlineCodeGenerator* GSDrawScanline::GSDrawScanlineMap::Create(UINT64 key, void* ptr, size_t maxsize)
|
||||
{
|
||||
return new GSDrawScanlineCodeGenerator(m_env, ptr, maxsize);
|
||||
return new GSDrawScanlineCodeGenerator(m_env, key, ptr, maxsize);
|
||||
}
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
|
||||
{
|
||||
GSScanlineEnvironment m_env;
|
||||
GSScanlineSelector m_sel;
|
||||
|
||||
//
|
||||
|
||||
|
|
|
@ -20,11 +20,12 @@
|
|||
*/
|
||||
|
||||
// TODO: x64 (use the extra regs to avoid spills of zs, zd, uf, vf, rb, ga and keep a few constants in the last two like aref or afix)
|
||||
// TODO: for edges doing 4 pixels is wasteful (needed memory access * 4)
|
||||
|
||||
#include "StdAfx.h"
|
||||
#include "GSDrawScanlineCodeGenerator.h"
|
||||
|
||||
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize)
|
||||
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, UINT64 key, void* ptr, size_t maxsize)
|
||||
: CodeGenerator(maxsize, ptr)
|
||||
, m_env(env)
|
||||
{
|
||||
|
@ -32,6 +33,8 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment&
|
|||
#error TODO
|
||||
#endif
|
||||
|
||||
m_sel.key = key;
|
||||
|
||||
Generate();
|
||||
}
|
||||
|
||||
|
@ -46,7 +49,10 @@ void GSDrawScanlineCodeGenerator::Generate()
|
|||
|
||||
Init(params);
|
||||
|
||||
align(16);
|
||||
if(!m_sel.edge)
|
||||
{
|
||||
align(16);
|
||||
}
|
||||
|
||||
L("loop");
|
||||
|
||||
|
@ -60,7 +66,7 @@ L("loop");
|
|||
// xmm6 = ga (!tme)
|
||||
// xmm7 = test
|
||||
|
||||
bool tme = m_env.sel.tfx != TFX_NONE;
|
||||
bool tme = m_sel.tfx != TFX_NONE;
|
||||
|
||||
TestZ(tme ? xmm5 : xmm2, tme ? xmm6 : xmm3);
|
||||
|
||||
|
@ -98,12 +104,12 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
if(m_env.sel.fwrite)
|
||||
if(m_sel.fwrite)
|
||||
{
|
||||
movdqa(xmm3, xmmword[&m_env.fm]);
|
||||
}
|
||||
|
||||
if(m_env.sel.zwrite)
|
||||
if(m_sel.zwrite)
|
||||
{
|
||||
movdqa(xmm4, xmmword[&m_env.zm]);
|
||||
}
|
||||
|
@ -174,12 +180,12 @@ L("loop");
|
|||
// fm |= test;
|
||||
// zm |= test;
|
||||
|
||||
if(m_env.sel.fwrite)
|
||||
if(m_sel.fwrite)
|
||||
{
|
||||
por(xmm3, xmm7);
|
||||
}
|
||||
|
||||
if(m_env.sel.zwrite)
|
||||
if(m_sel.zwrite)
|
||||
{
|
||||
por(xmm4, xmm7);
|
||||
}
|
||||
|
@ -188,19 +194,19 @@ L("loop");
|
|||
|
||||
pcmpeqd(xmm1, xmm1);
|
||||
|
||||
if(m_env.sel.fwrite && m_env.sel.zwrite)
|
||||
if(m_sel.fwrite && m_sel.zwrite)
|
||||
{
|
||||
movdqa(xmm0, xmm1);
|
||||
pcmpeqd(xmm1, xmm3);
|
||||
pcmpeqd(xmm0, xmm4);
|
||||
packssdw(xmm1, xmm0);
|
||||
}
|
||||
else if(m_env.sel.fwrite)
|
||||
else if(m_sel.fwrite)
|
||||
{
|
||||
pcmpeqd(xmm1, xmm3);
|
||||
packssdw(xmm1, xmm1);
|
||||
}
|
||||
else if(m_env.sel.zwrite)
|
||||
else if(m_sel.zwrite)
|
||||
{
|
||||
pcmpeqd(xmm1, xmm4);
|
||||
packssdw(xmm1, xmm1);
|
||||
|
@ -253,12 +259,15 @@ L("step");
|
|||
|
||||
// if(steps <= 0) break;
|
||||
|
||||
test(ecx, ecx);
|
||||
jle("exit", T_NEAR);
|
||||
if(!m_sel.edge)
|
||||
{
|
||||
test(ecx, ecx);
|
||||
jle("exit", T_NEAR);
|
||||
|
||||
Step();
|
||||
Step();
|
||||
|
||||
jmp("loop", T_NEAR);
|
||||
jmp("loop", T_NEAR);
|
||||
}
|
||||
|
||||
L("exit");
|
||||
|
||||
|
@ -313,7 +322,7 @@ void GSDrawScanlineCodeGenerator::Init(int params)
|
|||
lea(edi, ptr[ebx * 2]);
|
||||
add(edi, dword[&m_env.fzbc]);
|
||||
|
||||
if(!m_env.sel.sprite && (m_env.sel.fwrite && m_env.sel.fge || m_env.sel.zb) || m_env.sel.fb && (m_env.sel.tfx != TFX_NONE || m_env.sel.iip))
|
||||
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
|
||||
{
|
||||
// edx = &m_env.d[skip]
|
||||
|
||||
|
@ -325,13 +334,13 @@ void GSDrawScanlineCodeGenerator::Init(int params)
|
|||
mov(ebx, dword[esp + _v]);
|
||||
}
|
||||
|
||||
if(!m_env.sel.sprite)
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
if(m_env.sel.fwrite && m_env.sel.fge || m_env.sel.zb)
|
||||
if(m_sel.fwrite && m_sel.fge || m_sel.zb)
|
||||
{
|
||||
movaps(xmm0, xmmword[ebx + 16]); // v.p
|
||||
|
||||
if(m_env.sel.fwrite && m_env.sel.fge)
|
||||
if(m_sel.fwrite && m_sel.fge)
|
||||
{
|
||||
// f = GSVector4i(vp).zzzzh().zzzz().add16(m_env.d[skip].f);
|
||||
|
||||
|
@ -343,7 +352,7 @@ void GSDrawScanlineCodeGenerator::Init(int params)
|
|||
movdqa(xmmword[&m_env.temp.f], xmm1);
|
||||
}
|
||||
|
||||
if(m_env.sel.zb)
|
||||
if(m_sel.zb)
|
||||
{
|
||||
// z = vp.zzzz() + m_env.d[skip].z;
|
||||
|
||||
|
@ -356,19 +365,31 @@ void GSDrawScanlineCodeGenerator::Init(int params)
|
|||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.ztest)
|
||||
if(m_sel.ztest)
|
||||
{
|
||||
movdqa(xmm0, xmmword[&m_env.p.z]);
|
||||
}
|
||||
}
|
||||
|
||||
if(m_env.sel.fb)
|
||||
if(m_sel.fb)
|
||||
{
|
||||
if(m_env.sel.tfx != TFX_NONE)
|
||||
if(m_sel.edge)
|
||||
{
|
||||
movaps(xmm4, xmmword[ebx + 32]); // v.t
|
||||
|
||||
if(m_env.sel.fst)
|
||||
cvttps2dq(xmm4, xmm4);
|
||||
|
||||
pshufhw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pshufd(xmm4, xmm4, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
movdqa(xmmword[&m_env.temp.cov], xmm4);
|
||||
}
|
||||
|
||||
if(m_sel.tfx != TFX_NONE)
|
||||
{
|
||||
movaps(xmm4, xmmword[ebx + 32]); // v.t
|
||||
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// GSVector4i vti(vt);
|
||||
|
||||
|
@ -382,13 +403,13 @@ void GSDrawScanlineCodeGenerator::Init(int params)
|
|||
|
||||
paddd(xmm2, xmmword[edx + 16 * 7]);
|
||||
|
||||
if(!m_env.sel.sprite)
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
paddd(xmm3, xmmword[edx + 16 * 8]);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.ltf)
|
||||
if(m_sel.ltf)
|
||||
{
|
||||
movdqa(xmm4, xmm3);
|
||||
pshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
|
@ -428,9 +449,9 @@ void GSDrawScanlineCodeGenerator::Init(int params)
|
|||
}
|
||||
}
|
||||
|
||||
if(m_env.sel.tfx != TFX_DECAL)
|
||||
if(m_sel.tfx != TFX_DECAL)
|
||||
{
|
||||
if(m_env.sel.iip)
|
||||
if(m_sel.iip)
|
||||
{
|
||||
// GSVector4i vc = GSVector4i(v.c);
|
||||
|
||||
|
@ -455,7 +476,7 @@ void GSDrawScanlineCodeGenerator::Init(int params)
|
|||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.tfx == TFX_NONE)
|
||||
if(m_sel.tfx == TFX_NONE)
|
||||
{
|
||||
movdqa(xmm5, xmmword[&m_env.c.rb]);
|
||||
movdqa(xmm6, xmmword[&m_env.c.ga]);
|
||||
|
@ -475,11 +496,11 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
|
||||
add(edi, 8);
|
||||
|
||||
if(!m_env.sel.sprite)
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
// z += m_env.d4.z;
|
||||
|
||||
if(m_env.sel.zb)
|
||||
if(m_sel.zb)
|
||||
{
|
||||
movaps(xmm0, xmmword[&m_env.temp.z]);
|
||||
addps(xmm0, xmmword[&m_env.d4.z]);
|
||||
|
@ -488,7 +509,7 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
|
||||
// f = f.add16(m_env.d4.f);
|
||||
|
||||
if(m_env.sel.fwrite && m_env.sel.fge)
|
||||
if(m_sel.fwrite && m_sel.fge)
|
||||
{
|
||||
movdqa(xmm1, xmmword[&m_env.temp.f]);
|
||||
paddw(xmm1, xmmword[&m_env.d4.f]);
|
||||
|
@ -497,17 +518,17 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.ztest)
|
||||
if(m_sel.ztest)
|
||||
{
|
||||
movdqa(xmm0, xmmword[&m_env.p.z]);
|
||||
}
|
||||
}
|
||||
|
||||
if(m_env.sel.fb)
|
||||
if(m_sel.fb)
|
||||
{
|
||||
if(m_env.sel.tfx != TFX_NONE)
|
||||
if(m_sel.tfx != TFX_NONE)
|
||||
{
|
||||
if(m_env.sel.fst)
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// GSVector4i st = m_env.d4.st;
|
||||
|
||||
|
@ -520,7 +541,7 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
paddd(xmm2, xmmword[&m_env.temp.s]);
|
||||
movdqa(xmmword[&m_env.temp.s], xmm2);
|
||||
|
||||
if(!m_env.sel.sprite)
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
paddd(xmm3, xmmword[&m_env.temp.t]);
|
||||
|
@ -561,9 +582,9 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
}
|
||||
}
|
||||
|
||||
if(m_env.sel.tfx != TFX_DECAL)
|
||||
if(m_sel.tfx != TFX_DECAL)
|
||||
{
|
||||
if(m_env.sel.iip)
|
||||
if(m_sel.iip)
|
||||
{
|
||||
// GSVector4i c = m_env.d4.c;
|
||||
|
||||
|
@ -583,7 +604,7 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.tfx == TFX_NONE)
|
||||
if(m_sel.tfx == TFX_NONE)
|
||||
{
|
||||
movdqa(xmm5, xmmword[&m_env.c.rb]);
|
||||
movdqa(xmm6, xmmword[&m_env.c.ga]);
|
||||
|
@ -604,7 +625,7 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
|
||||
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
||||
{
|
||||
if(!m_env.sel.zb)
|
||||
if(!m_sel.zb)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -616,9 +637,9 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
|
||||
// GSVector4i zs = zi;
|
||||
|
||||
if(!m_env.sel.sprite)
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
if(m_env.sel.zoverflow)
|
||||
if(m_sel.zoverflow)
|
||||
{
|
||||
// zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||
|
||||
|
@ -644,30 +665,30 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
cvttps2dq(xmm0, xmm0);
|
||||
}
|
||||
|
||||
if(m_env.sel.zwrite)
|
||||
if(m_sel.zwrite)
|
||||
{
|
||||
movdqa(xmmword[&m_env.temp.zs], xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
if(m_env.sel.ztest)
|
||||
if(m_sel.ztest)
|
||||
{
|
||||
ReadPixel(xmm1, ebp);
|
||||
|
||||
if(m_env.sel.zwrite && m_env.sel.zpsm < 2)
|
||||
if(m_sel.zwrite && m_sel.zpsm < 2)
|
||||
{
|
||||
movdqa(xmmword[&m_env.temp.zd], xmm1);
|
||||
}
|
||||
|
||||
// zd &= 0xffffffff >> m_env.sel.zpsm * 8;
|
||||
// zd &= 0xffffffff >> m_sel.zpsm * 8;
|
||||
|
||||
if(m_env.sel.zpsm)
|
||||
if(m_sel.zpsm)
|
||||
{
|
||||
pslld(xmm1, m_env.sel.zpsm * 8);
|
||||
psrld(xmm1, m_env.sel.zpsm * 8);
|
||||
pslld(xmm1, m_sel.zpsm * 8);
|
||||
psrld(xmm1, m_sel.zpsm * 8);
|
||||
}
|
||||
|
||||
if(m_env.sel.zoverflow || m_env.sel.zpsm == 0)
|
||||
if(m_sel.zoverflow || m_sel.zpsm == 0)
|
||||
{
|
||||
// GSVector4i o = GSVector4i::x80000000();
|
||||
|
||||
|
@ -683,7 +704,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
psubd(xmm1, xmm4);
|
||||
}
|
||||
|
||||
switch(m_env.sel.ztst)
|
||||
switch(m_sel.ztst)
|
||||
{
|
||||
case ZTST_GEQUAL:
|
||||
// test |= zso < zdo;
|
||||
|
@ -707,7 +728,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
|
||||
void GSDrawScanlineCodeGenerator::SampleTexture()
|
||||
{
|
||||
if(!m_env.sel.fb || m_env.sel.tfx == TFX_NONE)
|
||||
if(!m_sel.fb || m_sel.tfx == TFX_NONE)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -716,14 +737,14 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
// ebx = tex
|
||||
|
||||
if(!m_env.sel.fst)
|
||||
if(!m_sel.fst)
|
||||
{
|
||||
// TODO: move these into Init/Step too?
|
||||
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
cvttps2dq(xmm3, xmm3);
|
||||
|
||||
if(m_env.sel.ltf)
|
||||
if(m_sel.ltf)
|
||||
{
|
||||
// u -= 0x8000;
|
||||
// v -= 0x8000;
|
||||
|
@ -739,7 +760,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// xmm2 = u
|
||||
// xmm3 = v
|
||||
|
||||
if(m_env.sel.ltf)
|
||||
if(m_sel.ltf)
|
||||
{
|
||||
// GSVector4i uf = u.xxzzlh().srl16(1);
|
||||
|
||||
|
@ -749,7 +770,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
psrlw(xmm0, 1);
|
||||
movdqa(xmmword[&m_env.temp.uf], xmm0);
|
||||
|
||||
if(!m_env.sel.sprite)
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
// GSVector4i vf = v.xxzzlh().srl16(1);
|
||||
|
||||
|
@ -767,7 +788,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
psrad(xmm3, 16);
|
||||
packssdw(xmm2, xmm3);
|
||||
|
||||
if(m_env.sel.ltf)
|
||||
if(m_sel.ltf)
|
||||
{
|
||||
// GSVector4i uv1 = uv0.add16(GSVector4i::x0001());
|
||||
|
||||
|
@ -812,7 +833,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// xmm5, xmm6 = free
|
||||
// xmm7 = used
|
||||
|
||||
if(m_env.sel.ltf)
|
||||
if(m_sel.ltf)
|
||||
{
|
||||
// GSVector4i y1 = uv1.uph16() << tw;
|
||||
// GSVector4i x1 = uv1.upl16();
|
||||
|
@ -989,10 +1010,10 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv)
|
|||
{
|
||||
// xmm0, xmm1, xmm4, xmm5, xmm6 = free
|
||||
|
||||
int wms_clamp = ((m_env.sel.wms + 1) >> 1) & 1;
|
||||
int wmt_clamp = ((m_env.sel.wmt + 1) >> 1) & 1;
|
||||
int wms_clamp = ((m_sel.wms + 1) >> 1) & 1;
|
||||
int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1;
|
||||
|
||||
int region = ((m_env.sel.wms | m_env.sel.wmt) >> 1) & 1;
|
||||
int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1;
|
||||
|
||||
if(wms_clamp == wmt_clamp)
|
||||
{
|
||||
|
@ -1052,10 +1073,10 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
|
|||
{
|
||||
// xmm0, xmm1, xmm4, xmm5, xmm6 = free
|
||||
|
||||
int wms_clamp = ((m_env.sel.wms + 1) >> 1) & 1;
|
||||
int wmt_clamp = ((m_env.sel.wmt + 1) >> 1) & 1;
|
||||
int wms_clamp = ((m_sel.wms + 1) >> 1) & 1;
|
||||
int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1;
|
||||
|
||||
int region = ((m_env.sel.wms | m_env.sel.wmt) >> 1) & 1;
|
||||
int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1;
|
||||
|
||||
if(wms_clamp == wmt_clamp)
|
||||
{
|
||||
|
@ -1149,18 +1170,18 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
|
|||
|
||||
void GSDrawScanlineCodeGenerator::AlphaTFX()
|
||||
{
|
||||
if(!m_env.sel.fb)
|
||||
if(!m_sel.fb)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
switch(m_env.sel.tfx)
|
||||
switch(m_sel.tfx)
|
||||
{
|
||||
case TFX_MODULATE:
|
||||
|
||||
// GSVector4i ga = iip ? gaf : m_env.c.ga;
|
||||
|
||||
movdqa(xmm4, xmmword[m_env.sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
|
||||
movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
|
||||
|
||||
// gat = gat.modulate16<1>(ga).clamp8();
|
||||
|
||||
|
@ -1170,7 +1191,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
|||
|
||||
// if(!tcc) gat = gat.mix16(ga.srl16(7));
|
||||
|
||||
if(!m_env.sel.tcc)
|
||||
if(!m_sel.tcc)
|
||||
{
|
||||
psrlw(xmm4, 7);
|
||||
|
||||
|
@ -1187,14 +1208,14 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
|||
|
||||
// GSVector4i ga = iip ? gaf : m_env.c.ga;
|
||||
|
||||
movdqa(xmm4, xmmword[m_env.sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
|
||||
movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
|
||||
movdqa(xmm2, xmm4);
|
||||
|
||||
// gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7)));
|
||||
|
||||
psrlw(xmm4, 7);
|
||||
|
||||
if(m_env.sel.tcc)
|
||||
if(m_sel.tcc)
|
||||
{
|
||||
paddusb(xmm4, xmm6);
|
||||
}
|
||||
|
@ -1207,11 +1228,11 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
|||
|
||||
// if(!tcc) gat = gat.mix16(ga.srl16(7));
|
||||
|
||||
if(!m_env.sel.tcc)
|
||||
if(!m_sel.tcc)
|
||||
{
|
||||
// GSVector4i ga = iip ? gaf : m_env.c.ga;
|
||||
|
||||
movdqa(xmm4, xmmword[m_env.sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
|
||||
movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
|
||||
movdqa(xmm2, xmm4);
|
||||
|
||||
psrlw(xmm4, 7);
|
||||
|
@ -1225,33 +1246,81 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
|||
|
||||
// gat = iip ? ga.srl16(7) : ga;
|
||||
|
||||
if(m_env.sel.iip)
|
||||
if(m_sel.iip)
|
||||
{
|
||||
psrlw(xmm6, 7);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if(m_sel.aa1)
|
||||
{
|
||||
// gs_user figure 3-2: anti-aliasing after tfx, before tests, modifies alpha
|
||||
|
||||
// FIXME: bios config screen cubes
|
||||
|
||||
if(!m_sel.abe)
|
||||
{
|
||||
// a = cov
|
||||
|
||||
if(m_sel.edge)
|
||||
{
|
||||
movdqa(xmm0, xmmword[&m_env.temp.cov]);
|
||||
}
|
||||
else
|
||||
{
|
||||
pcmpeqd(xmm0, xmm0);
|
||||
psllw(xmm0, 15);
|
||||
psrlw(xmm0, 8);
|
||||
}
|
||||
|
||||
mix16(xmm6, xmm0, xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// a = a == 0x80 ? cov : a
|
||||
|
||||
pcmpeqd(xmm0, xmm0);
|
||||
psllw(xmm0, 15);
|
||||
psrlw(xmm0, 8);
|
||||
|
||||
if(m_sel.edge)
|
||||
{
|
||||
movdqa(xmm1, xmmword[&m_env.temp.cov]);
|
||||
}
|
||||
else
|
||||
{
|
||||
movdqa(xmm1, xmm0);
|
||||
}
|
||||
|
||||
pcmpeqw(xmm0, xmm6);
|
||||
psrld(xmm0, 16);
|
||||
pslld(xmm0, 16);
|
||||
|
||||
blend8(xmm6, xmm1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestAlpha()
|
||||
{
|
||||
switch(m_env.sel.afail)
|
||||
switch(m_sel.afail)
|
||||
{
|
||||
case AFAIL_FB_ONLY:
|
||||
if(!m_env.sel.zwrite) return;
|
||||
if(!m_sel.zwrite) return;
|
||||
break;
|
||||
|
||||
case AFAIL_ZB_ONLY:
|
||||
if(!m_env.sel.fwrite) return;
|
||||
if(!m_sel.fwrite) return;
|
||||
break;
|
||||
|
||||
case AFAIL_RGB_ONLY:
|
||||
if(!m_env.sel.zwrite && m_env.sel.fpsm == 1) return;
|
||||
if(!m_sel.zwrite && m_sel.fpsm == 1) return;
|
||||
break;
|
||||
}
|
||||
|
||||
switch(m_env.sel.atst)
|
||||
switch(m_sel.atst)
|
||||
{
|
||||
case ATST_NEVER:
|
||||
// t = GSVector4i::xffffffff();
|
||||
|
@ -1295,7 +1364,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha()
|
|||
break;
|
||||
}
|
||||
|
||||
switch(m_env.sel.afail)
|
||||
switch(m_sel.afail)
|
||||
{
|
||||
case AFAIL_KEEP:
|
||||
// test |= t;
|
||||
|
@ -1326,12 +1395,12 @@ void GSDrawScanlineCodeGenerator::TestAlpha()
|
|||
|
||||
void GSDrawScanlineCodeGenerator::ColorTFX()
|
||||
{
|
||||
if(!m_env.sel.fwrite)
|
||||
if(!m_sel.fwrite)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
switch(m_env.sel.tfx)
|
||||
switch(m_sel.tfx)
|
||||
{
|
||||
case TFX_MODULATE:
|
||||
|
||||
|
@ -1339,7 +1408,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
|
|||
|
||||
// rbt = rbt.modulate16<1>(rb).clamp8();
|
||||
|
||||
modulate16<1>(xmm5, xmmword[m_env.sel.iip ? &m_env.temp.rb : &m_env.c.rb]);
|
||||
modulate16<1>(xmm5, xmmword[m_sel.iip ? &m_env.temp.rb : &m_env.c.rb]);
|
||||
|
||||
clamp16(xmm5, xmm1);
|
||||
|
||||
|
@ -1352,11 +1421,11 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
|
|||
case TFX_HIGHLIGHT:
|
||||
case TFX_HIGHLIGHT2:
|
||||
|
||||
if(m_env.sel.tfx == TFX_HIGHLIGHT2 && m_env.sel.tcc)
|
||||
if(m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc)
|
||||
{
|
||||
// GSVector4i ga = iip ? gaf : m_env.c.ga;
|
||||
|
||||
movdqa(xmm2, xmmword[m_env.sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
|
||||
movdqa(xmm2, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]);
|
||||
}
|
||||
|
||||
// gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat);
|
||||
|
@ -1379,7 +1448,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
|
|||
|
||||
// rbt = rbt.modulate16<1>(rb).add16(af).clamp8();
|
||||
|
||||
modulate16<1>(xmm5, xmmword[m_env.sel.iip ? &m_env.temp.rb : &m_env.c.rb]);
|
||||
modulate16<1>(xmm5, xmmword[m_sel.iip ? &m_env.temp.rb : &m_env.c.rb]);
|
||||
|
||||
paddw(xmm5, xmm2);
|
||||
|
||||
|
@ -1391,7 +1460,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
|
|||
|
||||
// rbt = iip ? rb.srl16(7) : rb;
|
||||
|
||||
if(m_env.sel.iip)
|
||||
if(m_sel.iip)
|
||||
{
|
||||
psrlw(xmm5, 7);
|
||||
}
|
||||
|
@ -1402,7 +1471,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
|
|||
|
||||
void GSDrawScanlineCodeGenerator::Fog()
|
||||
{
|
||||
if(!m_env.sel.fwrite || !m_env.sel.fge)
|
||||
if(!m_sel.fwrite || !m_sel.fge)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -1410,7 +1479,7 @@ void GSDrawScanlineCodeGenerator::Fog()
|
|||
// rb = m_env.frb.lerp16<0>(rb, f);
|
||||
// ga = m_env.fga.lerp16<0>(ga, f).mix16(ga);
|
||||
|
||||
movdqa(xmm0, xmmword[!m_env.sel.sprite ? &m_env.temp.f : &m_env.p.f]);
|
||||
movdqa(xmm0, xmmword[!m_sel.sprite ? &m_env.temp.f : &m_env.p.f]);
|
||||
movdqa(xmm1, xmm6);
|
||||
|
||||
movdqa(xmm2, xmmword[&m_env.frb]);
|
||||
|
@ -1424,7 +1493,7 @@ void GSDrawScanlineCodeGenerator::Fog()
|
|||
|
||||
void GSDrawScanlineCodeGenerator::ReadFrame()
|
||||
{
|
||||
if(!m_env.sel.fb)
|
||||
if(!m_sel.fb)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -1434,7 +1503,7 @@ void GSDrawScanlineCodeGenerator::ReadFrame()
|
|||
mov(ebx, dword[esi]);
|
||||
add(ebx, dword[edi]);
|
||||
|
||||
if(!m_env.sel.rfb)
|
||||
if(!m_sel.rfb)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -1444,7 +1513,7 @@ void GSDrawScanlineCodeGenerator::ReadFrame()
|
|||
|
||||
void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
||||
{
|
||||
if(!m_env.sel.date || m_env.sel.fpsm != 0 && m_env.sel.fpsm != 2)
|
||||
if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -1453,9 +1522,9 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
|
||||
movdqa(xmm1, xmm2);
|
||||
|
||||
if(m_env.sel.datm)
|
||||
if(m_sel.datm)
|
||||
{
|
||||
if(m_env.sel.fpsm == 2)
|
||||
if(m_sel.fpsm == 2)
|
||||
{
|
||||
pxor(xmm0, xmm0);
|
||||
psrld(xmm1, 15);
|
||||
|
@ -1470,7 +1539,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.fpsm == 2)
|
||||
if(m_sel.fpsm == 2)
|
||||
{
|
||||
pslld(xmm1, 16);
|
||||
}
|
||||
|
@ -1485,16 +1554,16 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
|
||||
void GSDrawScanlineCodeGenerator::WriteZBuf()
|
||||
{
|
||||
if(!m_env.sel.zwrite)
|
||||
if(!m_sel.zwrite)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
movdqa(xmm1, xmmword[!m_env.sel.sprite ? &m_env.temp.zs : &m_env.p.z]);
|
||||
movdqa(xmm1, xmmword[!m_sel.sprite ? &m_env.temp.zs : &m_env.p.z]);
|
||||
|
||||
bool fast = false;
|
||||
|
||||
if(m_env.sel.ztest && m_env.sel.zpsm < 2)
|
||||
if(m_sel.ztest && m_sel.zpsm < 2)
|
||||
{
|
||||
// zs = zs.blend8(zd, zm);
|
||||
|
||||
|
@ -1505,43 +1574,24 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
fast = true;
|
||||
}
|
||||
|
||||
WritePixel(xmm1, xmm0, ebp, dh, fast, m_env.sel.zpsm);
|
||||
WritePixel(xmm1, xmm0, ebp, dh, fast, m_sel.zpsm);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::AlphaBlend()
|
||||
{
|
||||
if(!m_env.sel.fwrite)
|
||||
{
|
||||
return;
|
||||
}
|
||||
/*
|
||||
if(m_env.sel.aa1)
|
||||
{
|
||||
// hmm, the playstation logo does not look good...
|
||||
|
||||
printf("aa1 %016I64x\n", m_env.sel.key);
|
||||
|
||||
if(m_env.sel.fpsm != 1) // TODO: fm == 0xffxxxxxx
|
||||
{
|
||||
// a = 0x80
|
||||
|
||||
pcmpeqd(xmm0, xmm0);
|
||||
psllw(xmm0, 15);
|
||||
psrlw(xmm0, 8);
|
||||
mix16(xmm6, xmm0, xmm1);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
*/
|
||||
if(m_env.sel.abe == 255)
|
||||
if(!m_sel.fwrite)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if((m_env.sel.abea != m_env.sel.abeb) && (m_env.sel.abea == 1 || m_env.sel.abeb == 1 || m_env.sel.abec == 1) || m_env.sel.abed == 1)
|
||||
if(m_sel.abe == 0 && m_sel.aa1 == 0)
|
||||
{
|
||||
switch(m_env.sel.fpsm)
|
||||
return;
|
||||
}
|
||||
|
||||
if((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1)
|
||||
{
|
||||
switch(m_sel.fpsm)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
|
@ -1599,40 +1649,40 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
// xmm2, xmm3 = used
|
||||
// xmm4, xmm7 = free
|
||||
|
||||
if(m_env.sel.pabe || (m_env.sel.abea != m_env.sel.abeb) && (m_env.sel.abeb == 0 || m_env.sel.abed == 0))
|
||||
if(m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0))
|
||||
{
|
||||
movdqa(xmm4, xmm5);
|
||||
}
|
||||
|
||||
if(m_env.sel.abea != m_env.sel.abeb)
|
||||
if(m_sel.aba != m_sel.abb)
|
||||
{
|
||||
// rb = c[abea * 2 + 0];
|
||||
// rb = c[aba * 2 + 0];
|
||||
|
||||
switch(m_env.sel.abea)
|
||||
switch(m_sel.aba)
|
||||
{
|
||||
case 0: break;
|
||||
case 1: movdqa(xmm5, xmm0); break;
|
||||
case 2: pxor(xmm5, xmm5); break;
|
||||
}
|
||||
|
||||
// rb = rb.sub16(c[abeb * 2 + 0]);
|
||||
// rb = rb.sub16(c[abb * 2 + 0]);
|
||||
|
||||
switch(m_env.sel.abeb)
|
||||
switch(m_sel.abb)
|
||||
{
|
||||
case 0: psubw(xmm5, xmm4); break;
|
||||
case 1: psubw(xmm5, xmm0); break;
|
||||
case 2: break;
|
||||
}
|
||||
|
||||
if(!(m_env.sel.fpsm == 1 && m_env.sel.abec == 1))
|
||||
if(!(m_sel.fpsm == 1 && m_sel.abc == 1))
|
||||
{
|
||||
// GSVector4i a = abec < 2 ? c[abec * 2 + 1].yywwlh().sll16(7) : m_env.afix;
|
||||
// GSVector4i a = abc < 2 ? c[abc * 2 + 1].yywwlh().sll16(7) : m_env.afix;
|
||||
|
||||
switch(m_env.sel.abec)
|
||||
switch(m_sel.abc)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
movdqa(xmm7, m_env.sel.abec ? xmm1 : xmm6);
|
||||
movdqa(xmm7, m_sel.abc ? xmm1 : xmm6);
|
||||
pshuflw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1));
|
||||
pshufhw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1));
|
||||
psllw(xmm7, 7);
|
||||
|
@ -1647,9 +1697,9 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
modulate16<1>(xmm5, xmm7);
|
||||
}
|
||||
|
||||
// rb = rb.add16(c[abed * 2 + 0]);
|
||||
// rb = rb.add16(c[abd * 2 + 0]);
|
||||
|
||||
switch(m_env.sel.abed)
|
||||
switch(m_sel.abd)
|
||||
{
|
||||
case 0: paddw(xmm5, xmm4); break;
|
||||
case 1: paddw(xmm5, xmm0); break;
|
||||
|
@ -1658,9 +1708,9 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
}
|
||||
else
|
||||
{
|
||||
// rb = c[abed * 2 + 0];
|
||||
// rb = c[abd * 2 + 0];
|
||||
|
||||
switch(m_env.sel.abed)
|
||||
switch(m_sel.abd)
|
||||
{
|
||||
case 0: break;
|
||||
case 1: movdqa(xmm5, xmm0); break;
|
||||
|
@ -1668,7 +1718,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
}
|
||||
}
|
||||
|
||||
if(m_env.sel.pabe)
|
||||
if(m_sel.pabe)
|
||||
{
|
||||
// mask = (c[1] << 8).sra32(31);
|
||||
|
||||
|
@ -1690,11 +1740,11 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
|
||||
movdqa(xmm4, xmm6);
|
||||
|
||||
if(m_env.sel.abea != m_env.sel.abeb)
|
||||
if(m_sel.aba != m_sel.abb)
|
||||
{
|
||||
// ga = c[abea * 2 + 1];
|
||||
// ga = c[aba * 2 + 1];
|
||||
|
||||
switch(m_env.sel.abea)
|
||||
switch(m_sel.aba)
|
||||
{
|
||||
case 0: break;
|
||||
case 1: movdqa(xmm6, xmm1); break;
|
||||
|
@ -1703,23 +1753,23 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
|
||||
// ga = ga.sub16(c[abeb * 2 + 1]);
|
||||
|
||||
switch(m_env.sel.abeb)
|
||||
switch(m_sel.abb)
|
||||
{
|
||||
case 0: psubw(xmm6, xmm4); break;
|
||||
case 1: psubw(xmm6, xmm1); break;
|
||||
case 2: break;
|
||||
}
|
||||
|
||||
if(!(m_env.sel.fpsm == 1 && m_env.sel.abec == 1))
|
||||
if(!(m_sel.fpsm == 1 && m_sel.abc == 1))
|
||||
{
|
||||
// ga = ga.modulate16<1>(a);
|
||||
|
||||
modulate16<1>(xmm6, xmm7);
|
||||
}
|
||||
|
||||
// ga = ga.add16(c[abed * 2 + 1]);
|
||||
// ga = ga.add16(c[abd * 2 + 1]);
|
||||
|
||||
switch(m_env.sel.abed)
|
||||
switch(m_sel.abd)
|
||||
{
|
||||
case 0: paddw(xmm6, xmm4); break;
|
||||
case 1: paddw(xmm6, xmm1); break;
|
||||
|
@ -1728,9 +1778,9 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
}
|
||||
else
|
||||
{
|
||||
// ga = c[abed * 2 + 1];
|
||||
// ga = c[abd * 2 + 1];
|
||||
|
||||
switch(m_env.sel.abed)
|
||||
switch(m_sel.abd)
|
||||
{
|
||||
case 0: break;
|
||||
case 1: movdqa(xmm6, xmm1); break;
|
||||
|
@ -1738,7 +1788,13 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
}
|
||||
}
|
||||
|
||||
if(m_env.sel.pabe)
|
||||
// xmm4 = src ga
|
||||
// xmm5 = rb
|
||||
// xmm6 = ga
|
||||
// xmm2, xmm3 = used
|
||||
// xmm0, xmm1, xmm7 = free
|
||||
|
||||
if(m_sel.pabe)
|
||||
{
|
||||
if(!m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
|
@ -1757,7 +1813,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.fpsm != 1) // TODO: fm == 0xffxxxxxx
|
||||
if(m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx
|
||||
{
|
||||
mix16(xmm6, xmm4, xmm7);
|
||||
}
|
||||
|
@ -1768,12 +1824,12 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
|
|||
{
|
||||
const int _top = params + 4;
|
||||
|
||||
if(!m_env.sel.fwrite)
|
||||
if(!m_sel.fwrite)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if(m_env.sel.colclamp == 0)
|
||||
if(m_sel.colclamp == 0)
|
||||
{
|
||||
// c[0] &= 0x000000ff;
|
||||
// c[1] &= 0x000000ff;
|
||||
|
@ -1784,7 +1840,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
|
|||
pand(xmm6, xmm7);
|
||||
}
|
||||
|
||||
if(m_env.sel.fpsm == 2 && m_env.sel.dthe)
|
||||
if(m_sel.fpsm == 2 && m_sel.dthe)
|
||||
{
|
||||
mov(eax, dword[esp + _top]);
|
||||
and(eax, 3);
|
||||
|
@ -1800,7 +1856,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
|
|||
punpckhwd(xmm7, xmm6);
|
||||
packuswb(xmm5, xmm7);
|
||||
|
||||
if(m_env.sel.fba && m_env.sel.fpsm != 1)
|
||||
if(m_sel.fba && m_sel.fpsm != 1)
|
||||
{
|
||||
// fs |= 0x80000000;
|
||||
|
||||
|
@ -1809,7 +1865,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
|
|||
por(xmm5, xmm7);
|
||||
}
|
||||
|
||||
if(m_env.sel.fpsm == 2)
|
||||
if(m_sel.fpsm == 2)
|
||||
{
|
||||
// GSVector4i rb = fs & 0x00f800f8;
|
||||
// GSVector4i ga = fs & 0x8000f800;
|
||||
|
@ -1841,16 +1897,16 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
|
|||
por(xmm5, xmm7);
|
||||
}
|
||||
|
||||
if(m_env.sel.rfb)
|
||||
if(m_sel.rfb)
|
||||
{
|
||||
// fs = fs.blend(fd, fm);
|
||||
|
||||
blend(xmm5, xmm2, xmm3); // TODO: could be skipped in certain cases, depending on fpsm and fm
|
||||
}
|
||||
|
||||
bool fast = m_env.sel.rfb && m_env.sel.fpsm < 2;
|
||||
bool fast = m_sel.rfb && m_sel.fpsm < 2;
|
||||
|
||||
WritePixel(xmm5, xmm0, ebx, dl, fast, m_env.sel.fpsm);
|
||||
WritePixel(xmm5, xmm0, ebx, dl, fast, m_sel.fpsm);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr)
|
||||
|
@ -1992,9 +2048,9 @@ void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uin
|
|||
if(i == 0) movd(eax, addr);
|
||||
else pextrd(eax, addr, i);
|
||||
|
||||
if(m_env.sel.tlu) movzx(eax, byte[ebx + eax]);
|
||||
if(m_sel.tlu) movzx(eax, byte[ebx + eax]);
|
||||
|
||||
const Address& src = m_env.sel.tlu ? ptr[eax * 4 + (size_t)m_env.clut] : ptr[ebx + eax * 4];
|
||||
const Address& src = m_sel.tlu ? ptr[eax * 4 + (size_t)m_env.clut] : ptr[ebx + eax * 4];
|
||||
|
||||
if(i == 0) movd(dst, src);
|
||||
else pinsrd(dst, src, i);
|
||||
|
|
|
@ -36,6 +36,7 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
|
|||
util::Cpu m_cpu;
|
||||
|
||||
GSScanlineEnvironment& m_env;
|
||||
GSScanlineSelector m_sel;
|
||||
|
||||
void Generate();
|
||||
|
||||
|
@ -72,5 +73,5 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
|
|||
void blendr(const Xmm& b, const Xmm& a, const Xmm& mask);
|
||||
|
||||
public:
|
||||
GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize);
|
||||
GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, UINT64 key, void* ptr, size_t maxsize);
|
||||
};
|
|
@ -38,9 +38,10 @@ GSRasterizer::~GSRasterizer()
|
|||
|
||||
void GSRasterizer::Draw(const GSRasterizerData* data)
|
||||
{
|
||||
m_dsf.sr = NULL;
|
||||
m_dsf.ssl = NULL;
|
||||
m_dsf.ssle = NULL;
|
||||
m_dsf.ssp = NULL;
|
||||
m_dsf.sr = NULL;
|
||||
|
||||
m_ds->BeginDraw(data, &m_dsf);
|
||||
|
||||
|
@ -111,6 +112,25 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
|
|||
GSVertexSW dv = v[1] - v[0];
|
||||
|
||||
GSVector4 dp = dv.p.abs();
|
||||
|
||||
if(m_dsf.ssle)
|
||||
{
|
||||
int i = (dp < dp.yxwz()).mask() & 1; // |x| <= |y|
|
||||
|
||||
GSVertexSW dscan;
|
||||
|
||||
dscan.p = GSVector4::zero();
|
||||
dscan.t = GSVector4::zero();
|
||||
dscan.c = GSVector4::zero();
|
||||
|
||||
m_dsf.ssp(v, dscan);
|
||||
|
||||
DrawEdge(v[0], v[1], dv, scissor, i, 0);
|
||||
DrawEdge(v[0], v[1], dv, scissor, i, 1);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
GSVector4i dpi(dp);
|
||||
|
||||
if(dpi.y == 0)
|
||||
|
@ -197,6 +217,11 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices, const GSVector4i& sc
|
|||
|
||||
i = (aabb == bccb).mask() & 7;
|
||||
|
||||
if(m_dsf.ssle)
|
||||
{
|
||||
DrawTriangleEdge(v, scissor);
|
||||
}
|
||||
|
||||
switch(i)
|
||||
{
|
||||
case 0: // a < b < c
|
||||
|
@ -215,6 +240,37 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices, const GSVector4i& sc
|
|||
}
|
||||
}
|
||||
|
||||
void GSRasterizer::DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& scissor)
|
||||
{
|
||||
GSVertexSW dv[3];
|
||||
|
||||
dv[0] = v[1] - v[0];
|
||||
dv[1] = v[2] - v[0];
|
||||
dv[2] = v[2] - v[1];
|
||||
|
||||
GSVector4 dx = dv[0].p.upl(dv[1].p).xyxy(dv[2].p);
|
||||
GSVector4 dy = dv[0].p.upl(dv[1].p).zwyx(dv[2].p);
|
||||
|
||||
GSVector4 a = dx.abs() < dy.abs(); // |x| <= |y|
|
||||
GSVector4 b = dx < GSVector4::zero(); // x < 0
|
||||
GSVector4 c = dv[1].p * (dv[0].p / dv[1].p).yyyy() < dv[0].p; // longest.p.x < 0
|
||||
|
||||
int i = a.mask();
|
||||
int j = ((a | b) ^ c.xxxx()).mask() ^ 2; // evil
|
||||
|
||||
GSVertexSW dscan;
|
||||
|
||||
dscan.p = GSVector4::zero();
|
||||
dscan.t = GSVector4::zero();
|
||||
dscan.c = GSVector4::zero();
|
||||
|
||||
m_dsf.ssp(v, dscan); // TODO: don't call it twice (can't be sure about the second call if the triangle is too small)
|
||||
|
||||
DrawEdge(v[0], v[1], dv[0], scissor, i & 1, j & 1);
|
||||
DrawEdge(v[0], v[2], dv[1], scissor, i & 2, j & 2);
|
||||
DrawEdge(v[1], v[2], dv[2], scissor, i & 4, j & 4);
|
||||
}
|
||||
|
||||
void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor)
|
||||
{
|
||||
GSVertexSW longest;
|
||||
|
@ -559,6 +615,222 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
|
|||
}
|
||||
}
|
||||
|
||||
void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side)
|
||||
{
|
||||
// orientation:
|
||||
// - true: |dv.p.y| > |dv.p.x|
|
||||
// - false |dv.p.x| > |dv.p.y|
|
||||
// side:
|
||||
// - true: top/left edge
|
||||
// - false: bottom/right edge
|
||||
|
||||
// TODO: bit slow and too much duplicated code
|
||||
// TODO: inner pre-step is still missing (hardly noticable)
|
||||
|
||||
GSVector4 fscissor(scissor);
|
||||
|
||||
GSVector4 lrtb = v0.p.upl(v1.p).ceil();
|
||||
|
||||
if(orientation)
|
||||
{
|
||||
GSVector4 tbmax = lrtb.maxv(fscissor.yyyy());
|
||||
GSVector4 tbmin = lrtb.minv(fscissor.wwww());
|
||||
|
||||
GSVector4i tbi = GSVector4i(tbmax.zwzw(tbmin));
|
||||
|
||||
int top, bottom;
|
||||
|
||||
GSVertexSW edge, dedge;
|
||||
|
||||
if((dv.p >= GSVector4::zero()).mask() & 2)
|
||||
{
|
||||
top = tbi.extract32<0>();
|
||||
bottom = tbi.extract32<3>();
|
||||
|
||||
if(top >= bottom) return;
|
||||
|
||||
edge = v0;
|
||||
dedge = dv / dv.p.yyyy();
|
||||
|
||||
edge += dedge * (tbmax.zzzz() - edge.p.yyyy());
|
||||
}
|
||||
else
|
||||
{
|
||||
top = tbi.extract32<1>();
|
||||
bottom = tbi.extract32<2>();
|
||||
|
||||
if(top >= bottom) return;
|
||||
|
||||
edge = v1;
|
||||
dedge = dv / dv.p.yyyy();
|
||||
|
||||
edge += dedge * (tbmax.wwww() - edge.p.yyyy());
|
||||
}
|
||||
|
||||
if(side)
|
||||
{
|
||||
while(1)
|
||||
{
|
||||
do
|
||||
{
|
||||
if((top % m_threads) == m_id)
|
||||
{
|
||||
GSVector4 p = edge.p.ceil();
|
||||
|
||||
if(((fscissor.xxxx() < p) & (p <= fscissor.zzzz())).mask() & 1)
|
||||
{
|
||||
GSVector4 coverage = (p - edge.p).xxxx();
|
||||
|
||||
edge.t = edge.t.xyxy(edge.t.uph(coverage * 0x80)); // coverage => t.w
|
||||
|
||||
int x = GSVector4i(p).extract32<0>() - 1;
|
||||
|
||||
m_stats.pixels++;
|
||||
|
||||
m_dsf.ssle(x + 1, x, top, edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
while(0);
|
||||
|
||||
if(++top >= bottom) break;
|
||||
|
||||
edge += dedge;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(1)
|
||||
{
|
||||
do
|
||||
{
|
||||
if((top % m_threads) == m_id)
|
||||
{
|
||||
GSVector4 p = edge.p.floor();
|
||||
|
||||
if(((fscissor.xxxx() <= p) & (p < fscissor.zzzz())).mask() & 1)
|
||||
{
|
||||
GSVector4 coverage = (edge.p - p).xxxx();
|
||||
|
||||
edge.t = edge.t.xyxy(edge.t.uph(coverage * 0x80)); // coverage => t.w
|
||||
|
||||
int x = GSVector4i(p).extract32<0>() + 1;
|
||||
|
||||
m_stats.pixels++;
|
||||
|
||||
m_dsf.ssle(x + 1, x, top, edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
while(0);
|
||||
|
||||
if(++top >= bottom) break;
|
||||
|
||||
edge += dedge;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4 lrmax = lrtb.maxv(fscissor.xxxx());
|
||||
GSVector4 lrmin = lrtb.minv(fscissor.zzzz());
|
||||
|
||||
GSVector4i lri = GSVector4i(lrmax.xyxy(lrmin));
|
||||
|
||||
int left, right;
|
||||
|
||||
GSVertexSW edge, dedge;
|
||||
|
||||
if((dv.p >= GSVector4::zero()).mask() & 1)
|
||||
{
|
||||
left = lri.extract32<0>();
|
||||
right = lri.extract32<3>();
|
||||
|
||||
if(left >= right) return;
|
||||
|
||||
edge = v0;
|
||||
dedge = dv / dv.p.xxxx();
|
||||
|
||||
edge += dedge * (lrmax.xxxx() - edge.p.xxxx());
|
||||
}
|
||||
else
|
||||
{
|
||||
left = lri.extract32<1>();
|
||||
right = lri.extract32<2>();
|
||||
|
||||
if(left >= right) return;
|
||||
|
||||
edge = v1;
|
||||
dedge = dv / dv.p.xxxx();
|
||||
|
||||
edge += dedge * (lrmax.yyyy() - edge.p.xxxx());
|
||||
}
|
||||
|
||||
if(side)
|
||||
{
|
||||
while(1)
|
||||
{
|
||||
do
|
||||
{
|
||||
GSVector4 p = edge.p.ceil();
|
||||
|
||||
if(((fscissor.yyyy() < p) & (p <= fscissor.wwww())).mask() & 2)
|
||||
{
|
||||
int y = GSVector4i(p).extract32<1>() - 1;
|
||||
|
||||
if((y % m_threads) == m_id)
|
||||
{
|
||||
GSVector4 coverage = (p - edge.p).yyyy();
|
||||
|
||||
edge.t = edge.t.xyxy(edge.t.uph(coverage * 0x80)); // coverage => t.w
|
||||
|
||||
m_stats.pixels++;
|
||||
|
||||
m_dsf.ssle(left + 1, left, y, edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
while(0);
|
||||
|
||||
if(++left >= right) break;
|
||||
|
||||
edge += dedge;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(1)
|
||||
{
|
||||
do
|
||||
{
|
||||
GSVector4 p = edge.p.floor();
|
||||
|
||||
if(((fscissor.yyyy() <= p) & (p < fscissor.wwww())).mask() & 2)
|
||||
{
|
||||
int y = GSVector4i(p).extract32<1>() + 1;
|
||||
|
||||
if((y % m_threads) == m_id)
|
||||
{
|
||||
GSVector4 coverage = (edge.p - p).yyyy();
|
||||
|
||||
edge.t = edge.t.xyxy(edge.t.uph(coverage * 0x80)); // coverage => t.w
|
||||
|
||||
m_stats.pixels++;
|
||||
|
||||
m_dsf.ssle(left + 1, left, y, edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
while(0);
|
||||
|
||||
if(++left >= right) break;
|
||||
|
||||
edge += dedge;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, int id, int threads, long* sync)
|
||||
|
|
|
@ -48,15 +48,16 @@ public:
|
|||
class IDrawScanline
|
||||
{
|
||||
public:
|
||||
typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v);
|
||||
typedef void (__fastcall *DrawScanlineStaticPtr)(int right, int left, int top, const GSVertexSW& v);
|
||||
typedef void (__fastcall *SetupPrimStaticPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v);
|
||||
|
||||
struct Functions
|
||||
{
|
||||
DrawSolidRectPtr sr; // TODO
|
||||
DrawScanlineStaticPtr ssl;
|
||||
DrawScanlineStaticPtr ssle;
|
||||
SetupPrimStaticPtr ssp;
|
||||
DrawSolidRectPtr sr; // TODO
|
||||
};
|
||||
|
||||
virtual ~IDrawScanline() {}
|
||||
|
@ -78,6 +79,7 @@ protected:
|
|||
void DrawPoint(const GSVertexSW* v, const GSVector4i& scissor);
|
||||
void DrawLine(const GSVertexSW* v, const GSVector4i& scissor);
|
||||
void DrawTriangle(const GSVertexSW* v, const GSVector4i& scissor);
|
||||
void DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& scissor);
|
||||
void DrawSprite(const GSVertexSW* v, const GSVector4i& scissor);
|
||||
|
||||
void DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor);
|
||||
|
@ -87,6 +89,8 @@ protected:
|
|||
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, GSVector4& r, const GSVector4& dr, const GSVertexSW& dscan, const GSVector4& scissor);
|
||||
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan, const GSVector4& scissor);
|
||||
|
||||
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side);
|
||||
|
||||
public:
|
||||
GSRasterizer(IDrawScanline* ds, int id = 0, int threads = 0);
|
||||
virtual ~GSRasterizer();
|
||||
|
|
|
@ -34,6 +34,7 @@ struct GSRendererSettings
|
|||
int m_filter;
|
||||
bool m_vsync;
|
||||
bool m_nativeres;
|
||||
bool m_aa1;
|
||||
};
|
||||
|
||||
class GSRendererBase : public GSState, protected GSRendererSettings
|
||||
|
@ -84,6 +85,12 @@ protected:
|
|||
m_osd = !m_osd;
|
||||
return true;
|
||||
}
|
||||
|
||||
if(msg.wParam == VK_DELETE)
|
||||
{
|
||||
m_aa1 = !m_aa1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -103,6 +110,7 @@ public:
|
|||
m_filter = rs.m_filter;
|
||||
m_vsync = rs.m_vsync;
|
||||
m_nativeres = rs.m_nativeres;
|
||||
m_aa1 = rs.m_aa1;
|
||||
};
|
||||
|
||||
virtual bool Create(LPCTSTR title) = 0;
|
||||
|
|
|
@ -262,11 +262,11 @@ protected:
|
|||
p.sel.zpsm = 3;
|
||||
p.sel.atst = ATST_ALWAYS;
|
||||
p.sel.tfx = TFX_NONE;
|
||||
p.sel.abe = 255;
|
||||
p.sel.ababcd = 255;
|
||||
p.sel.sprite = primclass == GS_SPRITE_CLASS ? 1 : 0;
|
||||
|
||||
p.fm = context->FRAME.FBMSK;
|
||||
p.zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 || PRIM->AA1 && primclass == GS_LINE_CLASS ? 0xffffffff : 0;
|
||||
p.zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
||||
|
||||
if(context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER)
|
||||
{
|
||||
|
@ -449,22 +449,24 @@ protected:
|
|||
p.sel.datm = context->TEST.DATM;
|
||||
}
|
||||
|
||||
if(PRIM->ABE)
|
||||
if(PRIM->ABE && !context->ALPHA.IsOpaque() || PRIM->AA1)
|
||||
{
|
||||
if(!context->ALPHA.IsOpaque())
|
||||
p.sel.abe = PRIM->ABE;
|
||||
p.sel.ababcd = context->ALPHA.ai32[0];
|
||||
|
||||
if(env.PABE.PABE)
|
||||
{
|
||||
p.sel.abe = context->ALPHA.ai32[0];
|
||||
p.sel.pabe = env.PABE.PABE;
|
||||
p.sel.pabe = 1;
|
||||
}
|
||||
|
||||
if(PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS))
|
||||
{
|
||||
p.sel.aa1 = m_aa1 ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
if(PRIM->AA1)
|
||||
{
|
||||
p.sel.aa1 = 1;
|
||||
}
|
||||
|
||||
if(p.sel.date
|
||||
|| p.sel.abea == 1 || p.sel.abeb == 1 || p.sel.abec == 1 || p.sel.abed == 1
|
||||
|| p.sel.aba == 1 || p.sel.abb == 1 || p.sel.abc == 1 || p.sel.abd == 1
|
||||
|| p.sel.atst != ATST_ALWAYS && p.sel.afail == AFAIL_RGB_ONLY
|
||||
|| p.sel.fpsm == 0 && p.fm != 0 && p.fm != 0xffffffff
|
||||
|| p.sel.fpsm == 1 && (p.fm & 0x00ffffff) != 0 && (p.fm & 0x00ffffff) != 0x00ffffff
|
||||
|
@ -532,7 +534,7 @@ protected:
|
|||
GSRasterizerStats stats;
|
||||
|
||||
m_rl.GetStats(stats);
|
||||
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Draw, 1);
|
||||
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
|
||||
|
@ -576,6 +578,15 @@ protected:
|
|||
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz1_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM);
|
||||
if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);}
|
||||
}
|
||||
|
||||
if(0)//stats.ticks > 1000000)
|
||||
{
|
||||
printf("* [%I64d | %012I64x] ticks %I64d prims %d (%d) pixels %d (%d)\n",
|
||||
m_perfmon.GetFrame(), p.sel.key,
|
||||
stats.ticks,
|
||||
stats.prims, stats.prims > 0 ? (int)(stats.ticks / stats.prims) : -1,
|
||||
stats.pixels, stats.pixels > 0 ? (int)(stats.ticks / stats.pixels) : -1);
|
||||
}
|
||||
}
|
||||
|
||||
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r)
|
||||
|
|
|
@ -41,34 +41,37 @@ union GSScanlineSelector
|
|||
DWORD tlu:1; // 18
|
||||
DWORD fge:1; // 19
|
||||
DWORD date:1; // 20
|
||||
DWORD abea:2; // 21
|
||||
DWORD abeb:2; // 23
|
||||
DWORD abec:2; // 25
|
||||
DWORD abed:2; // 27
|
||||
DWORD pabe:1; // 29
|
||||
DWORD rfb:1; // 30
|
||||
DWORD sprite:1; // 31
|
||||
DWORD abe:1; // 21
|
||||
DWORD aba:2; // 22
|
||||
DWORD abb:2; // 24
|
||||
DWORD abc:2; // 26
|
||||
DWORD abd:2; // 28
|
||||
DWORD pabe:1; // 30
|
||||
DWORD aa1:1; // 31
|
||||
|
||||
DWORD fwrite:1; // 32
|
||||
DWORD ftest:1; // 33
|
||||
DWORD zwrite:1; // 34
|
||||
DWORD ztest:1; // 35
|
||||
DWORD wms:2; // 36
|
||||
DWORD wmt:2; // 38
|
||||
DWORD datm:1; // 40
|
||||
DWORD colclamp:1; // 41
|
||||
DWORD fba:1; // 42
|
||||
DWORD dthe:1; // 43
|
||||
DWORD zoverflow:1; // 44 (z max >= 0x80000000)
|
||||
DWORD aa1:1; // 45
|
||||
DWORD rfb:1; // 34
|
||||
DWORD zwrite:1; // 35
|
||||
DWORD ztest:1; // 36
|
||||
DWORD zoverflow:1; // 37 (z max >= 0x80000000)
|
||||
DWORD wms:2; // 38
|
||||
DWORD wmt:2; // 40
|
||||
DWORD datm:1; // 42
|
||||
DWORD colclamp:1; // 43
|
||||
DWORD fba:1; // 44
|
||||
DWORD dthe:1; // 45
|
||||
DWORD sprite:1; // 46
|
||||
DWORD edge:1; // 47
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
DWORD _pad1:21;
|
||||
DWORD abe:8;
|
||||
DWORD _pad2:3;
|
||||
DWORD _pad1:22;
|
||||
DWORD ababcd:8;
|
||||
DWORD _pad2:2;
|
||||
DWORD fb:2;
|
||||
DWORD _pad3:1;
|
||||
DWORD zb:2;
|
||||
};
|
||||
|
||||
|
@ -88,7 +91,7 @@ union GSScanlineSelector
|
|||
return sprite
|
||||
&& iip == 0
|
||||
&& tfx == TFX_NONE
|
||||
&& abe == 255
|
||||
&& abe == 0
|
||||
&& ztst <= 1
|
||||
&& atst <= 1
|
||||
&& date == 0
|
||||
|
@ -114,8 +117,6 @@ __declspec(align(16)) struct GSScanlineParam
|
|||
|
||||
__declspec(align(16)) struct GSScanlineEnvironment
|
||||
{
|
||||
GSScanlineSelector sel;
|
||||
|
||||
void* vm;
|
||||
const void* tex;
|
||||
const DWORD* clut;
|
||||
|
@ -140,5 +141,5 @@ __declspec(align(16)) struct GSScanlineEnvironment
|
|||
struct {GSVector4 z, stq; GSVector4i c, f, st;} d4;
|
||||
struct {GSVector4i rb, ga;} c;
|
||||
struct {GSVector4i z, f;} p;
|
||||
struct {GSVector4i z, f, s, t, q, rb, ga, zs, zd, uf, vf;} temp;
|
||||
struct {GSVector4i z, f, s, t, q, rb, ga, zs, zd, uf, vf, cov;} temp;
|
||||
};
|
||||
|
|
|
@ -75,6 +75,7 @@ GSSettingsDlg::GSSettingsDlg(CWnd* pParent /*=NULL*/)
|
|||
, m_vsync(FALSE)
|
||||
, m_logz(FALSE)
|
||||
, m_fba(TRUE)
|
||||
, m_aa1(FALSE)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -115,6 +116,7 @@ void GSSettingsDlg::DoDataExchange(CDataExchange* pDX)
|
|||
DDX_Check(pDX, IDC_CHECK2, m_vsync);
|
||||
DDX_Check(pDX, IDC_CHECK5, m_logz);
|
||||
DDX_Check(pDX, IDC_CHECK7, m_fba);
|
||||
DDX_Check(pDX, IDC_CHECK8, m_aa1);
|
||||
}
|
||||
|
||||
BEGIN_MESSAGE_MAP(GSSettingsDlg, CDialog)
|
||||
|
@ -220,6 +222,7 @@ BOOL GSSettingsDlg::OnInitDialog()
|
|||
m_vsync = !!pApp->GetProfileInt(_T("Settings"), _T("vsync"), FALSE);
|
||||
m_logz = !!pApp->GetProfileInt(_T("Settings"), _T("logz"), FALSE);
|
||||
m_fba = !!pApp->GetProfileInt(_T("Settings"), _T("fba"), TRUE);
|
||||
m_aa1 = !!pApp->GetProfileInt(_T("Settings"), _T("aa1"), FALSE);
|
||||
|
||||
m_resx.SetRange(512, 4096);
|
||||
m_resy.SetRange(512, 4096);
|
||||
|
@ -283,6 +286,7 @@ void GSSettingsDlg::OnOK()
|
|||
pApp->WriteProfileInt(_T("Settings"), _T("vsync"), m_vsync);
|
||||
pApp->WriteProfileInt(_T("Settings"), _T("logz"), m_logz);
|
||||
pApp->WriteProfileInt(_T("Settings"), _T("fba"), m_fba);
|
||||
pApp->WriteProfileInt(_T("Settings"), _T("aa1"), m_aa1);
|
||||
|
||||
pApp->WriteProfileInt(_T("Settings"), _T("resx"), m_resx.GetPos());
|
||||
pApp->WriteProfileInt(_T("Settings"), _T("resy"), m_resy.GetPos());
|
||||
|
|
|
@ -60,6 +60,7 @@ public:
|
|||
BOOL m_vsync;
|
||||
BOOL m_logz;
|
||||
BOOL m_fba;
|
||||
BOOL m_aa1;
|
||||
|
||||
protected:
|
||||
virtual LRESULT DefWindowProc(UINT message, WPARAM wParam, LPARAM lParam);
|
||||
|
|
|
@ -24,14 +24,16 @@
|
|||
#include "StdAfx.h"
|
||||
#include "GSSetupPrimCodeGenerator.h"
|
||||
|
||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize)
|
||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, UINT64 key, void* ptr, size_t maxsize)
|
||||
: CodeGenerator(maxsize, ptr)
|
||||
, m_env(env)
|
||||
{
|
||||
m_en.z = m_env.sel.zb ? 1 : 0;
|
||||
m_en.f = m_env.sel.fb && m_env.sel.fge ? 1 : 0;
|
||||
m_en.t = m_env.sel.fb && m_env.sel.tfx != TFX_NONE ? 1 : 0;
|
||||
m_en.c = m_env.sel.fb && m_env.sel.tfx != TFX_DECAL ? 1 : 0;
|
||||
m_sel.key = key;
|
||||
|
||||
m_en.z = m_sel.zb ? 1 : 0;
|
||||
m_en.f = m_sel.fb && m_sel.fge ? 1 : 0;
|
||||
m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0;
|
||||
m_en.c = m_sel.fb && m_sel.tfx != TFX_DECAL ? 1 : 0;
|
||||
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
|
@ -44,7 +46,7 @@ void GSSetupPrimCodeGenerator::Generate()
|
|||
{
|
||||
const int params = 0;
|
||||
|
||||
if((m_en.z || m_en.f) && !m_env.sel.sprite || m_en.t || m_en.c && m_env.sel.iip)
|
||||
if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip)
|
||||
{
|
||||
for(int i = 0; i < 5; i++)
|
||||
{
|
||||
|
@ -68,7 +70,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
return;
|
||||
}
|
||||
|
||||
if(!m_env.sel.sprite)
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
// GSVector4 t = dscan.p;
|
||||
|
||||
|
@ -148,7 +150,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
if(m_env.sel.zoverflow)
|
||||
if(m_sel.zoverflow)
|
||||
{
|
||||
// m_env.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||
|
||||
|
@ -193,7 +195,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
movaps(xmm1, xmm0);
|
||||
mulps(xmm1, xmm3);
|
||||
|
||||
if(m_env.sel.fst)
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// m_env.d4.st = GSVector4i(t * 4.0f);
|
||||
|
||||
|
@ -207,7 +209,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
movaps(xmmword[&m_env.d4.stq], xmm1);
|
||||
}
|
||||
|
||||
for(int j = 0, k = m_env.sel.fst ? 2 : 3; j < k; j++)
|
||||
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||
{
|
||||
// GSVector4 ds = t.xxxx();
|
||||
// GSVector4 dt = t.yyyy();
|
||||
|
@ -223,7 +225,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
movaps(xmm2, xmm1);
|
||||
mulps(xmm2, Xmm(4 + i));
|
||||
|
||||
if(m_env.sel.fst)
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// m_env.d[i].si/ti = GSVector4i(v);
|
||||
|
||||
|
@ -257,7 +259,7 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
return;
|
||||
}
|
||||
|
||||
if(m_env.sel.iip)
|
||||
if(m_sel.iip)
|
||||
{
|
||||
// GSVector4 c = dscan.c;
|
||||
|
||||
|
@ -351,7 +353,7 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
|
||||
// if(!tme) c = c.srl16(7);
|
||||
|
||||
if(m_env.sel.tfx == TFX_NONE)
|
||||
if(m_sel.tfx == TFX_NONE)
|
||||
{
|
||||
psrlw(xmm0, 7);
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ class GSSetupPrimCodeGenerator : public CodeGenerator
|
|||
util::Cpu m_cpu;
|
||||
|
||||
GSScanlineEnvironment& m_env;
|
||||
GSScanlineSelector m_sel;
|
||||
|
||||
struct {DWORD z:1, f:1, t:1, c:1;} m_en;
|
||||
|
||||
|
@ -46,5 +47,5 @@ class GSSetupPrimCodeGenerator : public CodeGenerator
|
|||
void Color();
|
||||
|
||||
public:
|
||||
GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize);
|
||||
GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, UINT64 key, void* ptr, size_t maxsize);
|
||||
};
|
|
@ -2193,12 +2193,12 @@ public:
|
|||
|
||||
GSVector4 abs() const
|
||||
{
|
||||
return GSVector4(_mm_abs_ps(m));
|
||||
return *this & cast(GSVector4i::x7fffffff());
|
||||
}
|
||||
|
||||
GSVector4 neg() const
|
||||
{
|
||||
return GSVector4(_mm_neg_ps(m));
|
||||
return *this ^ cast(GSVector4i::x80000000());
|
||||
}
|
||||
|
||||
GSVector4 rcp() const
|
||||
|
|
|
@ -82,7 +82,7 @@ IDB_LOGO10 BITMAP "res\\logo10.bmp"
|
|||
// Dialog
|
||||
//
|
||||
|
||||
IDD_CONFIG DIALOGEX 0, 0, 189, 231
|
||||
IDD_CONFIG DIALOGEX 0, 0, 189, 245
|
||||
STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU
|
||||
CAPTION "Settings..."
|
||||
FONT 8, "MS Shell Dlg", 400, 0, 0x1
|
||||
|
@ -105,17 +105,19 @@ BEGIN
|
|||
EDITTEXT IDC_EDIT2,109,132,35,13,ES_AUTOHSCROLL | ES_NUMBER
|
||||
CONTROL "",IDC_SPIN2,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,133,135,11,14
|
||||
CONTROL "Native",IDC_CHECK1,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,149,134,33,10
|
||||
CONTROL "Texture filtering",IDC_CHECK4,"Button",BS_AUTO3STATE | WS_TABSTOP,7,180,67,10
|
||||
CONTROL "Enable tv-out",IDC_CHECK3,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,167,57,10
|
||||
CONTROL "Wait vsync",IDC_CHECK2,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,193,51,10
|
||||
CONTROL "NLOOP hack",IDC_CHECK6,"Button",BS_AUTO3STATE | WS_TABSTOP,7,167,55,10
|
||||
CONTROL "Logarithmic Z",IDC_CHECK5,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,180,58,10
|
||||
CONTROL "Alpha correction (FBA)",IDC_CHECK7,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,193,102,10
|
||||
DEFPUSHBUTTON "OK",IDOK,43,210,50,14
|
||||
PUSHBUTTON "Cancel",IDCANCEL,96,210,50,14
|
||||
LTEXT "SW rend. threads:",IDC_STATIC,7,149,60,8
|
||||
EDITTEXT IDC_EDIT3,71,147,35,13,ES_AUTOHSCROLL | ES_NUMBER
|
||||
CONTROL "",IDC_SPIN3,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,99,150,11,14
|
||||
CONTROL "NLOOP hack",IDC_CHECK6,"Button",BS_AUTO3STATE | WS_TABSTOP,7,167,55,10
|
||||
CONTROL "Enable tv-out",IDC_CHECK3,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,167,57,10
|
||||
CONTROL "Texture filtering",IDC_CHECK4,"Button",BS_AUTO3STATE | WS_TABSTOP,7,180,67,10
|
||||
CONTROL "Logarithmic Z",IDC_CHECK5,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,180,58,10
|
||||
CONTROL "Wait vsync",IDC_CHECK2,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,193,51,10
|
||||
CONTROL "Alpha correction (FBA)",IDC_CHECK7,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,193,102,10
|
||||
CONTROL "Edge anti-aliasing (AA1, sw-mode only)",IDC_CHECK8,
|
||||
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,206,141,10
|
||||
DEFPUSHBUTTON "OK",IDOK,43,224,50,14
|
||||
PUSHBUTTON "Cancel",IDCANCEL,96,224,50,14
|
||||
END
|
||||
|
||||
IDD_CAPTURE DIALOGEX 0, 0, 279, 71
|
||||
|
@ -179,7 +181,7 @@ BEGIN
|
|||
VERTGUIDE, 80
|
||||
VERTGUIDE, 182
|
||||
TOPMARGIN, 7
|
||||
BOTTOMMARGIN, 224
|
||||
BOTTOMMARGIN, 238
|
||||
END
|
||||
|
||||
IDD_CAPTURE, DIALOG
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#define IDC_EDIT1 2009
|
||||
#define IDC_EDIT2 2010
|
||||
#define IDC_BUTTON1 2011
|
||||
#define IDC_CHECK8 2011
|
||||
#define IDC_BUTTON2 2012
|
||||
#define IDC_EDIT3 2012
|
||||
#define IDC_CUSTOM1 2013
|
||||
|
|
|
@ -47,19 +47,6 @@
|
|||
|
||||
const __m128 ps_3f800000 = _mm_castsi128_ps(_mm_set1_epi32(0x3f800000));
|
||||
const __m128 ps_4b000000 = _mm_castsi128_ps(_mm_set1_epi32(0x4b000000));
|
||||
const __m128 ps_7fffffff = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
|
||||
const __m128 ps_80000000 = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
|
||||
const __m128 ps_ffffffff = _mm_castsi128_ps(_mm_set1_epi32(0xffffffff));
|
||||
|
||||
__forceinline __m128 _mm_neg_ps(__m128 r)
|
||||
{
|
||||
return _mm_xor_ps(ps_80000000, r);
|
||||
}
|
||||
|
||||
__forceinline __m128 _mm_abs_ps(__m128 r)
|
||||
{
|
||||
return _mm_and_ps(ps_7fffffff, r);
|
||||
}
|
||||
|
||||
#define _MM_TRANSPOSE4_SI128(row0, row1, row2, row3) \
|
||||
{ \
|
||||
|
|
Loading…
Reference in New Issue