From 120971ec4ff176319e3706209fc40b0f0b5af866 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Mon, 9 Mar 2009 01:42:56 +0000 Subject: [PATCH] GSdx: Implemented edge anti-aliasing (aa1) for software mode, bios or ffx are good test subjects (not many other games use it). It's still a bit slow but could be improved a lot by not doing 4 pixels with sse for each single edge pixel, that's just a lot of unnecessary texture lookups. The bios config screen cubes are still bogus, gs_user on aa1 isn't too helpful... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@721 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GS.cpp | 1 + plugins/GSdx/GS.h | 26 -- plugins/GSdx/GSDrawScanline.cpp | 59 +-- plugins/GSdx/GSDrawScanline.h | 1 + plugins/GSdx/GSDrawScanlineCodeGenerator.cpp | 378 +++++++++++-------- plugins/GSdx/GSDrawScanlineCodeGenerator.h | 3 +- plugins/GSdx/GSRasterizer.cpp | 274 +++++++++++++- plugins/GSdx/GSRasterizer.h | 8 +- plugins/GSdx/GSRenderer.h | 8 + plugins/GSdx/GSRendererSW.h | 37 +- plugins/GSdx/GSScanlineEnvironment.h | 49 +-- plugins/GSdx/GSSettingsDlg.cpp | 4 + plugins/GSdx/GSSettingsDlg.h | 1 + plugins/GSdx/GSSetupPrimCodeGenerator.cpp | 28 +- plugins/GSdx/GSSetupPrimCodeGenerator.h | 3 +- plugins/GSdx/GSVector.h | 4 +- plugins/GSdx/GSdx.rc | 22 +- plugins/GSdx/resource.h | 1 + plugins/GSdx/sse.h | 13 - 19 files changed, 629 insertions(+), 291 deletions(-) diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index 996454189a..95c4178b63 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -117,6 +117,7 @@ static INT32 GSopen(void* dsp, char* title, int mt, int renderer) rs.m_filter = AfxGetApp()->GetProfileInt(_T("Settings"), _T("filter"), 1); rs.m_vsync = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("vsync"), FALSE); rs.m_nativeres = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("nativeres"), FALSE); + rs.m_aa1 = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("aa1"), FALSE); int threads = AfxGetApp()->GetProfileInt(_T("Settings"), _T("swthreads"), 1); diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index 38cd049ce6..f7b1276795 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -685,32 +685,6 @@ REG64_(GIFReg, PABE) UINT32 _PAD2:32; REG_END -/* - -AA1 == 1 (for triangles, as tested on the real thing) - -C ABE A Ae Ao Aoe -0 0 c c c c -0 1 s c s* c -1 0 d d c c -1 1 d d s* c -2 0 f f ? ? -2 1 f f ? ? - -C = ALPHA::C -A = alpha used for blending -Ae = edge alpha used for blending -Ao = alpha to output -Aoe = edge alpha to output - -c = coverage -s = source alpha -d = destination alpha -f = fixed alpha (TODO: test with 0x80/2) -* = only if s != 0x80 (s == 0x80 => s == c, but what about s > 0x80? s or 0x80? TODO) - -*/ - REG64_(GIFReg, PRIM) UINT32 PRIM:3; UINT32 IIP:1; diff --git a/plugins/GSdx/GSDrawScanline.cpp b/plugins/GSdx/GSDrawScanline.cpp index d8727517e3..ccdd34f4b7 100644 --- a/plugins/GSdx/GSDrawScanline.cpp +++ b/plugins/GSdx/GSDrawScanline.cpp @@ -43,7 +43,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) const GSScanlineParam* p = (const GSScanlineParam*)data->param; - m_env.sel = p->sel; + m_sel = p->sel; m_env.vm = p->vm; m_env.fbr = p->fbo->row; @@ -60,11 +60,11 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) m_env.fga = GSVector4i((int)(env.FOGCOL.ai32[0] >> 8) & 0x00ff00ff); m_env.dimx = env.dimx; - if(m_env.sel.fpsm == 1) + if(m_sel.fpsm == 1) { m_env.fm |= GSVector4i::xff000000(); } - else if(m_env.sel.fpsm == 2) + else if(m_sel.fpsm == 2) { GSVector4i rb = m_env.fm & 0x00f800f8; GSVector4i ga = m_env.fm & 0x8000f800; @@ -72,29 +72,29 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) m_env.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | GSVector4i::xffff0000(); } - if(m_env.sel.zpsm == 1) + if(m_sel.zpsm == 1) { m_env.zm |= GSVector4i::xff000000(); } - else if(m_env.sel.zpsm == 2) + else if(m_sel.zpsm == 2) { m_env.zm |= GSVector4i::xffff0000(); } - if(m_env.sel.atst == ATST_LESS) + if(m_sel.atst == ATST_LESS) { - m_env.sel.atst = ATST_LEQUAL; + m_sel.atst = ATST_LEQUAL; m_env.aref -= GSVector4i::x00000001(); } - else if(m_env.sel.atst == ATST_GREATER) + else if(m_sel.atst == ATST_GREATER) { - m_env.sel.atst = ATST_GEQUAL; + m_sel.atst = ATST_GEQUAL; m_env.aref += GSVector4i::x00000001(); } - if(m_env.sel.tfx != TFX_NONE) + if(m_sel.tfx != TFX_NONE) { m_env.tex = p->tex; m_env.clut = p->clut; @@ -163,9 +163,20 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) // - f->ssl = m_ds.Lookup(m_env.sel); + f->ssl = m_ds.Lookup(m_sel); - if(m_env.sel.IsSolidRect()) + if(m_sel.aa1)// && (m_state->m_perfmon.GetFrame() & 0x40)) + { + GSScanlineSelector sel; + + sel.key = m_sel.key; + sel.zwrite = 0; + sel.edge = 1; + + f->ssle = m_ds.Lookup(sel); + } + + if(m_sel.IsSolidRect()) { f->sr = (DrawSolidRectPtr)&GSDrawScanline::DrawSolidRect; } @@ -176,14 +187,14 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) sel.key = 0; - sel.iip = m_env.sel.iip; - sel.tfx = m_env.sel.tfx; - sel.fst = m_env.sel.fst; - sel.fge = m_env.sel.fge; - sel.sprite = m_env.sel.sprite; - sel.fb = m_env.sel.fb; - sel.zb = m_env.sel.zb; - sel.zoverflow = m_env.sel.zoverflow; + sel.iip = m_sel.iip; + sel.tfx = m_sel.tfx; + sel.fst = m_sel.fst; + sel.fge = m_sel.fge; + sel.sprite = m_sel.sprite; + sel.fb = m_sel.fb; + sel.zb = m_sel.zb; + sel.zoverflow = m_sel.zoverflow; f->ssp = m_sp.Lookup(sel); } @@ -208,7 +219,7 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v) { DWORD z = (DWORD)(float)v.p.z; - if(m_env.sel.zpsm != 2) + if(m_sel.zpsm != 2) { if(m == 0) { @@ -243,7 +254,7 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v) c |= 0x80000000; } - if(m_env.sel.fpsm != 2) + if(m_sel.fpsm != 2) { if(m == 0) { @@ -353,7 +364,7 @@ GSDrawScanline::GSSetupPrimMap::GSSetupPrimMap(GSScanlineEnvironment& env) GSSetupPrimCodeGenerator* GSDrawScanline::GSSetupPrimMap::Create(UINT64 key, void* ptr, size_t maxsize) { - return new GSSetupPrimCodeGenerator(m_env, ptr, maxsize); + return new GSSetupPrimCodeGenerator(m_env, key, ptr, maxsize); } // @@ -366,5 +377,5 @@ GSDrawScanline::GSDrawScanlineMap::GSDrawScanlineMap(GSScanlineEnvironment& env) GSDrawScanlineCodeGenerator* GSDrawScanline::GSDrawScanlineMap::Create(UINT64 key, void* ptr, size_t maxsize) { - return new GSDrawScanlineCodeGenerator(m_env, ptr, maxsize); + return new GSDrawScanlineCodeGenerator(m_env, key, ptr, maxsize); } diff --git a/plugins/GSdx/GSDrawScanline.h b/plugins/GSdx/GSDrawScanline.h index c950c383af..2221707dc8 100644 --- a/plugins/GSdx/GSDrawScanline.h +++ b/plugins/GSdx/GSDrawScanline.h @@ -31,6 +31,7 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline { GSScanlineEnvironment m_env; + GSScanlineSelector m_sel; // diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp index 27f138adf9..c08aaaaa10 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp @@ -20,11 +20,12 @@ */ // TODO: x64 (use the extra regs to avoid spills of zs, zd, uf, vf, rb, ga and keep a few constants in the last two like aref or afix) +// TODO: for edges doing 4 pixels is wasteful (needed memory access * 4) #include "StdAfx.h" #include "GSDrawScanlineCodeGenerator.h" -GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize) +GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, UINT64 key, void* ptr, size_t maxsize) : CodeGenerator(maxsize, ptr) , m_env(env) { @@ -32,6 +33,8 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment& #error TODO #endif + m_sel.key = key; + Generate(); } @@ -46,7 +49,10 @@ void GSDrawScanlineCodeGenerator::Generate() Init(params); - align(16); + if(!m_sel.edge) + { + align(16); + } L("loop"); @@ -60,7 +66,7 @@ L("loop"); // xmm6 = ga (!tme) // xmm7 = test - bool tme = m_env.sel.tfx != TFX_NONE; + bool tme = m_sel.tfx != TFX_NONE; TestZ(tme ? xmm5 : xmm2, tme ? xmm6 : xmm3); @@ -98,12 +104,12 @@ L("loop"); // xmm6 = ga // xmm7 = test - if(m_env.sel.fwrite) + if(m_sel.fwrite) { movdqa(xmm3, xmmword[&m_env.fm]); } - if(m_env.sel.zwrite) + if(m_sel.zwrite) { movdqa(xmm4, xmmword[&m_env.zm]); } @@ -174,12 +180,12 @@ L("loop"); // fm |= test; // zm |= test; - if(m_env.sel.fwrite) + if(m_sel.fwrite) { por(xmm3, xmm7); } - if(m_env.sel.zwrite) + if(m_sel.zwrite) { por(xmm4, xmm7); } @@ -188,19 +194,19 @@ L("loop"); pcmpeqd(xmm1, xmm1); - if(m_env.sel.fwrite && m_env.sel.zwrite) + if(m_sel.fwrite && m_sel.zwrite) { movdqa(xmm0, xmm1); pcmpeqd(xmm1, xmm3); pcmpeqd(xmm0, xmm4); packssdw(xmm1, xmm0); } - else if(m_env.sel.fwrite) + else if(m_sel.fwrite) { pcmpeqd(xmm1, xmm3); packssdw(xmm1, xmm1); } - else if(m_env.sel.zwrite) + else if(m_sel.zwrite) { pcmpeqd(xmm1, xmm4); packssdw(xmm1, xmm1); @@ -253,12 +259,15 @@ L("step"); // if(steps <= 0) break; - test(ecx, ecx); - jle("exit", T_NEAR); + if(!m_sel.edge) + { + test(ecx, ecx); + jle("exit", T_NEAR); - Step(); + Step(); - jmp("loop", T_NEAR); + jmp("loop", T_NEAR); + } L("exit"); @@ -313,7 +322,7 @@ void GSDrawScanlineCodeGenerator::Init(int params) lea(edi, ptr[ebx * 2]); add(edi, dword[&m_env.fzbc]); - if(!m_env.sel.sprite && (m_env.sel.fwrite && m_env.sel.fge || m_env.sel.zb) || m_env.sel.fb && (m_env.sel.tfx != TFX_NONE || m_env.sel.iip)) + if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) { // edx = &m_env.d[skip] @@ -325,13 +334,13 @@ void GSDrawScanlineCodeGenerator::Init(int params) mov(ebx, dword[esp + _v]); } - if(!m_env.sel.sprite) + if(!m_sel.sprite) { - if(m_env.sel.fwrite && m_env.sel.fge || m_env.sel.zb) + if(m_sel.fwrite && m_sel.fge || m_sel.zb) { movaps(xmm0, xmmword[ebx + 16]); // v.p - if(m_env.sel.fwrite && m_env.sel.fge) + if(m_sel.fwrite && m_sel.fge) { // f = GSVector4i(vp).zzzzh().zzzz().add16(m_env.d[skip].f); @@ -343,7 +352,7 @@ void GSDrawScanlineCodeGenerator::Init(int params) movdqa(xmmword[&m_env.temp.f], xmm1); } - if(m_env.sel.zb) + if(m_sel.zb) { // z = vp.zzzz() + m_env.d[skip].z; @@ -356,19 +365,31 @@ void GSDrawScanlineCodeGenerator::Init(int params) } else { - if(m_env.sel.ztest) + if(m_sel.ztest) { movdqa(xmm0, xmmword[&m_env.p.z]); } } - if(m_env.sel.fb) + if(m_sel.fb) { - if(m_env.sel.tfx != TFX_NONE) + if(m_sel.edge) { movaps(xmm4, xmmword[ebx + 32]); // v.t - if(m_env.sel.fst) + cvttps2dq(xmm4, xmm4); + + pshufhw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); + pshufd(xmm4, xmm4, _MM_SHUFFLE(3, 3, 3, 3)); + + movdqa(xmmword[&m_env.temp.cov], xmm4); + } + + if(m_sel.tfx != TFX_NONE) + { + movaps(xmm4, xmmword[ebx + 32]); // v.t + + if(m_sel.fst) { // GSVector4i vti(vt); @@ -382,13 +403,13 @@ void GSDrawScanlineCodeGenerator::Init(int params) paddd(xmm2, xmmword[edx + 16 * 7]); - if(!m_env.sel.sprite) + if(!m_sel.sprite) { paddd(xmm3, xmmword[edx + 16 * 8]); } else { - if(m_env.sel.ltf) + if(m_sel.ltf) { movdqa(xmm4, xmm3); pshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0)); @@ -428,9 +449,9 @@ void GSDrawScanlineCodeGenerator::Init(int params) } } - if(m_env.sel.tfx != TFX_DECAL) + if(m_sel.tfx != TFX_DECAL) { - if(m_env.sel.iip) + if(m_sel.iip) { // GSVector4i vc = GSVector4i(v.c); @@ -455,7 +476,7 @@ void GSDrawScanlineCodeGenerator::Init(int params) } else { - if(m_env.sel.tfx == TFX_NONE) + if(m_sel.tfx == TFX_NONE) { movdqa(xmm5, xmmword[&m_env.c.rb]); movdqa(xmm6, xmmword[&m_env.c.ga]); @@ -475,11 +496,11 @@ void GSDrawScanlineCodeGenerator::Step() add(edi, 8); - if(!m_env.sel.sprite) + if(!m_sel.sprite) { // z += m_env.d4.z; - if(m_env.sel.zb) + if(m_sel.zb) { movaps(xmm0, xmmword[&m_env.temp.z]); addps(xmm0, xmmword[&m_env.d4.z]); @@ -488,7 +509,7 @@ void GSDrawScanlineCodeGenerator::Step() // f = f.add16(m_env.d4.f); - if(m_env.sel.fwrite && m_env.sel.fge) + if(m_sel.fwrite && m_sel.fge) { movdqa(xmm1, xmmword[&m_env.temp.f]); paddw(xmm1, xmmword[&m_env.d4.f]); @@ -497,17 +518,17 @@ void GSDrawScanlineCodeGenerator::Step() } else { - if(m_env.sel.ztest) + if(m_sel.ztest) { movdqa(xmm0, xmmword[&m_env.p.z]); } } - if(m_env.sel.fb) + if(m_sel.fb) { - if(m_env.sel.tfx != TFX_NONE) + if(m_sel.tfx != TFX_NONE) { - if(m_env.sel.fst) + if(m_sel.fst) { // GSVector4i st = m_env.d4.st; @@ -520,7 +541,7 @@ void GSDrawScanlineCodeGenerator::Step() paddd(xmm2, xmmword[&m_env.temp.s]); movdqa(xmmword[&m_env.temp.s], xmm2); - if(!m_env.sel.sprite) + if(!m_sel.sprite) { pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); paddd(xmm3, xmmword[&m_env.temp.t]); @@ -561,9 +582,9 @@ void GSDrawScanlineCodeGenerator::Step() } } - if(m_env.sel.tfx != TFX_DECAL) + if(m_sel.tfx != TFX_DECAL) { - if(m_env.sel.iip) + if(m_sel.iip) { // GSVector4i c = m_env.d4.c; @@ -583,7 +604,7 @@ void GSDrawScanlineCodeGenerator::Step() } else { - if(m_env.sel.tfx == TFX_NONE) + if(m_sel.tfx == TFX_NONE) { movdqa(xmm5, xmmword[&m_env.c.rb]); movdqa(xmm6, xmmword[&m_env.c.ga]); @@ -604,7 +625,7 @@ void GSDrawScanlineCodeGenerator::Step() void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2) { - if(!m_env.sel.zb) + if(!m_sel.zb) { return; } @@ -616,9 +637,9 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2) // GSVector4i zs = zi; - if(!m_env.sel.sprite) + if(!m_sel.sprite) { - if(m_env.sel.zoverflow) + if(m_sel.zoverflow) { // zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); @@ -644,30 +665,30 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2) cvttps2dq(xmm0, xmm0); } - if(m_env.sel.zwrite) + if(m_sel.zwrite) { movdqa(xmmword[&m_env.temp.zs], xmm0); } } - if(m_env.sel.ztest) + if(m_sel.ztest) { ReadPixel(xmm1, ebp); - if(m_env.sel.zwrite && m_env.sel.zpsm < 2) + if(m_sel.zwrite && m_sel.zpsm < 2) { movdqa(xmmword[&m_env.temp.zd], xmm1); } - // zd &= 0xffffffff >> m_env.sel.zpsm * 8; + // zd &= 0xffffffff >> m_sel.zpsm * 8; - if(m_env.sel.zpsm) + if(m_sel.zpsm) { - pslld(xmm1, m_env.sel.zpsm * 8); - psrld(xmm1, m_env.sel.zpsm * 8); + pslld(xmm1, m_sel.zpsm * 8); + psrld(xmm1, m_sel.zpsm * 8); } - if(m_env.sel.zoverflow || m_env.sel.zpsm == 0) + if(m_sel.zoverflow || m_sel.zpsm == 0) { // GSVector4i o = GSVector4i::x80000000(); @@ -683,7 +704,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2) psubd(xmm1, xmm4); } - switch(m_env.sel.ztst) + switch(m_sel.ztst) { case ZTST_GEQUAL: // test |= zso < zdo; @@ -707,7 +728,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2) void GSDrawScanlineCodeGenerator::SampleTexture() { - if(!m_env.sel.fb || m_env.sel.tfx == TFX_NONE) + if(!m_sel.fb || m_sel.tfx == TFX_NONE) { return; } @@ -716,14 +737,14 @@ void GSDrawScanlineCodeGenerator::SampleTexture() // ebx = tex - if(!m_env.sel.fst) + if(!m_sel.fst) { // TODO: move these into Init/Step too? cvttps2dq(xmm2, xmm2); cvttps2dq(xmm3, xmm3); - if(m_env.sel.ltf) + if(m_sel.ltf) { // u -= 0x8000; // v -= 0x8000; @@ -739,7 +760,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() // xmm2 = u // xmm3 = v - if(m_env.sel.ltf) + if(m_sel.ltf) { // GSVector4i uf = u.xxzzlh().srl16(1); @@ -749,7 +770,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() psrlw(xmm0, 1); movdqa(xmmword[&m_env.temp.uf], xmm0); - if(!m_env.sel.sprite) + if(!m_sel.sprite) { // GSVector4i vf = v.xxzzlh().srl16(1); @@ -767,7 +788,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() psrad(xmm3, 16); packssdw(xmm2, xmm3); - if(m_env.sel.ltf) + if(m_sel.ltf) { // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); @@ -812,7 +833,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() // xmm5, xmm6 = free // xmm7 = used - if(m_env.sel.ltf) + if(m_sel.ltf) { // GSVector4i y1 = uv1.uph16() << tw; // GSVector4i x1 = uv1.upl16(); @@ -989,10 +1010,10 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv) { // xmm0, xmm1, xmm4, xmm5, xmm6 = free - int wms_clamp = ((m_env.sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_env.sel.wmt + 1) >> 1) & 1; + int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; + int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - int region = ((m_env.sel.wms | m_env.sel.wmt) >> 1) & 1; + int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; if(wms_clamp == wmt_clamp) { @@ -1052,10 +1073,10 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1) { // xmm0, xmm1, xmm4, xmm5, xmm6 = free - int wms_clamp = ((m_env.sel.wms + 1) >> 1) & 1; - int wmt_clamp = ((m_env.sel.wmt + 1) >> 1) & 1; + int wms_clamp = ((m_sel.wms + 1) >> 1) & 1; + int wmt_clamp = ((m_sel.wmt + 1) >> 1) & 1; - int region = ((m_env.sel.wms | m_env.sel.wmt) >> 1) & 1; + int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; if(wms_clamp == wmt_clamp) { @@ -1149,18 +1170,18 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1) void GSDrawScanlineCodeGenerator::AlphaTFX() { - if(!m_env.sel.fb) + if(!m_sel.fb) { return; } - switch(m_env.sel.tfx) + switch(m_sel.tfx) { case TFX_MODULATE: // GSVector4i ga = iip ? gaf : m_env.c.ga; - movdqa(xmm4, xmmword[m_env.sel.iip ? &m_env.temp.ga : &m_env.c.ga]); + movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]); // gat = gat.modulate16<1>(ga).clamp8(); @@ -1170,7 +1191,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX() // if(!tcc) gat = gat.mix16(ga.srl16(7)); - if(!m_env.sel.tcc) + if(!m_sel.tcc) { psrlw(xmm4, 7); @@ -1187,14 +1208,14 @@ void GSDrawScanlineCodeGenerator::AlphaTFX() // GSVector4i ga = iip ? gaf : m_env.c.ga; - movdqa(xmm4, xmmword[m_env.sel.iip ? &m_env.temp.ga : &m_env.c.ga]); + movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]); movdqa(xmm2, xmm4); // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); psrlw(xmm4, 7); - if(m_env.sel.tcc) + if(m_sel.tcc) { paddusb(xmm4, xmm6); } @@ -1207,11 +1228,11 @@ void GSDrawScanlineCodeGenerator::AlphaTFX() // if(!tcc) gat = gat.mix16(ga.srl16(7)); - if(!m_env.sel.tcc) + if(!m_sel.tcc) { // GSVector4i ga = iip ? gaf : m_env.c.ga; - movdqa(xmm4, xmmword[m_env.sel.iip ? &m_env.temp.ga : &m_env.c.ga]); + movdqa(xmm4, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]); movdqa(xmm2, xmm4); psrlw(xmm4, 7); @@ -1225,33 +1246,81 @@ void GSDrawScanlineCodeGenerator::AlphaTFX() // gat = iip ? ga.srl16(7) : ga; - if(m_env.sel.iip) + if(m_sel.iip) { psrlw(xmm6, 7); } break; } + + if(m_sel.aa1) + { + // gs_user figure 3-2: anti-aliasing after tfx, before tests, modifies alpha + + // FIXME: bios config screen cubes + + if(!m_sel.abe) + { + // a = cov + + if(m_sel.edge) + { + movdqa(xmm0, xmmword[&m_env.temp.cov]); + } + else + { + pcmpeqd(xmm0, xmm0); + psllw(xmm0, 15); + psrlw(xmm0, 8); + } + + mix16(xmm6, xmm0, xmm1); + } + else + { + // a = a == 0x80 ? cov : a + + pcmpeqd(xmm0, xmm0); + psllw(xmm0, 15); + psrlw(xmm0, 8); + + if(m_sel.edge) + { + movdqa(xmm1, xmmword[&m_env.temp.cov]); + } + else + { + movdqa(xmm1, xmm0); + } + + pcmpeqw(xmm0, xmm6); + psrld(xmm0, 16); + pslld(xmm0, 16); + + blend8(xmm6, xmm1); + } + } } void GSDrawScanlineCodeGenerator::TestAlpha() { - switch(m_env.sel.afail) + switch(m_sel.afail) { case AFAIL_FB_ONLY: - if(!m_env.sel.zwrite) return; + if(!m_sel.zwrite) return; break; case AFAIL_ZB_ONLY: - if(!m_env.sel.fwrite) return; + if(!m_sel.fwrite) return; break; case AFAIL_RGB_ONLY: - if(!m_env.sel.zwrite && m_env.sel.fpsm == 1) return; + if(!m_sel.zwrite && m_sel.fpsm == 1) return; break; } - switch(m_env.sel.atst) + switch(m_sel.atst) { case ATST_NEVER: // t = GSVector4i::xffffffff(); @@ -1295,7 +1364,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha() break; } - switch(m_env.sel.afail) + switch(m_sel.afail) { case AFAIL_KEEP: // test |= t; @@ -1326,12 +1395,12 @@ void GSDrawScanlineCodeGenerator::TestAlpha() void GSDrawScanlineCodeGenerator::ColorTFX() { - if(!m_env.sel.fwrite) + if(!m_sel.fwrite) { return; } - switch(m_env.sel.tfx) + switch(m_sel.tfx) { case TFX_MODULATE: @@ -1339,7 +1408,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX() // rbt = rbt.modulate16<1>(rb).clamp8(); - modulate16<1>(xmm5, xmmword[m_env.sel.iip ? &m_env.temp.rb : &m_env.c.rb]); + modulate16<1>(xmm5, xmmword[m_sel.iip ? &m_env.temp.rb : &m_env.c.rb]); clamp16(xmm5, xmm1); @@ -1352,11 +1421,11 @@ void GSDrawScanlineCodeGenerator::ColorTFX() case TFX_HIGHLIGHT: case TFX_HIGHLIGHT2: - if(m_env.sel.tfx == TFX_HIGHLIGHT2 && m_env.sel.tcc) + if(m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc) { // GSVector4i ga = iip ? gaf : m_env.c.ga; - movdqa(xmm2, xmmword[m_env.sel.iip ? &m_env.temp.ga : &m_env.c.ga]); + movdqa(xmm2, xmmword[m_sel.iip ? &m_env.temp.ga : &m_env.c.ga]); } // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); @@ -1379,7 +1448,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX() // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); - modulate16<1>(xmm5, xmmword[m_env.sel.iip ? &m_env.temp.rb : &m_env.c.rb]); + modulate16<1>(xmm5, xmmword[m_sel.iip ? &m_env.temp.rb : &m_env.c.rb]); paddw(xmm5, xmm2); @@ -1391,7 +1460,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX() // rbt = iip ? rb.srl16(7) : rb; - if(m_env.sel.iip) + if(m_sel.iip) { psrlw(xmm5, 7); } @@ -1402,7 +1471,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX() void GSDrawScanlineCodeGenerator::Fog() { - if(!m_env.sel.fwrite || !m_env.sel.fge) + if(!m_sel.fwrite || !m_sel.fge) { return; } @@ -1410,7 +1479,7 @@ void GSDrawScanlineCodeGenerator::Fog() // rb = m_env.frb.lerp16<0>(rb, f); // ga = m_env.fga.lerp16<0>(ga, f).mix16(ga); - movdqa(xmm0, xmmword[!m_env.sel.sprite ? &m_env.temp.f : &m_env.p.f]); + movdqa(xmm0, xmmword[!m_sel.sprite ? &m_env.temp.f : &m_env.p.f]); movdqa(xmm1, xmm6); movdqa(xmm2, xmmword[&m_env.frb]); @@ -1424,7 +1493,7 @@ void GSDrawScanlineCodeGenerator::Fog() void GSDrawScanlineCodeGenerator::ReadFrame() { - if(!m_env.sel.fb) + if(!m_sel.fb) { return; } @@ -1434,7 +1503,7 @@ void GSDrawScanlineCodeGenerator::ReadFrame() mov(ebx, dword[esi]); add(ebx, dword[edi]); - if(!m_env.sel.rfb) + if(!m_sel.rfb) { return; } @@ -1444,7 +1513,7 @@ void GSDrawScanlineCodeGenerator::ReadFrame() void GSDrawScanlineCodeGenerator::TestDestAlpha() { - if(!m_env.sel.date || m_env.sel.fpsm != 0 && m_env.sel.fpsm != 2) + if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) { return; } @@ -1453,9 +1522,9 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha() movdqa(xmm1, xmm2); - if(m_env.sel.datm) + if(m_sel.datm) { - if(m_env.sel.fpsm == 2) + if(m_sel.fpsm == 2) { pxor(xmm0, xmm0); psrld(xmm1, 15); @@ -1470,7 +1539,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha() } else { - if(m_env.sel.fpsm == 2) + if(m_sel.fpsm == 2) { pslld(xmm1, 16); } @@ -1485,16 +1554,16 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha() void GSDrawScanlineCodeGenerator::WriteZBuf() { - if(!m_env.sel.zwrite) + if(!m_sel.zwrite) { return; } - movdqa(xmm1, xmmword[!m_env.sel.sprite ? &m_env.temp.zs : &m_env.p.z]); + movdqa(xmm1, xmmword[!m_sel.sprite ? &m_env.temp.zs : &m_env.p.z]); bool fast = false; - if(m_env.sel.ztest && m_env.sel.zpsm < 2) + if(m_sel.ztest && m_sel.zpsm < 2) { // zs = zs.blend8(zd, zm); @@ -1505,43 +1574,24 @@ void GSDrawScanlineCodeGenerator::WriteZBuf() fast = true; } - WritePixel(xmm1, xmm0, ebp, dh, fast, m_env.sel.zpsm); + WritePixel(xmm1, xmm0, ebp, dh, fast, m_sel.zpsm); } void GSDrawScanlineCodeGenerator::AlphaBlend() { - if(!m_env.sel.fwrite) - { - return; - } -/* - if(m_env.sel.aa1) - { - // hmm, the playstation logo does not look good... - - printf("aa1 %016I64x\n", m_env.sel.key); - - if(m_env.sel.fpsm != 1) // TODO: fm == 0xffxxxxxx - { - // a = 0x80 - - pcmpeqd(xmm0, xmm0); - psllw(xmm0, 15); - psrlw(xmm0, 8); - mix16(xmm6, xmm0, xmm1); - } - - return; - } -*/ - if(m_env.sel.abe == 255) + if(!m_sel.fwrite) { return; } - if((m_env.sel.abea != m_env.sel.abeb) && (m_env.sel.abea == 1 || m_env.sel.abeb == 1 || m_env.sel.abec == 1) || m_env.sel.abed == 1) + if(m_sel.abe == 0 && m_sel.aa1 == 0) { - switch(m_env.sel.fpsm) + return; + } + + if((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) + { + switch(m_sel.fpsm) { case 0: case 1: @@ -1599,40 +1649,40 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() // xmm2, xmm3 = used // xmm4, xmm7 = free - if(m_env.sel.pabe || (m_env.sel.abea != m_env.sel.abeb) && (m_env.sel.abeb == 0 || m_env.sel.abed == 0)) + if(m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) { movdqa(xmm4, xmm5); } - if(m_env.sel.abea != m_env.sel.abeb) + if(m_sel.aba != m_sel.abb) { - // rb = c[abea * 2 + 0]; + // rb = c[aba * 2 + 0]; - switch(m_env.sel.abea) + switch(m_sel.aba) { case 0: break; case 1: movdqa(xmm5, xmm0); break; case 2: pxor(xmm5, xmm5); break; } - // rb = rb.sub16(c[abeb * 2 + 0]); + // rb = rb.sub16(c[abb * 2 + 0]); - switch(m_env.sel.abeb) + switch(m_sel.abb) { case 0: psubw(xmm5, xmm4); break; case 1: psubw(xmm5, xmm0); break; case 2: break; } - if(!(m_env.sel.fpsm == 1 && m_env.sel.abec == 1)) + if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) { - // GSVector4i a = abec < 2 ? c[abec * 2 + 1].yywwlh().sll16(7) : m_env.afix; + // GSVector4i a = abc < 2 ? c[abc * 2 + 1].yywwlh().sll16(7) : m_env.afix; - switch(m_env.sel.abec) + switch(m_sel.abc) { case 0: case 1: - movdqa(xmm7, m_env.sel.abec ? xmm1 : xmm6); + movdqa(xmm7, m_sel.abc ? xmm1 : xmm6); pshuflw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1)); pshufhw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1)); psllw(xmm7, 7); @@ -1647,9 +1697,9 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() modulate16<1>(xmm5, xmm7); } - // rb = rb.add16(c[abed * 2 + 0]); + // rb = rb.add16(c[abd * 2 + 0]); - switch(m_env.sel.abed) + switch(m_sel.abd) { case 0: paddw(xmm5, xmm4); break; case 1: paddw(xmm5, xmm0); break; @@ -1658,9 +1708,9 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() } else { - // rb = c[abed * 2 + 0]; + // rb = c[abd * 2 + 0]; - switch(m_env.sel.abed) + switch(m_sel.abd) { case 0: break; case 1: movdqa(xmm5, xmm0); break; @@ -1668,7 +1718,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() } } - if(m_env.sel.pabe) + if(m_sel.pabe) { // mask = (c[1] << 8).sra32(31); @@ -1690,11 +1740,11 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() movdqa(xmm4, xmm6); - if(m_env.sel.abea != m_env.sel.abeb) + if(m_sel.aba != m_sel.abb) { - // ga = c[abea * 2 + 1]; + // ga = c[aba * 2 + 1]; - switch(m_env.sel.abea) + switch(m_sel.aba) { case 0: break; case 1: movdqa(xmm6, xmm1); break; @@ -1703,23 +1753,23 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() // ga = ga.sub16(c[abeb * 2 + 1]); - switch(m_env.sel.abeb) + switch(m_sel.abb) { case 0: psubw(xmm6, xmm4); break; case 1: psubw(xmm6, xmm1); break; case 2: break; } - if(!(m_env.sel.fpsm == 1 && m_env.sel.abec == 1)) + if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // ga = ga.modulate16<1>(a); modulate16<1>(xmm6, xmm7); } - // ga = ga.add16(c[abed * 2 + 1]); + // ga = ga.add16(c[abd * 2 + 1]); - switch(m_env.sel.abed) + switch(m_sel.abd) { case 0: paddw(xmm6, xmm4); break; case 1: paddw(xmm6, xmm1); break; @@ -1728,9 +1778,9 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() } else { - // ga = c[abed * 2 + 1]; + // ga = c[abd * 2 + 1]; - switch(m_env.sel.abed) + switch(m_sel.abd) { case 0: break; case 1: movdqa(xmm6, xmm1); break; @@ -1738,7 +1788,13 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() } } - if(m_env.sel.pabe) + // xmm4 = src ga + // xmm5 = rb + // xmm6 = ga + // xmm2, xmm3 = used + // xmm0, xmm1, xmm7 = free + + if(m_sel.pabe) { if(!m_cpu.has(util::Cpu::tSSE41)) { @@ -1757,7 +1813,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() } else { - if(m_env.sel.fpsm != 1) // TODO: fm == 0xffxxxxxx + if(m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx { mix16(xmm6, xmm4, xmm7); } @@ -1768,12 +1824,12 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params) { const int _top = params + 4; - if(!m_env.sel.fwrite) + if(!m_sel.fwrite) { return; } - if(m_env.sel.colclamp == 0) + if(m_sel.colclamp == 0) { // c[0] &= 0x000000ff; // c[1] &= 0x000000ff; @@ -1784,7 +1840,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params) pand(xmm6, xmm7); } - if(m_env.sel.fpsm == 2 && m_env.sel.dthe) + if(m_sel.fpsm == 2 && m_sel.dthe) { mov(eax, dword[esp + _top]); and(eax, 3); @@ -1800,7 +1856,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params) punpckhwd(xmm7, xmm6); packuswb(xmm5, xmm7); - if(m_env.sel.fba && m_env.sel.fpsm != 1) + if(m_sel.fba && m_sel.fpsm != 1) { // fs |= 0x80000000; @@ -1809,7 +1865,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params) por(xmm5, xmm7); } - if(m_env.sel.fpsm == 2) + if(m_sel.fpsm == 2) { // GSVector4i rb = fs & 0x00f800f8; // GSVector4i ga = fs & 0x8000f800; @@ -1841,16 +1897,16 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params) por(xmm5, xmm7); } - if(m_env.sel.rfb) + if(m_sel.rfb) { // fs = fs.blend(fd, fm); blend(xmm5, xmm2, xmm3); // TODO: could be skipped in certain cases, depending on fpsm and fm } - bool fast = m_env.sel.rfb && m_env.sel.fpsm < 2; + bool fast = m_sel.rfb && m_sel.fpsm < 2; - WritePixel(xmm5, xmm0, ebx, dl, fast, m_env.sel.fpsm); + WritePixel(xmm5, xmm0, ebx, dl, fast, m_sel.fpsm); } void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr) @@ -1992,9 +2048,9 @@ void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uin if(i == 0) movd(eax, addr); else pextrd(eax, addr, i); - if(m_env.sel.tlu) movzx(eax, byte[ebx + eax]); + if(m_sel.tlu) movzx(eax, byte[ebx + eax]); - const Address& src = m_env.sel.tlu ? ptr[eax * 4 + (size_t)m_env.clut] : ptr[ebx + eax * 4]; + const Address& src = m_sel.tlu ? ptr[eax * 4 + (size_t)m_env.clut] : ptr[ebx + eax * 4]; if(i == 0) movd(dst, src); else pinsrd(dst, src, i); diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.h b/plugins/GSdx/GSDrawScanlineCodeGenerator.h index fe800af300..2803037200 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.h +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.h @@ -36,6 +36,7 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator util::Cpu m_cpu; GSScanlineEnvironment& m_env; + GSScanlineSelector m_sel; void Generate(); @@ -72,5 +73,5 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator void blendr(const Xmm& b, const Xmm& a, const Xmm& mask); public: - GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize); + GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, UINT64 key, void* ptr, size_t maxsize); }; \ No newline at end of file diff --git a/plugins/GSdx/GSRasterizer.cpp b/plugins/GSdx/GSRasterizer.cpp index 6710344c0d..1dcea7f61c 100644 --- a/plugins/GSdx/GSRasterizer.cpp +++ b/plugins/GSdx/GSRasterizer.cpp @@ -38,9 +38,10 @@ GSRasterizer::~GSRasterizer() void GSRasterizer::Draw(const GSRasterizerData* data) { - m_dsf.sr = NULL; m_dsf.ssl = NULL; + m_dsf.ssle = NULL; m_dsf.ssp = NULL; + m_dsf.sr = NULL; m_ds->BeginDraw(data, &m_dsf); @@ -111,6 +112,25 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor) GSVertexSW dv = v[1] - v[0]; GSVector4 dp = dv.p.abs(); + + if(m_dsf.ssle) + { + int i = (dp < dp.yxwz()).mask() & 1; // |x| <= |y| + + GSVertexSW dscan; + + dscan.p = GSVector4::zero(); + dscan.t = GSVector4::zero(); + dscan.c = GSVector4::zero(); + + m_dsf.ssp(v, dscan); + + DrawEdge(v[0], v[1], dv, scissor, i, 0); + DrawEdge(v[0], v[1], dv, scissor, i, 1); + + return; + } + GSVector4i dpi(dp); if(dpi.y == 0) @@ -197,6 +217,11 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices, const GSVector4i& sc i = (aabb == bccb).mask() & 7; + if(m_dsf.ssle) + { + DrawTriangleEdge(v, scissor); + } + switch(i) { case 0: // a < b < c @@ -215,6 +240,37 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices, const GSVector4i& sc } } +void GSRasterizer::DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& scissor) +{ + GSVertexSW dv[3]; + + dv[0] = v[1] - v[0]; + dv[1] = v[2] - v[0]; + dv[2] = v[2] - v[1]; + + GSVector4 dx = dv[0].p.upl(dv[1].p).xyxy(dv[2].p); + GSVector4 dy = dv[0].p.upl(dv[1].p).zwyx(dv[2].p); + + GSVector4 a = dx.abs() < dy.abs(); // |x| <= |y| + GSVector4 b = dx < GSVector4::zero(); // x < 0 + GSVector4 c = dv[1].p * (dv[0].p / dv[1].p).yyyy() < dv[0].p; // longest.p.x < 0 + + int i = a.mask(); + int j = ((a | b) ^ c.xxxx()).mask() ^ 2; // evil + + GSVertexSW dscan; + + dscan.p = GSVector4::zero(); + dscan.t = GSVector4::zero(); + dscan.c = GSVector4::zero(); + + m_dsf.ssp(v, dscan); // TODO: don't call it twice (can't be sure about the second call if the triangle is too small) + + DrawEdge(v[0], v[1], dv[0], scissor, i & 1, j & 1); + DrawEdge(v[0], v[2], dv[1], scissor, i & 2, j & 2); + DrawEdge(v[1], v[2], dv[2], scissor, i & 4, j & 4); +} + void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor) { GSVertexSW longest; @@ -559,6 +615,222 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis } } +void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side) +{ + // orientation: + // - true: |dv.p.y| > |dv.p.x| + // - false |dv.p.x| > |dv.p.y| + // side: + // - true: top/left edge + // - false: bottom/right edge + + // TODO: bit slow and too much duplicated code + // TODO: inner pre-step is still missing (hardly noticable) + + GSVector4 fscissor(scissor); + + GSVector4 lrtb = v0.p.upl(v1.p).ceil(); + + if(orientation) + { + GSVector4 tbmax = lrtb.maxv(fscissor.yyyy()); + GSVector4 tbmin = lrtb.minv(fscissor.wwww()); + + GSVector4i tbi = GSVector4i(tbmax.zwzw(tbmin)); + + int top, bottom; + + GSVertexSW edge, dedge; + + if((dv.p >= GSVector4::zero()).mask() & 2) + { + top = tbi.extract32<0>(); + bottom = tbi.extract32<3>(); + + if(top >= bottom) return; + + edge = v0; + dedge = dv / dv.p.yyyy(); + + edge += dedge * (tbmax.zzzz() - edge.p.yyyy()); + } + else + { + top = tbi.extract32<1>(); + bottom = tbi.extract32<2>(); + + if(top >= bottom) return; + + edge = v1; + dedge = dv / dv.p.yyyy(); + + edge += dedge * (tbmax.wwww() - edge.p.yyyy()); + } + + if(side) + { + while(1) + { + do + { + if((top % m_threads) == m_id) + { + GSVector4 p = edge.p.ceil(); + + if(((fscissor.xxxx() < p) & (p <= fscissor.zzzz())).mask() & 1) + { + GSVector4 coverage = (p - edge.p).xxxx(); + + edge.t = edge.t.xyxy(edge.t.uph(coverage * 0x80)); // coverage => t.w + + int x = GSVector4i(p).extract32<0>() - 1; + + m_stats.pixels++; + + m_dsf.ssle(x + 1, x, top, edge); + } + } + } + while(0); + + if(++top >= bottom) break; + + edge += dedge; + } + } + else + { + while(1) + { + do + { + if((top % m_threads) == m_id) + { + GSVector4 p = edge.p.floor(); + + if(((fscissor.xxxx() <= p) & (p < fscissor.zzzz())).mask() & 1) + { + GSVector4 coverage = (edge.p - p).xxxx(); + + edge.t = edge.t.xyxy(edge.t.uph(coverage * 0x80)); // coverage => t.w + + int x = GSVector4i(p).extract32<0>() + 1; + + m_stats.pixels++; + + m_dsf.ssle(x + 1, x, top, edge); + } + } + } + while(0); + + if(++top >= bottom) break; + + edge += dedge; + } + } + } + else + { + GSVector4 lrmax = lrtb.maxv(fscissor.xxxx()); + GSVector4 lrmin = lrtb.minv(fscissor.zzzz()); + + GSVector4i lri = GSVector4i(lrmax.xyxy(lrmin)); + + int left, right; + + GSVertexSW edge, dedge; + + if((dv.p >= GSVector4::zero()).mask() & 1) + { + left = lri.extract32<0>(); + right = lri.extract32<3>(); + + if(left >= right) return; + + edge = v0; + dedge = dv / dv.p.xxxx(); + + edge += dedge * (lrmax.xxxx() - edge.p.xxxx()); + } + else + { + left = lri.extract32<1>(); + right = lri.extract32<2>(); + + if(left >= right) return; + + edge = v1; + dedge = dv / dv.p.xxxx(); + + edge += dedge * (lrmax.yyyy() - edge.p.xxxx()); + } + + if(side) + { + while(1) + { + do + { + GSVector4 p = edge.p.ceil(); + + if(((fscissor.yyyy() < p) & (p <= fscissor.wwww())).mask() & 2) + { + int y = GSVector4i(p).extract32<1>() - 1; + + if((y % m_threads) == m_id) + { + GSVector4 coverage = (p - edge.p).yyyy(); + + edge.t = edge.t.xyxy(edge.t.uph(coverage * 0x80)); // coverage => t.w + + m_stats.pixels++; + + m_dsf.ssle(left + 1, left, y, edge); + } + } + } + while(0); + + if(++left >= right) break; + + edge += dedge; + } + } + else + { + while(1) + { + do + { + GSVector4 p = edge.p.floor(); + + if(((fscissor.yyyy() <= p) & (p < fscissor.wwww())).mask() & 2) + { + int y = GSVector4i(p).extract32<1>() + 1; + + if((y % m_threads) == m_id) + { + GSVector4 coverage = (edge.p - p).yyyy(); + + edge.t = edge.t.xyxy(edge.t.uph(coverage * 0x80)); // coverage => t.w + + m_stats.pixels++; + + m_dsf.ssle(left + 1, left, y, edge); + } + } + } + while(0); + + if(++left >= right) break; + + edge += dedge; + } + } + } +} + // GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, int id, int threads, long* sync) diff --git a/plugins/GSdx/GSRasterizer.h b/plugins/GSdx/GSRasterizer.h index bbcf1e6619..0f24c794f2 100644 --- a/plugins/GSdx/GSRasterizer.h +++ b/plugins/GSdx/GSRasterizer.h @@ -48,15 +48,16 @@ public: class IDrawScanline { public: - typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v); typedef void (__fastcall *DrawScanlineStaticPtr)(int right, int left, int top, const GSVertexSW& v); typedef void (__fastcall *SetupPrimStaticPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan); + typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v); struct Functions { - DrawSolidRectPtr sr; // TODO DrawScanlineStaticPtr ssl; + DrawScanlineStaticPtr ssle; SetupPrimStaticPtr ssp; + DrawSolidRectPtr sr; // TODO }; virtual ~IDrawScanline() {} @@ -78,6 +79,7 @@ protected: void DrawPoint(const GSVertexSW* v, const GSVector4i& scissor); void DrawLine(const GSVertexSW* v, const GSVector4i& scissor); void DrawTriangle(const GSVertexSW* v, const GSVector4i& scissor); + void DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& scissor); void DrawSprite(const GSVertexSW* v, const GSVector4i& scissor); void DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor); @@ -87,6 +89,8 @@ protected: __forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, GSVector4& r, const GSVector4& dr, const GSVertexSW& dscan, const GSVector4& scissor); __forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan, const GSVector4& scissor); + void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side); + public: GSRasterizer(IDrawScanline* ds, int id = 0, int threads = 0); virtual ~GSRasterizer(); diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index 6072b04596..aa53f39b1b 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -34,6 +34,7 @@ struct GSRendererSettings int m_filter; bool m_vsync; bool m_nativeres; + bool m_aa1; }; class GSRendererBase : public GSState, protected GSRendererSettings @@ -84,6 +85,12 @@ protected: m_osd = !m_osd; return true; } + + if(msg.wParam == VK_DELETE) + { + m_aa1 = !m_aa1; + return true; + } } return false; @@ -103,6 +110,7 @@ public: m_filter = rs.m_filter; m_vsync = rs.m_vsync; m_nativeres = rs.m_nativeres; + m_aa1 = rs.m_aa1; }; virtual bool Create(LPCTSTR title) = 0; diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index ecd37f0fe1..733b61cf86 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -262,11 +262,11 @@ protected: p.sel.zpsm = 3; p.sel.atst = ATST_ALWAYS; p.sel.tfx = TFX_NONE; - p.sel.abe = 255; + p.sel.ababcd = 255; p.sel.sprite = primclass == GS_SPRITE_CLASS ? 1 : 0; p.fm = context->FRAME.FBMSK; - p.zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 || PRIM->AA1 && primclass == GS_LINE_CLASS ? 0xffffffff : 0; + p.zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; if(context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER) { @@ -449,22 +449,24 @@ protected: p.sel.datm = context->TEST.DATM; } - if(PRIM->ABE) + if(PRIM->ABE && !context->ALPHA.IsOpaque() || PRIM->AA1) { - if(!context->ALPHA.IsOpaque()) + p.sel.abe = PRIM->ABE; + p.sel.ababcd = context->ALPHA.ai32[0]; + + if(env.PABE.PABE) { - p.sel.abe = context->ALPHA.ai32[0]; - p.sel.pabe = env.PABE.PABE; + p.sel.pabe = 1; + } + + if(PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS)) + { + p.sel.aa1 = m_aa1 ? 1 : 0; } } - if(PRIM->AA1) - { - p.sel.aa1 = 1; - } - if(p.sel.date - || p.sel.abea == 1 || p.sel.abeb == 1 || p.sel.abec == 1 || p.sel.abed == 1 + || p.sel.aba == 1 || p.sel.abb == 1 || p.sel.abc == 1 || p.sel.abd == 1 || p.sel.atst != ATST_ALWAYS && p.sel.afail == AFAIL_RGB_ONLY || p.sel.fpsm == 0 && p.fm != 0 && p.fm != 0xffffffff || p.sel.fpsm == 1 && (p.fm & 0x00ffffff) != 0 && (p.fm & 0x00ffffff) != 0x00ffffff @@ -532,7 +534,7 @@ protected: GSRasterizerStats stats; m_rl.GetStats(stats); - + m_perfmon.Put(GSPerfMon::Draw, 1); m_perfmon.Put(GSPerfMon::Prim, stats.prims); m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels); @@ -576,6 +578,15 @@ protected: str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz1_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM); if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);} } + + if(0)//stats.ticks > 1000000) + { + printf("* [%I64d | %012I64x] ticks %I64d prims %d (%d) pixels %d (%d)\n", + m_perfmon.GetFrame(), p.sel.key, + stats.ticks, + stats.prims, stats.prims > 0 ? (int)(stats.ticks / stats.prims) : -1, + stats.pixels, stats.pixels > 0 ? (int)(stats.ticks / stats.pixels) : -1); + } } void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r) diff --git a/plugins/GSdx/GSScanlineEnvironment.h b/plugins/GSdx/GSScanlineEnvironment.h index f01508d925..d959930197 100644 --- a/plugins/GSdx/GSScanlineEnvironment.h +++ b/plugins/GSdx/GSScanlineEnvironment.h @@ -41,34 +41,37 @@ union GSScanlineSelector DWORD tlu:1; // 18 DWORD fge:1; // 19 DWORD date:1; // 20 - DWORD abea:2; // 21 - DWORD abeb:2; // 23 - DWORD abec:2; // 25 - DWORD abed:2; // 27 - DWORD pabe:1; // 29 - DWORD rfb:1; // 30 - DWORD sprite:1; // 31 + DWORD abe:1; // 21 + DWORD aba:2; // 22 + DWORD abb:2; // 24 + DWORD abc:2; // 26 + DWORD abd:2; // 28 + DWORD pabe:1; // 30 + DWORD aa1:1; // 31 DWORD fwrite:1; // 32 DWORD ftest:1; // 33 - DWORD zwrite:1; // 34 - DWORD ztest:1; // 35 - DWORD wms:2; // 36 - DWORD wmt:2; // 38 - DWORD datm:1; // 40 - DWORD colclamp:1; // 41 - DWORD fba:1; // 42 - DWORD dthe:1; // 43 - DWORD zoverflow:1; // 44 (z max >= 0x80000000) - DWORD aa1:1; // 45 + DWORD rfb:1; // 34 + DWORD zwrite:1; // 35 + DWORD ztest:1; // 36 + DWORD zoverflow:1; // 37 (z max >= 0x80000000) + DWORD wms:2; // 38 + DWORD wmt:2; // 40 + DWORD datm:1; // 42 + DWORD colclamp:1; // 43 + DWORD fba:1; // 44 + DWORD dthe:1; // 45 + DWORD sprite:1; // 46 + DWORD edge:1; // 47 }; struct { - DWORD _pad1:21; - DWORD abe:8; - DWORD _pad2:3; + DWORD _pad1:22; + DWORD ababcd:8; + DWORD _pad2:2; DWORD fb:2; + DWORD _pad3:1; DWORD zb:2; }; @@ -88,7 +91,7 @@ union GSScanlineSelector return sprite && iip == 0 && tfx == TFX_NONE - && abe == 255 + && abe == 0 && ztst <= 1 && atst <= 1 && date == 0 @@ -114,8 +117,6 @@ __declspec(align(16)) struct GSScanlineParam __declspec(align(16)) struct GSScanlineEnvironment { - GSScanlineSelector sel; - void* vm; const void* tex; const DWORD* clut; @@ -140,5 +141,5 @@ __declspec(align(16)) struct GSScanlineEnvironment struct {GSVector4 z, stq; GSVector4i c, f, st;} d4; struct {GSVector4i rb, ga;} c; struct {GSVector4i z, f;} p; - struct {GSVector4i z, f, s, t, q, rb, ga, zs, zd, uf, vf;} temp; + struct {GSVector4i z, f, s, t, q, rb, ga, zs, zd, uf, vf, cov;} temp; }; diff --git a/plugins/GSdx/GSSettingsDlg.cpp b/plugins/GSdx/GSSettingsDlg.cpp index 5186208229..4d06bde333 100644 --- a/plugins/GSdx/GSSettingsDlg.cpp +++ b/plugins/GSdx/GSSettingsDlg.cpp @@ -75,6 +75,7 @@ GSSettingsDlg::GSSettingsDlg(CWnd* pParent /*=NULL*/) , m_vsync(FALSE) , m_logz(FALSE) , m_fba(TRUE) + , m_aa1(FALSE) { } @@ -115,6 +116,7 @@ void GSSettingsDlg::DoDataExchange(CDataExchange* pDX) DDX_Check(pDX, IDC_CHECK2, m_vsync); DDX_Check(pDX, IDC_CHECK5, m_logz); DDX_Check(pDX, IDC_CHECK7, m_fba); + DDX_Check(pDX, IDC_CHECK8, m_aa1); } BEGIN_MESSAGE_MAP(GSSettingsDlg, CDialog) @@ -220,6 +222,7 @@ BOOL GSSettingsDlg::OnInitDialog() m_vsync = !!pApp->GetProfileInt(_T("Settings"), _T("vsync"), FALSE); m_logz = !!pApp->GetProfileInt(_T("Settings"), _T("logz"), FALSE); m_fba = !!pApp->GetProfileInt(_T("Settings"), _T("fba"), TRUE); + m_aa1 = !!pApp->GetProfileInt(_T("Settings"), _T("aa1"), FALSE); m_resx.SetRange(512, 4096); m_resy.SetRange(512, 4096); @@ -283,6 +286,7 @@ void GSSettingsDlg::OnOK() pApp->WriteProfileInt(_T("Settings"), _T("vsync"), m_vsync); pApp->WriteProfileInt(_T("Settings"), _T("logz"), m_logz); pApp->WriteProfileInt(_T("Settings"), _T("fba"), m_fba); + pApp->WriteProfileInt(_T("Settings"), _T("aa1"), m_aa1); pApp->WriteProfileInt(_T("Settings"), _T("resx"), m_resx.GetPos()); pApp->WriteProfileInt(_T("Settings"), _T("resy"), m_resy.GetPos()); diff --git a/plugins/GSdx/GSSettingsDlg.h b/plugins/GSdx/GSSettingsDlg.h index 2cc4522dd9..845c53f898 100644 --- a/plugins/GSdx/GSSettingsDlg.h +++ b/plugins/GSdx/GSSettingsDlg.h @@ -60,6 +60,7 @@ public: BOOL m_vsync; BOOL m_logz; BOOL m_fba; + BOOL m_aa1; protected: virtual LRESULT DefWindowProc(UINT message, WPARAM wParam, LPARAM lParam); diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.cpp index 7350d1ed61..d44768d68f 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.cpp @@ -24,14 +24,16 @@ #include "StdAfx.h" #include "GSSetupPrimCodeGenerator.h" -GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize) +GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, UINT64 key, void* ptr, size_t maxsize) : CodeGenerator(maxsize, ptr) , m_env(env) { - m_en.z = m_env.sel.zb ? 1 : 0; - m_en.f = m_env.sel.fb && m_env.sel.fge ? 1 : 0; - m_en.t = m_env.sel.fb && m_env.sel.tfx != TFX_NONE ? 1 : 0; - m_en.c = m_env.sel.fb && m_env.sel.tfx != TFX_DECAL ? 1 : 0; + m_sel.key = key; + + m_en.z = m_sel.zb ? 1 : 0; + m_en.f = m_sel.fb && m_sel.fge ? 1 : 0; + m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0; + m_en.c = m_sel.fb && m_sel.tfx != TFX_DECAL ? 1 : 0; #if _M_AMD64 #error TODO @@ -44,7 +46,7 @@ void GSSetupPrimCodeGenerator::Generate() { const int params = 0; - if((m_en.z || m_en.f) && !m_env.sel.sprite || m_en.t || m_en.c && m_env.sel.iip) + if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip) { for(int i = 0; i < 5; i++) { @@ -68,7 +70,7 @@ void GSSetupPrimCodeGenerator::Depth() return; } - if(!m_env.sel.sprite) + if(!m_sel.sprite) { // GSVector4 t = dscan.p; @@ -148,7 +150,7 @@ void GSSetupPrimCodeGenerator::Depth() shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - if(m_env.sel.zoverflow) + if(m_sel.zoverflow) { // m_env.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); @@ -193,7 +195,7 @@ void GSSetupPrimCodeGenerator::Texture() movaps(xmm1, xmm0); mulps(xmm1, xmm3); - if(m_env.sel.fst) + if(m_sel.fst) { // m_env.d4.st = GSVector4i(t * 4.0f); @@ -207,7 +209,7 @@ void GSSetupPrimCodeGenerator::Texture() movaps(xmmword[&m_env.d4.stq], xmm1); } - for(int j = 0, k = m_env.sel.fst ? 2 : 3; j < k; j++) + for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) { // GSVector4 ds = t.xxxx(); // GSVector4 dt = t.yyyy(); @@ -223,7 +225,7 @@ void GSSetupPrimCodeGenerator::Texture() movaps(xmm2, xmm1); mulps(xmm2, Xmm(4 + i)); - if(m_env.sel.fst) + if(m_sel.fst) { // m_env.d[i].si/ti = GSVector4i(v); @@ -257,7 +259,7 @@ void GSSetupPrimCodeGenerator::Color() return; } - if(m_env.sel.iip) + if(m_sel.iip) { // GSVector4 c = dscan.c; @@ -351,7 +353,7 @@ void GSSetupPrimCodeGenerator::Color() // if(!tme) c = c.srl16(7); - if(m_env.sel.tfx == TFX_NONE) + if(m_sel.tfx == TFX_NONE) { psrlw(xmm0, 7); } diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.h b/plugins/GSdx/GSSetupPrimCodeGenerator.h index 6607153546..f12913da7b 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.h +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.h @@ -36,6 +36,7 @@ class GSSetupPrimCodeGenerator : public CodeGenerator util::Cpu m_cpu; GSScanlineEnvironment& m_env; + GSScanlineSelector m_sel; struct {DWORD z:1, f:1, t:1, c:1;} m_en; @@ -46,5 +47,5 @@ class GSSetupPrimCodeGenerator : public CodeGenerator void Color(); public: - GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize); + GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, UINT64 key, void* ptr, size_t maxsize); }; \ No newline at end of file diff --git a/plugins/GSdx/GSVector.h b/plugins/GSdx/GSVector.h index 09375c6cef..1de9c74c16 100644 --- a/plugins/GSdx/GSVector.h +++ b/plugins/GSdx/GSVector.h @@ -2193,12 +2193,12 @@ public: GSVector4 abs() const { - return GSVector4(_mm_abs_ps(m)); + return *this & cast(GSVector4i::x7fffffff()); } GSVector4 neg() const { - return GSVector4(_mm_neg_ps(m)); + return *this ^ cast(GSVector4i::x80000000()); } GSVector4 rcp() const diff --git a/plugins/GSdx/GSdx.rc b/plugins/GSdx/GSdx.rc index 81c53bc620..750014a605 100644 --- a/plugins/GSdx/GSdx.rc +++ b/plugins/GSdx/GSdx.rc @@ -82,7 +82,7 @@ IDB_LOGO10 BITMAP "res\\logo10.bmp" // Dialog // -IDD_CONFIG DIALOGEX 0, 0, 189, 231 +IDD_CONFIG DIALOGEX 0, 0, 189, 245 STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU CAPTION "Settings..." FONT 8, "MS Shell Dlg", 400, 0, 0x1 @@ -105,17 +105,19 @@ BEGIN EDITTEXT IDC_EDIT2,109,132,35,13,ES_AUTOHSCROLL | ES_NUMBER CONTROL "",IDC_SPIN2,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,133,135,11,14 CONTROL "Native",IDC_CHECK1,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,149,134,33,10 - CONTROL "Texture filtering",IDC_CHECK4,"Button",BS_AUTO3STATE | WS_TABSTOP,7,180,67,10 - CONTROL "Enable tv-out",IDC_CHECK3,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,167,57,10 - CONTROL "Wait vsync",IDC_CHECK2,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,193,51,10 - CONTROL "NLOOP hack",IDC_CHECK6,"Button",BS_AUTO3STATE | WS_TABSTOP,7,167,55,10 - CONTROL "Logarithmic Z",IDC_CHECK5,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,180,58,10 - CONTROL "Alpha correction (FBA)",IDC_CHECK7,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,193,102,10 - DEFPUSHBUTTON "OK",IDOK,43,210,50,14 - PUSHBUTTON "Cancel",IDCANCEL,96,210,50,14 LTEXT "SW rend. threads:",IDC_STATIC,7,149,60,8 EDITTEXT IDC_EDIT3,71,147,35,13,ES_AUTOHSCROLL | ES_NUMBER CONTROL "",IDC_SPIN3,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,99,150,11,14 + CONTROL "NLOOP hack",IDC_CHECK6,"Button",BS_AUTO3STATE | WS_TABSTOP,7,167,55,10 + CONTROL "Enable tv-out",IDC_CHECK3,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,167,57,10 + CONTROL "Texture filtering",IDC_CHECK4,"Button",BS_AUTO3STATE | WS_TABSTOP,7,180,67,10 + CONTROL "Logarithmic Z",IDC_CHECK5,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,180,58,10 + CONTROL "Wait vsync",IDC_CHECK2,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,193,51,10 + CONTROL "Alpha correction (FBA)",IDC_CHECK7,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,193,102,10 + CONTROL "Edge anti-aliasing (AA1, sw-mode only)",IDC_CHECK8, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,206,141,10 + DEFPUSHBUTTON "OK",IDOK,43,224,50,14 + PUSHBUTTON "Cancel",IDCANCEL,96,224,50,14 END IDD_CAPTURE DIALOGEX 0, 0, 279, 71 @@ -179,7 +181,7 @@ BEGIN VERTGUIDE, 80 VERTGUIDE, 182 TOPMARGIN, 7 - BOTTOMMARGIN, 224 + BOTTOMMARGIN, 238 END IDD_CAPTURE, DIALOG diff --git a/plugins/GSdx/resource.h b/plugins/GSdx/resource.h index a10f5e1c5f..4817fc5032 100644 --- a/plugins/GSdx/resource.h +++ b/plugins/GSdx/resource.h @@ -13,6 +13,7 @@ #define IDC_EDIT1 2009 #define IDC_EDIT2 2010 #define IDC_BUTTON1 2011 +#define IDC_CHECK8 2011 #define IDC_BUTTON2 2012 #define IDC_EDIT3 2012 #define IDC_CUSTOM1 2013 diff --git a/plugins/GSdx/sse.h b/plugins/GSdx/sse.h index 2c3b0f76a7..e334c5bb01 100644 --- a/plugins/GSdx/sse.h +++ b/plugins/GSdx/sse.h @@ -47,19 +47,6 @@ const __m128 ps_3f800000 = _mm_castsi128_ps(_mm_set1_epi32(0x3f800000)); const __m128 ps_4b000000 = _mm_castsi128_ps(_mm_set1_epi32(0x4b000000)); - const __m128 ps_7fffffff = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); - const __m128 ps_80000000 = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); - const __m128 ps_ffffffff = _mm_castsi128_ps(_mm_set1_epi32(0xffffffff)); - - __forceinline __m128 _mm_neg_ps(__m128 r) - { - return _mm_xor_ps(ps_80000000, r); - } - - __forceinline __m128 _mm_abs_ps(__m128 r) - { - return _mm_and_ps(ps_7fffffff, r); - } #define _MM_TRANSPOSE4_SI128(row0, row1, row2, row3) \ { \