diff --git a/plugins/GSdx/GPUDrawScanline.cpp b/plugins/GSdx/GPUDrawScanline.cpp index f47a80d01b..c05f820454 100644 --- a/plugins/GSdx/GPUDrawScanline.cpp +++ b/plugins/GSdx/GPUDrawScanline.cpp @@ -25,6 +25,7 @@ GPUDrawScanline::GPUDrawScanline(GPUState* state, int id) : m_state(state) , m_id(id) + , m_sp(m_env) , m_ds(m_env) { } @@ -73,96 +74,35 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) f->sr = NULL; // TODO - // - - DWORD sel = 0; + // doesn't need all bits => less functions generated - sel |= (data->primclass == GS_SPRITE_CLASS ? 1 : 0) << 0; - sel |= m_env.sel.tme << 1; - sel |= m_env.sel.iip << 2; + GPUScanlineSelector sel; - f->sp = m_sp.Lookup(sel); + sel.key = 0; + + sel.iip = m_env.sel.iip; + sel.tfx = m_env.sel.tfx; + sel.twin = m_env.sel.twin; + sel.sprite = m_env.sel.sprite; + + f->ssp = m_sp.Lookup(sel); } -template -void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) +void GPUDrawScanline::EndDraw(const GSRasterizerStats& stats) { - if(m_env.sel.tme && !m_env.sel.twin) - { - if(sprite) - { - GSVector4i t; - - t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001(); - t = t.ps32(t); - t = t.upl16(t); - - m_env.twin[2].u = t.xxxx(); - m_env.twin[2].v = t.yyyy(); - } - else - { - m_env.twin[2].u = GSVector4i::x00ff(); - m_env.twin[2].v = GSVector4i::x00ff(); - } - } - - GSVector4 ps0123 = GSVector4::ps0123(); - GSVector4 ps4567 = GSVector4::ps4567(); - - GSVector4 dt = dscan.t; - GSVector4 dc = dscan.c; - - GSVector4i dtc8 = GSVector4i(dt * 8.0f).ps32(GSVector4i(dc * 8.0f)); - - if(tme) - { - m_env.d8.st = dtc8.upl16(dtc8); - - m_env.d.s = GSVector4i(dt.xxxx() * ps0123).ps32(GSVector4i(dt.xxxx() * ps4567)); - m_env.d.t = GSVector4i(dt.yyyy() * ps0123).ps32(GSVector4i(dt.yyyy() * ps4567)); - } - - if(iip) - { - m_env.d8.c = dtc8.uph16(dtc8); - - m_env.d.r = GSVector4i(dc.xxxx() * ps0123).ps32(GSVector4i(dc.xxxx() * ps4567)); - m_env.d.g = GSVector4i(dc.yyyy() * ps0123).ps32(GSVector4i(dc.yyyy() * ps4567)); - m_env.d.b = GSVector4i(dc.zzzz() * ps0123).ps32(GSVector4i(dc.zzzz() * ps4567)); - } - else - { - // TODO: m_env.c.r/g/b = ... - } + m_ds.UpdateStats(stats, m_state->m_perfmon.GetFrame()); } // -GPUDrawScanline::GPUSetupPrimMap::GPUSetupPrimMap() +GPUDrawScanline::GPUSetupPrimMap::GPUSetupPrimMap(GPUScanlineEnvironment& env) + : m_env(env) { - #define InitSP_IIP(sprite, tme, iip) \ - m_default[sprite][tme][iip] = (SetupPrimPtr)&GPUDrawScanline::SetupPrim; \ - - #define InitSP_TME(sprite, tme) \ - InitSP_IIP(sprite, tme, 0) \ - InitSP_IIP(sprite, tme, 1) \ - - #define InitSP_SPRITE(sprite) \ - InitSP_TME(sprite, 0) \ - InitSP_TME(sprite, 1) \ - - InitSP_SPRITE(0); - InitSP_SPRITE(1); } -IDrawScanline::SetupPrimPtr GPUDrawScanline::GPUSetupPrimMap::GetDefaultFunction(DWORD key) +GPUSetupPrimCodeGenerator* GPUDrawScanline::GPUSetupPrimMap::Create(DWORD key, void* ptr, size_t maxsize) { - DWORD sprite = (key >> 0) & 1; - DWORD tme = (key >> 1) & 1; - DWORD iip = (key >> 2) & 1; - - return m_default[sprite][tme][iip]; + return new GPUSetupPrimCodeGenerator(m_env, ptr, maxsize); } // diff --git a/plugins/GSdx/GPUDrawScanline.h b/plugins/GSdx/GPUDrawScanline.h index d361dccfe4..9a39d87875 100644 --- a/plugins/GSdx/GPUDrawScanline.h +++ b/plugins/GSdx/GPUDrawScanline.h @@ -23,9 +23,10 @@ #include "GPUState.h" #include "GSRasterizer.h" -#include "GSAlignedClass.h" #include "GPUScanlineEnvironment.h" +#include "GPUSetupPrimCodeGenerator.h" #include "GPUDrawScanlineCodeGenerator.h" +#include "GSAlignedClass.h" class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline { @@ -33,20 +34,14 @@ class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline // - class GPUSetupPrimMap : public GSFunctionMap + class GPUSetupPrimMap : public GSCodeGeneratorFunctionMap { - SetupPrimPtr m_default[2][2][2]; + GPUScanlineEnvironment& m_env; public: - GPUSetupPrimMap(); - - SetupPrimPtr GetDefaultFunction(DWORD key); - }; - - GPUSetupPrimMap m_sp; - - template - void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan); + GPUSetupPrimMap(GPUScanlineEnvironment& env); + GPUSetupPrimCodeGenerator* Create(DWORD key, void* ptr, size_t maxsize); + } m_sp; // @@ -59,8 +54,6 @@ class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline GPUDrawScanlineCodeGenerator* Create(DWORD key, void* ptr, size_t maxsize); } m_ds; - void DrawScanline(int top, int left, int right, const GSVertexSW& v); - protected: GPUState* m_state; int m_id; @@ -72,6 +65,6 @@ public: // IDrawScanline void BeginDraw(const GSRasterizerData* data, Functions* f); - void EndDraw(const GSRasterizerStats& stats) {} - void PrintStats() {} + void EndDraw(const GSRasterizerStats& stats); + void PrintStats() {m_ds.PrintStats();} }; diff --git a/plugins/GSdx/GPUSetupPrimCodeGenerator.cpp b/plugins/GSdx/GPUSetupPrimCodeGenerator.cpp new file mode 100644 index 0000000000..4862cf8a17 --- /dev/null +++ b/plugins/GSdx/GPUSetupPrimCodeGenerator.cpp @@ -0,0 +1,224 @@ +/* + * Copyright (C) 2007-2009 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +// TODO: x64 + +#include "StdAfx.h" +#include "GSVertexSW.h" +#include "GPUSetupPrimCodeGenerator.h" + +GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize) + : CodeGenerator(maxsize, ptr) + , m_env(env) +{ + #if _M_AMD64 + #error TODO + #endif + + Generate(); +} + +void GPUSetupPrimCodeGenerator::Generate() +{ + const int params = 0; + + const int _vertices = params + 4; + const int _dscan = params + 8; + + mov(ecx, dword[esp + _vertices]); + mov(edx, dword[esp + _dscan]); + + if(m_env.sel.tme && !m_env.sel.twin) + { + pcmpeqd(xmm0, xmm0); + + if(m_env.sel.sprite) + { + // t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001(); + + cvttps2dq(xmm1, xmmword[ecx + sizeof(GSVertexSW) * 1 + 32]); + psrld(xmm1, 8); + psrld(xmm0, 31); + psubd(xmm1, xmm0); + + // t = t.ps32(t); + // t = t.upl16(t); + + packssdw(xmm1, xmm1); + punpcklwd(xmm1, xmm1); + + // m_env.twin[2].u = t.xxxx(); + // m_env.twin[2].v = t.yyyy(); + + pshufd(xmm2, xmm1, _MM_SHUFFLE(0, 0, 0, 0)); + pshufd(xmm3, xmm1, _MM_SHUFFLE(1, 1, 1, 1)); + + movdqa(xmmword[&m_env.twin[2].u], xmm2); + movdqa(xmmword[&m_env.twin[2].v], xmm3); + } + else + { + // TODO: not really needed + + // m_env.twin[2].u = GSVector4i::x00ff(); + // m_env.twin[2].v = GSVector4i::x00ff(); + + psrlw(xmm0, 8); + + movdqa(xmmword[&m_env.twin[2].u], xmm0); + movdqa(xmmword[&m_env.twin[2].v], xmm0); + } + } + + if(m_env.sel.tme || m_env.sel.iip && m_env.sel.tfx != 3) + { + for(int i = 0; i < 3; i++) + { + movaps(Xmm(5 + i), xmmword[&m_shift[i]]); + } + + // GSVector4 dt = dscan.t; + // GSVector4 dc = dscan.c; + + movaps(xmm4, xmmword[edx]); + movaps(xmm3, xmmword[edx + 32]); + + // GSVector4i dtc8 = GSVector4i(dt * 8.0f).ps32(GSVector4i(dc * 8.0f)); + + movaps(xmm1, xmm3); + mulps(xmm1, xmm5); + cvttps2dq(xmm1, xmm1); + movaps(xmm2, xmm4); + mulps(xmm2, xmm5); + cvttps2dq(xmm2, xmm2); + packssdw(xmm1, xmm2); + + if(m_env.sel.tme) + { + // m_env.d8.st = dtc8.upl16(dtc8); + + movdqa(xmm0, xmm1); + punpcklwd(xmm0, xmm0); + movdqa(xmmword[&m_env.d8.st], xmm0); + } + + if(m_env.sel.iip && m_env.sel.tfx != 3) + { + // m_env.d8.c = dtc8.uph16(dtc8); + + punpckhwd(xmm1, xmm1); + movdqa(xmmword[&m_env.d8.c], xmm1); + } + + // xmm3 = dt + // xmm4 = dc + // xmm6 = ps0123 + // xmm7 = ps4567 + // xmm0, xmm1, xmm2, xmm5 = free + + if(m_env.sel.tme) + { + // GSVector4 dtx = dt.xxxx(); + // GSVector4 dty = dt.yyyy(); + + movaps(xmm0, xmm3); + shufps(xmm3, xmm3, _MM_SHUFFLE(0, 0, 0, 0)); + shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); + + // m_env.d.s = GSVector4i(dtx * ps0123).ps32(GSVector4i(dtx * ps4567)); + + movaps(xmm1, xmm3); + mulps(xmm3, xmm6); + mulps(xmm1, xmm7); + cvttps2dq(xmm3, xmm3); + cvttps2dq(xmm1, xmm1); + packssdw(xmm3, xmm1); + movdqa(xmmword[&m_env.d.s], xmm3); + + // m_env.d.t = GSVector4i(dty * ps0123).ps32(GSVector4i(dty * ps4567)); + + movaps(xmm1, xmm0); + mulps(xmm0, xmm6); + mulps(xmm1, xmm7); + cvttps2dq(xmm0, xmm0); + cvttps2dq(xmm1, xmm1); + packssdw(xmm0, xmm1); + movdqa(xmmword[&m_env.d.t], xmm0); + } + + // xmm4 = dc + // xmm6 = ps0123 + // xmm7 = ps4567 + // xmm0, xmm1, zmm2, xmm3, xmm5 = free + + if(m_env.sel.iip && m_env.sel.tfx != 3) + { + // GSVector4 dcx = dc.xxxx(); + // GSVector4 dcy = dc.yyyy(); + // GSVector4 dcz = dc.zzzz(); + + movaps(xmm0, xmm4); + movaps(xmm1, xmm4); + shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); + shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); + shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); + + // m_env.d.r = GSVector4i(dcx * ps0123).ps32(GSVector4i(dcx * ps4567)); + + movaps(xmm2, xmm4); + mulps(xmm4, xmm6); + mulps(xmm2, xmm7); + cvttps2dq(xmm4, xmm4); + cvttps2dq(xmm2, xmm2); + packssdw(xmm4, xmm2); + movdqa(xmmword[&m_env.d.r], xmm4); + + // m_env.d.g = GSVector4i(dcy * ps0123).ps32(GSVector4i(dcy * ps4567)); + + movaps(xmm2, xmm0); + mulps(xmm0, xmm6); + mulps(xmm2, xmm7); + cvttps2dq(xmm0, xmm0); + cvttps2dq(xmm2, xmm2); + packssdw(xmm0, xmm2); + movdqa(xmmword[&m_env.d.g], xmm0); + + // m_env.d.b = GSVector4i(dcz * ps0123).ps32(GSVector4i(dcz * ps4567)); + + movaps(xmm2, xmm1); + mulps(xmm1, xmm6); + mulps(xmm2, xmm7); + cvttps2dq(xmm1, xmm1); + cvttps2dq(xmm2, xmm2); + packssdw(xmm1, xmm2); + movdqa(xmmword[&m_env.d.b], xmm1); + } + } + + ret(); +} + +const GSVector4 GPUSetupPrimCodeGenerator::m_shift[3] = +{ + GSVector4(8.0f, 8.0f, 8.0f, 8.0f), + GSVector4(0.0f, 1.0f, 2.0f, 3.0f), + GSVector4(4.0f, 5.0f, 6.0f, 7.0f), +}; \ No newline at end of file diff --git a/plugins/GSdx/GPUSetupPrimCodeGenerator.h b/plugins/GSdx/GPUSetupPrimCodeGenerator.h new file mode 100644 index 0000000000..a45c82c388 --- /dev/null +++ b/plugins/GSdx/GPUSetupPrimCodeGenerator.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2007-2009 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GPUScanlineEnvironment.h" +#include "xbyak/xbyak.h" +#include "xbyak/xbyak_util.h" + +using namespace Xbyak; + +class GPUSetupPrimCodeGenerator : public CodeGenerator +{ + void operator = (const GPUSetupPrimCodeGenerator&); + + static const GSVector4 m_shift[3]; + + util::Cpu m_cpu; + + GPUScanlineEnvironment& m_env; + + void Generate(); + +public: + GPUSetupPrimCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize); +}; \ No newline at end of file diff --git a/plugins/GSdx/GSDrawScanline.cpp b/plugins/GSdx/GSDrawScanline.cpp index 50d485a4ef..1bbf2c838f 100644 --- a/plugins/GSdx/GSDrawScanline.cpp +++ b/plugins/GSdx/GSDrawScanline.cpp @@ -170,10 +170,6 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) f->sr = (DrawSolidRectPtr)&GSDrawScanline::DrawSolidRect; } - // - - f->sp = (SetupPrimPtr)&GSDrawScanline::SetupPrim; - // doesn't need all bits => less functions generated GSScanlineSelector sel; @@ -189,9 +185,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) sel.zb = m_env.sel.zb; sel.zoverflow = m_env.sel.zoverflow; - m_spf = m_sp.Lookup(sel); - - f->ssp = m_spf; + f->ssp = m_sp.Lookup(sel); } void GSDrawScanline::EndDraw(const GSRasterizerStats& stats) @@ -199,11 +193,6 @@ void GSDrawScanline::EndDraw(const GSRasterizerStats& stats) m_ds.UpdateStats(stats, m_state->m_perfmon.GetFrame()); } -void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) -{ - m_spf(vertices, dscan); // TODO: call this directly from rasterizer -} - void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v) { ASSERT(r.y >= 0); diff --git a/plugins/GSdx/GSDrawScanline.h b/plugins/GSdx/GSDrawScanline.h index 8423a63e68..c950c383af 100644 --- a/plugins/GSdx/GSDrawScanline.h +++ b/plugins/GSdx/GSDrawScanline.h @@ -43,10 +43,6 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline GSSetupPrimCodeGenerator* Create(UINT64 key, void* ptr, size_t maxsize); } m_sp; - SetupPrimStaticPtr m_spf; - - void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan); - // class GSDrawScanlineMap : public GSCodeGeneratorFunctionMap diff --git a/plugins/GSdx/GSRasterizer.cpp b/plugins/GSdx/GSRasterizer.cpp index 28d8f0fbb0..34bd98caa5 100644 --- a/plugins/GSdx/GSRasterizer.cpp +++ b/plugins/GSdx/GSRasterizer.cpp @@ -19,7 +19,7 @@ * */ -// TODO: skip scissor test when vtrace.p.min/max is inside the region +// TODO: JIT Draw* (flags: depth, texture, color (+iip), scissor) #include "StdAfx.h" #include "GSRasterizer.h" @@ -39,7 +39,6 @@ GSRasterizer::~GSRasterizer() void GSRasterizer::Draw(const GSRasterizerData* data) { m_dsf.sr = NULL; - m_dsf.sp = NULL; m_dsf.ssl = NULL; m_dsf.ssp = NULL; @@ -98,8 +97,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor) { if((p.y % m_threads) == m_id) { - (m_ds->*m_dsf.sp)(v, *v); - // TODO: (m_dsf.ssp)(v, *v); + m_dsf.ssp(v, *v); m_dsf.ssl(p.y, p.x, p.x + 1, *v); @@ -139,7 +137,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor) { GSVertexSW dscan = dv / dv.p.xxxx(); - (m_ds->*m_dsf.sp)(v, dscan); + m_dsf.ssp(v, dscan); l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y @@ -257,8 +255,7 @@ void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor) if(py > 0) l += dl * py; - (m_ds->*m_dsf.sp)(v, dscan); - // TODO: (m_dsf.ssp)(v, dscan); + m_dsf.ssp(v, dscan); DrawTriangleSection(top, bottom, l, dl, dscan, scissor); } @@ -305,8 +302,7 @@ void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor) if(py > 0) l += dl * py; - (m_ds->*m_dsf.sp)(v, dscan); - // TODO: (m_dsf.ssp)(v, dscan); + m_dsf.ssp(v, dscan); DrawTriangleSection(top, bottom, l, dl, dscan, scissor); } @@ -328,8 +324,7 @@ void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scisso GSVertexSW dscan = longest * longest.p.xxxx().rcp(); - (m_ds->*m_dsf.sp)(v, dscan); - // TODO: (m_dsf.ssp)(v, dscan); + m_dsf.ssp(v, dscan); GSVertexSW& l = v[0]; GSVector4 r = v[0].p; @@ -582,8 +577,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis if(scan.p.y < (float)top) scan.t += dedge.t * ((float)top - scan.p.y); if(scan.p.x < (float)left) scan.t += dscan.t * ((float)left - scan.p.x); - (m_ds->*m_dsf.sp)(v, dscan); - // TODO: (m_dsf.ssp)(v, dscan); + m_dsf.ssp(v, dscan); for(; top < bottom; top++, scan.t += dedge.t) { diff --git a/plugins/GSdx/GSRasterizer.h b/plugins/GSdx/GSRasterizer.h index 0f33ee4ee3..de26bb97bc 100644 --- a/plugins/GSdx/GSRasterizer.h +++ b/plugins/GSdx/GSRasterizer.h @@ -49,14 +49,12 @@ class IDrawScanline { public: typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v); - typedef void (IDrawScanline::*SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan); typedef void (*DrawScanlineStaticPtr)(int top, int left, int right, const GSVertexSW& v); typedef void (*SetupPrimStaticPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan); struct Functions { - DrawSolidRectPtr sr; - SetupPrimPtr sp; + DrawSolidRectPtr sr; // TODO DrawScanlineStaticPtr ssl; SetupPrimStaticPtr ssp; }; diff --git a/plugins/GSdx/GSdx_vs2008.vcproj b/plugins/GSdx/GSdx_vs2008.vcproj index 76f63357c8..d62abfced6 100644 --- a/plugins/GSdx/GSdx_vs2008.vcproj +++ b/plugins/GSdx/GSdx_vs2008.vcproj @@ -1071,6 +1071,10 @@ RelativePath=".\GPUSettingsDlg.cpp" > + + @@ -1617,6 +1621,10 @@ RelativePath=".\GPUSettingsDlg.h" > + +