mirror of https://github.com/PCSX2/pcsx2.git
GSdx: more JIT code and a little clean-up
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@484 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
e7536c1bbb
commit
e131e22ea6
File diff suppressed because it is too large
Load Diff
|
@ -24,6 +24,7 @@
|
|||
#include "GSState.h"
|
||||
#include "GSRasterizer.h"
|
||||
#include "GSScanlineEnvironment.h"
|
||||
#include "GSSetupPrimCodeGenerator.h"
|
||||
#include "GSDrawScanlineCodeGenerator.h"
|
||||
#include "GSAlignedClass.h"
|
||||
|
||||
|
@ -31,74 +32,36 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
|
|||
{
|
||||
GSScanlineEnvironment m_env;
|
||||
|
||||
static const GSVector4 m_shift[4];
|
||||
/* static const GSVector4i m_test[8];
|
||||
|
||||
//
|
||||
|
||||
class GSDrawScanlineMap : public GSFunctionMap<DWORD, DrawScanlinePtr>
|
||||
class GSSetupPrimMap : public GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, UINT64, SetupPrimStaticPtr>
|
||||
{
|
||||
DrawScanlinePtr m_default[4][4][4][2];
|
||||
GSDrawScanline* m_ds;
|
||||
|
||||
public:
|
||||
GSDrawScanlineMap();
|
||||
GSSetupPrimMap(GSDrawScanline* ds);
|
||||
GSSetupPrimCodeGenerator* Create(UINT64 key);
|
||||
} m_sp;
|
||||
|
||||
DrawScanlinePtr GetDefaultFunction(DWORD key);
|
||||
SetupPrimStaticPtr m_spf;
|
||||
|
||||
void PrintStats();
|
||||
};
|
||||
|
||||
GSDrawScanlineMap m_ds;
|
||||
*/
|
||||
//
|
||||
|
||||
class GSSetupPrimMap : public GSFunctionMap<DWORD, SetupPrimPtr>
|
||||
{
|
||||
SetupPrimPtr m_default[2][2][2][2][2];
|
||||
|
||||
public:
|
||||
GSSetupPrimMap();
|
||||
|
||||
SetupPrimPtr GetDefaultFunction(DWORD key);
|
||||
};
|
||||
|
||||
GSSetupPrimMap m_sp;
|
||||
|
||||
template<DWORD zbe, DWORD fge, DWORD tme, DWORD fst, DWORD iip>
|
||||
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
|
||||
//
|
||||
|
||||
CRBMap<UINT64, GSDrawScanlineCodeGenerator*> m_dscg;
|
||||
class GSDrawScanlineMap : public GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, UINT64, DrawScanlineStaticPtr>
|
||||
{
|
||||
GSDrawScanline* m_ds;
|
||||
|
||||
public:
|
||||
GSDrawScanlineMap(GSDrawScanline* ds);
|
||||
GSDrawScanlineCodeGenerator* Create(UINT64 key);
|
||||
} m_ds;
|
||||
|
||||
DrawScanlineStaticPtr m_dsf;
|
||||
|
||||
void DrawScanline(int top, int left, int right, const GSVertexSW& v);
|
||||
|
||||
/*
|
||||
//
|
||||
|
||||
__forceinline GSVector4i Wrap(const GSVector4i& t);
|
||||
|
||||
__forceinline void SampleTexture(DWORD ltf, DWORD tlu, const GSVector4i& u, const GSVector4i& v, GSVector4i* c);
|
||||
__forceinline void ColorTFX(DWORD iip, DWORD tfx, const GSVector4i& rbf, const GSVector4i& gaf, GSVector4i& rbt, GSVector4i& gat);
|
||||
__forceinline void AlphaTFX(DWORD iip, DWORD tfx, DWORD tcc, const GSVector4i& gaf, GSVector4i& gat);
|
||||
__forceinline void Fog(DWORD fge, const GSVector4i& f, GSVector4i& rb, GSVector4i& ga);
|
||||
__forceinline bool TestZ(DWORD zpsm, DWORD ztst, const GSVector4i& zs, const GSVector4i& zd, GSVector4i& test);
|
||||
__forceinline bool TestAlpha(DWORD atst, DWORD afail, const GSVector4i& ga, GSVector4i& fm, GSVector4i& zm, GSVector4i& test);
|
||||
__forceinline bool TestDestAlpha(DWORD fpsm, DWORD date, const GSVector4i& fd, GSVector4i& test);
|
||||
|
||||
__forceinline void ReadPixel(int psm, int addr, GSVector4i& c) const;
|
||||
__forceinline static void WritePixel(int psm, WORD* RESTRICT vm16, DWORD c);
|
||||
__forceinline void WriteFrame(int fpsm, int rfb, GSVector4i* c, const GSVector4i& fd, const GSVector4i& fm, int addr, int fzm);
|
||||
__forceinline void WriteZBuf(int zpsm, int ztst, const GSVector4i& z, const GSVector4i& zd, const GSVector4i& zm, int addr, int fzm);
|
||||
|
||||
template<DWORD fpsm, DWORD zpsm, DWORD ztst, DWORD iip>
|
||||
void DrawScanline(int top, int left, int right, const GSVertexSW& v);
|
||||
|
||||
template<DWORD sel>
|
||||
void DrawScanlineEx(int top, int left, int right, const GSVertexSW& v);
|
||||
*/
|
||||
//
|
||||
|
||||
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);
|
||||
|
@ -124,5 +87,5 @@ public:
|
|||
|
||||
void BeginDraw(const GSRasterizerData* data, Functions* f);
|
||||
void EndDraw(const GSRasterizerStats& stats);
|
||||
void PrintStats() {/*m_ds.PrintStats();*/}
|
||||
void PrintStats() {m_ds.PrintStats();}
|
||||
};
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
*
|
||||
*/
|
||||
|
||||
// TODO: test without sse41
|
||||
// TODO: x64
|
||||
|
||||
#include "StdAfx.h"
|
||||
|
@ -220,7 +219,6 @@ L("@@");
|
|||
}
|
||||
|
||||
packssdw(xmm0, xmm1);
|
||||
packssdw(xmm0, xmm0); // TODO: not really needed...
|
||||
pmovmskb(edx, xmm0);
|
||||
not(edx);
|
||||
|
||||
|
@ -440,35 +438,38 @@ void GSDrawScanlineCodeGenerator::Init(int params)
|
|||
}
|
||||
}
|
||||
|
||||
if(m_env.sel.iip)
|
||||
if(m_env.sel.tfx != TFX_DECAL)
|
||||
{
|
||||
// GSVector4i vc = GSVector4i(v.c);
|
||||
|
||||
cvttps2dq(xmm6, xmmword[ebx]); // v.c
|
||||
|
||||
// vc = vc.upl16(vc.zwxy());
|
||||
|
||||
pshufd(xmm5, xmm6, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
punpcklwd(xmm6, xmm5);
|
||||
|
||||
// rb = vc.xxxx().add16(m_env.d[skip].rb);
|
||||
// ga = vc.zzzz().add16(m_env.d[skip].ga);
|
||||
|
||||
pshufd(xmm5, xmm6, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xmm6, xmm6, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
paddw(xmm5, xmmword[eax + 16 * 4]);
|
||||
paddw(xmm6, xmmword[eax + 16 * 5]);
|
||||
|
||||
movdqa(xmmword[&m_env.temp.rb], xmm5);
|
||||
movdqa(xmmword[&m_env.temp.ga], xmm6);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.tfx == TFX_NONE)
|
||||
if(m_env.sel.iip)
|
||||
{
|
||||
movdqa(xmm5, xmmword[&m_env.c.rb]);
|
||||
movdqa(xmm6, xmmword[&m_env.c.ga]);
|
||||
// GSVector4i vc = GSVector4i(v.c);
|
||||
|
||||
cvttps2dq(xmm6, xmmword[ebx]); // v.c
|
||||
|
||||
// vc = vc.upl16(vc.zwxy());
|
||||
|
||||
pshufd(xmm5, xmm6, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
punpcklwd(xmm6, xmm5);
|
||||
|
||||
// rb = vc.xxxx().add16(m_env.d[skip].rb);
|
||||
// ga = vc.zzzz().add16(m_env.d[skip].ga);
|
||||
|
||||
pshufd(xmm5, xmm6, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xmm6, xmm6, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
paddw(xmm5, xmmword[eax + 16 * 4]);
|
||||
paddw(xmm6, xmmword[eax + 16 * 5]);
|
||||
|
||||
movdqa(xmmword[&m_env.temp.rb], xmm5);
|
||||
movdqa(xmmword[&m_env.temp.ga], xmm6);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.tfx == TFX_NONE)
|
||||
{
|
||||
movdqa(xmm5, xmmword[&m_env.c.rb]);
|
||||
movdqa(xmm6, xmmword[&m_env.c.ga]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -566,30 +567,33 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
}
|
||||
}
|
||||
|
||||
if(m_env.sel.iip)
|
||||
if(m_env.sel.tfx != TFX_DECAL)
|
||||
{
|
||||
// GSVector4i c = m_env.d4.c;
|
||||
|
||||
// rb = rb.add16(c.xxxx());
|
||||
// ga = ga.add16(c.yyyy());
|
||||
|
||||
movdqa(xmm7, xmmword[&m_env.d4.c]);
|
||||
|
||||
pshufd(xmm5, xmm7, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xmm6, xmm7, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
paddw(xmm5, xmmword[&m_env.temp.rb]);
|
||||
paddw(xmm6, xmmword[&m_env.temp.ga]);
|
||||
|
||||
movdqa(xmmword[&m_env.temp.rb], xmm5);
|
||||
movdqa(xmmword[&m_env.temp.ga], xmm6);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.tfx == TFX_NONE)
|
||||
if(m_env.sel.iip)
|
||||
{
|
||||
movdqa(xmm5, xmmword[&m_env.c.rb]);
|
||||
movdqa(xmm6, xmmword[&m_env.c.ga]);
|
||||
// GSVector4i c = m_env.d4.c;
|
||||
|
||||
// rb = rb.add16(c.xxxx());
|
||||
// ga = ga.add16(c.yyyy());
|
||||
|
||||
movdqa(xmm7, xmmword[&m_env.d4.c]);
|
||||
|
||||
pshufd(xmm5, xmm7, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xmm6, xmm7, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
paddw(xmm5, xmmword[&m_env.temp.rb]);
|
||||
paddw(xmm6, xmmword[&m_env.temp.ga]);
|
||||
|
||||
movdqa(xmmword[&m_env.temp.rb], xmm5);
|
||||
movdqa(xmmword[&m_env.temp.ga], xmm6);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.tfx == TFX_NONE)
|
||||
{
|
||||
movdqa(xmm5, xmmword[&m_env.c.rb]);
|
||||
movdqa(xmm6, xmmword[&m_env.c.ga]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -620,19 +624,27 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
|
||||
if(!m_env.sel.sprite)
|
||||
{
|
||||
static float half = 0.5f;
|
||||
if(m_env.sel.zoverflow)
|
||||
{
|
||||
static float half = 0.5f;
|
||||
|
||||
movss(temp1, dword[&half]);
|
||||
shufps(temp1, temp1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
mulps(temp1, xmm0);
|
||||
cvttps2dq(temp1, temp1);
|
||||
movss(temp1, dword[&half]);
|
||||
shufps(temp1, temp1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
mulps(temp1, xmm0);
|
||||
cvttps2dq(temp1, temp1);
|
||||
pslld(temp1, 1);
|
||||
|
||||
cvttps2dq(xmm0, xmm0);
|
||||
pcmpeqd(temp1, temp1);
|
||||
psrld(temp1, 31);
|
||||
pand(temp1, xmm0);
|
||||
cvttps2dq(xmm0, xmm0);
|
||||
pcmpeqd(temp2, temp2);
|
||||
psrld(temp2, 31);
|
||||
pand(xmm0, temp2);
|
||||
|
||||
por(xmm0, temp1);
|
||||
por(xmm0, temp1);
|
||||
}
|
||||
else
|
||||
{
|
||||
cvttps2dq(xmm0, xmm0);
|
||||
}
|
||||
|
||||
if(m_env.sel.zwrite)
|
||||
{
|
||||
|
@ -649,26 +661,29 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
movdqa(xmmword[&m_env.temp.zd], temp1);
|
||||
}
|
||||
|
||||
// GSVector4i o = GSVector4i::x80000000();
|
||||
|
||||
pcmpeqd(temp2, temp2);
|
||||
pslld(temp2, 31);
|
||||
|
||||
// GSVector4i zso = zs - o;
|
||||
|
||||
psubd(xmm0, temp2);
|
||||
|
||||
// zpsm == 0: GSVector4i zdo = zd - o;
|
||||
// zpsm == 1: GSVector4i zdo = (zd & GSVector4i::x00ffffff()) - o;
|
||||
// zpsm == 2: GSVector4i zdo = (zd & GSVector4i::x0000ffff()) - o;
|
||||
// zd &= 0xffffffff >> m_env.sel.zpsm * 8;
|
||||
|
||||
if(m_env.sel.zpsm)
|
||||
{
|
||||
pslld(xmm1, m_env.sel.zpsm * 8);
|
||||
psrld(xmm1, m_env.sel.zpsm * 8);
|
||||
pslld(temp1, m_env.sel.zpsm * 8);
|
||||
psrld(temp1, m_env.sel.zpsm * 8);
|
||||
}
|
||||
|
||||
psubd(temp1, temp2);
|
||||
if(m_env.sel.zoverflow || m_env.sel.zpsm == 0)
|
||||
{
|
||||
// GSVector4i o = GSVector4i::x80000000();
|
||||
|
||||
pcmpeqd(temp2, temp2);
|
||||
pslld(temp2, 31);
|
||||
|
||||
// GSVector4i zso = zs - o;
|
||||
|
||||
psubd(xmm0, temp2);
|
||||
|
||||
// GSVector4i zdo = zd - o;
|
||||
|
||||
psubd(temp1, temp2);
|
||||
}
|
||||
|
||||
switch(m_env.sel.ztst)
|
||||
{
|
||||
|
@ -688,7 +703,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
break;
|
||||
}
|
||||
|
||||
alltrue(xmm7, eax, "step");
|
||||
alltrue();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -701,6 +716,9 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
mov(ebx, dword[&m_env.tex]);
|
||||
|
||||
// ebx = tex
|
||||
// edx = clut
|
||||
|
||||
if(!m_env.sel.fst)
|
||||
{
|
||||
// GSVector4 w = q.rcp();
|
||||
|
@ -790,7 +808,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// GSVector4i x0 = uv0.upl16();
|
||||
|
||||
pxor(xmm0, xmm0);
|
||||
movd(xmm1, dword[&m_env.tw]);
|
||||
movd(xmm1, ptr[&m_env.tw]);
|
||||
|
||||
movdqa(xmm4, xmm2);
|
||||
punpckhwd(xmm2, xmm0);
|
||||
|
@ -810,16 +828,16 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// GSVector4i y1 = uv1.uph16() << tw;
|
||||
// GSVector4i x1 = uv1.upl16();
|
||||
|
||||
movdqa(xmm5, xmm3);
|
||||
movdqa(xmm6, xmm3);
|
||||
punpckhwd(xmm3, xmm0);
|
||||
punpcklwd(xmm5, xmm0);
|
||||
punpcklwd(xmm6, xmm0);
|
||||
pslld(xmm3, xmm1);
|
||||
|
||||
// xmm2 = y0
|
||||
// xmm3 = y1
|
||||
// xmm4 = x0
|
||||
// xmm5 = x1
|
||||
// xmm0, xmm1, xmm6 = free
|
||||
// xmm6 = x1
|
||||
// xmm0, xmm5, xmm6 = free
|
||||
// xmm7 = used
|
||||
|
||||
// GSVector4i addr00 = y0 + x0;
|
||||
|
@ -827,19 +845,19 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// GSVector4i addr10 = y1 + x0;
|
||||
// GSVector4i addr11 = y1 + x1;
|
||||
|
||||
movdqa(xmm0, xmm2);
|
||||
paddd(xmm2, xmm5);
|
||||
movdqa(xmm5, xmm2);
|
||||
paddd(xmm5, xmm4);
|
||||
paddd(xmm2, xmm6);
|
||||
|
||||
movdqa(xmm0, xmm3);
|
||||
paddd(xmm0, xmm4);
|
||||
paddd(xmm3, xmm6);
|
||||
|
||||
movdqa(xmm1, xmm3);
|
||||
paddd(xmm3, xmm5);
|
||||
paddd(xmm1, xmm4);
|
||||
|
||||
// xmm0 = addr00
|
||||
// xmm5 = addr00
|
||||
// xmm2 = addr01
|
||||
// xmm1 = addr10
|
||||
// xmm0 = addr10
|
||||
// xmm3 = addr11
|
||||
// xmm4, xmm5, xmm6 = free
|
||||
// xmm1, xmm4, xmm6 = free
|
||||
// xmm7 = used
|
||||
|
||||
// c00 = addr00.gather32_32((const DWORD/BYTE*)tex[, clut]);
|
||||
|
@ -847,118 +865,113 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// c10 = addr10.gather32_32((const DWORD/BYTE*)tex[, clut]);
|
||||
// c11 = addr11.gather32_32((const DWORD/BYTE*)tex[, clut]);
|
||||
|
||||
ReadTexel(xmm5, xmm0, ebx, xmm4, xmm6);
|
||||
ReadTexel(xmm6, xmm5, xmm1, xmm4);
|
||||
|
||||
// xmm0, xmm4, xmm6 = free
|
||||
// xmm2, xmm5, xmm1 = free
|
||||
|
||||
ReadTexel(xmm6, xmm2, ebx, xmm0, xmm4);
|
||||
ReadTexel(xmm4, xmm2, xmm5, xmm1);
|
||||
|
||||
// xmm0, xmm2, xmm4 = free
|
||||
// xmm0, xmm2, xmm5 = free
|
||||
|
||||
ReadTexel(xmm4, xmm1, ebx, xmm2, xmm0);
|
||||
ReadTexel(xmm1, xmm0, xmm2, xmm5);
|
||||
|
||||
// xmm0, xmm1, xmm2 = free
|
||||
// xmm3, xmm0, xmm2 = free
|
||||
|
||||
ReadTexel(xmm0, xmm3, ebx, xmm1, xmm2);
|
||||
ReadTexel(xmm5, xmm3, xmm0, xmm2);
|
||||
|
||||
// xmm5 = c00
|
||||
// xmm6 = c01
|
||||
// xmm4 = c10
|
||||
// xmm0 = c11
|
||||
// xmm1, xmm2, xmm3 = free
|
||||
// xmm6 = c00
|
||||
// xmm4 = c01
|
||||
// xmm1 = c10
|
||||
// xmm5 = c11
|
||||
// xmm0, xmm2, xmm3 = free
|
||||
// xmm7 = used
|
||||
|
||||
movdqa(xmm1, xmmword[&m_env.temp.uf]);
|
||||
movdqa(xmm0, xmmword[&m_env.temp.uf]);
|
||||
|
||||
// GSVector4i rb00 = c00 & mask;
|
||||
// GSVector4i ga00 = (c00 >> 8) & mask;
|
||||
|
||||
movdqa(xmm2, xmm5);
|
||||
movdqa(xmm2, xmm6);
|
||||
psllw(xmm2, 8);
|
||||
psrlw(xmm2, 8);
|
||||
psrlw(xmm5, 8);
|
||||
psrlw(xmm6, 8);
|
||||
|
||||
// GSVector4i rb01 = c01 & mask;
|
||||
// GSVector4i ga01 = (c01 >> 8) & mask;
|
||||
|
||||
movdqa(xmm3, xmm6);
|
||||
movdqa(xmm3, xmm4);
|
||||
psllw(xmm3, 8);
|
||||
psrlw(xmm3, 8);
|
||||
psrlw(xmm6, 8);
|
||||
psrlw(xmm4, 8);
|
||||
|
||||
// xmm1 = uf
|
||||
// xmm0 = uf
|
||||
// xmm2 = rb00
|
||||
// xmm3 = rb01
|
||||
// xmm5 = ga00
|
||||
// xmm6 = ga01
|
||||
// xmm4 = c10
|
||||
// xmm0 = c11
|
||||
// xmm6 = ga00
|
||||
// xmm4 = ga01
|
||||
// xmm1 = c10
|
||||
// xmm5 = c11
|
||||
// xmm7 = used
|
||||
|
||||
// rb00 = rb00.lerp16<0>(rb01, uf);
|
||||
// ga00 = ga00.lerp16<0>(ga01, uf);
|
||||
|
||||
lerp16<0>(xmm3, xmm2, xmm1);
|
||||
lerp16<0>(xmm6, xmm5, xmm1);
|
||||
lerp16<0>(xmm3, xmm2, xmm0);
|
||||
lerp16<0>(xmm4, xmm6, xmm0);
|
||||
|
||||
// xmm1 = uf
|
||||
// xmm0 = uf
|
||||
// xmm3 = rb00
|
||||
// xmm6 = ga00
|
||||
// xmm4 = c10
|
||||
// xmm0 = c11
|
||||
// xmm2, xmm5 = free
|
||||
// xmm4 = ga00
|
||||
// xmm1 = c10
|
||||
// xmm5 = c11
|
||||
// xmm2, xmm6 = free
|
||||
// xmm7 = used
|
||||
|
||||
// GSVector4i rb10 = c10 & mask;
|
||||
// GSVector4i rb11 = c11 & mask;
|
||||
|
||||
movdqa(xmm2, xmm4);
|
||||
psllw(xmm4, 8);
|
||||
psrlw(xmm4, 8);
|
||||
movdqa(xmm2, xmm1);
|
||||
psllw(xmm1, 8);
|
||||
psrlw(xmm1, 8);
|
||||
psrlw(xmm2, 8);
|
||||
|
||||
// GSVector4i ga10 = (c10 >> 8) & mask;
|
||||
// GSVector4i ga11 = (c11 >> 8) & mask;
|
||||
|
||||
movdqa(xmm5, xmm0);
|
||||
psllw(xmm0, 8);
|
||||
psrlw(xmm0, 8);
|
||||
movdqa(xmm6, xmm5);
|
||||
psllw(xmm5, 8);
|
||||
psrlw(xmm5, 8);
|
||||
psrlw(xmm6, 8);
|
||||
|
||||
// xmm1 = uf
|
||||
// xmm0 = uf
|
||||
// xmm3 = rb00
|
||||
// xmm6 = ga00
|
||||
// xmm4 = rb10
|
||||
// xmm0 = rb11
|
||||
// xmm4 = ga00
|
||||
// xmm1 = rb10
|
||||
// xmm5 = rb11
|
||||
// xmm2 = ga10
|
||||
// xmm5 = ga11
|
||||
// xmm6 = ga11
|
||||
// xmm7 = used
|
||||
|
||||
// rb10 = rb10.lerp16<0>(rb11, uf);
|
||||
// ga10 = ga10.lerp16<0>(ga11, uf);
|
||||
|
||||
lerp16<0>(xmm0, xmm4, xmm1);
|
||||
lerp16<0>(xmm5, xmm2, xmm1);
|
||||
lerp16<0>(xmm5, xmm1, xmm0);
|
||||
lerp16<0>(xmm6, xmm2, xmm0);
|
||||
|
||||
// xmm3 = rb00
|
||||
// xmm6 = ga00
|
||||
// xmm0 = rb10
|
||||
// xmm5 = ga10
|
||||
// xmm1, xmm2, xmm4 = free
|
||||
// xmm4 = ga00
|
||||
// xmm5 = rb10
|
||||
// xmm6 = ga10
|
||||
// xmm0, xmm1, xmm2 = free
|
||||
// xmm7 = used
|
||||
|
||||
// rb00 = rb00.lerp16<0>(rb10, vf);
|
||||
// ga00 = ga00.lerp16<0>(ga10, vf);
|
||||
|
||||
movdqa(xmm1, xmmword[&m_env.temp.vf]);
|
||||
movdqa(xmm0, xmmword[&m_env.temp.vf]);
|
||||
|
||||
lerp16<0>(xmm0, xmm3, xmm1);
|
||||
lerp16<0>(xmm5, xmm6, xmm1);
|
||||
|
||||
// TODO: make rb/ga end up in xmm5/xmm6
|
||||
|
||||
movdqa(xmm6, xmm5);
|
||||
movdqa(xmm5, xmm0);
|
||||
lerp16<0>(xmm5, xmm3, xmm0);
|
||||
lerp16<0>(xmm6, xmm4, xmm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -968,7 +981,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
// c00 = addr00.gather32_32((const DWORD/BYTE*)tex[, clut]);
|
||||
|
||||
ReadTexel(xmm5, xmm2, ebx, xmm0, xmm1);
|
||||
ReadTexel(xmm5, xmm2, xmm0, xmm1);
|
||||
|
||||
// GSVector4i mask = GSVector4i::x00ff();
|
||||
|
||||
|
@ -1248,7 +1261,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha()
|
|||
case AFAIL_KEEP:
|
||||
// test |= t;
|
||||
por(xmm7, xmm1);
|
||||
alltrue(xmm7, eax, "step");
|
||||
alltrue();
|
||||
break;
|
||||
|
||||
case AFAIL_FB_ONLY:
|
||||
|
@ -1428,7 +1441,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
|
||||
por(xmm7, xmm1);
|
||||
|
||||
alltrue(xmm7, eax, "step");
|
||||
alltrue();
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteZBuf()
|
||||
|
@ -1451,12 +1464,12 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
// if(fzm & 0x30) GSVector4i::storel(&vm16[addr + 0], zs);
|
||||
// if(fzm & 0xc0) GSVector4i::storeh(&vm16[addr + 8], zs);
|
||||
|
||||
test(dl, 0x30);
|
||||
test(dh, 0x0f);
|
||||
je("wz30");
|
||||
movq(qword[ebp * 2 + (size_t)m_env.vm], xmm1);
|
||||
L("wz30");
|
||||
|
||||
test(dl, 0xc0);
|
||||
test(dh, 0xf0);
|
||||
je("wzc0");
|
||||
movhps(qword[ebp * 2 + (size_t)m_env.vm + 8 * 2], xmm1);
|
||||
L("wzc0");
|
||||
|
@ -1468,22 +1481,22 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
// if(fzm & 0x40) WritePixel(zpsm, &vm16[addr + 8], zs.extract32<2>());
|
||||
// if(fzm & 0x80) WritePixel(zpsm, &vm16[addr + 10], zs.extract32<3>());
|
||||
|
||||
test(dl, 0x10);
|
||||
test(dh, 0x03);
|
||||
je("wz10");
|
||||
WritePixel(xmm1, xmm0, ebp, 0, m_env.sel.zpsm);
|
||||
L("wz10");
|
||||
|
||||
test(dl, 0x20);
|
||||
test(dh, 0x0c);
|
||||
je("wz20");
|
||||
WritePixel(xmm1, xmm0, ebp, 1, m_env.sel.zpsm);
|
||||
L("wz20");
|
||||
|
||||
test(dl, 0x40);
|
||||
test(dh, 0x30);
|
||||
je("wz40");
|
||||
WritePixel(xmm1, xmm0, ebp, 2, m_env.sel.zpsm);
|
||||
L("wz40");
|
||||
|
||||
test(dl, 0x80);
|
||||
test(dh, 0xc0);
|
||||
je("wz80");
|
||||
WritePixel(xmm1, xmm0, ebp, 3, m_env.sel.zpsm);
|
||||
L("wz80");
|
||||
|
@ -1597,7 +1610,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
psllw(xmm7, 7);
|
||||
break;
|
||||
case 2:
|
||||
movdqa(xmm7, xmmword[&m_env.afix2]);
|
||||
movdqa(xmm7, xmmword[&m_env.afix]);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1627,7 +1640,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
}
|
||||
}
|
||||
|
||||
if(0)//m_env.sel.pabe)
|
||||
if(m_env.sel.pabe)
|
||||
{
|
||||
// mask = (c[1] << 8).sra32(31);
|
||||
|
||||
|
@ -1697,7 +1710,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
}
|
||||
}
|
||||
|
||||
if(0)//m_env.sel.pabe)
|
||||
if(m_env.sel.pabe)
|
||||
{
|
||||
if(!m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
|
@ -1718,13 +1731,6 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
{
|
||||
mix16(xmm6, xmm4, xmm7);
|
||||
}
|
||||
|
||||
if(m_env.sel.pabe)
|
||||
{
|
||||
printf("PABE\n");
|
||||
Sleep(1000);
|
||||
MessageBeep(-1);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteFrame(int params)
|
||||
|
@ -1816,12 +1822,12 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
|
|||
// if(fzm & 0x03) GSVector4i::storel(&vm16[addr + 0], fs);
|
||||
// if(fzm & 0x0c) GSVector4i::storeh(&vm16[addr + 8], fs);
|
||||
|
||||
test(dl, 0x03);
|
||||
test(dl, 0x0f);
|
||||
je("wf03");
|
||||
movq(qword[ebx * 2 + (size_t)m_env.vm], xmm5);
|
||||
L("wf03");
|
||||
|
||||
test(dl, 0x0c);
|
||||
test(dl, 0xf0);
|
||||
je("wf0c");
|
||||
movhps(qword[ebx * 2 + (size_t)m_env.vm + 8 * 2], xmm5);
|
||||
L("wf0c");
|
||||
|
@ -1833,22 +1839,22 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
|
|||
// if(fzm & 0x04) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>());
|
||||
// if(fzm & 0x08) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>());
|
||||
|
||||
test(dl, 0x01);
|
||||
test(dl, 0x03);
|
||||
je("wf01");
|
||||
WritePixel(xmm5, xmm0, ebx, 0, m_env.sel.fpsm);
|
||||
L("wf01");
|
||||
|
||||
test(dl, 0x02);
|
||||
test(dl, 0x0c);
|
||||
je("wf02");
|
||||
WritePixel(xmm5, xmm0, ebx, 1, m_env.sel.fpsm);
|
||||
L("wf02");
|
||||
|
||||
test(dl, 0x04);
|
||||
test(dl, 0x30);
|
||||
je("wf04");
|
||||
WritePixel(xmm5, xmm0, ebx, 2, m_env.sel.fpsm);
|
||||
L("wf04");
|
||||
|
||||
test(dl, 0x08);
|
||||
test(dl, 0xc0);
|
||||
je("wf08");
|
||||
WritePixel(xmm5, xmm0, ebx, 3, m_env.sel.fpsm);
|
||||
L("wf08");
|
||||
|
@ -1911,26 +1917,26 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Xmm& temp, co
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, const Xmm& temp1, const Xmm& temp2)
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, const Xmm& temp1, const Xmm& temp2)
|
||||
{
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
ReadTexel(dst, addr, base, 0);
|
||||
ReadTexel(dst, addr, base, 1);
|
||||
ReadTexel(dst, addr, base, 2);
|
||||
ReadTexel(dst, addr, base, 3);
|
||||
ReadTexel(dst, addr, 0);
|
||||
ReadTexel(dst, addr, 1);
|
||||
ReadTexel(dst, addr, 2);
|
||||
ReadTexel(dst, addr, 3);
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadTexel(dst, addr, base, 0);
|
||||
ReadTexel(dst, addr, 0);
|
||||
psrldq(addr, 4); // shuffle instead? (1 2 3 0 ~ rotation)
|
||||
ReadTexel(temp1, addr, base, 0);
|
||||
ReadTexel(temp1, addr, 0);
|
||||
psrldq(addr, 4);
|
||||
punpckldq(dst, temp1);
|
||||
|
||||
ReadTexel(temp1, addr, base, 0);
|
||||
ReadTexel(temp1, addr, 0);
|
||||
psrldq(addr, 4);
|
||||
ReadTexel(temp2, addr, base, 0);
|
||||
ReadTexel(temp2, addr, 0);
|
||||
// psrldq(addr, 4);
|
||||
punpckldq(temp1, temp2);
|
||||
|
||||
|
@ -1938,7 +1944,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, con
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, uint8 i)
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i)
|
||||
{
|
||||
if(!m_cpu.has(util::Cpu::tSSE41) && i > 0)
|
||||
{
|
||||
|
@ -1948,14 +1954,9 @@ void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, con
|
|||
if(i == 0) movd(eax, addr);
|
||||
else pextrd(eax, addr, i);
|
||||
|
||||
const Address& src = m_env.sel.tlu
|
||||
? dword[eax * 4 + (size_t)m_env.clut]
|
||||
: dword[base + eax * 4];
|
||||
if(m_env.sel.tlu) movzx(eax, byte[ebx + eax]);
|
||||
|
||||
if(m_env.sel.tlu)
|
||||
{
|
||||
movzx(eax, byte[base + eax]);
|
||||
}
|
||||
const Address& src = m_env.sel.tlu ? ptr[eax * 4 + (size_t)m_env.clut] : ptr[ebx + eax * 4];
|
||||
|
||||
if(i == 0) movd(dst, src);
|
||||
else pinsrd(dst, src, i);
|
||||
|
@ -2014,11 +2015,11 @@ void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::alltrue(const Xmm& a, const Reg32& temp, LPCTSTR label)
|
||||
void GSDrawScanlineCodeGenerator::alltrue()
|
||||
{
|
||||
pmovmskb(temp, a);
|
||||
cmp(temp, 0xffff);
|
||||
je(label, T_NEAR);
|
||||
pmovmskb(eax, xmm7);
|
||||
cmp(eax, 0xffff);
|
||||
je("step", T_NEAR);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b)
|
||||
|
|
|
@ -57,14 +57,14 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
|
|||
|
||||
void ReadPixel(const Xmm& dst, const Reg32& addr);
|
||||
void WritePixel(const Xmm& src, const Xmm& temp, const Reg32& addr, uint8 i, int psm);
|
||||
void ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, const Xmm& temp1, const Xmm& temp2);
|
||||
void ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, uint8 i);
|
||||
void ReadTexel(const Xmm& dst, const Xmm& addr, const Xmm& temp1, const Xmm& temp2);
|
||||
void ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i);
|
||||
|
||||
template<int shift> void modulate16(const Xmm& a, const Operand& f);
|
||||
template<int shift> void lerp16(const Xmm& a, const Xmm& b, const Xmm& f);
|
||||
void mix16(const Xmm& a, const Xmm& b, const Xmm& temp);
|
||||
void clamp16(const Xmm& a, const Xmm& temp);
|
||||
void alltrue(const Xmm& a, const Reg32& temp, LPCTSTR label);
|
||||
void alltrue();
|
||||
void blend8(const Xmm& a, const Xmm& b);
|
||||
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
|
||||
void blend8r(const Xmm& b, const Xmm& a);
|
||||
|
|
|
@ -82,9 +82,9 @@ public:
|
|||
dimx[1] = GSVector4i(DIMX.DM00, 0, DIMX.DM01, 0, DIMX.DM02, 0, DIMX.DM03, 0);
|
||||
dimx[0] = dimx[1].xxzzlh();
|
||||
dimx[3] = GSVector4i(DIMX.DM10, 0, DIMX.DM11, 0, DIMX.DM12, 0, DIMX.DM13, 0),
|
||||
dimx[2] = dimx[2].xxzzlh();
|
||||
dimx[2] = dimx[3].xxzzlh();
|
||||
dimx[5] = GSVector4i(DIMX.DM20, 0, DIMX.DM21, 0, DIMX.DM22, 0, DIMX.DM23, 0),
|
||||
dimx[4] = dimx[4].xxzzlh();
|
||||
dimx[4] = dimx[5].xxzzlh();
|
||||
dimx[7] = GSVector4i(DIMX.DM30, 0, DIMX.DM31, 0, DIMX.DM32, 0, DIMX.DM33, 0),
|
||||
dimx[6] = dimx[7].xxzzlh();
|
||||
}
|
||||
|
|
|
@ -149,8 +149,8 @@ public:
|
|||
__int64 tpf = p->frames > 0 ? p->ticks / p->frames : 0;
|
||||
__int64 ppf = p->frames > 0 ? p->pixels / p->frames : 0;
|
||||
|
||||
printf("[%08x]%c %6.2f%% | %5.2f%% | f %4I64d | p %10I64d | tpp %4I64d | tpf %9I64d | ppf %7I64d\n",
|
||||
key, !m_map.Lookup(key) ? '*' : ' ',
|
||||
printf("[%012I64x]%c %6.2f%% | %5.2f%% | f %4I64d | p %10I64d | tpp %4I64d | tpf %9I64d | ppf %7I64d\n",
|
||||
(UINT64)key, !m_map.Lookup(key) ? '*' : ' ',
|
||||
(float)(tpf * 10000 / 50000000) / 100,
|
||||
(float)(tpf * 10000 / ttpf) / 100,
|
||||
p->frames, p->pixels,
|
||||
|
@ -159,3 +159,43 @@ public:
|
|||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class CG, class KEY, class VALUE>
|
||||
class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
|
||||
{
|
||||
CRBMap<UINT64, CG*> m_cgmap;
|
||||
|
||||
protected:
|
||||
virtual CG* Create(KEY key) = 0;
|
||||
|
||||
public:
|
||||
GSCodeGeneratorFunctionMap()
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~GSCodeGeneratorFunctionMap()
|
||||
{
|
||||
POSITION pos = m_cgmap.GetHeadPosition();
|
||||
|
||||
while(pos)
|
||||
{
|
||||
delete m_cgmap.GetNextValue(pos);
|
||||
}
|
||||
}
|
||||
|
||||
VALUE GetDefaultFunction(KEY key)
|
||||
{
|
||||
CG* cg = NULL;
|
||||
|
||||
if(!m_cgmap.Lookup(key, cg))
|
||||
{
|
||||
cg = Create(key);
|
||||
|
||||
ASSERT(cg);
|
||||
|
||||
m_cgmap.SetAt(key, cg);
|
||||
}
|
||||
|
||||
return (VALUE)cg->getCode();
|
||||
}
|
||||
};
|
||||
|
|
|
@ -39,6 +39,8 @@ void GSRasterizer::Draw(const GSRasterizerData* data)
|
|||
m_dsf.sl = NULL;
|
||||
m_dsf.sr = NULL;
|
||||
m_dsf.sp = NULL;
|
||||
m_dsf.ssl = NULL;
|
||||
m_dsf.ssp = NULL;
|
||||
|
||||
m_ds->BeginDraw(data, &m_dsf);
|
||||
|
||||
|
@ -96,8 +98,10 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor)
|
|||
if((p.y % m_threads) == m_id)
|
||||
{
|
||||
(m_ds->*m_dsf.sp)(v, *v);
|
||||
// TODO: (m_dsf.ssp)(v, *v);
|
||||
|
||||
(m_ds->*m_dsf.sl)(p.y, p.x, p.x + 1, *v);
|
||||
// TODO: (m_dsf.ssl)(p.y, p.x, p.x + 1, *v);
|
||||
|
||||
m_stats.pixels++;
|
||||
}
|
||||
|
@ -254,6 +258,7 @@ void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor)
|
|||
if(py > 0) l += dl * py;
|
||||
|
||||
(m_ds->*m_dsf.sp)(v, dscan);
|
||||
// TODO: (m_dsf.ssp)(v, dscan);
|
||||
|
||||
DrawTriangleSection(top, bottom, l, dl, dscan, scissor);
|
||||
}
|
||||
|
@ -301,6 +306,7 @@ void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor)
|
|||
if(py > 0) l += dl * py;
|
||||
|
||||
(m_ds->*m_dsf.sp)(v, dscan);
|
||||
// TODO: (m_dsf.ssp)(v, dscan);
|
||||
|
||||
DrawTriangleSection(top, bottom, l, dl, dscan, scissor);
|
||||
}
|
||||
|
@ -323,6 +329,7 @@ void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scisso
|
|||
GSVertexSW dscan = longest * longest.p.xxxx().rcp();
|
||||
|
||||
(m_ds->*m_dsf.sp)(v, dscan);
|
||||
// TODO: (m_dsf.ssp)(v, dscan);
|
||||
|
||||
GSVertexSW& l = v[0];
|
||||
GSVector4 r = v[0].p;
|
||||
|
@ -434,6 +441,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
|||
}
|
||||
|
||||
(m_ds->*m_dsf.sl)(top, left, right, scan);
|
||||
// TODO: (m_dsf.ssl)(top, left, right, scan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -485,6 +493,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
|||
}
|
||||
|
||||
(m_ds->*m_dsf.sl)(top, left, right, scan);
|
||||
// TODO: (m_dsf.ssl)(top, left, right, scan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -568,12 +577,14 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
|
|||
if(scan.p.x < (float)left) scan.t += dscan.t * ((float)left - scan.p.x);
|
||||
|
||||
(m_ds->*m_dsf.sp)(v, dscan);
|
||||
// TODO: (m_dsf.ssp)(v, dscan);
|
||||
|
||||
for(; top < bottom; top++, scan.t += dedge.t)
|
||||
{
|
||||
if((top % m_threads) == m_id)
|
||||
{
|
||||
(m_ds->*m_dsf.sl)(top, left, right, scan);
|
||||
// TODO: (m_dsf.ssl)(top, left, right, scan);
|
||||
|
||||
m_stats.pixels += right - left;
|
||||
}
|
||||
|
|
|
@ -55,12 +55,15 @@ public:
|
|||
typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v);
|
||||
typedef void (IDrawScanline::*SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
typedef void (*DrawScanlineStaticPtr)(int top, int left, int right, const GSVertexSW& v);
|
||||
typedef void (*SetupPrimStaticPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
|
||||
struct Functions
|
||||
{
|
||||
DrawScanlinePtr sl;
|
||||
DrawSolidRectPtr sr;
|
||||
SetupPrimPtr sp;
|
||||
DrawScanlineStaticPtr ssl;
|
||||
SetupPrimStaticPtr ssp;
|
||||
};
|
||||
|
||||
virtual ~IDrawScanline() {}
|
||||
|
|
|
@ -488,6 +488,7 @@ protected:
|
|||
{
|
||||
p.sel.zpsm = GSUtil::EncodePSM(context->ZBUF.PSM);
|
||||
p.sel.ztst = ztest ? context->TEST.ZTST : 1;
|
||||
p.sel.zoverflow = GSVector4i(m_vtrace.m_max.p).z == 0x80000000;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -59,6 +59,7 @@ union GSScanlineSelector
|
|||
DWORD colclamp:1; // 39
|
||||
DWORD fba:1; // 40
|
||||
DWORD dthe:1; // 41
|
||||
DWORD zoverflow:1; // 42 (z max >= 0x80000000)
|
||||
};
|
||||
|
||||
struct
|
||||
|
@ -130,11 +131,8 @@ __declspec(align(16)) struct GSScanlineEnvironment
|
|||
|
||||
GSVector4i fm, zm;
|
||||
struct {GSVector4i min, max, mask, invmask;} t; // [u] x 4 [v] x 4
|
||||
GSVector4i datm;
|
||||
GSVector4i colclamp;
|
||||
GSVector4i fba;
|
||||
GSVector4i aref;
|
||||
GSVector4i afix, afix2;
|
||||
GSVector4i afix;
|
||||
GSVector4i frb, fga;
|
||||
|
||||
struct {GSVector4 z, s, t, q; GSVector4i rb, ga, f, si, ti, _pad[7];} d[4];
|
||||
|
|
|
@ -0,0 +1,383 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
// TODO: x64
|
||||
|
||||
#include "StdAfx.h"
|
||||
#include "GSSetupPrimCodeGenerator.h"
|
||||
|
||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env)
|
||||
: CodeGenerator(DEFAULT_MAX_CODE_SIZE, 0)
|
||||
, m_env(env)
|
||||
{
|
||||
m_en.z = m_env.sel.zb ? 1 : 0;
|
||||
m_en.f = m_env.sel.fb && m_env.sel.fge ? 1 : 0;
|
||||
m_en.t = m_env.sel.fb && m_env.sel.tfx != TFX_NONE ? 1 : 0;
|
||||
m_en.c = m_env.sel.fb && m_env.sel.tfx != TFX_DECAL ? 1 : 0;
|
||||
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
#endif
|
||||
|
||||
Generate();
|
||||
}
|
||||
|
||||
void GSSetupPrimCodeGenerator::Generate()
|
||||
{
|
||||
const int params = 0;
|
||||
|
||||
const int _vertices = params + 4;
|
||||
const int _dscan = params + 8;
|
||||
|
||||
mov(ecx, dword[esp + _vertices]);
|
||||
mov(edx, dword[esp + _dscan]);
|
||||
|
||||
if((m_en.z || m_en.f) && !m_env.sel.sprite || m_en.t || m_en.c && m_env.sel.iip)
|
||||
{
|
||||
for(int i = 0; i < 5; i++)
|
||||
{
|
||||
movaps(Xmm(3 + i), xmmword[&m_shift[i]]);
|
||||
}
|
||||
}
|
||||
|
||||
Depth();
|
||||
|
||||
Texture();
|
||||
|
||||
Color();
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
void GSSetupPrimCodeGenerator::Depth()
|
||||
{
|
||||
if(!m_en.z && !m_en.f)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if(!m_env.sel.sprite)
|
||||
{
|
||||
// GSVector4 t = dscan.p;
|
||||
|
||||
movaps(xmm0, xmmword[edx + 16]);
|
||||
|
||||
if(m_en.f)
|
||||
{
|
||||
// GSVector4 df = p.wwww();
|
||||
|
||||
movaps(xmm1, xmm0);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
// m_env.d4.f = GSVector4i(df * 4.0f).xxzzlh();
|
||||
|
||||
movaps(xmm2, xmm1);
|
||||
mulps(xmm2, xmm3);
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
movdqa(xmmword[&m_env.d4.f], xmm2);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
// m_env.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
|
||||
|
||||
movaps(xmm2, xmm1);
|
||||
mulps(xmm2, Xmm(4 + i));
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
movdqa(xmmword[&m_env.d[i].f], xmm2);
|
||||
}
|
||||
}
|
||||
|
||||
if(m_en.z)
|
||||
{
|
||||
// GSVector4 dz = p.zzzz();
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
// m_env.d4.z = dz * 4.0f;
|
||||
|
||||
movaps(xmm1, xmm0);
|
||||
mulps(xmm1, xmm3);
|
||||
movdqa(xmmword[&m_env.d4.z], xmm1);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
// m_env.d[i].z = dz * m_shift[i];
|
||||
|
||||
movaps(xmm1, xmm0);
|
||||
mulps(xmm1, Xmm(4 + i));
|
||||
movdqa(xmmword[&m_env.d[i].z], xmm1);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// GSVector4 p = vertices[0].p;
|
||||
|
||||
movaps(xmm0, xmmword[ecx + 16]);
|
||||
|
||||
if(m_en.f)
|
||||
{
|
||||
// m_env.p.f = GSVector4i(p).zzzzh().zzzz();
|
||||
|
||||
movaps(xmm1, xmm0);
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
movdqa(xmmword[&m_env.p.f], xmm1);
|
||||
}
|
||||
|
||||
if(m_en.z)
|
||||
{
|
||||
// GSVector4 z = p.zzzz();
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
if(m_env.sel.zoverflow)
|
||||
{
|
||||
// m_env.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||
|
||||
static const float half = 0.5f;
|
||||
|
||||
movss(xmm1, dword[&half]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
mulps(xmm1, xmm0);
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
pslld(xmm1, 1);
|
||||
|
||||
cvttps2dq(xmm0, xmm0);
|
||||
pcmpeqd(xmm2, xmm2);
|
||||
psrld(xmm2, 31);
|
||||
pand(xmm0, xmm2);
|
||||
|
||||
por(xmm0, xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_env.p.z = GSVector4i(z);
|
||||
|
||||
cvttps2dq(xmm0, xmm0);
|
||||
}
|
||||
|
||||
movdqa(xmmword[&m_env.p.z], xmm0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GSSetupPrimCodeGenerator::Texture()
|
||||
{
|
||||
if(!m_en.t)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// GSVector4 t = dscan.t;
|
||||
|
||||
movaps(xmm0, xmmword[edx + 32]);
|
||||
|
||||
movaps(xmm1, xmm0);
|
||||
mulps(xmm1, xmm3);
|
||||
|
||||
if(m_env.sel.fst)
|
||||
{
|
||||
// m_env.d4.st = GSVector4i(t * 4.0f);
|
||||
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
movdqa(xmmword[&m_env.d4.st], xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_env.d4.stq = t * 4.0f;
|
||||
|
||||
movaps(xmmword[&m_env.d4.stq], xmm1);
|
||||
}
|
||||
|
||||
for(int j = 0, k = m_env.sel.fst ? 2 : 3; j < k; j++)
|
||||
{
|
||||
// GSVector4 ds = t.xxxx();
|
||||
// GSVector4 dt = t.yyyy();
|
||||
// GSVector4 dq = t.zzzz();
|
||||
|
||||
movaps(xmm1, xmm0);
|
||||
shufps(xmm1, xmm1, (uint8)_MM_SHUFFLE(j, j, j, j));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
// GSVector4 v = ds/dt * m_shift[i];
|
||||
|
||||
movaps(xmm2, xmm1);
|
||||
mulps(xmm2, Xmm(4 + i));
|
||||
|
||||
if(m_env.sel.fst)
|
||||
{
|
||||
// m_env.d[i].si/ti = GSVector4i(v);
|
||||
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
|
||||
switch(j)
|
||||
{
|
||||
case 0: movdqa(xmmword[&m_env.d[i].si], xmm2); break;
|
||||
case 1: movdqa(xmmword[&m_env.d[i].ti], xmm2); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_env.d[i].s/t/q = v;
|
||||
|
||||
switch(j)
|
||||
{
|
||||
case 0: movaps(xmmword[&m_env.d[i].s], xmm2); break;
|
||||
case 1: movaps(xmmword[&m_env.d[i].t], xmm2); break;
|
||||
case 2: movaps(xmmword[&m_env.d[i].q], xmm2); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GSSetupPrimCodeGenerator::Color()
|
||||
{
|
||||
if(!m_en.c)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if(m_env.sel.iip)
|
||||
{
|
||||
// GSVector4 c = dscan.c;
|
||||
|
||||
movaps(xmm0, xmmword[edx]);
|
||||
movaps(xmm1, xmm0);
|
||||
|
||||
// m_env.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
|
||||
|
||||
movaps(xmm2, xmm0);
|
||||
mulps(xmm2, xmm3);
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
pshufd(xmm2, xmm2, _MM_SHUFFLE(3, 1, 2, 0));
|
||||
packssdw(xmm2, xmm2);
|
||||
movdqa(xmmword[&m_env.d4.c], xmm2);
|
||||
|
||||
// xmm3 is not needed anymore
|
||||
|
||||
// GSVector4 dr = c.xxxx();
|
||||
// GSVector4 db = c.zzzz();
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
// GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
|
||||
|
||||
movaps(xmm2, xmm0);
|
||||
mulps(xmm2, Xmm(4 + i));
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
packssdw(xmm2, xmm2);
|
||||
|
||||
// GSVector4i b = GSVector4i(db * m_shift[i]).ps32();
|
||||
|
||||
movaps(xmm3, xmm1);
|
||||
mulps(xmm3, Xmm(4 + i));
|
||||
cvttps2dq(xmm3, xmm3);
|
||||
packssdw(xmm3, xmm3);
|
||||
|
||||
// m_env.d[i].rb = r.upl16(b);
|
||||
|
||||
punpcklwd(xmm2, xmm3);
|
||||
movdqa(xmmword[&m_env.d[i].rb], xmm2);
|
||||
}
|
||||
|
||||
// GSVector4 c = dscan.c;
|
||||
|
||||
movaps(xmm0, xmmword[edx]); // not enough regs, have to reload it
|
||||
movaps(xmm1, xmm0);
|
||||
|
||||
// GSVector4 dg = c.yyyy();
|
||||
// GSVector4 da = c.wwww();
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
// GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
|
||||
|
||||
movaps(xmm2, xmm0);
|
||||
mulps(xmm2, Xmm(4 + i));
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
packssdw(xmm2, xmm2);
|
||||
|
||||
// GSVector4i a = GSVector4i(da * m_shift[i]).ps32();
|
||||
|
||||
movaps(xmm3, xmm1);
|
||||
mulps(xmm3, Xmm(4 + i));
|
||||
cvttps2dq(xmm3, xmm3);
|
||||
packssdw(xmm3, xmm3);
|
||||
|
||||
// m_env.d[i].ga = g.upl16(a);
|
||||
|
||||
punpcklwd(xmm2, xmm3);
|
||||
movdqa(xmmword[&m_env.d[i].ga], xmm2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// GSVector4i c = GSVector4i(vertices[0].c);
|
||||
|
||||
movaps(xmm0, xmmword[ecx]);
|
||||
cvttps2dq(xmm0, xmm0);
|
||||
|
||||
// c = c.upl16(c.zwxy());
|
||||
|
||||
movdqa(xmm1, xmm0);
|
||||
pshufd(xmm1, xmm1, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
punpcklwd(xmm0, xmm1);
|
||||
|
||||
// if(!tme) c = c.srl16(7);
|
||||
|
||||
if(m_env.sel.tfx == TFX_NONE)
|
||||
{
|
||||
psrlw(xmm0, 7);
|
||||
}
|
||||
|
||||
// m_env.c.rb = c.xxxx();
|
||||
// m_env.c.ga = c.zzzz();
|
||||
|
||||
movdqa(xmm1, xmm0);
|
||||
pshufd(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
movdqa(xmmword[&m_env.c.rb], xmm0);
|
||||
movdqa(xmmword[&m_env.c.ga], xmm1);
|
||||
}
|
||||
}
|
||||
|
||||
const GSVector4 GSSetupPrimCodeGenerator::m_shift[5] =
|
||||
{
|
||||
GSVector4(4.0f, 4.0f, 4.0f, 4.0f),
|
||||
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
|
||||
GSVector4(-1.0f, 0.0f, 1.0f, 2.0f),
|
||||
GSVector4(-2.0f, -1.0f, 0.0f, 1.0f),
|
||||
GSVector4(-3.0f, -2.0f, -1.0f, 0.0f),
|
||||
};
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GSScanlineEnvironment.h"
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
class GSSetupPrimCodeGenerator : public CodeGenerator
|
||||
{
|
||||
void operator = (const GSSetupPrimCodeGenerator&);
|
||||
|
||||
static const GSVector4 m_shift[5];
|
||||
|
||||
util::Cpu m_cpu;
|
||||
|
||||
GSScanlineEnvironment& m_env;
|
||||
|
||||
struct {DWORD z:1, f:1, t:1, c:1;} m_en;
|
||||
|
||||
void Generate();
|
||||
|
||||
void Depth();
|
||||
void Texture();
|
||||
void Color();
|
||||
|
||||
public:
|
||||
GSSetupPrimCodeGenerator(GSScanlineEnvironment& env);
|
||||
};
|
|
@ -1327,6 +1327,10 @@
|
|||
RelativePath=".\GSSettingsDlg.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSSetupPrimCodeGenerator.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSState.cpp"
|
||||
>
|
||||
|
@ -1737,6 +1741,10 @@
|
|||
RelativePath=".\GSSettingsDlg.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSSetupPrimCodeGenerator.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSState.h"
|
||||
>
|
||||
|
|
|
@ -1202,7 +1202,7 @@ protected:
|
|||
}
|
||||
void movd(const Mmx& mmx, const Address& addr)
|
||||
{
|
||||
if (mmx.isXMM()) db(0x66);
|
||||
ASSERT(!addr.isBit(32)); // don't use dword ptr, bogus, won't output 0x66 for xmm dest op
|
||||
opModM(addr, Reg(mmx.getIdx(), Operand::REG, mmx.getBit() / 8), 0x0F, B01101110);
|
||||
}
|
||||
void movd(const Mmx& mmx, const Reg32& reg)
|
||||
|
|
Loading…
Reference in New Issue