GSdx: more JIT code and a little clean-up

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@484 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-02-13 09:28:51 +00:00
parent e7536c1bbb
commit e131e22ea6
14 changed files with 751 additions and 2739 deletions

File diff suppressed because it is too large Load Diff

View File

@ -24,6 +24,7 @@
#include "GSState.h" #include "GSState.h"
#include "GSRasterizer.h" #include "GSRasterizer.h"
#include "GSScanlineEnvironment.h" #include "GSScanlineEnvironment.h"
#include "GSSetupPrimCodeGenerator.h"
#include "GSDrawScanlineCodeGenerator.h" #include "GSDrawScanlineCodeGenerator.h"
#include "GSAlignedClass.h" #include "GSAlignedClass.h"
@ -31,74 +32,36 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
{ {
GSScanlineEnvironment m_env; GSScanlineEnvironment m_env;
static const GSVector4 m_shift[4];
/* static const GSVector4i m_test[8];
// //
class GSDrawScanlineMap : public GSFunctionMap<DWORD, DrawScanlinePtr> class GSSetupPrimMap : public GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, UINT64, SetupPrimStaticPtr>
{ {
DrawScanlinePtr m_default[4][4][4][2]; GSDrawScanline* m_ds;
public: public:
GSDrawScanlineMap(); GSSetupPrimMap(GSDrawScanline* ds);
GSSetupPrimCodeGenerator* Create(UINT64 key);
} m_sp;
DrawScanlinePtr GetDefaultFunction(DWORD key); SetupPrimStaticPtr m_spf;
void PrintStats();
};
GSDrawScanlineMap m_ds;
*/
//
class GSSetupPrimMap : public GSFunctionMap<DWORD, SetupPrimPtr>
{
SetupPrimPtr m_default[2][2][2][2][2];
public:
GSSetupPrimMap();
SetupPrimPtr GetDefaultFunction(DWORD key);
};
GSSetupPrimMap m_sp;
template<DWORD zbe, DWORD fge, DWORD tme, DWORD fst, DWORD iip>
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan); void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
// //
CRBMap<UINT64, GSDrawScanlineCodeGenerator*> m_dscg; class GSDrawScanlineMap : public GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, UINT64, DrawScanlineStaticPtr>
{
GSDrawScanline* m_ds;
public:
GSDrawScanlineMap(GSDrawScanline* ds);
GSDrawScanlineCodeGenerator* Create(UINT64 key);
} m_ds;
DrawScanlineStaticPtr m_dsf; DrawScanlineStaticPtr m_dsf;
void DrawScanline(int top, int left, int right, const GSVertexSW& v); void DrawScanline(int top, int left, int right, const GSVertexSW& v);
/*
//
__forceinline GSVector4i Wrap(const GSVector4i& t);
__forceinline void SampleTexture(DWORD ltf, DWORD tlu, const GSVector4i& u, const GSVector4i& v, GSVector4i* c);
__forceinline void ColorTFX(DWORD iip, DWORD tfx, const GSVector4i& rbf, const GSVector4i& gaf, GSVector4i& rbt, GSVector4i& gat);
__forceinline void AlphaTFX(DWORD iip, DWORD tfx, DWORD tcc, const GSVector4i& gaf, GSVector4i& gat);
__forceinline void Fog(DWORD fge, const GSVector4i& f, GSVector4i& rb, GSVector4i& ga);
__forceinline bool TestZ(DWORD zpsm, DWORD ztst, const GSVector4i& zs, const GSVector4i& zd, GSVector4i& test);
__forceinline bool TestAlpha(DWORD atst, DWORD afail, const GSVector4i& ga, GSVector4i& fm, GSVector4i& zm, GSVector4i& test);
__forceinline bool TestDestAlpha(DWORD fpsm, DWORD date, const GSVector4i& fd, GSVector4i& test);
__forceinline void ReadPixel(int psm, int addr, GSVector4i& c) const;
__forceinline static void WritePixel(int psm, WORD* RESTRICT vm16, DWORD c);
__forceinline void WriteFrame(int fpsm, int rfb, GSVector4i* c, const GSVector4i& fd, const GSVector4i& fm, int addr, int fzm);
__forceinline void WriteZBuf(int zpsm, int ztst, const GSVector4i& z, const GSVector4i& zd, const GSVector4i& zm, int addr, int fzm);
template<DWORD fpsm, DWORD zpsm, DWORD ztst, DWORD iip>
void DrawScanline(int top, int left, int right, const GSVertexSW& v);
template<DWORD sel>
void DrawScanlineEx(int top, int left, int right, const GSVertexSW& v);
*/
// //
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v); void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);
@ -124,5 +87,5 @@ public:
void BeginDraw(const GSRasterizerData* data, Functions* f); void BeginDraw(const GSRasterizerData* data, Functions* f);
void EndDraw(const GSRasterizerStats& stats); void EndDraw(const GSRasterizerStats& stats);
void PrintStats() {/*m_ds.PrintStats();*/} void PrintStats() {m_ds.PrintStats();}
}; };

View File

@ -19,7 +19,6 @@
* *
*/ */
// TODO: test without sse41
// TODO: x64 // TODO: x64
#include "StdAfx.h" #include "StdAfx.h"
@ -220,7 +219,6 @@ L("@@");
} }
packssdw(xmm0, xmm1); packssdw(xmm0, xmm1);
packssdw(xmm0, xmm0); // TODO: not really needed...
pmovmskb(edx, xmm0); pmovmskb(edx, xmm0);
not(edx); not(edx);
@ -440,6 +438,8 @@ void GSDrawScanlineCodeGenerator::Init(int params)
} }
} }
if(m_env.sel.tfx != TFX_DECAL)
{
if(m_env.sel.iip) if(m_env.sel.iip)
{ {
// GSVector4i vc = GSVector4i(v.c); // GSVector4i vc = GSVector4i(v.c);
@ -473,6 +473,7 @@ void GSDrawScanlineCodeGenerator::Init(int params)
} }
} }
} }
}
void GSDrawScanlineCodeGenerator::Step() void GSDrawScanlineCodeGenerator::Step()
{ {
@ -566,6 +567,8 @@ void GSDrawScanlineCodeGenerator::Step()
} }
} }
if(m_env.sel.tfx != TFX_DECAL)
{
if(m_env.sel.iip) if(m_env.sel.iip)
{ {
// GSVector4i c = m_env.d4.c; // GSVector4i c = m_env.d4.c;
@ -593,6 +596,7 @@ void GSDrawScanlineCodeGenerator::Step()
} }
} }
} }
}
// test = m_test[7 + (steps & (steps >> 31))]; // test = m_test[7 + (steps & (steps >> 31))];
@ -619,6 +623,8 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
// GSVector4i zs = sprite ? zi : (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); // GSVector4i zs = sprite ? zi : (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
if(!m_env.sel.sprite) if(!m_env.sel.sprite)
{
if(m_env.sel.zoverflow)
{ {
static float half = 0.5f; static float half = 0.5f;
@ -626,13 +632,19 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
shufps(temp1, temp1, _MM_SHUFFLE(0, 0, 0, 0)); shufps(temp1, temp1, _MM_SHUFFLE(0, 0, 0, 0));
mulps(temp1, xmm0); mulps(temp1, xmm0);
cvttps2dq(temp1, temp1); cvttps2dq(temp1, temp1);
pslld(temp1, 1);
cvttps2dq(xmm0, xmm0); cvttps2dq(xmm0, xmm0);
pcmpeqd(temp1, temp1); pcmpeqd(temp2, temp2);
psrld(temp1, 31); psrld(temp2, 31);
pand(temp1, xmm0); pand(xmm0, temp2);
por(xmm0, temp1); por(xmm0, temp1);
}
else
{
cvttps2dq(xmm0, xmm0);
}
if(m_env.sel.zwrite) if(m_env.sel.zwrite)
{ {
@ -649,6 +661,16 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
movdqa(xmmword[&m_env.temp.zd], temp1); movdqa(xmmword[&m_env.temp.zd], temp1);
} }
// zd &= 0xffffffff >> m_env.sel.zpsm * 8;
if(m_env.sel.zpsm)
{
pslld(temp1, m_env.sel.zpsm * 8);
psrld(temp1, m_env.sel.zpsm * 8);
}
if(m_env.sel.zoverflow || m_env.sel.zpsm == 0)
{
// GSVector4i o = GSVector4i::x80000000(); // GSVector4i o = GSVector4i::x80000000();
pcmpeqd(temp2, temp2); pcmpeqd(temp2, temp2);
@ -658,17 +680,10 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
psubd(xmm0, temp2); psubd(xmm0, temp2);
// zpsm == 0: GSVector4i zdo = zd - o; // GSVector4i zdo = zd - o;
// zpsm == 1: GSVector4i zdo = (zd & GSVector4i::x00ffffff()) - o;
// zpsm == 2: GSVector4i zdo = (zd & GSVector4i::x0000ffff()) - o;
if(m_env.sel.zpsm)
{
pslld(xmm1, m_env.sel.zpsm * 8);
psrld(xmm1, m_env.sel.zpsm * 8);
}
psubd(temp1, temp2); psubd(temp1, temp2);
}
switch(m_env.sel.ztst) switch(m_env.sel.ztst)
{ {
@ -688,7 +703,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
break; break;
} }
alltrue(xmm7, eax, "step"); alltrue();
} }
} }
@ -701,6 +716,9 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
mov(ebx, dword[&m_env.tex]); mov(ebx, dword[&m_env.tex]);
// ebx = tex
// edx = clut
if(!m_env.sel.fst) if(!m_env.sel.fst)
{ {
// GSVector4 w = q.rcp(); // GSVector4 w = q.rcp();
@ -790,7 +808,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// GSVector4i x0 = uv0.upl16(); // GSVector4i x0 = uv0.upl16();
pxor(xmm0, xmm0); pxor(xmm0, xmm0);
movd(xmm1, dword[&m_env.tw]); movd(xmm1, ptr[&m_env.tw]);
movdqa(xmm4, xmm2); movdqa(xmm4, xmm2);
punpckhwd(xmm2, xmm0); punpckhwd(xmm2, xmm0);
@ -810,16 +828,16 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// GSVector4i y1 = uv1.uph16() << tw; // GSVector4i y1 = uv1.uph16() << tw;
// GSVector4i x1 = uv1.upl16(); // GSVector4i x1 = uv1.upl16();
movdqa(xmm5, xmm3); movdqa(xmm6, xmm3);
punpckhwd(xmm3, xmm0); punpckhwd(xmm3, xmm0);
punpcklwd(xmm5, xmm0); punpcklwd(xmm6, xmm0);
pslld(xmm3, xmm1); pslld(xmm3, xmm1);
// xmm2 = y0 // xmm2 = y0
// xmm3 = y1 // xmm3 = y1
// xmm4 = x0 // xmm4 = x0
// xmm5 = x1 // xmm6 = x1
// xmm0, xmm1, xmm6 = free // xmm0, xmm5, xmm6 = free
// xmm7 = used // xmm7 = used
// GSVector4i addr00 = y0 + x0; // GSVector4i addr00 = y0 + x0;
@ -827,19 +845,19 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// GSVector4i addr10 = y1 + x0; // GSVector4i addr10 = y1 + x0;
// GSVector4i addr11 = y1 + x1; // GSVector4i addr11 = y1 + x1;
movdqa(xmm0, xmm2); movdqa(xmm5, xmm2);
paddd(xmm2, xmm5); paddd(xmm5, xmm4);
paddd(xmm2, xmm6);
movdqa(xmm0, xmm3);
paddd(xmm0, xmm4); paddd(xmm0, xmm4);
paddd(xmm3, xmm6);
movdqa(xmm1, xmm3); // xmm5 = addr00
paddd(xmm3, xmm5);
paddd(xmm1, xmm4);
// xmm0 = addr00
// xmm2 = addr01 // xmm2 = addr01
// xmm1 = addr10 // xmm0 = addr10
// xmm3 = addr11 // xmm3 = addr11
// xmm4, xmm5, xmm6 = free // xmm1, xmm4, xmm6 = free
// xmm7 = used // xmm7 = used
// c00 = addr00.gather32_32((const DWORD/BYTE*)tex[, clut]); // c00 = addr00.gather32_32((const DWORD/BYTE*)tex[, clut]);
@ -847,118 +865,113 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// c10 = addr10.gather32_32((const DWORD/BYTE*)tex[, clut]); // c10 = addr10.gather32_32((const DWORD/BYTE*)tex[, clut]);
// c11 = addr11.gather32_32((const DWORD/BYTE*)tex[, clut]); // c11 = addr11.gather32_32((const DWORD/BYTE*)tex[, clut]);
ReadTexel(xmm5, xmm0, ebx, xmm4, xmm6); ReadTexel(xmm6, xmm5, xmm1, xmm4);
// xmm0, xmm4, xmm6 = free // xmm2, xmm5, xmm1 = free
ReadTexel(xmm6, xmm2, ebx, xmm0, xmm4); ReadTexel(xmm4, xmm2, xmm5, xmm1);
// xmm0, xmm2, xmm4 = free // xmm0, xmm2, xmm5 = free
ReadTexel(xmm4, xmm1, ebx, xmm2, xmm0); ReadTexel(xmm1, xmm0, xmm2, xmm5);
// xmm0, xmm1, xmm2 = free // xmm3, xmm0, xmm2 = free
ReadTexel(xmm0, xmm3, ebx, xmm1, xmm2); ReadTexel(xmm5, xmm3, xmm0, xmm2);
// xmm5 = c00 // xmm6 = c00
// xmm6 = c01 // xmm4 = c01
// xmm4 = c10 // xmm1 = c10
// xmm0 = c11 // xmm5 = c11
// xmm1, xmm2, xmm3 = free // xmm0, xmm2, xmm3 = free
// xmm7 = used // xmm7 = used
movdqa(xmm1, xmmword[&m_env.temp.uf]); movdqa(xmm0, xmmword[&m_env.temp.uf]);
// GSVector4i rb00 = c00 & mask; // GSVector4i rb00 = c00 & mask;
// GSVector4i ga00 = (c00 >> 8) & mask; // GSVector4i ga00 = (c00 >> 8) & mask;
movdqa(xmm2, xmm5); movdqa(xmm2, xmm6);
psllw(xmm2, 8); psllw(xmm2, 8);
psrlw(xmm2, 8); psrlw(xmm2, 8);
psrlw(xmm5, 8); psrlw(xmm6, 8);
// GSVector4i rb01 = c01 & mask; // GSVector4i rb01 = c01 & mask;
// GSVector4i ga01 = (c01 >> 8) & mask; // GSVector4i ga01 = (c01 >> 8) & mask;
movdqa(xmm3, xmm6); movdqa(xmm3, xmm4);
psllw(xmm3, 8); psllw(xmm3, 8);
psrlw(xmm3, 8); psrlw(xmm3, 8);
psrlw(xmm6, 8); psrlw(xmm4, 8);
// xmm1 = uf // xmm0 = uf
// xmm2 = rb00 // xmm2 = rb00
// xmm3 = rb01 // xmm3 = rb01
// xmm5 = ga00 // xmm6 = ga00
// xmm6 = ga01 // xmm4 = ga01
// xmm4 = c10 // xmm1 = c10
// xmm0 = c11 // xmm5 = c11
// xmm7 = used // xmm7 = used
// rb00 = rb00.lerp16<0>(rb01, uf); // rb00 = rb00.lerp16<0>(rb01, uf);
// ga00 = ga00.lerp16<0>(ga01, uf); // ga00 = ga00.lerp16<0>(ga01, uf);
lerp16<0>(xmm3, xmm2, xmm1); lerp16<0>(xmm3, xmm2, xmm0);
lerp16<0>(xmm6, xmm5, xmm1); lerp16<0>(xmm4, xmm6, xmm0);
// xmm1 = uf // xmm0 = uf
// xmm3 = rb00 // xmm3 = rb00
// xmm6 = ga00 // xmm4 = ga00
// xmm4 = c10 // xmm1 = c10
// xmm0 = c11 // xmm5 = c11
// xmm2, xmm5 = free // xmm2, xmm6 = free
// xmm7 = used // xmm7 = used
// GSVector4i rb10 = c10 & mask; // GSVector4i rb10 = c10 & mask;
// GSVector4i rb11 = c11 & mask; // GSVector4i rb11 = c11 & mask;
movdqa(xmm2, xmm4); movdqa(xmm2, xmm1);
psllw(xmm4, 8); psllw(xmm1, 8);
psrlw(xmm4, 8); psrlw(xmm1, 8);
psrlw(xmm2, 8); psrlw(xmm2, 8);
// GSVector4i ga10 = (c10 >> 8) & mask; // GSVector4i ga10 = (c10 >> 8) & mask;
// GSVector4i ga11 = (c11 >> 8) & mask; // GSVector4i ga11 = (c11 >> 8) & mask;
movdqa(xmm5, xmm0); movdqa(xmm6, xmm5);
psllw(xmm0, 8); psllw(xmm5, 8);
psrlw(xmm0, 8);
psrlw(xmm5, 8); psrlw(xmm5, 8);
psrlw(xmm6, 8);
// xmm1 = uf // xmm0 = uf
// xmm3 = rb00 // xmm3 = rb00
// xmm6 = ga00 // xmm4 = ga00
// xmm4 = rb10 // xmm1 = rb10
// xmm0 = rb11 // xmm5 = rb11
// xmm2 = ga10 // xmm2 = ga10
// xmm5 = ga11 // xmm6 = ga11
// xmm7 = used // xmm7 = used
// rb10 = rb10.lerp16<0>(rb11, uf); // rb10 = rb10.lerp16<0>(rb11, uf);
// ga10 = ga10.lerp16<0>(ga11, uf); // ga10 = ga10.lerp16<0>(ga11, uf);
lerp16<0>(xmm0, xmm4, xmm1); lerp16<0>(xmm5, xmm1, xmm0);
lerp16<0>(xmm5, xmm2, xmm1); lerp16<0>(xmm6, xmm2, xmm0);
// xmm3 = rb00 // xmm3 = rb00
// xmm6 = ga00 // xmm4 = ga00
// xmm0 = rb10 // xmm5 = rb10
// xmm5 = ga10 // xmm6 = ga10
// xmm1, xmm2, xmm4 = free // xmm0, xmm1, xmm2 = free
// xmm7 = used // xmm7 = used
// rb00 = rb00.lerp16<0>(rb10, vf); // rb00 = rb00.lerp16<0>(rb10, vf);
// ga00 = ga00.lerp16<0>(ga10, vf); // ga00 = ga00.lerp16<0>(ga10, vf);
movdqa(xmm1, xmmword[&m_env.temp.vf]); movdqa(xmm0, xmmword[&m_env.temp.vf]);
lerp16<0>(xmm0, xmm3, xmm1); lerp16<0>(xmm5, xmm3, xmm0);
lerp16<0>(xmm5, xmm6, xmm1); lerp16<0>(xmm6, xmm4, xmm0);
// TODO: make rb/ga end up in xmm5/xmm6
movdqa(xmm6, xmm5);
movdqa(xmm5, xmm0);
} }
else else
{ {
@ -968,7 +981,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// c00 = addr00.gather32_32((const DWORD/BYTE*)tex[, clut]); // c00 = addr00.gather32_32((const DWORD/BYTE*)tex[, clut]);
ReadTexel(xmm5, xmm2, ebx, xmm0, xmm1); ReadTexel(xmm5, xmm2, xmm0, xmm1);
// GSVector4i mask = GSVector4i::x00ff(); // GSVector4i mask = GSVector4i::x00ff();
@ -1248,7 +1261,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha()
case AFAIL_KEEP: case AFAIL_KEEP:
// test |= t; // test |= t;
por(xmm7, xmm1); por(xmm7, xmm1);
alltrue(xmm7, eax, "step"); alltrue();
break; break;
case AFAIL_FB_ONLY: case AFAIL_FB_ONLY:
@ -1428,7 +1441,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
por(xmm7, xmm1); por(xmm7, xmm1);
alltrue(xmm7, eax, "step"); alltrue();
} }
void GSDrawScanlineCodeGenerator::WriteZBuf() void GSDrawScanlineCodeGenerator::WriteZBuf()
@ -1451,12 +1464,12 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
// if(fzm & 0x30) GSVector4i::storel(&vm16[addr + 0], zs); // if(fzm & 0x30) GSVector4i::storel(&vm16[addr + 0], zs);
// if(fzm & 0xc0) GSVector4i::storeh(&vm16[addr + 8], zs); // if(fzm & 0xc0) GSVector4i::storeh(&vm16[addr + 8], zs);
test(dl, 0x30); test(dh, 0x0f);
je("wz30"); je("wz30");
movq(qword[ebp * 2 + (size_t)m_env.vm], xmm1); movq(qword[ebp * 2 + (size_t)m_env.vm], xmm1);
L("wz30"); L("wz30");
test(dl, 0xc0); test(dh, 0xf0);
je("wzc0"); je("wzc0");
movhps(qword[ebp * 2 + (size_t)m_env.vm + 8 * 2], xmm1); movhps(qword[ebp * 2 + (size_t)m_env.vm + 8 * 2], xmm1);
L("wzc0"); L("wzc0");
@ -1468,22 +1481,22 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
// if(fzm & 0x40) WritePixel(zpsm, &vm16[addr + 8], zs.extract32<2>()); // if(fzm & 0x40) WritePixel(zpsm, &vm16[addr + 8], zs.extract32<2>());
// if(fzm & 0x80) WritePixel(zpsm, &vm16[addr + 10], zs.extract32<3>()); // if(fzm & 0x80) WritePixel(zpsm, &vm16[addr + 10], zs.extract32<3>());
test(dl, 0x10); test(dh, 0x03);
je("wz10"); je("wz10");
WritePixel(xmm1, xmm0, ebp, 0, m_env.sel.zpsm); WritePixel(xmm1, xmm0, ebp, 0, m_env.sel.zpsm);
L("wz10"); L("wz10");
test(dl, 0x20); test(dh, 0x0c);
je("wz20"); je("wz20");
WritePixel(xmm1, xmm0, ebp, 1, m_env.sel.zpsm); WritePixel(xmm1, xmm0, ebp, 1, m_env.sel.zpsm);
L("wz20"); L("wz20");
test(dl, 0x40); test(dh, 0x30);
je("wz40"); je("wz40");
WritePixel(xmm1, xmm0, ebp, 2, m_env.sel.zpsm); WritePixel(xmm1, xmm0, ebp, 2, m_env.sel.zpsm);
L("wz40"); L("wz40");
test(dl, 0x80); test(dh, 0xc0);
je("wz80"); je("wz80");
WritePixel(xmm1, xmm0, ebp, 3, m_env.sel.zpsm); WritePixel(xmm1, xmm0, ebp, 3, m_env.sel.zpsm);
L("wz80"); L("wz80");
@ -1597,7 +1610,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
psllw(xmm7, 7); psllw(xmm7, 7);
break; break;
case 2: case 2:
movdqa(xmm7, xmmword[&m_env.afix2]); movdqa(xmm7, xmmword[&m_env.afix]);
break; break;
} }
@ -1627,7 +1640,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
} }
} }
if(0)//m_env.sel.pabe) if(m_env.sel.pabe)
{ {
// mask = (c[1] << 8).sra32(31); // mask = (c[1] << 8).sra32(31);
@ -1697,7 +1710,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
} }
} }
if(0)//m_env.sel.pabe) if(m_env.sel.pabe)
{ {
if(!m_cpu.has(util::Cpu::tSSE41)) if(!m_cpu.has(util::Cpu::tSSE41))
{ {
@ -1718,13 +1731,6 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
{ {
mix16(xmm6, xmm4, xmm7); mix16(xmm6, xmm4, xmm7);
} }
if(m_env.sel.pabe)
{
printf("PABE\n");
Sleep(1000);
MessageBeep(-1);
}
} }
void GSDrawScanlineCodeGenerator::WriteFrame(int params) void GSDrawScanlineCodeGenerator::WriteFrame(int params)
@ -1816,12 +1822,12 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
// if(fzm & 0x03) GSVector4i::storel(&vm16[addr + 0], fs); // if(fzm & 0x03) GSVector4i::storel(&vm16[addr + 0], fs);
// if(fzm & 0x0c) GSVector4i::storeh(&vm16[addr + 8], fs); // if(fzm & 0x0c) GSVector4i::storeh(&vm16[addr + 8], fs);
test(dl, 0x03); test(dl, 0x0f);
je("wf03"); je("wf03");
movq(qword[ebx * 2 + (size_t)m_env.vm], xmm5); movq(qword[ebx * 2 + (size_t)m_env.vm], xmm5);
L("wf03"); L("wf03");
test(dl, 0x0c); test(dl, 0xf0);
je("wf0c"); je("wf0c");
movhps(qword[ebx * 2 + (size_t)m_env.vm + 8 * 2], xmm5); movhps(qword[ebx * 2 + (size_t)m_env.vm + 8 * 2], xmm5);
L("wf0c"); L("wf0c");
@ -1833,22 +1839,22 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
// if(fzm & 0x04) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>()); // if(fzm & 0x04) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>());
// if(fzm & 0x08) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>()); // if(fzm & 0x08) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>());
test(dl, 0x01); test(dl, 0x03);
je("wf01"); je("wf01");
WritePixel(xmm5, xmm0, ebx, 0, m_env.sel.fpsm); WritePixel(xmm5, xmm0, ebx, 0, m_env.sel.fpsm);
L("wf01"); L("wf01");
test(dl, 0x02); test(dl, 0x0c);
je("wf02"); je("wf02");
WritePixel(xmm5, xmm0, ebx, 1, m_env.sel.fpsm); WritePixel(xmm5, xmm0, ebx, 1, m_env.sel.fpsm);
L("wf02"); L("wf02");
test(dl, 0x04); test(dl, 0x30);
je("wf04"); je("wf04");
WritePixel(xmm5, xmm0, ebx, 2, m_env.sel.fpsm); WritePixel(xmm5, xmm0, ebx, 2, m_env.sel.fpsm);
L("wf04"); L("wf04");
test(dl, 0x08); test(dl, 0xc0);
je("wf08"); je("wf08");
WritePixel(xmm5, xmm0, ebx, 3, m_env.sel.fpsm); WritePixel(xmm5, xmm0, ebx, 3, m_env.sel.fpsm);
L("wf08"); L("wf08");
@ -1911,26 +1917,26 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Xmm& temp, co
} }
} }
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, const Xmm& temp1, const Xmm& temp2) void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, const Xmm& temp1, const Xmm& temp2)
{ {
if(m_cpu.has(util::Cpu::tSSE41)) if(m_cpu.has(util::Cpu::tSSE41))
{ {
ReadTexel(dst, addr, base, 0); ReadTexel(dst, addr, 0);
ReadTexel(dst, addr, base, 1); ReadTexel(dst, addr, 1);
ReadTexel(dst, addr, base, 2); ReadTexel(dst, addr, 2);
ReadTexel(dst, addr, base, 3); ReadTexel(dst, addr, 3);
} }
else else
{ {
ReadTexel(dst, addr, base, 0); ReadTexel(dst, addr, 0);
psrldq(addr, 4); // shuffle instead? (1 2 3 0 ~ rotation) psrldq(addr, 4); // shuffle instead? (1 2 3 0 ~ rotation)
ReadTexel(temp1, addr, base, 0); ReadTexel(temp1, addr, 0);
psrldq(addr, 4); psrldq(addr, 4);
punpckldq(dst, temp1); punpckldq(dst, temp1);
ReadTexel(temp1, addr, base, 0); ReadTexel(temp1, addr, 0);
psrldq(addr, 4); psrldq(addr, 4);
ReadTexel(temp2, addr, base, 0); ReadTexel(temp2, addr, 0);
// psrldq(addr, 4); // psrldq(addr, 4);
punpckldq(temp1, temp2); punpckldq(temp1, temp2);
@ -1938,7 +1944,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, con
} }
} }
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, uint8 i) void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i)
{ {
if(!m_cpu.has(util::Cpu::tSSE41) && i > 0) if(!m_cpu.has(util::Cpu::tSSE41) && i > 0)
{ {
@ -1948,14 +1954,9 @@ void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, con
if(i == 0) movd(eax, addr); if(i == 0) movd(eax, addr);
else pextrd(eax, addr, i); else pextrd(eax, addr, i);
const Address& src = m_env.sel.tlu if(m_env.sel.tlu) movzx(eax, byte[ebx + eax]);
? dword[eax * 4 + (size_t)m_env.clut]
: dword[base + eax * 4];
if(m_env.sel.tlu) const Address& src = m_env.sel.tlu ? ptr[eax * 4 + (size_t)m_env.clut] : ptr[ebx + eax * 4];
{
movzx(eax, byte[base + eax]);
}
if(i == 0) movd(dst, src); if(i == 0) movd(dst, src);
else pinsrd(dst, src, i); else pinsrd(dst, src, i);
@ -2014,11 +2015,11 @@ void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp)
} }
} }
void GSDrawScanlineCodeGenerator::alltrue(const Xmm& a, const Reg32& temp, LPCTSTR label) void GSDrawScanlineCodeGenerator::alltrue()
{ {
pmovmskb(temp, a); pmovmskb(eax, xmm7);
cmp(temp, 0xffff); cmp(eax, 0xffff);
je(label, T_NEAR); je("step", T_NEAR);
} }
void GSDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b) void GSDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b)

View File

@ -57,14 +57,14 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
void ReadPixel(const Xmm& dst, const Reg32& addr); void ReadPixel(const Xmm& dst, const Reg32& addr);
void WritePixel(const Xmm& src, const Xmm& temp, const Reg32& addr, uint8 i, int psm); void WritePixel(const Xmm& src, const Xmm& temp, const Reg32& addr, uint8 i, int psm);
void ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, const Xmm& temp1, const Xmm& temp2); void ReadTexel(const Xmm& dst, const Xmm& addr, const Xmm& temp1, const Xmm& temp2);
void ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, uint8 i); void ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i);
template<int shift> void modulate16(const Xmm& a, const Operand& f); template<int shift> void modulate16(const Xmm& a, const Operand& f);
template<int shift> void lerp16(const Xmm& a, const Xmm& b, const Xmm& f); template<int shift> void lerp16(const Xmm& a, const Xmm& b, const Xmm& f);
void mix16(const Xmm& a, const Xmm& b, const Xmm& temp); void mix16(const Xmm& a, const Xmm& b, const Xmm& temp);
void clamp16(const Xmm& a, const Xmm& temp); void clamp16(const Xmm& a, const Xmm& temp);
void alltrue(const Xmm& a, const Reg32& temp, LPCTSTR label); void alltrue();
void blend8(const Xmm& a, const Xmm& b); void blend8(const Xmm& a, const Xmm& b);
void blend(const Xmm& a, const Xmm& b, const Xmm& mask); void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
void blend8r(const Xmm& b, const Xmm& a); void blend8r(const Xmm& b, const Xmm& a);

View File

@ -82,9 +82,9 @@ public:
dimx[1] = GSVector4i(DIMX.DM00, 0, DIMX.DM01, 0, DIMX.DM02, 0, DIMX.DM03, 0); dimx[1] = GSVector4i(DIMX.DM00, 0, DIMX.DM01, 0, DIMX.DM02, 0, DIMX.DM03, 0);
dimx[0] = dimx[1].xxzzlh(); dimx[0] = dimx[1].xxzzlh();
dimx[3] = GSVector4i(DIMX.DM10, 0, DIMX.DM11, 0, DIMX.DM12, 0, DIMX.DM13, 0), dimx[3] = GSVector4i(DIMX.DM10, 0, DIMX.DM11, 0, DIMX.DM12, 0, DIMX.DM13, 0),
dimx[2] = dimx[2].xxzzlh(); dimx[2] = dimx[3].xxzzlh();
dimx[5] = GSVector4i(DIMX.DM20, 0, DIMX.DM21, 0, DIMX.DM22, 0, DIMX.DM23, 0), dimx[5] = GSVector4i(DIMX.DM20, 0, DIMX.DM21, 0, DIMX.DM22, 0, DIMX.DM23, 0),
dimx[4] = dimx[4].xxzzlh(); dimx[4] = dimx[5].xxzzlh();
dimx[7] = GSVector4i(DIMX.DM30, 0, DIMX.DM31, 0, DIMX.DM32, 0, DIMX.DM33, 0), dimx[7] = GSVector4i(DIMX.DM30, 0, DIMX.DM31, 0, DIMX.DM32, 0, DIMX.DM33, 0),
dimx[6] = dimx[7].xxzzlh(); dimx[6] = dimx[7].xxzzlh();
} }

View File

@ -149,8 +149,8 @@ public:
__int64 tpf = p->frames > 0 ? p->ticks / p->frames : 0; __int64 tpf = p->frames > 0 ? p->ticks / p->frames : 0;
__int64 ppf = p->frames > 0 ? p->pixels / p->frames : 0; __int64 ppf = p->frames > 0 ? p->pixels / p->frames : 0;
printf("[%08x]%c %6.2f%% | %5.2f%% | f %4I64d | p %10I64d | tpp %4I64d | tpf %9I64d | ppf %7I64d\n", printf("[%012I64x]%c %6.2f%% | %5.2f%% | f %4I64d | p %10I64d | tpp %4I64d | tpf %9I64d | ppf %7I64d\n",
key, !m_map.Lookup(key) ? '*' : ' ', (UINT64)key, !m_map.Lookup(key) ? '*' : ' ',
(float)(tpf * 10000 / 50000000) / 100, (float)(tpf * 10000 / 50000000) / 100,
(float)(tpf * 10000 / ttpf) / 100, (float)(tpf * 10000 / ttpf) / 100,
p->frames, p->pixels, p->frames, p->pixels,
@ -159,3 +159,43 @@ public:
} }
} }
}; };
template<class CG, class KEY, class VALUE>
class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
{
CRBMap<UINT64, CG*> m_cgmap;
protected:
virtual CG* Create(KEY key) = 0;
public:
GSCodeGeneratorFunctionMap()
{
}
virtual ~GSCodeGeneratorFunctionMap()
{
POSITION pos = m_cgmap.GetHeadPosition();
while(pos)
{
delete m_cgmap.GetNextValue(pos);
}
}
VALUE GetDefaultFunction(KEY key)
{
CG* cg = NULL;
if(!m_cgmap.Lookup(key, cg))
{
cg = Create(key);
ASSERT(cg);
m_cgmap.SetAt(key, cg);
}
return (VALUE)cg->getCode();
}
};

View File

@ -39,6 +39,8 @@ void GSRasterizer::Draw(const GSRasterizerData* data)
m_dsf.sl = NULL; m_dsf.sl = NULL;
m_dsf.sr = NULL; m_dsf.sr = NULL;
m_dsf.sp = NULL; m_dsf.sp = NULL;
m_dsf.ssl = NULL;
m_dsf.ssp = NULL;
m_ds->BeginDraw(data, &m_dsf); m_ds->BeginDraw(data, &m_dsf);
@ -96,8 +98,10 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor)
if((p.y % m_threads) == m_id) if((p.y % m_threads) == m_id)
{ {
(m_ds->*m_dsf.sp)(v, *v); (m_ds->*m_dsf.sp)(v, *v);
// TODO: (m_dsf.ssp)(v, *v);
(m_ds->*m_dsf.sl)(p.y, p.x, p.x + 1, *v); (m_ds->*m_dsf.sl)(p.y, p.x, p.x + 1, *v);
// TODO: (m_dsf.ssl)(p.y, p.x, p.x + 1, *v);
m_stats.pixels++; m_stats.pixels++;
} }
@ -254,6 +258,7 @@ void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor)
if(py > 0) l += dl * py; if(py > 0) l += dl * py;
(m_ds->*m_dsf.sp)(v, dscan); (m_ds->*m_dsf.sp)(v, dscan);
// TODO: (m_dsf.ssp)(v, dscan);
DrawTriangleSection(top, bottom, l, dl, dscan, scissor); DrawTriangleSection(top, bottom, l, dl, dscan, scissor);
} }
@ -301,6 +306,7 @@ void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor)
if(py > 0) l += dl * py; if(py > 0) l += dl * py;
(m_ds->*m_dsf.sp)(v, dscan); (m_ds->*m_dsf.sp)(v, dscan);
// TODO: (m_dsf.ssp)(v, dscan);
DrawTriangleSection(top, bottom, l, dl, dscan, scissor); DrawTriangleSection(top, bottom, l, dl, dscan, scissor);
} }
@ -323,6 +329,7 @@ void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scisso
GSVertexSW dscan = longest * longest.p.xxxx().rcp(); GSVertexSW dscan = longest * longest.p.xxxx().rcp();
(m_ds->*m_dsf.sp)(v, dscan); (m_ds->*m_dsf.sp)(v, dscan);
// TODO: (m_dsf.ssp)(v, dscan);
GSVertexSW& l = v[0]; GSVertexSW& l = v[0];
GSVector4 r = v[0].p; GSVector4 r = v[0].p;
@ -434,6 +441,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
} }
(m_ds->*m_dsf.sl)(top, left, right, scan); (m_ds->*m_dsf.sl)(top, left, right, scan);
// TODO: (m_dsf.ssl)(top, left, right, scan);
} }
} }
} }
@ -485,6 +493,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
} }
(m_ds->*m_dsf.sl)(top, left, right, scan); (m_ds->*m_dsf.sl)(top, left, right, scan);
// TODO: (m_dsf.ssl)(top, left, right, scan);
} }
} }
} }
@ -568,12 +577,14 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
if(scan.p.x < (float)left) scan.t += dscan.t * ((float)left - scan.p.x); if(scan.p.x < (float)left) scan.t += dscan.t * ((float)left - scan.p.x);
(m_ds->*m_dsf.sp)(v, dscan); (m_ds->*m_dsf.sp)(v, dscan);
// TODO: (m_dsf.ssp)(v, dscan);
for(; top < bottom; top++, scan.t += dedge.t) for(; top < bottom; top++, scan.t += dedge.t)
{ {
if((top % m_threads) == m_id) if((top % m_threads) == m_id)
{ {
(m_ds->*m_dsf.sl)(top, left, right, scan); (m_ds->*m_dsf.sl)(top, left, right, scan);
// TODO: (m_dsf.ssl)(top, left, right, scan);
m_stats.pixels += right - left; m_stats.pixels += right - left;
} }

View File

@ -55,12 +55,15 @@ public:
typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v); typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v);
typedef void (IDrawScanline::*SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan); typedef void (IDrawScanline::*SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
typedef void (*DrawScanlineStaticPtr)(int top, int left, int right, const GSVertexSW& v); typedef void (*DrawScanlineStaticPtr)(int top, int left, int right, const GSVertexSW& v);
typedef void (*SetupPrimStaticPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
struct Functions struct Functions
{ {
DrawScanlinePtr sl; DrawScanlinePtr sl;
DrawSolidRectPtr sr; DrawSolidRectPtr sr;
SetupPrimPtr sp; SetupPrimPtr sp;
DrawScanlineStaticPtr ssl;
SetupPrimStaticPtr ssp;
}; };
virtual ~IDrawScanline() {} virtual ~IDrawScanline() {}

View File

@ -488,6 +488,7 @@ protected:
{ {
p.sel.zpsm = GSUtil::EncodePSM(context->ZBUF.PSM); p.sel.zpsm = GSUtil::EncodePSM(context->ZBUF.PSM);
p.sel.ztst = ztest ? context->TEST.ZTST : 1; p.sel.ztst = ztest ? context->TEST.ZTST : 1;
p.sel.zoverflow = GSVector4i(m_vtrace.m_max.p).z == 0x80000000;
} }
} }

View File

@ -59,6 +59,7 @@ union GSScanlineSelector
DWORD colclamp:1; // 39 DWORD colclamp:1; // 39
DWORD fba:1; // 40 DWORD fba:1; // 40
DWORD dthe:1; // 41 DWORD dthe:1; // 41
DWORD zoverflow:1; // 42 (z max >= 0x80000000)
}; };
struct struct
@ -130,11 +131,8 @@ __declspec(align(16)) struct GSScanlineEnvironment
GSVector4i fm, zm; GSVector4i fm, zm;
struct {GSVector4i min, max, mask, invmask;} t; // [u] x 4 [v] x 4 struct {GSVector4i min, max, mask, invmask;} t; // [u] x 4 [v] x 4
GSVector4i datm;
GSVector4i colclamp;
GSVector4i fba;
GSVector4i aref; GSVector4i aref;
GSVector4i afix, afix2; GSVector4i afix;
GSVector4i frb, fga; GSVector4i frb, fga;
struct {GSVector4 z, s, t, q; GSVector4i rb, ga, f, si, ti, _pad[7];} d[4]; struct {GSVector4 z, s, t, q; GSVector4i rb, ga, f, si, ti, _pad[7];} d[4];

View File

@ -0,0 +1,383 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
// TODO: x64
#include "StdAfx.h"
#include "GSSetupPrimCodeGenerator.h"
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env)
: CodeGenerator(DEFAULT_MAX_CODE_SIZE, 0)
, m_env(env)
{
m_en.z = m_env.sel.zb ? 1 : 0;
m_en.f = m_env.sel.fb && m_env.sel.fge ? 1 : 0;
m_en.t = m_env.sel.fb && m_env.sel.tfx != TFX_NONE ? 1 : 0;
m_en.c = m_env.sel.fb && m_env.sel.tfx != TFX_DECAL ? 1 : 0;
#if _M_AMD64
#error TODO
#endif
Generate();
}
void GSSetupPrimCodeGenerator::Generate()
{
const int params = 0;
const int _vertices = params + 4;
const int _dscan = params + 8;
mov(ecx, dword[esp + _vertices]);
mov(edx, dword[esp + _dscan]);
if((m_en.z || m_en.f) && !m_env.sel.sprite || m_en.t || m_en.c && m_env.sel.iip)
{
for(int i = 0; i < 5; i++)
{
movaps(Xmm(3 + i), xmmword[&m_shift[i]]);
}
}
Depth();
Texture();
Color();
ret();
}
void GSSetupPrimCodeGenerator::Depth()
{
if(!m_en.z && !m_en.f)
{
return;
}
if(!m_env.sel.sprite)
{
// GSVector4 t = dscan.p;
movaps(xmm0, xmmword[edx + 16]);
if(m_en.f)
{
// GSVector4 df = p.wwww();
movaps(xmm1, xmm0);
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
// m_env.d4.f = GSVector4i(df * 4.0f).xxzzlh();
movaps(xmm2, xmm1);
mulps(xmm2, xmm3);
cvttps2dq(xmm2, xmm2);
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
movdqa(xmmword[&m_env.d4.f], xmm2);
for(int i = 0; i < 4; i++)
{
// m_env.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
movaps(xmm2, xmm1);
mulps(xmm2, Xmm(4 + i));
cvttps2dq(xmm2, xmm2);
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
movdqa(xmmword[&m_env.d[i].f], xmm2);
}
}
if(m_en.z)
{
// GSVector4 dz = p.zzzz();
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
// m_env.d4.z = dz * 4.0f;
movaps(xmm1, xmm0);
mulps(xmm1, xmm3);
movdqa(xmmword[&m_env.d4.z], xmm1);
for(int i = 0; i < 4; i++)
{
// m_env.d[i].z = dz * m_shift[i];
movaps(xmm1, xmm0);
mulps(xmm1, Xmm(4 + i));
movdqa(xmmword[&m_env.d[i].z], xmm1);
}
}
}
else
{
// GSVector4 p = vertices[0].p;
movaps(xmm0, xmmword[ecx + 16]);
if(m_en.f)
{
// m_env.p.f = GSVector4i(p).zzzzh().zzzz();
movaps(xmm1, xmm0);
cvttps2dq(xmm1, xmm1);
pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
movdqa(xmmword[&m_env.p.f], xmm1);
}
if(m_en.z)
{
// GSVector4 z = p.zzzz();
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
if(m_env.sel.zoverflow)
{
// m_env.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
static const float half = 0.5f;
movss(xmm1, dword[&half]);
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
mulps(xmm1, xmm0);
cvttps2dq(xmm1, xmm1);
pslld(xmm1, 1);
cvttps2dq(xmm0, xmm0);
pcmpeqd(xmm2, xmm2);
psrld(xmm2, 31);
pand(xmm0, xmm2);
por(xmm0, xmm1);
}
else
{
// m_env.p.z = GSVector4i(z);
cvttps2dq(xmm0, xmm0);
}
movdqa(xmmword[&m_env.p.z], xmm0);
}
}
}
void GSSetupPrimCodeGenerator::Texture()
{
if(!m_en.t)
{
return;
}
// GSVector4 t = dscan.t;
movaps(xmm0, xmmword[edx + 32]);
movaps(xmm1, xmm0);
mulps(xmm1, xmm3);
if(m_env.sel.fst)
{
// m_env.d4.st = GSVector4i(t * 4.0f);
cvttps2dq(xmm1, xmm1);
movdqa(xmmword[&m_env.d4.st], xmm1);
}
else
{
// m_env.d4.stq = t * 4.0f;
movaps(xmmword[&m_env.d4.stq], xmm1);
}
for(int j = 0, k = m_env.sel.fst ? 2 : 3; j < k; j++)
{
// GSVector4 ds = t.xxxx();
// GSVector4 dt = t.yyyy();
// GSVector4 dq = t.zzzz();
movaps(xmm1, xmm0);
shufps(xmm1, xmm1, (uint8)_MM_SHUFFLE(j, j, j, j));
for(int i = 0; i < 4; i++)
{
// GSVector4 v = ds/dt * m_shift[i];
movaps(xmm2, xmm1);
mulps(xmm2, Xmm(4 + i));
if(m_env.sel.fst)
{
// m_env.d[i].si/ti = GSVector4i(v);
cvttps2dq(xmm2, xmm2);
switch(j)
{
case 0: movdqa(xmmword[&m_env.d[i].si], xmm2); break;
case 1: movdqa(xmmword[&m_env.d[i].ti], xmm2); break;
}
}
else
{
// m_env.d[i].s/t/q = v;
switch(j)
{
case 0: movaps(xmmword[&m_env.d[i].s], xmm2); break;
case 1: movaps(xmmword[&m_env.d[i].t], xmm2); break;
case 2: movaps(xmmword[&m_env.d[i].q], xmm2); break;
}
}
}
}
}
void GSSetupPrimCodeGenerator::Color()
{
if(!m_en.c)
{
return;
}
if(m_env.sel.iip)
{
// GSVector4 c = dscan.c;
movaps(xmm0, xmmword[edx]);
movaps(xmm1, xmm0);
// m_env.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
movaps(xmm2, xmm0);
mulps(xmm2, xmm3);
cvttps2dq(xmm2, xmm2);
pshufd(xmm2, xmm2, _MM_SHUFFLE(3, 1, 2, 0));
packssdw(xmm2, xmm2);
movdqa(xmmword[&m_env.d4.c], xmm2);
// xmm3 is not needed anymore
// GSVector4 dr = c.xxxx();
// GSVector4 db = c.zzzz();
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
for(int i = 0; i < 4; i++)
{
// GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
movaps(xmm2, xmm0);
mulps(xmm2, Xmm(4 + i));
cvttps2dq(xmm2, xmm2);
packssdw(xmm2, xmm2);
// GSVector4i b = GSVector4i(db * m_shift[i]).ps32();
movaps(xmm3, xmm1);
mulps(xmm3, Xmm(4 + i));
cvttps2dq(xmm3, xmm3);
packssdw(xmm3, xmm3);
// m_env.d[i].rb = r.upl16(b);
punpcklwd(xmm2, xmm3);
movdqa(xmmword[&m_env.d[i].rb], xmm2);
}
// GSVector4 c = dscan.c;
movaps(xmm0, xmmword[edx]); // not enough regs, have to reload it
movaps(xmm1, xmm0);
// GSVector4 dg = c.yyyy();
// GSVector4 da = c.wwww();
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
for(int i = 0; i < 4; i++)
{
// GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
movaps(xmm2, xmm0);
mulps(xmm2, Xmm(4 + i));
cvttps2dq(xmm2, xmm2);
packssdw(xmm2, xmm2);
// GSVector4i a = GSVector4i(da * m_shift[i]).ps32();
movaps(xmm3, xmm1);
mulps(xmm3, Xmm(4 + i));
cvttps2dq(xmm3, xmm3);
packssdw(xmm3, xmm3);
// m_env.d[i].ga = g.upl16(a);
punpcklwd(xmm2, xmm3);
movdqa(xmmword[&m_env.d[i].ga], xmm2);
}
}
else
{
// GSVector4i c = GSVector4i(vertices[0].c);
movaps(xmm0, xmmword[ecx]);
cvttps2dq(xmm0, xmm0);
// c = c.upl16(c.zwxy());
movdqa(xmm1, xmm0);
pshufd(xmm1, xmm1, _MM_SHUFFLE(1, 0, 3, 2));
punpcklwd(xmm0, xmm1);
// if(!tme) c = c.srl16(7);
if(m_env.sel.tfx == TFX_NONE)
{
psrlw(xmm0, 7);
}
// m_env.c.rb = c.xxxx();
// m_env.c.ga = c.zzzz();
movdqa(xmm1, xmm0);
pshufd(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
movdqa(xmmword[&m_env.c.rb], xmm0);
movdqa(xmmword[&m_env.c.ga], xmm1);
}
}
const GSVector4 GSSetupPrimCodeGenerator::m_shift[5] =
{
GSVector4(4.0f, 4.0f, 4.0f, 4.0f),
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
GSVector4(-1.0f, 0.0f, 1.0f, 2.0f),
GSVector4(-2.0f, -1.0f, 0.0f, 1.0f),
GSVector4(-3.0f, -2.0f, -1.0f, 0.0f),
};

View File

@ -0,0 +1,50 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GSScanlineEnvironment.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
using namespace Xbyak;
class GSSetupPrimCodeGenerator : public CodeGenerator
{
void operator = (const GSSetupPrimCodeGenerator&);
static const GSVector4 m_shift[5];
util::Cpu m_cpu;
GSScanlineEnvironment& m_env;
struct {DWORD z:1, f:1, t:1, c:1;} m_en;
void Generate();
void Depth();
void Texture();
void Color();
public:
GSSetupPrimCodeGenerator(GSScanlineEnvironment& env);
};

View File

@ -1327,6 +1327,10 @@
RelativePath=".\GSSettingsDlg.cpp" RelativePath=".\GSSettingsDlg.cpp"
> >
</File> </File>
<File
RelativePath=".\GSSetupPrimCodeGenerator.cpp"
>
</File>
<File <File
RelativePath=".\GSState.cpp" RelativePath=".\GSState.cpp"
> >
@ -1737,6 +1741,10 @@
RelativePath=".\GSSettingsDlg.h" RelativePath=".\GSSettingsDlg.h"
> >
</File> </File>
<File
RelativePath=".\GSSetupPrimCodeGenerator.h"
>
</File>
<File <File
RelativePath=".\GSState.h" RelativePath=".\GSState.h"
> >

View File

@ -1202,7 +1202,7 @@ protected:
} }
void movd(const Mmx& mmx, const Address& addr) void movd(const Mmx& mmx, const Address& addr)
{ {
if (mmx.isXMM()) db(0x66); ASSERT(!addr.isBit(32)); // don't use dword ptr, bogus, won't output 0x66 for xmm dest op
opModM(addr, Reg(mmx.getIdx(), Operand::REG, mmx.getBit() / 8), 0x0F, B01101110); opModM(addr, Reg(mmx.getIdx(), Operand::REG, mmx.getBit() / 8), 0x0F, B01101110);
} }
void movd(const Mmx& mmx, const Reg32& reg) void movd(const Mmx& mmx, const Reg32& reg)