GSdx: upgraded the ps1 renderer to use runtime generated code, too.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@535 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-02-19 13:13:20 +00:00
parent 29ea3c8ebc
commit 4907dbda42
27 changed files with 3524 additions and 3050 deletions

View File

@ -25,6 +25,7 @@
GPUDrawScanline::GPUDrawScanline(GPUState* state, int id) GPUDrawScanline::GPUDrawScanline(GPUState* state, int id)
: m_state(state) : m_state(state)
, m_id(id) , m_id(id)
, m_ds(m_env)
{ {
} }
@ -40,7 +41,8 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
m_env.sel = p->sel; m_env.sel = p->sel;
m_env.mem = &m_state->m_mem; m_env.vm = m_state->m_mem.GetPixelAddress(0, 0);
m_env.fbw = 10 + m_state->m_mem.GetScale().cx;
if(m_env.sel.tme) if(m_env.sel.tme)
{ {
@ -54,24 +56,25 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
u = ~(env.TWIN.TWW << 3) & 0xff; u = ~(env.TWIN.TWW << 3) & 0xff;
v = ~(env.TWIN.TWH << 3) & 0xff; v = ~(env.TWIN.TWH << 3) & 0xff;
m_env.u[0] = GSVector4i((u << 16) | u); m_env.twin[0].u = GSVector4i((u << 16) | u);
m_env.v[0] = GSVector4i((v << 16) | v); m_env.twin[0].v = GSVector4i((v << 16) | v);
u = env.TWIN.TWX << 3; u = env.TWIN.TWX << 3;
v = env.TWIN.TWY << 3; v = env.TWIN.TWY << 3;
m_env.u[1] = GSVector4i((u << 16) | u) & ~m_env.u[0]; m_env.twin[1].u = GSVector4i((u << 16) | u) & ~m_env.twin[0].u;
m_env.v[1] = GSVector4i((v << 16) | v) & ~m_env.v[0]; m_env.twin[1].v = GSVector4i((v << 16) | v) & ~m_env.twin[0].v;
} }
} }
m_env.a = GSVector4i(env.PRIM.ABE ? 0xffffffff : 0); //
m_env.md = GSVector4i(env.STATUS.MD ? 0x80008000 : 0);
f->sl = m_ds.Lookup(m_env.sel); f->ssl = m_ds.Lookup(m_env.sel);
f->sr = NULL; // TODO f->sr = NULL; // TODO
//
DWORD sel = 0; DWORD sel = 0;
sel |= (data->primclass == GS_SPRITE_CLASS ? 1 : 0) << 0; sel |= (data->primclass == GS_SPRITE_CLASS ? 1 : 0) << 0;
@ -94,13 +97,13 @@ void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& ds
t = t.ps32(t); t = t.ps32(t);
t = t.upl16(t); t = t.upl16(t);
m_env.u[2] = t.xxxx(); m_env.twin[2].u = t.xxxx();
m_env.v[2] = t.yyyy(); m_env.twin[2].v = t.yyyy();
} }
else else
{ {
m_env.u[2] = GSVector4i::x00ff(); m_env.twin[2].u = GSVector4i::x00ff();
m_env.v[2] = GSVector4i::x00ff(); m_env.twin[2].v = GSVector4i::x00ff();
} }
} }
@ -114,770 +117,24 @@ void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& ds
if(tme) if(tme)
{ {
m_env.dst8 = dtc8.upl16(dtc8); m_env.d8.st = dtc8.upl16(dtc8);
m_env.ds = GSVector4i(dt.xxxx() * ps0123).ps32(GSVector4i(dt.xxxx() * ps4567)); m_env.d.s = GSVector4i(dt.xxxx() * ps0123).ps32(GSVector4i(dt.xxxx() * ps4567));
m_env.dt = GSVector4i(dt.yyyy() * ps0123).ps32(GSVector4i(dt.yyyy() * ps4567)); m_env.d.t = GSVector4i(dt.yyyy() * ps0123).ps32(GSVector4i(dt.yyyy() * ps4567));
} }
if(iip) if(iip)
{ {
m_env.dc8 = dtc8.uph16(dtc8); m_env.d8.c = dtc8.uph16(dtc8);
m_env.dr = GSVector4i(dc.xxxx() * ps0123).ps32(GSVector4i(dc.xxxx() * ps4567)); m_env.d.r = GSVector4i(dc.xxxx() * ps0123).ps32(GSVector4i(dc.xxxx() * ps4567));
m_env.dg = GSVector4i(dc.yyyy() * ps0123).ps32(GSVector4i(dc.yyyy() * ps4567)); m_env.d.g = GSVector4i(dc.yyyy() * ps0123).ps32(GSVector4i(dc.yyyy() * ps4567));
m_env.db = GSVector4i(dc.zzzz() * ps0123).ps32(GSVector4i(dc.zzzz() * ps4567)); m_env.d.b = GSVector4i(dc.zzzz() * ps0123).ps32(GSVector4i(dc.zzzz() * ps4567));
}
}
void GPUDrawScanline::SampleTexture(DWORD ltf, DWORD tlu, DWORD twin, GSVector4i& test, const GSVector4i& s, const GSVector4i& t, GSVector4i* c)
{
const void* RESTRICT tex = m_env.tex;
const WORD* RESTRICT clut = m_env.clut;
if(ltf)
{
GSVector4i u = s.sub16(GSVector4i(0x00200020)); // - 0.125f
GSVector4i v = t.sub16(GSVector4i(0x00200020)); // - 0.125f
GSVector4i u0 = u.srl16(8);
GSVector4i v0 = v.srl16(8);
GSVector4i u1 = u0.add16(GSVector4i::x0001());
GSVector4i v1 = v0.add16(GSVector4i::x0001());
GSVector4i uf = (u & GSVector4i::x00ff()) << 7;
GSVector4i vf = (v & GSVector4i::x00ff()) << 7;
if(twin)
{
u0 = (u0 & m_env.u[0]).add16(m_env.u[1]);
v0 = (v0 & m_env.v[0]).add16(m_env.v[1]);
u1 = (u1 & m_env.u[0]).add16(m_env.u[1]);
v1 = (v1 & m_env.v[0]).add16(m_env.v[1]);
} }
else else
{ {
u0 = u0.min_i16(m_env.u[2]); // TODO: m_env.c.r/g/b = ...
v0 = v0.min_i16(m_env.v[2]);
u1 = u1.min_i16(m_env.u[2]);
v1 = v1.min_i16(m_env.v[2]);
} }
GSVector4i addr00 = v0.sll16(8) | u0;
GSVector4i addr01 = v0.sll16(8) | u1;
GSVector4i addr10 = v1.sll16(8) | u0;
GSVector4i addr11 = v1.sll16(8) | u1;
GSVector4i c00, c01, c10, c11;
if(tlu)
{
c00 = addr00.gather16_16((const BYTE*)tex, clut);
c01 = addr01.gather16_16((const BYTE*)tex, clut);
c10 = addr10.gather16_16((const BYTE*)tex, clut);
c11 = addr11.gather16_16((const BYTE*)tex, clut);
}
else
{
c00 = addr00.gather16_16((const WORD*)tex);
c01 = addr01.gather16_16((const WORD*)tex);
c10 = addr00.gather16_16((const WORD*)tex);
c11 = addr01.gather16_16((const WORD*)tex);
}
GSVector4i r00 = (c00 & 0x001f001f) << 3;
GSVector4i r01 = (c01 & 0x001f001f) << 3;
GSVector4i r10 = (c10 & 0x001f001f) << 3;
GSVector4i r11 = (c11 & 0x001f001f) << 3;
r00 = r00.lerp16<0>(r01, uf);
r10 = r10.lerp16<0>(r11, uf);
c[0] = r00.lerp16<0>(r10, vf);
GSVector4i g00 = (c00 & 0x03e003e0) >> 2;
GSVector4i g01 = (c01 & 0x03e003e0) >> 2;
GSVector4i g10 = (c10 & 0x03e003e0) >> 2;
GSVector4i g11 = (c11 & 0x03e003e0) >> 2;
g00 = g00.lerp16<0>(g01, uf);
g10 = g10.lerp16<0>(g11, uf);
c[1] = g00.lerp16<0>(g10, vf);
GSVector4i b00 = (c00 & 0x7c007c00) >> 7;
GSVector4i b01 = (c01 & 0x7c007c00) >> 7;
GSVector4i b10 = (c10 & 0x7c007c00) >> 7;
GSVector4i b11 = (c11 & 0x7c007c00) >> 7;
b00 = b00.lerp16<0>(b01, uf);
b10 = b10.lerp16<0>(b11, uf);
c[2] = b00.lerp16<0>(b10, vf);
GSVector4i a00 = (c00 & 0x80008000) >> 8;
GSVector4i a01 = (c01 & 0x80008000) >> 8;
GSVector4i a10 = (c10 & 0x80008000) >> 8;
GSVector4i a11 = (c11 & 0x80008000) >> 8;
a00 = a00.lerp16<0>(a01, uf);
a10 = a10.lerp16<0>(a11, uf);
c[3] = a00.lerp16<0>(a10, vf).gt16(GSVector4i::zero());
// mask out blank pixels (not perfect)
test |=
c[0].eq16(GSVector4i::zero()) &
c[1].eq16(GSVector4i::zero()) &
c[2].eq16(GSVector4i::zero()) &
c[3].eq16(GSVector4i::zero());
}
else
{
GSVector4i u = s.srl16(8);
GSVector4i v = t.srl16(8);
if(twin)
{
u = (u & m_env.u[0]).add16(m_env.u[1]);
v = (v & m_env.v[0]).add16(m_env.v[1]);
}
else
{
u = u.min_i16(m_env.u[2]);
v = v.min_i16(m_env.v[2]);
}
GSVector4i addr = v.sll16(8) | u;
GSVector4i c00;
if(tlu)
{
c00 = addr.gather16_16((const BYTE*)tex, clut);
}
else
{
c00 = addr.gather16_16((const WORD*)tex);
}
test |= c00.eq16(GSVector4i::zero()); // mask out blank pixels
c[0] = (c00 & 0x001f001f) << 3;
c[1] = (c00 & 0x03e003e0) >> 2;
c[2] = (c00 & 0x7c007c00) >> 7;
c[3] = c00.sra16(15);
}
}
void GPUDrawScanline::ColorTFX(DWORD tfx, const GSVector4i& r, const GSVector4i& g, const GSVector4i& b, GSVector4i* c)
{
switch(tfx)
{
case 0: // none (tfx = 0)
case 1: // none (tfx = tge)
c[0] = r.srl16(7);
c[1] = g.srl16(7);
c[2] = b.srl16(7);
break;
case 2: // modulate (tfx = tme | tge)
c[0] = c[0].modulate16<1>(r).clamp8();
c[1] = c[1].modulate16<1>(g).clamp8();
c[2] = c[2].modulate16<1>(b).clamp8();
break;
case 3: // decal (tfx = tme)
break;
default:
__assume(0);
}
}
void GPUDrawScanline::AlphaBlend(UINT32 abr, UINT32 tme, const GSVector4i& d, GSVector4i* c)
{
GSVector4i r = (d & 0x001f001f) << 3;
GSVector4i g = (d & 0x03e003e0) >> 2;
GSVector4i b = (d & 0x7c007c00) >> 7;
switch(abr)
{
case 0:
r = r.avg8(c[0]);
g = g.avg8(c[0]);
b = b.avg8(c[0]);
break;
case 1:
r = r.addus8(c[0]);
g = g.addus8(c[1]);
b = b.addus8(c[2]);
break;
case 2:
r = r.subus8(c[0]);
g = g.subus8(c[1]);
b = b.subus8(c[2]);
break;
case 3:
r = r.addus8(c[0].srl16(2));
g = g.addus8(c[1].srl16(2));
b = b.addus8(c[2].srl16(2));
break;
default:
__assume(0);
}
if(tme) // per pixel
{
c[0] = c[0].blend8(r, c[3]);
c[1] = c[1].blend8(g, c[3]);
c[2] = c[2].blend8(b, c[3]);
}
else
{
c[0] = r;
c[1] = g;
c[2] = b;
c[3] = GSVector4i::zero();
}
}
void GPUDrawScanline::WriteFrame(WORD* RESTRICT fb, const GSVector4i& test, const GSVector4i* c, int pixels)
{
GSVector4i r = (c[0] & 0x00f800f8) >> 3;
GSVector4i g = (c[1] & 0x00f800f8) << 2;
GSVector4i b = (c[2] & 0x00f800f8) << 7;
GSVector4i a = (c[3] & 0x00800080) << 8;
GSVector4i s = r | g | b | a | m_env.md;
int i = 0;
do
{
if(test.u16[i] == 0)
{
fb[i] = s.u16[i];
}
}
while(++i < pixels);
}
//
__declspec(align(16)) static WORD s_dither[4][16] =
{
{7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1},
{2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4},
{1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7},
{4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2},
};
void GPUDrawScanline::DrawScanline(int top, int left, int right, const GSVertexSW& v)
{
GSVector4i s, t;
GSVector4i r, g, b;
if(m_env.sel.tme)
{
GSVector4i vt = GSVector4i(v.t).xxzzl();
s = vt.xxxx().add16(m_env.ds);
t = vt.yyyy().add16(m_env.dt);
}
GSVector4i vc = GSVector4i(v.c).xxzzlh();
r = vc.xxxx();
g = vc.yyyy();
b = vc.zzzz();
if(m_env.sel.iip)
{
r = r.add16(m_env.dr);
g = g.add16(m_env.dg);
b = b.add16(m_env.db);
}
GSVector4i dither;
if(m_env.sel.dtd)
{
dither = GSVector4i::load<false>(&s_dither[top & 3][left & 3]);
}
int steps = right - left;
WORD* fb = m_env.mem->GetPixelAddress(left, top);
while(1)
{
do
{
int pixels = GSVector4i::min_i16(steps, 8);
GSVector4i test = GSVector4i::zero();
GSVector4i d = GSVector4i::zero();
if(m_env.sel.rfb) // me | abe
{
d = GSVector4i::load<false>(fb);
if(m_env.sel.me)
{
test = d.sra16(15);
if(test.alltrue())
{
continue;
}
}
}
GSVector4i c[4];
if(m_env.sel.tme)
{
SampleTexture(m_env.sel.ltf, m_env.sel.tlu, m_env.sel.twin, test, s, t, c);
}
ColorTFX(m_env.sel.tfx, r, g, b, c);
if(m_env.sel.abe)
{
AlphaBlend(m_env.sel.abr, m_env.sel.tme, d, c);
}
if(m_env.sel.dtd)
{
c[0] = c[0].addus8(dither);
c[1] = c[1].addus8(dither);
c[2] = c[2].addus8(dither);
}
WriteFrame(fb, test, c, pixels);
}
while(0);
if(steps <= 8) break;
steps -= 8;
fb += 8;
if(m_env.sel.tme)
{
GSVector4i dst8 = m_env.dst8;
s = s.add16(dst8.xxxx());
t = t.add16(dst8.yyyy());
}
if(m_env.sel.iip)
{
GSVector4i dc8 = m_env.dc8;
r = r.add16(dc8.xxxx());
g = g.add16(dc8.yyyy());
b = b.add16(dc8.zzzz());
}
}
}
template<DWORD sel>
void GPUDrawScanline::DrawScanlineEx(int top, int left, int right, const GSVertexSW& v)
{
DWORD iip = (sel >> 0) & 1;
DWORD me = (sel >> 1) & 1;
DWORD abe = (sel >> 2) & 1;
DWORD abr = (sel >> 3) & 3;
// DWORD tge = (sel >> 5) & 1;
DWORD tme = (sel >> 6) & 1;
DWORD twin = (sel >> 7) & 1;
DWORD rfb = (sel >> 1) & 3;
DWORD tfx = (sel >> 5) & 3;
GSVector4i s, t;
GSVector4i r, g, b;
if(tme)
{
GSVector4i vt = GSVector4i(v.t).xxzzl();
s = vt.xxxx().add16(m_env.ds);
t = vt.yyyy().add16(m_env.dt);
}
GSVector4i vc = GSVector4i(v.c).xxzzlh();
r = vc.xxxx();
g = vc.yyyy();
b = vc.zzzz();
if(iip)
{
r = r.add16(m_env.dr);
g = g.add16(m_env.dg);
b = b.add16(m_env.db);
}
GSVector4i dither;
if(m_env.sel.dtd)
{
dither = GSVector4i::load<false>(&s_dither[top & 3][left & 3]);
}
int steps = right - left;
WORD* fb = m_env.mem->GetPixelAddress(left, top);
while(1)
{
do
{
int pixels = GSVector4i::min_i16(steps, 8);
GSVector4i test = GSVector4i::zero();
GSVector4i d = GSVector4i::zero();
if(rfb) // me | abe
{
d = GSVector4i::load<false>(fb);
if(me)
{
test = d.sra16(15);
if(test.alltrue())
{
continue;
}
}
}
GSVector4i c[4];
if(tme)
{
SampleTexture(m_env.sel.ltf, m_env.sel.tlu, twin, test, s, t, c);
}
ColorTFX(tfx, r, g, b, c);
if(abe)
{
AlphaBlend(abr, tme, d, c);
}
if(m_env.sel.dtd)
{
c[0] = c[0].addus8(dither);
c[1] = c[1].addus8(dither);
c[2] = c[2].addus8(dither);
}
WriteFrame(fb, test, c, pixels);
}
while(0);
if(steps <= 8) break;
steps -= 8;
fb += 8;
if(tme)
{
GSVector4i dst8 = m_env.dst8;
s = s.add16(dst8.xxxx());
t = t.add16(dst8.yyyy());
}
if(iip)
{
GSVector4i dc8 = m_env.dc8;
r = r.add16(dc8.xxxx());
g = g.add16(dc8.yyyy());
b = b.add16(dc8.zzzz());
}
}
}
GPUDrawScanline::GPUDrawScanlineMap::GPUDrawScanlineMap()
{
for(int i = 0; i < countof(m_default); i++)
{
m_default[i] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanline;
}
#ifdef FAST_DRAWSCANLINE
m_default[0x00] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x00>;
m_default[0x01] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x01>;
m_default[0x02] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x02>;
m_default[0x03] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x03>;
m_default[0x04] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x04>;
m_default[0x05] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x05>;
m_default[0x06] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x06>;
m_default[0x07] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x07>;
m_default[0x08] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x08>;
m_default[0x09] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x09>;
m_default[0x0a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x0a>;
m_default[0x0b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x0b>;
m_default[0x0c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x0c>;
m_default[0x0d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x0d>;
m_default[0x0e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x0e>;
m_default[0x0f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x0f>;
m_default[0x10] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x10>;
m_default[0x11] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x11>;
m_default[0x12] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x12>;
m_default[0x13] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x13>;
m_default[0x14] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x14>;
m_default[0x15] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x15>;
m_default[0x16] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x16>;
m_default[0x17] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x17>;
m_default[0x18] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x18>;
m_default[0x19] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x19>;
m_default[0x1a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x1a>;
m_default[0x1b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x1b>;
m_default[0x1c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x1c>;
m_default[0x1d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x1d>;
m_default[0x1e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x1e>;
m_default[0x1f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x1f>;
m_default[0x20] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x20>;
m_default[0x21] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x21>;
m_default[0x22] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x22>;
m_default[0x23] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x23>;
m_default[0x24] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x24>;
m_default[0x25] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x25>;
m_default[0x26] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x26>;
m_default[0x27] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x27>;
m_default[0x28] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x28>;
m_default[0x29] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x29>;
m_default[0x2a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x2a>;
m_default[0x2b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x2b>;
m_default[0x2c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x2c>;
m_default[0x2d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x2d>;
m_default[0x2e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x2e>;
m_default[0x2f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x2f>;
m_default[0x30] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x30>;
m_default[0x31] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x31>;
m_default[0x32] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x32>;
m_default[0x33] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x33>;
m_default[0x34] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x34>;
m_default[0x35] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x35>;
m_default[0x36] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x36>;
m_default[0x37] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x37>;
m_default[0x38] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x38>;
m_default[0x39] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x39>;
m_default[0x3a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x3a>;
m_default[0x3b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x3b>;
m_default[0x3c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x3c>;
m_default[0x3d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x3d>;
m_default[0x3e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x3e>;
m_default[0x3f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x3f>;
m_default[0x40] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x40>;
m_default[0x41] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x41>;
m_default[0x42] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x42>;
m_default[0x43] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x43>;
m_default[0x44] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x44>;
m_default[0x45] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x45>;
m_default[0x46] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x46>;
m_default[0x47] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x47>;
m_default[0x48] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x48>;
m_default[0x49] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x49>;
m_default[0x4a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x4a>;
m_default[0x4b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x4b>;
m_default[0x4c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x4c>;
m_default[0x4d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x4d>;
m_default[0x4e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x4e>;
m_default[0x4f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x4f>;
m_default[0x50] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x50>;
m_default[0x51] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x51>;
m_default[0x52] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x52>;
m_default[0x53] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x53>;
m_default[0x54] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x54>;
m_default[0x55] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x55>;
m_default[0x56] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x56>;
m_default[0x57] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x57>;
m_default[0x58] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x58>;
m_default[0x59] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x59>;
m_default[0x5a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x5a>;
m_default[0x5b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x5b>;
m_default[0x5c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x5c>;
m_default[0x5d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x5d>;
m_default[0x5e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x5e>;
m_default[0x5f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x5f>;
m_default[0x60] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x60>;
m_default[0x61] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x61>;
m_default[0x62] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x62>;
m_default[0x63] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x63>;
m_default[0x64] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x64>;
m_default[0x65] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x65>;
m_default[0x66] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x66>;
m_default[0x67] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x67>;
m_default[0x68] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x68>;
m_default[0x69] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x69>;
m_default[0x6a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x6a>;
m_default[0x6b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x6b>;
m_default[0x6c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x6c>;
m_default[0x6d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x6d>;
m_default[0x6e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x6e>;
m_default[0x6f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x6f>;
m_default[0x70] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x70>;
m_default[0x71] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x71>;
m_default[0x72] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x72>;
m_default[0x73] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x73>;
m_default[0x74] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x74>;
m_default[0x75] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x75>;
m_default[0x76] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x76>;
m_default[0x77] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x77>;
m_default[0x78] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x78>;
m_default[0x79] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x79>;
m_default[0x7a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x7a>;
m_default[0x7b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x7b>;
m_default[0x7c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x7c>;
m_default[0x7d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x7d>;
m_default[0x7e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x7e>;
m_default[0x7f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x7f>;
m_default[0x80] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x80>;
m_default[0x81] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x81>;
m_default[0x82] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x82>;
m_default[0x83] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x83>;
m_default[0x84] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x84>;
m_default[0x85] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x85>;
m_default[0x86] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x86>;
m_default[0x87] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x87>;
m_default[0x88] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x88>;
m_default[0x89] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x89>;
m_default[0x8a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x8a>;
m_default[0x8b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x8b>;
m_default[0x8c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x8c>;
m_default[0x8d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x8d>;
m_default[0x8e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x8e>;
m_default[0x8f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x8f>;
m_default[0x90] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x90>;
m_default[0x91] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x91>;
m_default[0x92] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x92>;
m_default[0x93] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x93>;
m_default[0x94] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x94>;
m_default[0x95] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x95>;
m_default[0x96] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x96>;
m_default[0x97] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x97>;
m_default[0x98] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x98>;
m_default[0x99] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x99>;
m_default[0x9a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x9a>;
m_default[0x9b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x9b>;
m_default[0x9c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x9c>;
m_default[0x9d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x9d>;
m_default[0x9e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x9e>;
m_default[0x9f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x9f>;
m_default[0xa0] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa0>;
m_default[0xa1] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa1>;
m_default[0xa2] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa2>;
m_default[0xa3] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa3>;
m_default[0xa4] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa4>;
m_default[0xa5] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa5>;
m_default[0xa6] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa6>;
m_default[0xa7] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa7>;
m_default[0xa8] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa8>;
m_default[0xa9] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa9>;
m_default[0xaa] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xaa>;
m_default[0xab] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xab>;
m_default[0xac] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xac>;
m_default[0xad] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xad>;
m_default[0xae] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xae>;
m_default[0xaf] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xaf>;
m_default[0xb0] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb0>;
m_default[0xb1] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb1>;
m_default[0xb2] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb2>;
m_default[0xb3] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb3>;
m_default[0xb4] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb4>;
m_default[0xb5] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb5>;
m_default[0xb6] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb6>;
m_default[0xb7] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb7>;
m_default[0xb8] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb8>;
m_default[0xb9] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb9>;
m_default[0xba] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xba>;
m_default[0xbb] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xbb>;
m_default[0xbc] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xbc>;
m_default[0xbd] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xbd>;
m_default[0xbe] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xbe>;
m_default[0xbf] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xbf>;
m_default[0xc0] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc0>;
m_default[0xc1] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc1>;
m_default[0xc2] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc2>;
m_default[0xc3] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc3>;
m_default[0xc4] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc4>;
m_default[0xc5] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc5>;
m_default[0xc6] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc6>;
m_default[0xc7] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc7>;
m_default[0xc8] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc8>;
m_default[0xc9] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc9>;
m_default[0xca] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xca>;
m_default[0xcb] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xcb>;
m_default[0xcc] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xcc>;
m_default[0xcd] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xcd>;
m_default[0xce] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xce>;
m_default[0xcf] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xcf>;
m_default[0xd0] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd0>;
m_default[0xd1] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd1>;
m_default[0xd2] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd2>;
m_default[0xd3] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd3>;
m_default[0xd4] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd4>;
m_default[0xd5] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd5>;
m_default[0xd6] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd6>;
m_default[0xd7] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd7>;
m_default[0xd8] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd8>;
m_default[0xd9] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd9>;
m_default[0xda] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xda>;
m_default[0xdb] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xdb>;
m_default[0xdc] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xdc>;
m_default[0xdd] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xdd>;
m_default[0xde] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xde>;
m_default[0xdf] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xdf>;
m_default[0xe0] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe0>;
m_default[0xe1] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe1>;
m_default[0xe2] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe2>;
m_default[0xe3] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe3>;
m_default[0xe4] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe4>;
m_default[0xe5] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe5>;
m_default[0xe6] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe6>;
m_default[0xe7] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe7>;
m_default[0xe8] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe8>;
m_default[0xe9] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe9>;
m_default[0xea] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xea>;
m_default[0xeb] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xeb>;
m_default[0xec] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xec>;
m_default[0xed] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xed>;
m_default[0xee] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xee>;
m_default[0xef] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xef>;
m_default[0xf0] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf0>;
m_default[0xf1] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf1>;
m_default[0xf2] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf2>;
m_default[0xf3] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf3>;
m_default[0xf4] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf4>;
m_default[0xf5] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf5>;
m_default[0xf6] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf6>;
m_default[0xf7] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf7>;
m_default[0xf8] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf8>;
m_default[0xf9] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf9>;
m_default[0xfa] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xfa>;
m_default[0xfb] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xfb>;
m_default[0xfc] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xfc>;
m_default[0xfd] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xfd>;
m_default[0xfe] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xfe>;
m_default[0xff] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xff>;
#endif
}
IDrawScanline::DrawScanlinePtr GPUDrawScanline::GPUDrawScanlineMap::GetDefaultFunction(DWORD key)
{
GPUScanlineSelector sel;
sel.key = key;
return m_default[sel];
} }
// //
@ -908,3 +165,14 @@ IDrawScanline::SetupPrimPtr GPUDrawScanline::GPUSetupPrimMap::GetDefaultFunction
return m_default[sprite][tme][iip]; return m_default[sprite][tme][iip];
} }
//
GPUDrawScanline::GPUDrawScanlineMap::GPUDrawScanlineMap(GPUScanlineEnvironment& env)
: m_env(env)
{
}
GPUDrawScanlineCodeGenerator* GPUDrawScanline::GPUDrawScanlineMap::Create(DWORD key, void* ptr, size_t maxsize)
{
return new GPUDrawScanlineCodeGenerator(m_env, ptr, maxsize);
}

View File

@ -24,62 +24,8 @@
#include "GPUState.h" #include "GPUState.h"
#include "GSRasterizer.h" #include "GSRasterizer.h"
#include "GSAlignedClass.h" #include "GSAlignedClass.h"
#include "GPUScanlineEnvironment.h"
union GPUScanlineSelector #include "GPUDrawScanlineCodeGenerator.h"
{
struct
{
DWORD iip:1; // 0
DWORD me:1; // 1
DWORD abe:1; // 2
DWORD abr:2; // 3
DWORD tge:1; // 5
DWORD tme:1; // 6
DWORD twin:1; // 7
DWORD tlu:1; // 8
DWORD dtd:1; // 9
DWORD ltf:1; // 10
// DWORD dte:1: // 11
};
struct
{
DWORD _pad1:1; // 0
DWORD rfb:2; // 1
DWORD _pad2:2; // 3
DWORD tfx:2; // 5
};
DWORD key;
operator DWORD() {return key & 0xff;}
};
__declspec(align(16)) struct GPUScanlineEnvironment
{
GPUScanlineSelector sel;
GPULocalMemory* mem;
const void* tex;
const WORD* clut;
GSVector4i u[3];
GSVector4i v[3];
GSVector4i a;
GSVector4i md; // similar to gs fba
GSVector4i ds, dt, dst8;
GSVector4i dr, dg, db, dc8;
};
__declspec(align(16)) struct GPUScanlineParam
{
GPUScanlineSelector sel;
const void* tex;
const WORD* clut;
};
class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline
{ {
@ -87,20 +33,6 @@ class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline
// //
class GPUDrawScanlineMap : public GSFunctionMap<DWORD, DrawScanlinePtr>
{
DrawScanlinePtr m_default[256];
public:
GPUDrawScanlineMap();
DrawScanlinePtr GetDefaultFunction(DWORD key);
};
GPUDrawScanlineMap m_ds;
//
class GPUSetupPrimMap : public GSFunctionMap<DWORD, SetupPrimPtr> class GPUSetupPrimMap : public GSFunctionMap<DWORD, SetupPrimPtr>
{ {
SetupPrimPtr m_default[2][2][2]; SetupPrimPtr m_default[2][2][2];
@ -113,23 +45,22 @@ class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline
GPUSetupPrimMap m_sp; GPUSetupPrimMap m_sp;
//
template<DWORD sprite, DWORD tme, DWORD iip> template<DWORD sprite, DWORD tme, DWORD iip>
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan); void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
// //
__forceinline void SampleTexture(DWORD ltf, DWORD tlu, DWORD twin, GSVector4i& test, const GSVector4i& s, const GSVector4i& t, GSVector4i* c); class GPUDrawScanlineMap : public GSCodeGeneratorFunctionMap<GPUDrawScanlineCodeGenerator, DWORD, DrawScanlineStaticPtr>
__forceinline void ColorTFX(DWORD tfx, const GSVector4i& r, const GSVector4i& g, const GSVector4i& b, GSVector4i* c); {
__forceinline void AlphaBlend(UINT32 abr, UINT32 tme, const GSVector4i& d, GSVector4i* c); GPUScanlineEnvironment& m_env;
__forceinline void WriteFrame(WORD* RESTRICT fb, const GSVector4i& test, const GSVector4i* c, int pixels);
public:
GPUDrawScanlineMap(GPUScanlineEnvironment& env);
GPUDrawScanlineCodeGenerator* Create(DWORD key, void* ptr, size_t maxsize);
} m_ds;
void DrawScanline(int top, int left, int right, const GSVertexSW& v); void DrawScanline(int top, int left, int right, const GSVertexSW& v);
template<DWORD sel>
void DrawScanlineEx(int top, int left, int right, const GSVertexSW& v);
protected: protected:
GPUState* m_state; GPUState* m_state;
int m_id; int m_id;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,63 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GPUScanlineEnvironment.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
using namespace Xbyak;
class GPUDrawScanlineCodeGenerator : public CodeGenerator
{
void operator = (const GPUDrawScanlineCodeGenerator&);
static const GSVector4i m_test[8];
static const WORD m_dither[4][16];
util::Cpu m_cpu;
GPUScanlineEnvironment& m_env;
void Generate();
void Init(int params);
void Step();
void TestMask();
void SampleTexture();
void ColorTFX();
void AlphaBlend();
void Dither();
void WriteFrame();
void ReadTexel(const Xmm& dst, const Xmm& addr);
template<int shift> void modulate16(const Xmm& a, const Operand& f);
template<int shift> void lerp16(const Xmm& a, const Xmm& b, const Operand& f);
void clamp16(const Xmm& a, const Xmm& zero);
void alltrue();
void blend8(const Xmm& a, const Xmm& b);
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
public:
GPUDrawScanlineCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize);
};

View File

@ -89,8 +89,6 @@ const WORD* GPULocalMemory::GetCLUT(int tp, int cx, int cy)
WORD* src = GetPixelAddressScaled(cx << 4, cy); WORD* src = GetPixelAddressScaled(cx << 4, cy);
WORD* dst = m_clut.buff; WORD* dst = m_clut.buff;
// TODO: at normal horizontal resolution just return src
if(m_scale.cx == 0) if(m_scale.cx == 0)
{ {
memcpy(dst, src, (tp == 0 ? 16 : 256) * 2); memcpy(dst, src, (tp == 0 ? 16 : 256) * 2);

View File

@ -128,17 +128,22 @@ protected:
p.sel.key = 0; p.sel.key = 0;
p.sel.iip = env.PRIM.IIP; p.sel.iip = env.PRIM.IIP;
p.sel.me = env.STATUS.ME; p.sel.me = env.STATUS.ME;
if(env.PRIM.ABE)
{
p.sel.abe = env.PRIM.ABE; p.sel.abe = env.PRIM.ABE;
p.sel.abr = env.STATUS.ABR; p.sel.abr = env.STATUS.ABR;
}
p.sel.tge = env.PRIM.TGE; p.sel.tge = env.PRIM.TGE;
p.sel.tme = env.PRIM.TME;
p.sel.tlu = env.STATUS.TP < 2;
p.sel.twin = (env.TWIN.ai32 & 0xfffff) != 0;
p.sel.dtd = m_dither ? env.STATUS.DTD : 0;
p.sel.ltf = m_filter == 1 && env.PRIM.TYPE == GPU_POLYGON || m_filter == 2 ? 1 : 0;
if(env.PRIM.TME) if(env.PRIM.TME)
{ {
p.sel.tme = env.PRIM.TME;
p.sel.tlu = env.STATUS.TP < 2;
p.sel.twin = (env.TWIN.ai32 & 0xfffff) != 0;
p.sel.ltf = m_filter == 1 && env.PRIM.TYPE == GPU_POLYGON || m_filter == 2 ? 1 : 0;
const void* t = m_mem.GetTexture(env.STATUS.TP, env.STATUS.TX, env.STATUS.TY); const void* t = m_mem.GetTexture(env.STATUS.TP, env.STATUS.TX, env.STATUS.TY);
if(!t) {ASSERT(0); return;} if(!t) {ASSERT(0); return;}
@ -147,6 +152,10 @@ protected:
p.clut = m_mem.GetCLUT(env.STATUS.TP, env.CLUT.X, env.CLUT.Y); p.clut = m_mem.GetCLUT(env.STATUS.TP, env.CLUT.X, env.CLUT.Y);
} }
p.sel.dtd = m_dither ? env.STATUS.DTD : 0;
p.sel.md = env.STATUS.MD;
p.sel.sprite = env.PRIM.TYPE == GPU_SPRITE;
// //
GSRasterizerData data; GSRasterizerData data;

View File

@ -0,0 +1,82 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GSVector.h"
#include "GPULocalMemory.h"
union GPUScanlineSelector
{
struct
{
DWORD iip:1; // 0
DWORD me:1; // 1
DWORD abe:1; // 2
DWORD abr:2; // 3
DWORD tge:1; // 5
DWORD tme:1; // 6
DWORD twin:1; // 7
DWORD tlu:1; // 8
DWORD dtd:1; // 9
DWORD ltf:1; // 10
DWORD md:1; // 11
DWORD sprite:1; // 12
};
struct
{
DWORD _pad1:1; // 0
DWORD rfb:2; // 1
DWORD _pad2:2; // 3
DWORD tfx:2; // 5
};
DWORD key;
operator DWORD() {return key;}
};
__declspec(align(16)) struct GPUScanlineParam
{
GPUScanlineSelector sel;
const void* tex;
const WORD* clut;
};
__declspec(align(16)) struct GPUScanlineEnvironment
{
GPUScanlineSelector sel;
// GPULocalMemory* mem; // TODO: obsolite
void* vm;
const void* tex;
const WORD* clut;
DWORD fbw; // 10 + m_scale.cx
// GSVector4i md; // similar to gs fba
struct {GSVector4i u, v;} twin[3];
struct {GSVector4i s, t, r, g, b, _pad[3];} d;
struct {GSVector4i st, c;} d8;
struct {GSVector4i s, t, r, b, g, uf, vf, dither, fd, test;} temp;
};

View File

@ -0,0 +1,71 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "StdAfx.h"
#include "GSCodeBuffer.h"
GSCodeBuffer::GSCodeBuffer(size_t blocksize)
: m_ptr(NULL)
, m_blocksize(blocksize)
, m_pos(0)
, m_reserved(0)
{
}
GSCodeBuffer::~GSCodeBuffer()
{
while(!m_buffers.IsEmpty())
{
VirtualFree(m_buffers.RemoveHead(), 0, MEM_RELEASE);
}
}
void* GSCodeBuffer::GetBuffer(size_t size)
{
ASSERT(size < m_blocksize);
ASSERT(m_reserved == 0);
size = (size + 15) & ~15;
if(m_ptr == NULL || m_pos + size > m_blocksize)
{
m_ptr = (BYTE*)VirtualAlloc(NULL, m_blocksize, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
m_pos = 0;
m_buffers.AddTail(m_ptr);
}
BYTE* ptr = &m_ptr[m_pos];
m_reserved = size;
return ptr;
}
void GSCodeBuffer::ReleaseBuffer(size_t size)
{
ASSERT(size <= m_reserved);
m_pos = ((m_pos + size) + 15) & ~15;
m_reserved = 0;
}

View File

@ -0,0 +1,37 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
class GSCodeBuffer
{
CAtlList<void*> m_buffers;
size_t m_blocksize;
size_t m_pos, m_reserved;
BYTE* m_ptr;
public:
GSCodeBuffer(size_t blocksize = 4096 * 64); // 256k
virtual ~GSCodeBuffer();
void* GetBuffer(size_t size);
void ReleaseBuffer(size_t size);
};

View File

@ -109,6 +109,7 @@ CRC::Game CRC::m_games[] =
{0x9E98B8AE, IkkiTousen, JP, false}, {0x9E98B8AE, IkkiTousen, JP, false},
{0xD6385328, GodOfWar, US, false}, {0xD6385328, GodOfWar, US, false},
{0xFB0E6D72, GodOfWar, EU, false}, {0xFB0E6D72, GodOfWar, EU, false},
{0xEB001875, GodOfWar, EU, false},
{0xA61A4C6D, GodOfWar, Unknown, false}, {0xA61A4C6D, GodOfWar, Unknown, false},
{0xE23D532B, GodOfWar, Unknown, false}, {0xE23D532B, GodOfWar, Unknown, false},
{0x2F123FD8, GodOfWar2, RU, false}, {0x2F123FD8, GodOfWar2, RU, false},

View File

@ -26,8 +26,8 @@
GSDrawScanline::GSDrawScanline(GSState* state, int id) GSDrawScanline::GSDrawScanline(GSState* state, int id)
: m_state(state) : m_state(state)
, m_id(id) , m_id(id)
, m_sp(this) , m_sp(m_env)
, m_ds(this) , m_ds(m_env)
{ {
memset(&m_env, 0, sizeof(m_env)); memset(&m_env, 0, sizeof(m_env));
} }
@ -163,13 +163,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
// //
f->sl = (DrawScanlinePtr)&GSDrawScanline::DrawScanline; f->ssl = m_ds.Lookup(m_env.sel);
m_dsf = m_ds.Lookup(m_env.sel);
f->ssl = m_dsf;
//
if(m_env.sel.IsSolidRect()) if(m_env.sel.IsSolidRect())
{ {
@ -207,16 +201,7 @@ void GSDrawScanline::EndDraw(const GSRasterizerStats& stats)
void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
{ {
// TODO: call this directly from rasterizer m_spf(vertices, dscan); // TODO: call this directly from rasterizer
m_spf(vertices, dscan);
}
void GSDrawScanline::DrawScanline(int top, int left, int right, const GSVertexSW& v)
{
// TODO: call this directly from rasterizer
m_dsf(top, left, right, v);
} }
void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v) void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
@ -371,24 +356,24 @@ void GSDrawScanline::FillBlock(const GSVector4i* row, int* col, const GSVector4i
// //
GSDrawScanline::GSSetupPrimMap::GSSetupPrimMap(GSDrawScanline* ds) GSDrawScanline::GSSetupPrimMap::GSSetupPrimMap(GSScanlineEnvironment& env)
: m_ds(ds) : m_env(env)
{ {
} }
GSSetupPrimCodeGenerator* GSDrawScanline::GSSetupPrimMap::Create(UINT64 key) GSSetupPrimCodeGenerator* GSDrawScanline::GSSetupPrimMap::Create(UINT64 key, void* ptr, size_t maxsize)
{ {
return new GSSetupPrimCodeGenerator(m_ds->m_env); return new GSSetupPrimCodeGenerator(m_env, ptr, maxsize);
} }
// //
GSDrawScanline::GSDrawScanlineMap::GSDrawScanlineMap(GSDrawScanline* ds) GSDrawScanline::GSDrawScanlineMap::GSDrawScanlineMap(GSScanlineEnvironment& env)
: m_ds(ds) : m_env(env)
{ {
} }
GSDrawScanlineCodeGenerator* GSDrawScanline::GSDrawScanlineMap::Create(UINT64 key) GSDrawScanlineCodeGenerator* GSDrawScanline::GSDrawScanlineMap::Create(UINT64 key, void* ptr, size_t maxsize)
{ {
return new GSDrawScanlineCodeGenerator(m_ds->m_env); return new GSDrawScanlineCodeGenerator(m_env, ptr, maxsize);
} }

View File

@ -36,11 +36,11 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
class GSSetupPrimMap : public GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, UINT64, SetupPrimStaticPtr> class GSSetupPrimMap : public GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, UINT64, SetupPrimStaticPtr>
{ {
GSDrawScanline* m_ds; GSScanlineEnvironment& m_env;
public: public:
GSSetupPrimMap(GSDrawScanline* ds); GSSetupPrimMap(GSScanlineEnvironment& env);
GSSetupPrimCodeGenerator* Create(UINT64 key); GSSetupPrimCodeGenerator* Create(UINT64 key, void* ptr, size_t maxsize);
} m_sp; } m_sp;
SetupPrimStaticPtr m_spf; SetupPrimStaticPtr m_spf;
@ -51,17 +51,13 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
class GSDrawScanlineMap : public GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, UINT64, DrawScanlineStaticPtr> class GSDrawScanlineMap : public GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, UINT64, DrawScanlineStaticPtr>
{ {
GSDrawScanline* m_ds; GSScanlineEnvironment& m_env;
public: public:
GSDrawScanlineMap(GSDrawScanline* ds); GSDrawScanlineMap(GSScanlineEnvironment& env);
GSDrawScanlineCodeGenerator* Create(UINT64 key); GSDrawScanlineCodeGenerator* Create(UINT64 key, void* ptr, size_t maxsize);
} m_ds; } m_ds;
DrawScanlineStaticPtr m_dsf;
void DrawScanline(int top, int left, int right, const GSVertexSW& v);
// //
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v); void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);

View File

@ -24,8 +24,8 @@
#include "StdAfx.h" #include "StdAfx.h"
#include "GSDrawScanlineCodeGenerator.h" #include "GSDrawScanlineCodeGenerator.h"
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env) GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize)
: CodeGenerator(DEFAULT_MAX_CODE_SIZE, 0) : CodeGenerator(maxsize, ptr)
, m_env(env) , m_env(env)
{ {
#if _M_AMD64 #if _M_AMD64
@ -391,6 +391,8 @@ void GSDrawScanlineCodeGenerator::Init(int params)
paddd(xmm3, xmmword[eax + 16 * 8]); paddd(xmm3, xmmword[eax + 16 * 8]);
} }
else else
{
if(m_env.sel.ltf)
{ {
movdqa(xmm4, xmm3); movdqa(xmm4, xmm3);
pshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0)); pshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0));
@ -398,6 +400,7 @@ void GSDrawScanlineCodeGenerator::Init(int params)
psrlw(xmm4, 1); psrlw(xmm4, 1);
movdqa(xmmword[&m_env.temp.vf], xmm4); movdqa(xmmword[&m_env.temp.vf], xmm4);
} }
}
movdqa(xmmword[&m_env.temp.s], xmm2); movdqa(xmmword[&m_env.temp.s], xmm2);
movdqa(xmmword[&m_env.temp.t], xmm3); movdqa(xmmword[&m_env.temp.t], xmm3);
@ -918,14 +921,14 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// xmm7 = used // xmm7 = used
// GSVector4i rb10 = c10 & mask; // GSVector4i rb10 = c10 & mask;
// GSVector4i rb11 = c11 & mask; // GSVector4i ga10 = (c10 >> 8) & mask;
movdqa(xmm2, xmm1); movdqa(xmm2, xmm1);
psllw(xmm1, 8); psllw(xmm1, 8);
psrlw(xmm1, 8); psrlw(xmm1, 8);
psrlw(xmm2, 8); psrlw(xmm2, 8);
// GSVector4i ga10 = (c10 >> 8) & mask; // GSVector4i rb11 = c11 & mask;
// GSVector4i ga11 = (c11 >> 8) & mask; // GSVector4i ga11 = (c11 >> 8) & mask;
movdqa(xmm6, xmm5); movdqa(xmm6, xmm5);
@ -1511,7 +1514,28 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
void GSDrawScanlineCodeGenerator::AlphaBlend() void GSDrawScanlineCodeGenerator::AlphaBlend()
{ {
if(!m_env.sel.fwrite || m_env.sel.abe == 255) if(!m_env.sel.fwrite)
{
return;
}
/*
if(m_env.sel.aa1)
{
printf("aa1 %016I64x\n", m_env.sel.key);
if(m_env.sel.fpsm != 1) // TODO: fm == 0xffxxxxxx
{
// a = 0x80
pcmpeqd(xmm0, xmm0);
psllw(xmm0, 15);
mix16(xmm6, xmm0, xmm1);
}
return;
}
*/
if(m_env.sel.abe == 255)
{ {
return; return;
} }
@ -1734,7 +1758,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
} }
else else
{ {
if(m_env.sel.fpsm != 1) // TODO: fpsm == 0 && fm == 0xffxxxxxx if(m_env.sel.fpsm != 1) // TODO: fm == 0xffxxxxxx
{ {
mix16(xmm6, xmm4, xmm7); mix16(xmm6, xmm4, xmm7);
} }

View File

@ -72,5 +72,5 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
void blendr(const Xmm& b, const Xmm& a, const Xmm& mask); void blendr(const Xmm& b, const Xmm& a, const Xmm& mask);
public: public:
GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env); GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize);
}; };

View File

@ -160,13 +160,18 @@ public:
} }
}; };
#include "GSCodeBuffer.h"
template<class CG, class KEY, class VALUE> template<class CG, class KEY, class VALUE>
class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE> class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
{ {
CRBMap<UINT64, CG*> m_cgmap; CRBMap<UINT64, CG*> m_cgmap;
GSCodeBuffer m_cb;
enum {MAX_SIZE = 4096};
protected: protected:
virtual CG* Create(KEY key) = 0; virtual CG* Create(KEY key, void* ptr, size_t maxsize = MAX_SIZE) = 0;
public: public:
GSCodeGeneratorFunctionMap() GSCodeGeneratorFunctionMap()
@ -189,10 +194,14 @@ public:
if(!m_cgmap.Lookup(key, cg)) if(!m_cgmap.Lookup(key, cg))
{ {
cg = Create(key); void* ptr = m_cb.GetBuffer(MAX_SIZE);
cg = Create(key, ptr, MAX_SIZE);
ASSERT(cg); ASSERT(cg);
m_cb.ReleaseBuffer(cg->getSize());
m_cgmap.SetAt(key, cg); m_cgmap.SetAt(key, cg);
} }

View File

@ -38,7 +38,6 @@ GSRasterizer::~GSRasterizer()
void GSRasterizer::Draw(const GSRasterizerData* data) void GSRasterizer::Draw(const GSRasterizerData* data)
{ {
m_dsf.sl = NULL;
m_dsf.sr = NULL; m_dsf.sr = NULL;
m_dsf.sp = NULL; m_dsf.sp = NULL;
m_dsf.ssl = NULL; m_dsf.ssl = NULL;
@ -102,8 +101,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor)
(m_ds->*m_dsf.sp)(v, *v); (m_ds->*m_dsf.sp)(v, *v);
// TODO: (m_dsf.ssp)(v, *v); // TODO: (m_dsf.ssp)(v, *v);
(m_ds->*m_dsf.sl)(p.y, p.x, p.x + 1, *v); m_dsf.ssl(p.y, p.x, p.x + 1, *v);
// TODO: (m_dsf.ssl)(p.y, p.x, p.x + 1, *v);
m_stats.pixels++; m_stats.pixels++;
} }
@ -420,6 +418,10 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
int left = lr.extract32<0>(); int left = lr.extract32<0>();
int right = lr.extract32<2>(); int right = lr.extract32<2>();
// TODO:
// left coverage = l.p.ceil().x - l.p.x
// right coverage = r.ceil() - r
if(left < scissor.x) left = scissor.x; if(left < scissor.x) left = scissor.x;
if(right > scissor.z) right = scissor.z; if(right > scissor.z) right = scissor.z;
@ -442,8 +444,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
scan = l; scan = l;
} }
(m_ds->*m_dsf.sl)(top, left, right, scan); m_dsf.ssl(top, left, right, scan);
// TODO: (m_dsf.ssl)(top, left, right, scan);
} }
} }
} }
@ -472,6 +473,10 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
int left = lr.extract32<0>(); int left = lr.extract32<0>();
int right = lr.extract32<1>(); int right = lr.extract32<1>();
// TODO:
// left coverage = l.p.ceil().x - l.p.x
// right coverage = l.p.ceil().y - l.p.y
if(left < scissor.x) left = scissor.x; if(left < scissor.x) left = scissor.x;
if(right > scissor.z) right = scissor.z; if(right > scissor.z) right = scissor.z;
@ -494,8 +499,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
scan = l; scan = l;
} }
(m_ds->*m_dsf.sl)(top, left, right, scan); m_dsf.ssl(top, left, right, scan);
// TODO: (m_dsf.ssl)(top, left, right, scan);
} }
} }
} }
@ -585,8 +589,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
{ {
if((top % m_threads) == m_id) if((top % m_threads) == m_id)
{ {
(m_ds->*m_dsf.sl)(top, left, right, scan); m_dsf.ssl(top, left, right, scan);
// TODO: (m_dsf.ssl)(top, left, right, scan);
m_stats.pixels += right - left; m_stats.pixels += right - left;
} }

View File

@ -25,9 +25,6 @@
#include "GSVertexSW.h" #include "GSVertexSW.h"
#include "GSFunctionMap.h" #include "GSFunctionMap.h"
//
#define FAST_DRAWSCANLINE
__declspec(align(16)) class GSRasterizerData __declspec(align(16)) class GSRasterizerData
{ {
public: public:
@ -51,7 +48,6 @@ public:
class IDrawScanline class IDrawScanline
{ {
public: public:
typedef void (IDrawScanline::*DrawScanlinePtr)(int top, int left, int right, const GSVertexSW& v);
typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v); typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v);
typedef void (IDrawScanline::*SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan); typedef void (IDrawScanline::*SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
typedef void (*DrawScanlineStaticPtr)(int top, int left, int right, const GSVertexSW& v); typedef void (*DrawScanlineStaticPtr)(int top, int left, int right, const GSVertexSW& v);
@ -59,7 +55,6 @@ public:
struct Functions struct Functions
{ {
DrawScanlinePtr sl;
DrawSolidRectPtr sr; DrawSolidRectPtr sr;
SetupPrimPtr sp; SetupPrimPtr sp;
DrawScanlineStaticPtr ssl; DrawScanlineStaticPtr ssl;

View File

@ -60,7 +60,8 @@ protected:
m_reset = false; m_reset = false;
} }
// if((m_perfmon.GetFrame() & 255) == 0) m_rl.PrintStats(); //
if((m_perfmon.GetFrame() & 255) == 0) m_rl.PrintStats();
} }
void ResetDevice() void ResetDevice()
@ -266,7 +267,7 @@ protected:
p.sel.sprite = primclass == GS_SPRITE_CLASS ? 1 : 0; p.sel.sprite = primclass == GS_SPRITE_CLASS ? 1 : 0;
p.fm = context->FRAME.FBMSK; p.fm = context->FRAME.FBMSK;
p.zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; p.zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 || PRIM->AA1 && primclass == GS_LINE_CLASS ? 0xffffffff : 0;
if(context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER) if(context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER)
{ {
@ -460,7 +461,7 @@ protected:
if(PRIM->AA1) if(PRIM->AA1)
{ {
// TODO: automatic alpha blending (ABE=1, A=0 B=1 C=0 D=1) p.sel.aa1 = 1;
} }
if(p.sel.date if(p.sel.date
@ -494,10 +495,10 @@ protected:
void Draw() void Draw()
{ {
m_vtrace.Update(m_vertices, m_count);
GS_PRIM_CLASS primclass = GSUtil::GetPrimClass(PRIM->PRIM); GS_PRIM_CLASS primclass = GSUtil::GetPrimClass(PRIM->PRIM);
m_vtrace.Update(m_vertices, m_count, primclass, PRIM->IIP, PRIM->TME, m_context->TEX0.TFX);
GSScanlineParam p; GSScanlineParam p;
GetScanlineParam(p, primclass); GetScanlineParam(p, primclass);
@ -796,6 +797,17 @@ public:
break; break;
} }
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
// are in line or just two of them are the same (cross product == 0)
GSVector4 tmp = (v[1].p - v[0].p) * (v[2].p - v[0].p).yxwz();
test |= tmp == tmp.yxwz();
break;
}
if(test.mask() & 3) if(test.mask() & 3)
{ {
return; return;
@ -818,7 +830,7 @@ public:
break; break;
} }
if(m_count >= 3 && m_count < 30) if(m_count < 30 && m_count >= 3)
{ {
GSVertexSW* v = &m_vertices[m_count - 3]; GSVertexSW* v = &m_vertices[m_count - 3];

View File

@ -60,6 +60,7 @@ union GSScanlineSelector
DWORD fba:1; // 42 DWORD fba:1; // 42
DWORD dthe:1; // 43 DWORD dthe:1; // 43
DWORD zoverflow:1; // 44 (z max >= 0x80000000) DWORD zoverflow:1; // 44 (z max >= 0x80000000)
DWORD aa1:1; // 45
}; };
struct struct

View File

@ -24,8 +24,8 @@
#include "StdAfx.h" #include "StdAfx.h"
#include "GSSetupPrimCodeGenerator.h" #include "GSSetupPrimCodeGenerator.h"
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env) GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize)
: CodeGenerator(DEFAULT_MAX_CODE_SIZE, 0) : CodeGenerator(maxsize, ptr)
, m_env(env) , m_env(env)
{ {
m_en.z = m_env.sel.zb ? 1 : 0; m_en.z = m_env.sel.zb ? 1 : 0;

View File

@ -46,5 +46,5 @@ class GSSetupPrimCodeGenerator : public CodeGenerator
void Color(); void Color();
public: public:
GSSetupPrimCodeGenerator(GSScanlineEnvironment& env); GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize);
}; };

View File

@ -2208,17 +2208,52 @@ public:
GSVector4 rcpnr() const GSVector4 rcpnr() const
{ {
return GSVector4(_mm_rcpnr_ps(m)); GSVector4 v = rcp();
return (v + v) - (v * v) * *this;
}
enum RoundMode {NearestInt = 8, NegInf = 9, PosInf = 10};
template<int mode> GSVector4 round() const
{
#if _M_SSE >= 0x401
return GSVector4(_mm_round_ps(m, mode));
#else
GSVector4 a = *this;
GSVector4 b = (a & GSVector4(ps_80000000)) | GSVector4(ps_4b000000);
b = a + b - b;
if((mode & 7) == (NegInf & 7))
{
return b - ((a < b) & GSVector4(ps_3f800000));
}
if((mode & 7) == (PosInf & 7))
{
return b + ((a > b) & GSVector4(ps_3f800000));
}
ASSERT((mode & 7) == (NearestInt & 7)); // other modes aren't implemented
return b;
#endif
} }
GSVector4 floor() const GSVector4 floor() const
{ {
return GSVector4(_mm_floor_ps(m)); return round<NegInf>();
} }
GSVector4 ceil() const GSVector4 ceil() const
{ {
return GSVector4(_mm_ceil_ps(m)); return round<PosInf>();
} }
GSVector4 mod2x(const GSVector4& f, const int scale = 256) const GSVector4 mod2x(const GSVector4& f, const int scale = 256) const

168
plugins/GSdx/GSVertexSW.cpp Normal file
View File

@ -0,0 +1,168 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "stdafx.h"
#include "GSVertexSW.h"
using namespace Xbyak;
GSVertexTrace::GSVertexTraceCodeGenerator::GSVertexTraceCodeGenerator(DWORD key, void* ptr, size_t maxsize)
: CodeGenerator(maxsize, ptr)
{
#if _M_AMD64
#error TODO
#endif
const int params = 0;
DWORD primclass = (key >> 0) & 3;
DWORD iip = (key >> 2) & 1;
DWORD tme = (key >> 3) & 1;
DWORD tfx = (key >> 4) & 3;
DWORD color = !(tme && tfx == TFX_DECAL);
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
const int _v = params + 4;
const int _count = params + 8;
const int _min = params + 12;
const int _max = params + 16;
//
static const float fmin = -FLT_MAX;
static const float fmax = FLT_MAX;
movss(xmm0, xmmword[&fmax]);
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
movss(xmm1, xmmword[&fmin]);
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
if(color)
{
// min.c = FLT_MAX;
// max.c = -FLT_MAX;
movaps(xmm2, xmm0);
movaps(xmm3, xmm1);
}
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movaps(xmm4, xmm0);
movaps(xmm5, xmm1);
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm0);
movaps(xmm7, xmm1);
}
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(ecx, dword[esp + _count]);
align(16);
L("loop");
for(int j = 0; j < n; j++)
{
if(color && (iip || j == n - 1))
{
// min.c = min.c.minv(v[i + j].c);
// max.c = max.c.maxv(v[i + j].c);
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW)]);
minps(xmm2, xmm0);
maxps(xmm3, xmm0);
}
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 16]);
minps(xmm4, xmm0);
maxps(xmm5, xmm0);
if(tme)
{
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 32]);
minps(xmm6, xmm0);
maxps(xmm7, xmm0);
}
}
add(edx, n * sizeof(GSVertexSW));
sub(ecx, n);
jg("loop");
// }
mov(eax, dword[esp + _min]);
mov(edx, dword[esp + _max]);
if(color)
{
movaps(xmmword[eax], xmm2);
movaps(xmmword[edx], xmm3);
}
movaps(xmmword[eax + 16], xmm4);
movaps(xmmword[edx + 16], xmm5);
if(tme)
{
movaps(xmmword[eax + 32], xmm6);
movaps(xmmword[edx + 32], xmm7);
}
ret();
}

View File

@ -214,8 +214,27 @@ __forceinline GSVertexSW operator / (const GSVertexSW& v, float f)
return v0; return v0;
} }
__declspec(align(16)) struct GSVertexTrace #include "GSFunctionMap.h"
#include "xbyak/xbyak.h"
__declspec(align(16)) class GSVertexTrace
{ {
class GSVertexTraceCodeGenerator : public Xbyak::CodeGenerator
{
public:
GSVertexTraceCodeGenerator(DWORD key, void* ptr, size_t maxsize);
};
typedef void (*VertexTracePtr)(const GSVertexSW* v, int count, GSVertexSW& min, GSVertexSW& max);
class GSVertexTraceMap : public GSCodeGeneratorFunctionMap<GSVertexTraceCodeGenerator, DWORD, VertexTracePtr>
{
public:
GSVertexTraceMap() {}
GSVertexTraceCodeGenerator* Create(DWORD key, void* ptr, size_t maxsize) {return new GSVertexTraceCodeGenerator(key, ptr, maxsize);}
} m_map;
public:
GSVertexSW m_min, m_max; GSVertexSW m_min, m_max;
union union
@ -225,16 +244,28 @@ __declspec(align(16)) struct GSVertexTrace
struct {DWORD xyzf:4, stq:4, rgba:4;}; struct {DWORD xyzf:4, stq:4, rgba:4;};
} m_eq; } m_eq;
void Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass, DWORD iip, DWORD tme, DWORD tfx)
{
if(!tme) tfx = 0;
DWORD key = primclass | (iip << 2) | (tme << 3) | (tfx << 4);
m_map.Lookup(key)(v, count, m_min, m_max);
m_eq.value = (m_min.p == m_max.p).mask() | ((m_min.t == m_max.t).mask() << 4) | ((m_min.c == m_max.c).mask() << 8);
}
/*
*/
void Update(const GSVertexSW* v, int count) void Update(const GSVertexSW* v, int count)
{ {
GSVertexSW min, max; GSVertexSW min, max;
min.p = v[0].p;
max.p = v[0].p;
min.t = v[0].t;
max.t = v[0].t;
min.c = v[0].c; min.c = v[0].c;
max.c = v[0].c; max.c = v[0].c;
min.t = v[0].t;
max.t = v[0].t;
min.p = v[0].p;
max.p = v[0].p;
for(int i = 1; i < count; i++) for(int i = 1; i < count; i++)
{ {

File diff suppressed because it is too large Load Diff

View File

@ -1,263 +0,0 @@
/**
*** Copyright (C) 1985-2007 Intel Corporation. All rights reserved.
***
*** The information and source code contained herein is the exclusive
*** property of Intel Corporation and may not be disclosed, examined
*** or reproduced in whole or in part without explicit written authorization
*** from the company.
***
**/
/*
* smmintrin.h
*
* Principal header file for Intel(R) Core(TM) 2 Duo processor
* SSE4.1 intrinsics
*/
// Gsdx Note: This header file has been "borrowed" from the MSVC install and bugfixed to
// allow for proper code compilation. The original version of the header includes semicolons
// after several macros defined below, which causes compiler errors when using them in
// inline object construction situations. -- Air
#pragma once
#ifndef __midl
#ifndef _INCLUDED_SMM
#define _INCLUDED_SMM
#if defined(_M_CEE_PURE)
#error ERROR: EMM intrinsics not supported in the pure mode!
#else
#include <tmmintrin.h>
/*
* Rounding mode macros
*/
#define _MM_FROUND_TO_NEAREST_INT 0x00
#define _MM_FROUND_TO_NEG_INF 0x01
#define _MM_FROUND_TO_POS_INF 0x02
#define _MM_FROUND_TO_ZERO 0x03
#define _MM_FROUND_CUR_DIRECTION 0x04
#define _MM_FROUND_RAISE_EXC 0x00
#define _MM_FROUND_NO_EXC 0x08
#define _MM_FROUND_NINT _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC
#define _MM_FROUND_FLOOR _MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC
#define _MM_FROUND_CEIL _MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC
#define _MM_FROUND_TRUNC _MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC
#define _MM_FROUND_RINT _MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC
#define _MM_FROUND_NEARBYINT _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC
/*
* MACRO functions for ceil/floor intrinsics
*/
#define _mm_ceil_pd(val) _mm_round_pd((val), _MM_FROUND_CEIL)
#define _mm_ceil_sd(dst, val) _mm_round_sd((dst), (val), _MM_FROUND_CEIL)
#define _mm_floor_pd(val) _mm_round_pd((val), _MM_FROUND_FLOOR)
#define _mm_floor_sd(dst, val) _mm_round_sd((dst), (val), _MM_FROUND_FLOOR)
#define _mm_ceil_ps(val) _mm_round_ps((val), _MM_FROUND_CEIL)
#define _mm_ceil_ss(dst, val) _mm_round_ss((dst), (val), _MM_FROUND_CEIL)
#define _mm_floor_ps(val) _mm_round_ps((val), _MM_FROUND_FLOOR)
#define _mm_floor_ss(dst, val) _mm_round_ss((dst), (val), _MM_FROUND_FLOOR)
#define _mm_test_all_zeros(mask, val) _mm_testz_si128((mask), (val))
/*
* MACRO functions for packed integer 128-bit comparison intrinsics.
*/
#define _mm_test_all_ones(val) \
_mm_testc_si128((val), _mm_cmpeq_epi32((val),(val)))
#define _mm_test_mix_ones_zeros(mask, val) _mm_testnzc_si128((mask), (val))
#if __cplusplus
extern "C" {
#endif
// Integer blend instructions - select data from 2 sources
// using constant/variable mask
extern __m128i _mm_blend_epi16 (__m128i v1, __m128i v2,
const int mask);
extern __m128i _mm_blendv_epi8 (__m128i v1, __m128i v2, __m128i mask);
// Float single precision blend instructions - select data
// from 2 sources using constant/variable mask
extern __m128 _mm_blend_ps (__m128 v1, __m128 v2, const int mask);
extern __m128 _mm_blendv_ps(__m128 v1, __m128 v2, __m128 v3);
// Float double precision blend instructions - select data
// from 2 sources using constant/variable mask
extern __m128d _mm_blend_pd (__m128d v1, __m128d v2, const int mask);
extern __m128d _mm_blendv_pd(__m128d v1, __m128d v2, __m128d v3);
// Dot product instructions with mask-defined summing and zeroing
// of result's parts
extern __m128 _mm_dp_ps(__m128 val1, __m128 val2, const int mask);
extern __m128d _mm_dp_pd(__m128d val1, __m128d val2, const int mask);
// Packed integer 64-bit comparison, zeroing or filling with ones
// corresponding parts of result
extern __m128i _mm_cmpeq_epi64(__m128i val1, __m128i val2);
// Min/max packed integer instructions
extern __m128i _mm_min_epi8 (__m128i val1, __m128i val2);
extern __m128i _mm_max_epi8 (__m128i val1, __m128i val2);
extern __m128i _mm_min_epu16(__m128i val1, __m128i val2);
extern __m128i _mm_max_epu16(__m128i val1, __m128i val2);
extern __m128i _mm_min_epi32(__m128i val1, __m128i val2);
extern __m128i _mm_max_epi32(__m128i val1, __m128i val2);
extern __m128i _mm_min_epu32(__m128i val1, __m128i val2);
extern __m128i _mm_max_epu32(__m128i val1, __m128i val2);
// Packed integer 32-bit multiplication with truncation
// of upper halves of results
extern __m128i _mm_mullo_epi32(__m128i a, __m128i b);
// Packed integer 32-bit multiplication of 2 pairs of operands
// producing two 64-bit results
extern __m128i _mm_mul_epi32(__m128i a, __m128i b);
// Packed integer 128-bit bitwise comparison.
// return 1 if (val 'and' mask) == 0
extern int _mm_testz_si128(__m128i mask, __m128i val);
// Packed integer 128-bit bitwise comparison.
// return 1 if (val 'and_not' mask) == 0
extern int _mm_testc_si128(__m128i mask, __m128i val);
// Packed integer 128-bit bitwise comparison
// ZF = ((val 'and' mask) == 0) CF = ((val 'and_not' mask) == 0)
// return 1 if both ZF and CF are 0
extern int _mm_testnzc_si128(__m128i mask, __m128i s2);
// Insert single precision float into packed single precision
// array element selected by index.
// The bits [7-6] of the 3d parameter define src index,
// the bits [5-4] define dst index, and bits [3-0] define zeroing
// mask for dst
extern __m128 _mm_insert_ps(__m128 dst, __m128 src, const int ndx);
// Helper macro to create ndx-parameter value for _mm_insert_ps
#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) \
(((srcField)<<6) | ((dstField)<<4) | (zeroMask))
// Extract binary representation of single precision float from
// packed single precision array element selected by index
extern int _mm_extract_ps(__m128 src, const int ndx);
// Extract single precision float from packed single precision
// array element selected by index into dest
#define _MM_EXTRACT_FLOAT(dest, src, ndx) \
*((int*)&(dest)) = _mm_extract_ps((src), (ndx))
// Extract specified single precision float element
// into the lower part of __m128
#define _MM_PICK_OUT_PS(src, num) \
_mm_insert_ps(_mm_setzero_ps(), (src), \
_MM_MK_INSERTPS_NDX((num), 0, 0x0e));
// Insert integer into packed integer array element
// selected by index
extern __m128i _mm_insert_epi8 (__m128i dst, int s, const int ndx);
extern __m128i _mm_insert_epi32(__m128i dst, int s, const int ndx);
#if defined(_M_X64)
extern __m128i _mm_insert_epi64(__m128i dst, __int64 s, const int ndx);
#endif
// Extract integer from packed integer array element
// selected by index
extern int _mm_extract_epi8 (__m128i src, const int ndx);
extern int _mm_extract_epi32(__m128i src, const int ndx);
#if defined(_M_X64)
extern __int64 _mm_extract_epi64(__m128i src, const int ndx);
#endif
// Horizontal packed word minimum and its index in
// result[15:0] and result[18:16] respectively
extern __m128i _mm_minpos_epu16(__m128i shortValues);
// Packed/single float double precision rounding
extern __m128d _mm_round_pd(__m128d val, int iRoundMode);
extern __m128d _mm_round_sd(__m128d dst, __m128d val, int iRoundMode);
// Packed/single float single precision rounding
extern __m128 _mm_round_ps(__m128 val, int iRoundMode);
extern __m128 _mm_round_ss(__m128 dst, __m128 val, int iRoundMode);
// Packed integer sign-extension
extern __m128i _mm_cvtepi8_epi32 (__m128i byteValues);
extern __m128i _mm_cvtepi16_epi32(__m128i shortValues);
extern __m128i _mm_cvtepi8_epi64 (__m128i byteValues);
extern __m128i _mm_cvtepi32_epi64(__m128i intValues);
extern __m128i _mm_cvtepi16_epi64(__m128i shortValues);
extern __m128i _mm_cvtepi8_epi16 (__m128i byteValues);
// Packed integer zero-extension
extern __m128i _mm_cvtepu8_epi32 (__m128i byteValues);
extern __m128i _mm_cvtepu16_epi32(__m128i shortValues);
extern __m128i _mm_cvtepu8_epi64 (__m128i shortValues);
extern __m128i _mm_cvtepu32_epi64(__m128i intValues);
extern __m128i _mm_cvtepu16_epi64(__m128i shortValues);
extern __m128i _mm_cvtepu8_epi16 (__m128i byteValues);
// Pack 8 double words from 2 operands into 8 words of result
// with unsigned saturation
extern __m128i _mm_packus_epi32(__m128i val1, __m128i val2);
// Sum absolute 8-bit integer difference of adjacent groups of 4 byte
// integers in operands. Starting offsets within operands are
// determined by mask
extern __m128i _mm_mpsadbw_epu8(__m128i s1, __m128i s2, const int msk);
/*
* Load double quadword using non-temporal aligned hint
*/
extern __m128i _mm_stream_load_si128(__m128i* v1);
#if defined __cplusplus
}; /* End "C" */
#endif /* __cplusplus */
#endif /* defined(_M_CEE_PURE) */
#endif
#endif /* _INCLUDED_SMM */

View File

@ -73,14 +73,6 @@
(row3) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0xDD)); \ (row3) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0xDD)); \
} }
__forceinline __m128 _mm_rcpnr_ps(__m128 r)
{
__m128 t = _mm_rcp_ps(r);
return _mm_sub_ps(_mm_add_ps(t, t), _mm_mul_ps(_mm_mul_ps(t, t), r));
}
#else #else
#error TODO: GSVector4 and GSRasterizer needs SSE2 #error TODO: GSVector4 and GSRasterizer needs SSE2
@ -99,29 +91,6 @@
#if _M_SSE >= 0x401 #if _M_SSE >= 0x401
#include "smmintrin_gsdx.h" #include <smmintrin.h>
#else
__forceinline __m128 _mm_round_ps(__m128 x)
{
__m128 t = _mm_or_ps(_mm_and_ps(ps_80000000, x), ps_4b000000);
return _mm_sub_ps(_mm_add_ps(x, t), t);
}
__forceinline __m128 _mm_floor_ps(__m128 x)
{
__m128 t = _mm_round_ps(x);
return _mm_sub_ps(t, _mm_and_ps(_mm_cmplt_ps(x, t), ps_3f800000));
}
__forceinline __m128 _mm_ceil_ps(__m128 x)
{
__m128 t = _mm_round_ps(x);
return _mm_add_ps(t, _mm_and_ps(_mm_cmpgt_ps(x, t), ps_3f800000));
}
#endif #endif