mirror of https://github.com/PCSX2/pcsx2.git
GSdx: upgraded the ps1 renderer to use runtime generated code, too.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@535 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
29ea3c8ebc
commit
4907dbda42
|
@ -25,6 +25,7 @@
|
|||
GPUDrawScanline::GPUDrawScanline(GPUState* state, int id)
|
||||
: m_state(state)
|
||||
, m_id(id)
|
||||
, m_ds(m_env)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -40,7 +41,8 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
|
||||
m_env.sel = p->sel;
|
||||
|
||||
m_env.mem = &m_state->m_mem;
|
||||
m_env.vm = m_state->m_mem.GetPixelAddress(0, 0);
|
||||
m_env.fbw = 10 + m_state->m_mem.GetScale().cx;
|
||||
|
||||
if(m_env.sel.tme)
|
||||
{
|
||||
|
@ -54,24 +56,25 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
u = ~(env.TWIN.TWW << 3) & 0xff;
|
||||
v = ~(env.TWIN.TWH << 3) & 0xff;
|
||||
|
||||
m_env.u[0] = GSVector4i((u << 16) | u);
|
||||
m_env.v[0] = GSVector4i((v << 16) | v);
|
||||
m_env.twin[0].u = GSVector4i((u << 16) | u);
|
||||
m_env.twin[0].v = GSVector4i((v << 16) | v);
|
||||
|
||||
u = env.TWIN.TWX << 3;
|
||||
v = env.TWIN.TWY << 3;
|
||||
|
||||
m_env.u[1] = GSVector4i((u << 16) | u) & ~m_env.u[0];
|
||||
m_env.v[1] = GSVector4i((v << 16) | v) & ~m_env.v[0];
|
||||
m_env.twin[1].u = GSVector4i((u << 16) | u) & ~m_env.twin[0].u;
|
||||
m_env.twin[1].v = GSVector4i((v << 16) | v) & ~m_env.twin[0].v;
|
||||
}
|
||||
}
|
||||
|
||||
m_env.a = GSVector4i(env.PRIM.ABE ? 0xffffffff : 0);
|
||||
m_env.md = GSVector4i(env.STATUS.MD ? 0x80008000 : 0);
|
||||
//
|
||||
|
||||
f->sl = m_ds.Lookup(m_env.sel);
|
||||
f->ssl = m_ds.Lookup(m_env.sel);
|
||||
|
||||
f->sr = NULL; // TODO
|
||||
|
||||
//
|
||||
|
||||
DWORD sel = 0;
|
||||
|
||||
sel |= (data->primclass == GS_SPRITE_CLASS ? 1 : 0) << 0;
|
||||
|
@ -94,13 +97,13 @@ void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& ds
|
|||
t = t.ps32(t);
|
||||
t = t.upl16(t);
|
||||
|
||||
m_env.u[2] = t.xxxx();
|
||||
m_env.v[2] = t.yyyy();
|
||||
m_env.twin[2].u = t.xxxx();
|
||||
m_env.twin[2].v = t.yyyy();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_env.u[2] = GSVector4i::x00ff();
|
||||
m_env.v[2] = GSVector4i::x00ff();
|
||||
m_env.twin[2].u = GSVector4i::x00ff();
|
||||
m_env.twin[2].v = GSVector4i::x00ff();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -114,770 +117,24 @@ void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& ds
|
|||
|
||||
if(tme)
|
||||
{
|
||||
m_env.dst8 = dtc8.upl16(dtc8);
|
||||
m_env.d8.st = dtc8.upl16(dtc8);
|
||||
|
||||
m_env.ds = GSVector4i(dt.xxxx() * ps0123).ps32(GSVector4i(dt.xxxx() * ps4567));
|
||||
m_env.dt = GSVector4i(dt.yyyy() * ps0123).ps32(GSVector4i(dt.yyyy() * ps4567));
|
||||
m_env.d.s = GSVector4i(dt.xxxx() * ps0123).ps32(GSVector4i(dt.xxxx() * ps4567));
|
||||
m_env.d.t = GSVector4i(dt.yyyy() * ps0123).ps32(GSVector4i(dt.yyyy() * ps4567));
|
||||
}
|
||||
|
||||
if(iip)
|
||||
{
|
||||
m_env.dc8 = dtc8.uph16(dtc8);
|
||||
m_env.d8.c = dtc8.uph16(dtc8);
|
||||
|
||||
m_env.dr = GSVector4i(dc.xxxx() * ps0123).ps32(GSVector4i(dc.xxxx() * ps4567));
|
||||
m_env.dg = GSVector4i(dc.yyyy() * ps0123).ps32(GSVector4i(dc.yyyy() * ps4567));
|
||||
m_env.db = GSVector4i(dc.zzzz() * ps0123).ps32(GSVector4i(dc.zzzz() * ps4567));
|
||||
}
|
||||
}
|
||||
void GPUDrawScanline::SampleTexture(DWORD ltf, DWORD tlu, DWORD twin, GSVector4i& test, const GSVector4i& s, const GSVector4i& t, GSVector4i* c)
|
||||
{
|
||||
const void* RESTRICT tex = m_env.tex;
|
||||
const WORD* RESTRICT clut = m_env.clut;
|
||||
|
||||
if(ltf)
|
||||
{
|
||||
GSVector4i u = s.sub16(GSVector4i(0x00200020)); // - 0.125f
|
||||
GSVector4i v = t.sub16(GSVector4i(0x00200020)); // - 0.125f
|
||||
|
||||
GSVector4i u0 = u.srl16(8);
|
||||
GSVector4i v0 = v.srl16(8);
|
||||
|
||||
GSVector4i u1 = u0.add16(GSVector4i::x0001());
|
||||
GSVector4i v1 = v0.add16(GSVector4i::x0001());
|
||||
|
||||
GSVector4i uf = (u & GSVector4i::x00ff()) << 7;
|
||||
GSVector4i vf = (v & GSVector4i::x00ff()) << 7;
|
||||
|
||||
if(twin)
|
||||
{
|
||||
u0 = (u0 & m_env.u[0]).add16(m_env.u[1]);
|
||||
v0 = (v0 & m_env.v[0]).add16(m_env.v[1]);
|
||||
u1 = (u1 & m_env.u[0]).add16(m_env.u[1]);
|
||||
v1 = (v1 & m_env.v[0]).add16(m_env.v[1]);
|
||||
m_env.d.r = GSVector4i(dc.xxxx() * ps0123).ps32(GSVector4i(dc.xxxx() * ps4567));
|
||||
m_env.d.g = GSVector4i(dc.yyyy() * ps0123).ps32(GSVector4i(dc.yyyy() * ps4567));
|
||||
m_env.d.b = GSVector4i(dc.zzzz() * ps0123).ps32(GSVector4i(dc.zzzz() * ps4567));
|
||||
}
|
||||
else
|
||||
{
|
||||
u0 = u0.min_i16(m_env.u[2]);
|
||||
v0 = v0.min_i16(m_env.v[2]);
|
||||
u1 = u1.min_i16(m_env.u[2]);
|
||||
v1 = v1.min_i16(m_env.v[2]);
|
||||
// TODO: m_env.c.r/g/b = ...
|
||||
}
|
||||
|
||||
GSVector4i addr00 = v0.sll16(8) | u0;
|
||||
GSVector4i addr01 = v0.sll16(8) | u1;
|
||||
GSVector4i addr10 = v1.sll16(8) | u0;
|
||||
GSVector4i addr11 = v1.sll16(8) | u1;
|
||||
|
||||
GSVector4i c00, c01, c10, c11;
|
||||
|
||||
if(tlu)
|
||||
{
|
||||
c00 = addr00.gather16_16((const BYTE*)tex, clut);
|
||||
c01 = addr01.gather16_16((const BYTE*)tex, clut);
|
||||
c10 = addr10.gather16_16((const BYTE*)tex, clut);
|
||||
c11 = addr11.gather16_16((const BYTE*)tex, clut);
|
||||
}
|
||||
else
|
||||
{
|
||||
c00 = addr00.gather16_16((const WORD*)tex);
|
||||
c01 = addr01.gather16_16((const WORD*)tex);
|
||||
c10 = addr00.gather16_16((const WORD*)tex);
|
||||
c11 = addr01.gather16_16((const WORD*)tex);
|
||||
}
|
||||
|
||||
GSVector4i r00 = (c00 & 0x001f001f) << 3;
|
||||
GSVector4i r01 = (c01 & 0x001f001f) << 3;
|
||||
GSVector4i r10 = (c10 & 0x001f001f) << 3;
|
||||
GSVector4i r11 = (c11 & 0x001f001f) << 3;
|
||||
|
||||
r00 = r00.lerp16<0>(r01, uf);
|
||||
r10 = r10.lerp16<0>(r11, uf);
|
||||
c[0] = r00.lerp16<0>(r10, vf);
|
||||
|
||||
GSVector4i g00 = (c00 & 0x03e003e0) >> 2;
|
||||
GSVector4i g01 = (c01 & 0x03e003e0) >> 2;
|
||||
GSVector4i g10 = (c10 & 0x03e003e0) >> 2;
|
||||
GSVector4i g11 = (c11 & 0x03e003e0) >> 2;
|
||||
|
||||
g00 = g00.lerp16<0>(g01, uf);
|
||||
g10 = g10.lerp16<0>(g11, uf);
|
||||
c[1] = g00.lerp16<0>(g10, vf);
|
||||
|
||||
GSVector4i b00 = (c00 & 0x7c007c00) >> 7;
|
||||
GSVector4i b01 = (c01 & 0x7c007c00) >> 7;
|
||||
GSVector4i b10 = (c10 & 0x7c007c00) >> 7;
|
||||
GSVector4i b11 = (c11 & 0x7c007c00) >> 7;
|
||||
|
||||
b00 = b00.lerp16<0>(b01, uf);
|
||||
b10 = b10.lerp16<0>(b11, uf);
|
||||
c[2] = b00.lerp16<0>(b10, vf);
|
||||
|
||||
GSVector4i a00 = (c00 & 0x80008000) >> 8;
|
||||
GSVector4i a01 = (c01 & 0x80008000) >> 8;
|
||||
GSVector4i a10 = (c10 & 0x80008000) >> 8;
|
||||
GSVector4i a11 = (c11 & 0x80008000) >> 8;
|
||||
|
||||
a00 = a00.lerp16<0>(a01, uf);
|
||||
a10 = a10.lerp16<0>(a11, uf);
|
||||
c[3] = a00.lerp16<0>(a10, vf).gt16(GSVector4i::zero());
|
||||
|
||||
// mask out blank pixels (not perfect)
|
||||
|
||||
test |=
|
||||
c[0].eq16(GSVector4i::zero()) &
|
||||
c[1].eq16(GSVector4i::zero()) &
|
||||
c[2].eq16(GSVector4i::zero()) &
|
||||
c[3].eq16(GSVector4i::zero());
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4i u = s.srl16(8);
|
||||
GSVector4i v = t.srl16(8);
|
||||
|
||||
if(twin)
|
||||
{
|
||||
u = (u & m_env.u[0]).add16(m_env.u[1]);
|
||||
v = (v & m_env.v[0]).add16(m_env.v[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
u = u.min_i16(m_env.u[2]);
|
||||
v = v.min_i16(m_env.v[2]);
|
||||
}
|
||||
|
||||
GSVector4i addr = v.sll16(8) | u;
|
||||
|
||||
GSVector4i c00;
|
||||
|
||||
if(tlu)
|
||||
{
|
||||
c00 = addr.gather16_16((const BYTE*)tex, clut);
|
||||
}
|
||||
else
|
||||
{
|
||||
c00 = addr.gather16_16((const WORD*)tex);
|
||||
}
|
||||
|
||||
test |= c00.eq16(GSVector4i::zero()); // mask out blank pixels
|
||||
|
||||
c[0] = (c00 & 0x001f001f) << 3;
|
||||
c[1] = (c00 & 0x03e003e0) >> 2;
|
||||
c[2] = (c00 & 0x7c007c00) >> 7;
|
||||
c[3] = c00.sra16(15);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUDrawScanline::ColorTFX(DWORD tfx, const GSVector4i& r, const GSVector4i& g, const GSVector4i& b, GSVector4i* c)
|
||||
{
|
||||
switch(tfx)
|
||||
{
|
||||
case 0: // none (tfx = 0)
|
||||
case 1: // none (tfx = tge)
|
||||
c[0] = r.srl16(7);
|
||||
c[1] = g.srl16(7);
|
||||
c[2] = b.srl16(7);
|
||||
break;
|
||||
case 2: // modulate (tfx = tme | tge)
|
||||
c[0] = c[0].modulate16<1>(r).clamp8();
|
||||
c[1] = c[1].modulate16<1>(g).clamp8();
|
||||
c[2] = c[2].modulate16<1>(b).clamp8();
|
||||
break;
|
||||
case 3: // decal (tfx = tme)
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUDrawScanline::AlphaBlend(UINT32 abr, UINT32 tme, const GSVector4i& d, GSVector4i* c)
|
||||
{
|
||||
GSVector4i r = (d & 0x001f001f) << 3;
|
||||
GSVector4i g = (d & 0x03e003e0) >> 2;
|
||||
GSVector4i b = (d & 0x7c007c00) >> 7;
|
||||
|
||||
switch(abr)
|
||||
{
|
||||
case 0:
|
||||
r = r.avg8(c[0]);
|
||||
g = g.avg8(c[0]);
|
||||
b = b.avg8(c[0]);
|
||||
break;
|
||||
case 1:
|
||||
r = r.addus8(c[0]);
|
||||
g = g.addus8(c[1]);
|
||||
b = b.addus8(c[2]);
|
||||
break;
|
||||
case 2:
|
||||
r = r.subus8(c[0]);
|
||||
g = g.subus8(c[1]);
|
||||
b = b.subus8(c[2]);
|
||||
break;
|
||||
case 3:
|
||||
r = r.addus8(c[0].srl16(2));
|
||||
g = g.addus8(c[1].srl16(2));
|
||||
b = b.addus8(c[2].srl16(2));
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
if(tme) // per pixel
|
||||
{
|
||||
c[0] = c[0].blend8(r, c[3]);
|
||||
c[1] = c[1].blend8(g, c[3]);
|
||||
c[2] = c[2].blend8(b, c[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
c[0] = r;
|
||||
c[1] = g;
|
||||
c[2] = b;
|
||||
c[3] = GSVector4i::zero();
|
||||
}
|
||||
}
|
||||
|
||||
void GPUDrawScanline::WriteFrame(WORD* RESTRICT fb, const GSVector4i& test, const GSVector4i* c, int pixels)
|
||||
{
|
||||
GSVector4i r = (c[0] & 0x00f800f8) >> 3;
|
||||
GSVector4i g = (c[1] & 0x00f800f8) << 2;
|
||||
GSVector4i b = (c[2] & 0x00f800f8) << 7;
|
||||
GSVector4i a = (c[3] & 0x00800080) << 8;
|
||||
|
||||
GSVector4i s = r | g | b | a | m_env.md;
|
||||
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
if(test.u16[i] == 0)
|
||||
{
|
||||
fb[i] = s.u16[i];
|
||||
}
|
||||
}
|
||||
while(++i < pixels);
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
__declspec(align(16)) static WORD s_dither[4][16] =
|
||||
{
|
||||
{7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1},
|
||||
{2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4},
|
||||
{1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7},
|
||||
{4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2},
|
||||
};
|
||||
|
||||
void GPUDrawScanline::DrawScanline(int top, int left, int right, const GSVertexSW& v)
|
||||
{
|
||||
GSVector4i s, t;
|
||||
GSVector4i r, g, b;
|
||||
|
||||
if(m_env.sel.tme)
|
||||
{
|
||||
GSVector4i vt = GSVector4i(v.t).xxzzl();
|
||||
|
||||
s = vt.xxxx().add16(m_env.ds);
|
||||
t = vt.yyyy().add16(m_env.dt);
|
||||
}
|
||||
|
||||
GSVector4i vc = GSVector4i(v.c).xxzzlh();
|
||||
|
||||
r = vc.xxxx();
|
||||
g = vc.yyyy();
|
||||
b = vc.zzzz();
|
||||
|
||||
if(m_env.sel.iip)
|
||||
{
|
||||
r = r.add16(m_env.dr);
|
||||
g = g.add16(m_env.dg);
|
||||
b = b.add16(m_env.db);
|
||||
}
|
||||
|
||||
GSVector4i dither;
|
||||
|
||||
if(m_env.sel.dtd)
|
||||
{
|
||||
dither = GSVector4i::load<false>(&s_dither[top & 3][left & 3]);
|
||||
}
|
||||
|
||||
int steps = right - left;
|
||||
|
||||
WORD* fb = m_env.mem->GetPixelAddress(left, top);
|
||||
|
||||
while(1)
|
||||
{
|
||||
do
|
||||
{
|
||||
int pixels = GSVector4i::min_i16(steps, 8);
|
||||
|
||||
GSVector4i test = GSVector4i::zero();
|
||||
|
||||
GSVector4i d = GSVector4i::zero();
|
||||
|
||||
if(m_env.sel.rfb) // me | abe
|
||||
{
|
||||
d = GSVector4i::load<false>(fb);
|
||||
|
||||
if(m_env.sel.me)
|
||||
{
|
||||
test = d.sra16(15);
|
||||
|
||||
if(test.alltrue())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GSVector4i c[4];
|
||||
|
||||
if(m_env.sel.tme)
|
||||
{
|
||||
SampleTexture(m_env.sel.ltf, m_env.sel.tlu, m_env.sel.twin, test, s, t, c);
|
||||
}
|
||||
|
||||
ColorTFX(m_env.sel.tfx, r, g, b, c);
|
||||
|
||||
if(m_env.sel.abe)
|
||||
{
|
||||
AlphaBlend(m_env.sel.abr, m_env.sel.tme, d, c);
|
||||
}
|
||||
|
||||
if(m_env.sel.dtd)
|
||||
{
|
||||
c[0] = c[0].addus8(dither);
|
||||
c[1] = c[1].addus8(dither);
|
||||
c[2] = c[2].addus8(dither);
|
||||
}
|
||||
|
||||
WriteFrame(fb, test, c, pixels);
|
||||
}
|
||||
while(0);
|
||||
|
||||
if(steps <= 8) break;
|
||||
|
||||
steps -= 8;
|
||||
|
||||
fb += 8;
|
||||
|
||||
if(m_env.sel.tme)
|
||||
{
|
||||
GSVector4i dst8 = m_env.dst8;
|
||||
|
||||
s = s.add16(dst8.xxxx());
|
||||
t = t.add16(dst8.yyyy());
|
||||
}
|
||||
|
||||
if(m_env.sel.iip)
|
||||
{
|
||||
GSVector4i dc8 = m_env.dc8;
|
||||
|
||||
r = r.add16(dc8.xxxx());
|
||||
g = g.add16(dc8.yyyy());
|
||||
b = b.add16(dc8.zzzz());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<DWORD sel>
|
||||
void GPUDrawScanline::DrawScanlineEx(int top, int left, int right, const GSVertexSW& v)
|
||||
{
|
||||
DWORD iip = (sel >> 0) & 1;
|
||||
DWORD me = (sel >> 1) & 1;
|
||||
DWORD abe = (sel >> 2) & 1;
|
||||
DWORD abr = (sel >> 3) & 3;
|
||||
// DWORD tge = (sel >> 5) & 1;
|
||||
DWORD tme = (sel >> 6) & 1;
|
||||
DWORD twin = (sel >> 7) & 1;
|
||||
DWORD rfb = (sel >> 1) & 3;
|
||||
DWORD tfx = (sel >> 5) & 3;
|
||||
|
||||
GSVector4i s, t;
|
||||
GSVector4i r, g, b;
|
||||
|
||||
if(tme)
|
||||
{
|
||||
GSVector4i vt = GSVector4i(v.t).xxzzl();
|
||||
|
||||
s = vt.xxxx().add16(m_env.ds);
|
||||
t = vt.yyyy().add16(m_env.dt);
|
||||
}
|
||||
|
||||
GSVector4i vc = GSVector4i(v.c).xxzzlh();
|
||||
|
||||
r = vc.xxxx();
|
||||
g = vc.yyyy();
|
||||
b = vc.zzzz();
|
||||
|
||||
if(iip)
|
||||
{
|
||||
r = r.add16(m_env.dr);
|
||||
g = g.add16(m_env.dg);
|
||||
b = b.add16(m_env.db);
|
||||
}
|
||||
|
||||
GSVector4i dither;
|
||||
|
||||
if(m_env.sel.dtd)
|
||||
{
|
||||
dither = GSVector4i::load<false>(&s_dither[top & 3][left & 3]);
|
||||
}
|
||||
|
||||
int steps = right - left;
|
||||
|
||||
WORD* fb = m_env.mem->GetPixelAddress(left, top);
|
||||
|
||||
while(1)
|
||||
{
|
||||
do
|
||||
{
|
||||
int pixels = GSVector4i::min_i16(steps, 8);
|
||||
|
||||
GSVector4i test = GSVector4i::zero();
|
||||
|
||||
GSVector4i d = GSVector4i::zero();
|
||||
|
||||
if(rfb) // me | abe
|
||||
{
|
||||
d = GSVector4i::load<false>(fb);
|
||||
|
||||
if(me)
|
||||
{
|
||||
test = d.sra16(15);
|
||||
|
||||
if(test.alltrue())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GSVector4i c[4];
|
||||
|
||||
if(tme)
|
||||
{
|
||||
SampleTexture(m_env.sel.ltf, m_env.sel.tlu, twin, test, s, t, c);
|
||||
}
|
||||
|
||||
ColorTFX(tfx, r, g, b, c);
|
||||
|
||||
if(abe)
|
||||
{
|
||||
AlphaBlend(abr, tme, d, c);
|
||||
}
|
||||
|
||||
if(m_env.sel.dtd)
|
||||
{
|
||||
c[0] = c[0].addus8(dither);
|
||||
c[1] = c[1].addus8(dither);
|
||||
c[2] = c[2].addus8(dither);
|
||||
}
|
||||
|
||||
WriteFrame(fb, test, c, pixels);
|
||||
}
|
||||
while(0);
|
||||
|
||||
if(steps <= 8) break;
|
||||
|
||||
steps -= 8;
|
||||
|
||||
fb += 8;
|
||||
|
||||
if(tme)
|
||||
{
|
||||
GSVector4i dst8 = m_env.dst8;
|
||||
|
||||
s = s.add16(dst8.xxxx());
|
||||
t = t.add16(dst8.yyyy());
|
||||
}
|
||||
|
||||
if(iip)
|
||||
{
|
||||
GSVector4i dc8 = m_env.dc8;
|
||||
|
||||
r = r.add16(dc8.xxxx());
|
||||
g = g.add16(dc8.yyyy());
|
||||
b = b.add16(dc8.zzzz());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GPUDrawScanline::GPUDrawScanlineMap::GPUDrawScanlineMap()
|
||||
{
|
||||
for(int i = 0; i < countof(m_default); i++)
|
||||
{
|
||||
m_default[i] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanline;
|
||||
}
|
||||
|
||||
#ifdef FAST_DRAWSCANLINE
|
||||
|
||||
m_default[0x00] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x00>;
|
||||
m_default[0x01] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x01>;
|
||||
m_default[0x02] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x02>;
|
||||
m_default[0x03] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x03>;
|
||||
m_default[0x04] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x04>;
|
||||
m_default[0x05] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x05>;
|
||||
m_default[0x06] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x06>;
|
||||
m_default[0x07] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x07>;
|
||||
m_default[0x08] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x08>;
|
||||
m_default[0x09] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x09>;
|
||||
m_default[0x0a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x0a>;
|
||||
m_default[0x0b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x0b>;
|
||||
m_default[0x0c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x0c>;
|
||||
m_default[0x0d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x0d>;
|
||||
m_default[0x0e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x0e>;
|
||||
m_default[0x0f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x0f>;
|
||||
m_default[0x10] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x10>;
|
||||
m_default[0x11] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x11>;
|
||||
m_default[0x12] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x12>;
|
||||
m_default[0x13] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x13>;
|
||||
m_default[0x14] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x14>;
|
||||
m_default[0x15] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x15>;
|
||||
m_default[0x16] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x16>;
|
||||
m_default[0x17] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x17>;
|
||||
m_default[0x18] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x18>;
|
||||
m_default[0x19] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x19>;
|
||||
m_default[0x1a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x1a>;
|
||||
m_default[0x1b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x1b>;
|
||||
m_default[0x1c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x1c>;
|
||||
m_default[0x1d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x1d>;
|
||||
m_default[0x1e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x1e>;
|
||||
m_default[0x1f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x1f>;
|
||||
m_default[0x20] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x20>;
|
||||
m_default[0x21] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x21>;
|
||||
m_default[0x22] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x22>;
|
||||
m_default[0x23] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x23>;
|
||||
m_default[0x24] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x24>;
|
||||
m_default[0x25] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x25>;
|
||||
m_default[0x26] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x26>;
|
||||
m_default[0x27] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x27>;
|
||||
m_default[0x28] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x28>;
|
||||
m_default[0x29] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x29>;
|
||||
m_default[0x2a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x2a>;
|
||||
m_default[0x2b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x2b>;
|
||||
m_default[0x2c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x2c>;
|
||||
m_default[0x2d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x2d>;
|
||||
m_default[0x2e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x2e>;
|
||||
m_default[0x2f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x2f>;
|
||||
m_default[0x30] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x30>;
|
||||
m_default[0x31] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x31>;
|
||||
m_default[0x32] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x32>;
|
||||
m_default[0x33] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x33>;
|
||||
m_default[0x34] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x34>;
|
||||
m_default[0x35] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x35>;
|
||||
m_default[0x36] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x36>;
|
||||
m_default[0x37] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x37>;
|
||||
m_default[0x38] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x38>;
|
||||
m_default[0x39] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x39>;
|
||||
m_default[0x3a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x3a>;
|
||||
m_default[0x3b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x3b>;
|
||||
m_default[0x3c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x3c>;
|
||||
m_default[0x3d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x3d>;
|
||||
m_default[0x3e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x3e>;
|
||||
m_default[0x3f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x3f>;
|
||||
m_default[0x40] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x40>;
|
||||
m_default[0x41] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x41>;
|
||||
m_default[0x42] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x42>;
|
||||
m_default[0x43] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x43>;
|
||||
m_default[0x44] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x44>;
|
||||
m_default[0x45] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x45>;
|
||||
m_default[0x46] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x46>;
|
||||
m_default[0x47] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x47>;
|
||||
m_default[0x48] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x48>;
|
||||
m_default[0x49] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x49>;
|
||||
m_default[0x4a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x4a>;
|
||||
m_default[0x4b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x4b>;
|
||||
m_default[0x4c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x4c>;
|
||||
m_default[0x4d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x4d>;
|
||||
m_default[0x4e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x4e>;
|
||||
m_default[0x4f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x4f>;
|
||||
m_default[0x50] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x50>;
|
||||
m_default[0x51] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x51>;
|
||||
m_default[0x52] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x52>;
|
||||
m_default[0x53] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x53>;
|
||||
m_default[0x54] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x54>;
|
||||
m_default[0x55] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x55>;
|
||||
m_default[0x56] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x56>;
|
||||
m_default[0x57] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x57>;
|
||||
m_default[0x58] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x58>;
|
||||
m_default[0x59] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x59>;
|
||||
m_default[0x5a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x5a>;
|
||||
m_default[0x5b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x5b>;
|
||||
m_default[0x5c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x5c>;
|
||||
m_default[0x5d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x5d>;
|
||||
m_default[0x5e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x5e>;
|
||||
m_default[0x5f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x5f>;
|
||||
m_default[0x60] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x60>;
|
||||
m_default[0x61] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x61>;
|
||||
m_default[0x62] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x62>;
|
||||
m_default[0x63] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x63>;
|
||||
m_default[0x64] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x64>;
|
||||
m_default[0x65] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x65>;
|
||||
m_default[0x66] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x66>;
|
||||
m_default[0x67] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x67>;
|
||||
m_default[0x68] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x68>;
|
||||
m_default[0x69] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x69>;
|
||||
m_default[0x6a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x6a>;
|
||||
m_default[0x6b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x6b>;
|
||||
m_default[0x6c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x6c>;
|
||||
m_default[0x6d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x6d>;
|
||||
m_default[0x6e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x6e>;
|
||||
m_default[0x6f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x6f>;
|
||||
m_default[0x70] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x70>;
|
||||
m_default[0x71] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x71>;
|
||||
m_default[0x72] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x72>;
|
||||
m_default[0x73] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x73>;
|
||||
m_default[0x74] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x74>;
|
||||
m_default[0x75] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x75>;
|
||||
m_default[0x76] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x76>;
|
||||
m_default[0x77] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x77>;
|
||||
m_default[0x78] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x78>;
|
||||
m_default[0x79] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x79>;
|
||||
m_default[0x7a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x7a>;
|
||||
m_default[0x7b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x7b>;
|
||||
m_default[0x7c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x7c>;
|
||||
m_default[0x7d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x7d>;
|
||||
m_default[0x7e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x7e>;
|
||||
m_default[0x7f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x7f>;
|
||||
m_default[0x80] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x80>;
|
||||
m_default[0x81] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x81>;
|
||||
m_default[0x82] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x82>;
|
||||
m_default[0x83] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x83>;
|
||||
m_default[0x84] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x84>;
|
||||
m_default[0x85] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x85>;
|
||||
m_default[0x86] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x86>;
|
||||
m_default[0x87] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x87>;
|
||||
m_default[0x88] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x88>;
|
||||
m_default[0x89] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x89>;
|
||||
m_default[0x8a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x8a>;
|
||||
m_default[0x8b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x8b>;
|
||||
m_default[0x8c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x8c>;
|
||||
m_default[0x8d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x8d>;
|
||||
m_default[0x8e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x8e>;
|
||||
m_default[0x8f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x8f>;
|
||||
m_default[0x90] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x90>;
|
||||
m_default[0x91] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x91>;
|
||||
m_default[0x92] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x92>;
|
||||
m_default[0x93] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x93>;
|
||||
m_default[0x94] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x94>;
|
||||
m_default[0x95] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x95>;
|
||||
m_default[0x96] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x96>;
|
||||
m_default[0x97] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x97>;
|
||||
m_default[0x98] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x98>;
|
||||
m_default[0x99] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x99>;
|
||||
m_default[0x9a] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x9a>;
|
||||
m_default[0x9b] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x9b>;
|
||||
m_default[0x9c] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x9c>;
|
||||
m_default[0x9d] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x9d>;
|
||||
m_default[0x9e] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x9e>;
|
||||
m_default[0x9f] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0x9f>;
|
||||
m_default[0xa0] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa0>;
|
||||
m_default[0xa1] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa1>;
|
||||
m_default[0xa2] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa2>;
|
||||
m_default[0xa3] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa3>;
|
||||
m_default[0xa4] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa4>;
|
||||
m_default[0xa5] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa5>;
|
||||
m_default[0xa6] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa6>;
|
||||
m_default[0xa7] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa7>;
|
||||
m_default[0xa8] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa8>;
|
||||
m_default[0xa9] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xa9>;
|
||||
m_default[0xaa] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xaa>;
|
||||
m_default[0xab] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xab>;
|
||||
m_default[0xac] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xac>;
|
||||
m_default[0xad] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xad>;
|
||||
m_default[0xae] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xae>;
|
||||
m_default[0xaf] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xaf>;
|
||||
m_default[0xb0] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb0>;
|
||||
m_default[0xb1] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb1>;
|
||||
m_default[0xb2] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb2>;
|
||||
m_default[0xb3] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb3>;
|
||||
m_default[0xb4] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb4>;
|
||||
m_default[0xb5] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb5>;
|
||||
m_default[0xb6] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb6>;
|
||||
m_default[0xb7] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb7>;
|
||||
m_default[0xb8] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb8>;
|
||||
m_default[0xb9] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xb9>;
|
||||
m_default[0xba] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xba>;
|
||||
m_default[0xbb] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xbb>;
|
||||
m_default[0xbc] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xbc>;
|
||||
m_default[0xbd] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xbd>;
|
||||
m_default[0xbe] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xbe>;
|
||||
m_default[0xbf] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xbf>;
|
||||
m_default[0xc0] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc0>;
|
||||
m_default[0xc1] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc1>;
|
||||
m_default[0xc2] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc2>;
|
||||
m_default[0xc3] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc3>;
|
||||
m_default[0xc4] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc4>;
|
||||
m_default[0xc5] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc5>;
|
||||
m_default[0xc6] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc6>;
|
||||
m_default[0xc7] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc7>;
|
||||
m_default[0xc8] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc8>;
|
||||
m_default[0xc9] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xc9>;
|
||||
m_default[0xca] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xca>;
|
||||
m_default[0xcb] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xcb>;
|
||||
m_default[0xcc] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xcc>;
|
||||
m_default[0xcd] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xcd>;
|
||||
m_default[0xce] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xce>;
|
||||
m_default[0xcf] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xcf>;
|
||||
m_default[0xd0] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd0>;
|
||||
m_default[0xd1] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd1>;
|
||||
m_default[0xd2] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd2>;
|
||||
m_default[0xd3] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd3>;
|
||||
m_default[0xd4] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd4>;
|
||||
m_default[0xd5] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd5>;
|
||||
m_default[0xd6] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd6>;
|
||||
m_default[0xd7] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd7>;
|
||||
m_default[0xd8] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd8>;
|
||||
m_default[0xd9] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xd9>;
|
||||
m_default[0xda] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xda>;
|
||||
m_default[0xdb] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xdb>;
|
||||
m_default[0xdc] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xdc>;
|
||||
m_default[0xdd] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xdd>;
|
||||
m_default[0xde] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xde>;
|
||||
m_default[0xdf] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xdf>;
|
||||
m_default[0xe0] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe0>;
|
||||
m_default[0xe1] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe1>;
|
||||
m_default[0xe2] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe2>;
|
||||
m_default[0xe3] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe3>;
|
||||
m_default[0xe4] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe4>;
|
||||
m_default[0xe5] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe5>;
|
||||
m_default[0xe6] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe6>;
|
||||
m_default[0xe7] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe7>;
|
||||
m_default[0xe8] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe8>;
|
||||
m_default[0xe9] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xe9>;
|
||||
m_default[0xea] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xea>;
|
||||
m_default[0xeb] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xeb>;
|
||||
m_default[0xec] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xec>;
|
||||
m_default[0xed] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xed>;
|
||||
m_default[0xee] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xee>;
|
||||
m_default[0xef] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xef>;
|
||||
m_default[0xf0] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf0>;
|
||||
m_default[0xf1] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf1>;
|
||||
m_default[0xf2] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf2>;
|
||||
m_default[0xf3] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf3>;
|
||||
m_default[0xf4] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf4>;
|
||||
m_default[0xf5] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf5>;
|
||||
m_default[0xf6] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf6>;
|
||||
m_default[0xf7] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf7>;
|
||||
m_default[0xf8] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf8>;
|
||||
m_default[0xf9] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xf9>;
|
||||
m_default[0xfa] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xfa>;
|
||||
m_default[0xfb] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xfb>;
|
||||
m_default[0xfc] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xfc>;
|
||||
m_default[0xfd] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xfd>;
|
||||
m_default[0xfe] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xfe>;
|
||||
m_default[0xff] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineEx<0xff>;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
IDrawScanline::DrawScanlinePtr GPUDrawScanline::GPUDrawScanlineMap::GetDefaultFunction(DWORD key)
|
||||
{
|
||||
GPUScanlineSelector sel;
|
||||
|
||||
sel.key = key;
|
||||
|
||||
return m_default[sel];
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -908,3 +165,14 @@ IDrawScanline::SetupPrimPtr GPUDrawScanline::GPUSetupPrimMap::GetDefaultFunction
|
|||
return m_default[sprite][tme][iip];
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
GPUDrawScanline::GPUDrawScanlineMap::GPUDrawScanlineMap(GPUScanlineEnvironment& env)
|
||||
: m_env(env)
|
||||
{
|
||||
}
|
||||
|
||||
GPUDrawScanlineCodeGenerator* GPUDrawScanline::GPUDrawScanlineMap::Create(DWORD key, void* ptr, size_t maxsize)
|
||||
{
|
||||
return new GPUDrawScanlineCodeGenerator(m_env, ptr, maxsize);
|
||||
}
|
||||
|
|
|
@ -24,62 +24,8 @@
|
|||
#include "GPUState.h"
|
||||
#include "GSRasterizer.h"
|
||||
#include "GSAlignedClass.h"
|
||||
|
||||
union GPUScanlineSelector
|
||||
{
|
||||
struct
|
||||
{
|
||||
DWORD iip:1; // 0
|
||||
DWORD me:1; // 1
|
||||
DWORD abe:1; // 2
|
||||
DWORD abr:2; // 3
|
||||
DWORD tge:1; // 5
|
||||
DWORD tme:1; // 6
|
||||
DWORD twin:1; // 7
|
||||
DWORD tlu:1; // 8
|
||||
DWORD dtd:1; // 9
|
||||
DWORD ltf:1; // 10
|
||||
// DWORD dte:1: // 11
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
DWORD _pad1:1; // 0
|
||||
DWORD rfb:2; // 1
|
||||
DWORD _pad2:2; // 3
|
||||
DWORD tfx:2; // 5
|
||||
};
|
||||
|
||||
DWORD key;
|
||||
|
||||
operator DWORD() {return key & 0xff;}
|
||||
};
|
||||
|
||||
__declspec(align(16)) struct GPUScanlineEnvironment
|
||||
{
|
||||
GPUScanlineSelector sel;
|
||||
|
||||
GPULocalMemory* mem;
|
||||
const void* tex;
|
||||
const WORD* clut;
|
||||
|
||||
GSVector4i u[3];
|
||||
GSVector4i v[3];
|
||||
|
||||
GSVector4i a;
|
||||
GSVector4i md; // similar to gs fba
|
||||
|
||||
GSVector4i ds, dt, dst8;
|
||||
GSVector4i dr, dg, db, dc8;
|
||||
};
|
||||
|
||||
__declspec(align(16)) struct GPUScanlineParam
|
||||
{
|
||||
GPUScanlineSelector sel;
|
||||
|
||||
const void* tex;
|
||||
const WORD* clut;
|
||||
};
|
||||
#include "GPUScanlineEnvironment.h"
|
||||
#include "GPUDrawScanlineCodeGenerator.h"
|
||||
|
||||
class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline
|
||||
{
|
||||
|
@ -87,20 +33,6 @@ class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline
|
|||
|
||||
//
|
||||
|
||||
class GPUDrawScanlineMap : public GSFunctionMap<DWORD, DrawScanlinePtr>
|
||||
{
|
||||
DrawScanlinePtr m_default[256];
|
||||
|
||||
public:
|
||||
GPUDrawScanlineMap();
|
||||
|
||||
DrawScanlinePtr GetDefaultFunction(DWORD key);
|
||||
};
|
||||
|
||||
GPUDrawScanlineMap m_ds;
|
||||
|
||||
//
|
||||
|
||||
class GPUSetupPrimMap : public GSFunctionMap<DWORD, SetupPrimPtr>
|
||||
{
|
||||
SetupPrimPtr m_default[2][2][2];
|
||||
|
@ -113,23 +45,22 @@ class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline
|
|||
|
||||
GPUSetupPrimMap m_sp;
|
||||
|
||||
//
|
||||
|
||||
template<DWORD sprite, DWORD tme, DWORD iip>
|
||||
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
|
||||
//
|
||||
|
||||
__forceinline void SampleTexture(DWORD ltf, DWORD tlu, DWORD twin, GSVector4i& test, const GSVector4i& s, const GSVector4i& t, GSVector4i* c);
|
||||
__forceinline void ColorTFX(DWORD tfx, const GSVector4i& r, const GSVector4i& g, const GSVector4i& b, GSVector4i* c);
|
||||
__forceinline void AlphaBlend(UINT32 abr, UINT32 tme, const GSVector4i& d, GSVector4i* c);
|
||||
__forceinline void WriteFrame(WORD* RESTRICT fb, const GSVector4i& test, const GSVector4i* c, int pixels);
|
||||
class GPUDrawScanlineMap : public GSCodeGeneratorFunctionMap<GPUDrawScanlineCodeGenerator, DWORD, DrawScanlineStaticPtr>
|
||||
{
|
||||
GPUScanlineEnvironment& m_env;
|
||||
|
||||
public:
|
||||
GPUDrawScanlineMap(GPUScanlineEnvironment& env);
|
||||
GPUDrawScanlineCodeGenerator* Create(DWORD key, void* ptr, size_t maxsize);
|
||||
} m_ds;
|
||||
|
||||
void DrawScanline(int top, int left, int right, const GSVertexSW& v);
|
||||
|
||||
template<DWORD sel>
|
||||
void DrawScanlineEx(int top, int left, int right, const GSVertexSW& v);
|
||||
|
||||
protected:
|
||||
GPUState* m_state;
|
||||
int m_id;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GPUScanlineEnvironment.h"
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
class GPUDrawScanlineCodeGenerator : public CodeGenerator
|
||||
{
|
||||
void operator = (const GPUDrawScanlineCodeGenerator&);
|
||||
|
||||
static const GSVector4i m_test[8];
|
||||
static const WORD m_dither[4][16];
|
||||
|
||||
util::Cpu m_cpu;
|
||||
|
||||
GPUScanlineEnvironment& m_env;
|
||||
|
||||
void Generate();
|
||||
|
||||
void Init(int params);
|
||||
void Step();
|
||||
void TestMask();
|
||||
void SampleTexture();
|
||||
void ColorTFX();
|
||||
void AlphaBlend();
|
||||
void Dither();
|
||||
void WriteFrame();
|
||||
|
||||
void ReadTexel(const Xmm& dst, const Xmm& addr);
|
||||
|
||||
template<int shift> void modulate16(const Xmm& a, const Operand& f);
|
||||
template<int shift> void lerp16(const Xmm& a, const Xmm& b, const Operand& f);
|
||||
void clamp16(const Xmm& a, const Xmm& zero);
|
||||
void alltrue();
|
||||
void blend8(const Xmm& a, const Xmm& b);
|
||||
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
|
||||
|
||||
public:
|
||||
GPUDrawScanlineCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize);
|
||||
};
|
|
@ -89,8 +89,6 @@ const WORD* GPULocalMemory::GetCLUT(int tp, int cx, int cy)
|
|||
WORD* src = GetPixelAddressScaled(cx << 4, cy);
|
||||
WORD* dst = m_clut.buff;
|
||||
|
||||
// TODO: at normal horizontal resolution just return src
|
||||
|
||||
if(m_scale.cx == 0)
|
||||
{
|
||||
memcpy(dst, src, (tp == 0 ? 16 : 256) * 2);
|
||||
|
|
|
@ -128,17 +128,22 @@ protected:
|
|||
p.sel.key = 0;
|
||||
p.sel.iip = env.PRIM.IIP;
|
||||
p.sel.me = env.STATUS.ME;
|
||||
|
||||
if(env.PRIM.ABE)
|
||||
{
|
||||
p.sel.abe = env.PRIM.ABE;
|
||||
p.sel.abr = env.STATUS.ABR;
|
||||
}
|
||||
|
||||
p.sel.tge = env.PRIM.TGE;
|
||||
p.sel.tme = env.PRIM.TME;
|
||||
p.sel.tlu = env.STATUS.TP < 2;
|
||||
p.sel.twin = (env.TWIN.ai32 & 0xfffff) != 0;
|
||||
p.sel.dtd = m_dither ? env.STATUS.DTD : 0;
|
||||
p.sel.ltf = m_filter == 1 && env.PRIM.TYPE == GPU_POLYGON || m_filter == 2 ? 1 : 0;
|
||||
|
||||
if(env.PRIM.TME)
|
||||
{
|
||||
p.sel.tme = env.PRIM.TME;
|
||||
p.sel.tlu = env.STATUS.TP < 2;
|
||||
p.sel.twin = (env.TWIN.ai32 & 0xfffff) != 0;
|
||||
p.sel.ltf = m_filter == 1 && env.PRIM.TYPE == GPU_POLYGON || m_filter == 2 ? 1 : 0;
|
||||
|
||||
const void* t = m_mem.GetTexture(env.STATUS.TP, env.STATUS.TX, env.STATUS.TY);
|
||||
|
||||
if(!t) {ASSERT(0); return;}
|
||||
|
@ -147,6 +152,10 @@ protected:
|
|||
p.clut = m_mem.GetCLUT(env.STATUS.TP, env.CLUT.X, env.CLUT.Y);
|
||||
}
|
||||
|
||||
p.sel.dtd = m_dither ? env.STATUS.DTD : 0;
|
||||
p.sel.md = env.STATUS.MD;
|
||||
p.sel.sprite = env.PRIM.TYPE == GPU_SPRITE;
|
||||
|
||||
//
|
||||
|
||||
GSRasterizerData data;
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GSVector.h"
|
||||
#include "GPULocalMemory.h"
|
||||
|
||||
union GPUScanlineSelector
|
||||
{
|
||||
struct
|
||||
{
|
||||
DWORD iip:1; // 0
|
||||
DWORD me:1; // 1
|
||||
DWORD abe:1; // 2
|
||||
DWORD abr:2; // 3
|
||||
DWORD tge:1; // 5
|
||||
DWORD tme:1; // 6
|
||||
DWORD twin:1; // 7
|
||||
DWORD tlu:1; // 8
|
||||
DWORD dtd:1; // 9
|
||||
DWORD ltf:1; // 10
|
||||
DWORD md:1; // 11
|
||||
DWORD sprite:1; // 12
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
DWORD _pad1:1; // 0
|
||||
DWORD rfb:2; // 1
|
||||
DWORD _pad2:2; // 3
|
||||
DWORD tfx:2; // 5
|
||||
};
|
||||
|
||||
DWORD key;
|
||||
|
||||
operator DWORD() {return key;}
|
||||
};
|
||||
|
||||
__declspec(align(16)) struct GPUScanlineParam
|
||||
{
|
||||
GPUScanlineSelector sel;
|
||||
|
||||
const void* tex;
|
||||
const WORD* clut;
|
||||
};
|
||||
|
||||
__declspec(align(16)) struct GPUScanlineEnvironment
|
||||
{
|
||||
GPUScanlineSelector sel;
|
||||
|
||||
// GPULocalMemory* mem; // TODO: obsolite
|
||||
void* vm;
|
||||
const void* tex;
|
||||
const WORD* clut;
|
||||
DWORD fbw; // 10 + m_scale.cx
|
||||
|
||||
// GSVector4i md; // similar to gs fba
|
||||
|
||||
struct {GSVector4i u, v;} twin[3];
|
||||
struct {GSVector4i s, t, r, g, b, _pad[3];} d;
|
||||
struct {GSVector4i st, c;} d8;
|
||||
struct {GSVector4i s, t, r, b, g, uf, vf, dither, fd, test;} temp;
|
||||
};
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#include "StdAfx.h"
|
||||
#include "GSCodeBuffer.h"
|
||||
|
||||
GSCodeBuffer::GSCodeBuffer(size_t blocksize)
|
||||
: m_ptr(NULL)
|
||||
, m_blocksize(blocksize)
|
||||
, m_pos(0)
|
||||
, m_reserved(0)
|
||||
{
|
||||
}
|
||||
|
||||
GSCodeBuffer::~GSCodeBuffer()
|
||||
{
|
||||
while(!m_buffers.IsEmpty())
|
||||
{
|
||||
VirtualFree(m_buffers.RemoveHead(), 0, MEM_RELEASE);
|
||||
}
|
||||
}
|
||||
|
||||
void* GSCodeBuffer::GetBuffer(size_t size)
|
||||
{
|
||||
ASSERT(size < m_blocksize);
|
||||
ASSERT(m_reserved == 0);
|
||||
|
||||
size = (size + 15) & ~15;
|
||||
|
||||
if(m_ptr == NULL || m_pos + size > m_blocksize)
|
||||
{
|
||||
m_ptr = (BYTE*)VirtualAlloc(NULL, m_blocksize, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
|
||||
|
||||
m_pos = 0;
|
||||
|
||||
m_buffers.AddTail(m_ptr);
|
||||
}
|
||||
|
||||
BYTE* ptr = &m_ptr[m_pos];
|
||||
|
||||
m_reserved = size;
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void GSCodeBuffer::ReleaseBuffer(size_t size)
|
||||
{
|
||||
ASSERT(size <= m_reserved);
|
||||
|
||||
m_pos = ((m_pos + size) + 15) & ~15;
|
||||
|
||||
m_reserved = 0;
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
class GSCodeBuffer
|
||||
{
|
||||
CAtlList<void*> m_buffers;
|
||||
size_t m_blocksize;
|
||||
size_t m_pos, m_reserved;
|
||||
BYTE* m_ptr;
|
||||
|
||||
public:
|
||||
GSCodeBuffer(size_t blocksize = 4096 * 64); // 256k
|
||||
virtual ~GSCodeBuffer();
|
||||
|
||||
void* GetBuffer(size_t size);
|
||||
void ReleaseBuffer(size_t size);
|
||||
};
|
|
@ -109,6 +109,7 @@ CRC::Game CRC::m_games[] =
|
|||
{0x9E98B8AE, IkkiTousen, JP, false},
|
||||
{0xD6385328, GodOfWar, US, false},
|
||||
{0xFB0E6D72, GodOfWar, EU, false},
|
||||
{0xEB001875, GodOfWar, EU, false},
|
||||
{0xA61A4C6D, GodOfWar, Unknown, false},
|
||||
{0xE23D532B, GodOfWar, Unknown, false},
|
||||
{0x2F123FD8, GodOfWar2, RU, false},
|
||||
|
|
|
@ -26,8 +26,8 @@
|
|||
GSDrawScanline::GSDrawScanline(GSState* state, int id)
|
||||
: m_state(state)
|
||||
, m_id(id)
|
||||
, m_sp(this)
|
||||
, m_ds(this)
|
||||
, m_sp(m_env)
|
||||
, m_ds(m_env)
|
||||
{
|
||||
memset(&m_env, 0, sizeof(m_env));
|
||||
}
|
||||
|
@ -163,13 +163,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
|
||||
//
|
||||
|
||||
f->sl = (DrawScanlinePtr)&GSDrawScanline::DrawScanline;
|
||||
|
||||
m_dsf = m_ds.Lookup(m_env.sel);
|
||||
|
||||
f->ssl = m_dsf;
|
||||
|
||||
//
|
||||
f->ssl = m_ds.Lookup(m_env.sel);
|
||||
|
||||
if(m_env.sel.IsSolidRect())
|
||||
{
|
||||
|
@ -207,16 +201,7 @@ void GSDrawScanline::EndDraw(const GSRasterizerStats& stats)
|
|||
|
||||
void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
|
||||
{
|
||||
// TODO: call this directly from rasterizer
|
||||
|
||||
m_spf(vertices, dscan);
|
||||
}
|
||||
|
||||
void GSDrawScanline::DrawScanline(int top, int left, int right, const GSVertexSW& v)
|
||||
{
|
||||
// TODO: call this directly from rasterizer
|
||||
|
||||
m_dsf(top, left, right, v);
|
||||
m_spf(vertices, dscan); // TODO: call this directly from rasterizer
|
||||
}
|
||||
|
||||
void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
|
||||
|
@ -371,24 +356,24 @@ void GSDrawScanline::FillBlock(const GSVector4i* row, int* col, const GSVector4i
|
|||
|
||||
//
|
||||
|
||||
GSDrawScanline::GSSetupPrimMap::GSSetupPrimMap(GSDrawScanline* ds)
|
||||
: m_ds(ds)
|
||||
GSDrawScanline::GSSetupPrimMap::GSSetupPrimMap(GSScanlineEnvironment& env)
|
||||
: m_env(env)
|
||||
{
|
||||
}
|
||||
|
||||
GSSetupPrimCodeGenerator* GSDrawScanline::GSSetupPrimMap::Create(UINT64 key)
|
||||
GSSetupPrimCodeGenerator* GSDrawScanline::GSSetupPrimMap::Create(UINT64 key, void* ptr, size_t maxsize)
|
||||
{
|
||||
return new GSSetupPrimCodeGenerator(m_ds->m_env);
|
||||
return new GSSetupPrimCodeGenerator(m_env, ptr, maxsize);
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
GSDrawScanline::GSDrawScanlineMap::GSDrawScanlineMap(GSDrawScanline* ds)
|
||||
: m_ds(ds)
|
||||
GSDrawScanline::GSDrawScanlineMap::GSDrawScanlineMap(GSScanlineEnvironment& env)
|
||||
: m_env(env)
|
||||
{
|
||||
}
|
||||
|
||||
GSDrawScanlineCodeGenerator* GSDrawScanline::GSDrawScanlineMap::Create(UINT64 key)
|
||||
GSDrawScanlineCodeGenerator* GSDrawScanline::GSDrawScanlineMap::Create(UINT64 key, void* ptr, size_t maxsize)
|
||||
{
|
||||
return new GSDrawScanlineCodeGenerator(m_ds->m_env);
|
||||
return new GSDrawScanlineCodeGenerator(m_env, ptr, maxsize);
|
||||
}
|
||||
|
|
|
@ -36,11 +36,11 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
|
|||
|
||||
class GSSetupPrimMap : public GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, UINT64, SetupPrimStaticPtr>
|
||||
{
|
||||
GSDrawScanline* m_ds;
|
||||
GSScanlineEnvironment& m_env;
|
||||
|
||||
public:
|
||||
GSSetupPrimMap(GSDrawScanline* ds);
|
||||
GSSetupPrimCodeGenerator* Create(UINT64 key);
|
||||
GSSetupPrimMap(GSScanlineEnvironment& env);
|
||||
GSSetupPrimCodeGenerator* Create(UINT64 key, void* ptr, size_t maxsize);
|
||||
} m_sp;
|
||||
|
||||
SetupPrimStaticPtr m_spf;
|
||||
|
@ -51,17 +51,13 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
|
|||
|
||||
class GSDrawScanlineMap : public GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, UINT64, DrawScanlineStaticPtr>
|
||||
{
|
||||
GSDrawScanline* m_ds;
|
||||
GSScanlineEnvironment& m_env;
|
||||
|
||||
public:
|
||||
GSDrawScanlineMap(GSDrawScanline* ds);
|
||||
GSDrawScanlineCodeGenerator* Create(UINT64 key);
|
||||
GSDrawScanlineMap(GSScanlineEnvironment& env);
|
||||
GSDrawScanlineCodeGenerator* Create(UINT64 key, void* ptr, size_t maxsize);
|
||||
} m_ds;
|
||||
|
||||
DrawScanlineStaticPtr m_dsf;
|
||||
|
||||
void DrawScanline(int top, int left, int right, const GSVertexSW& v);
|
||||
|
||||
//
|
||||
|
||||
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);
|
||||
|
|
|
@ -24,8 +24,8 @@
|
|||
#include "StdAfx.h"
|
||||
#include "GSDrawScanlineCodeGenerator.h"
|
||||
|
||||
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env)
|
||||
: CodeGenerator(DEFAULT_MAX_CODE_SIZE, 0)
|
||||
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize)
|
||||
: CodeGenerator(maxsize, ptr)
|
||||
, m_env(env)
|
||||
{
|
||||
#if _M_AMD64
|
||||
|
@ -391,6 +391,8 @@ void GSDrawScanlineCodeGenerator::Init(int params)
|
|||
paddd(xmm3, xmmword[eax + 16 * 8]);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.ltf)
|
||||
{
|
||||
movdqa(xmm4, xmm3);
|
||||
pshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
|
@ -398,6 +400,7 @@ void GSDrawScanlineCodeGenerator::Init(int params)
|
|||
psrlw(xmm4, 1);
|
||||
movdqa(xmmword[&m_env.temp.vf], xmm4);
|
||||
}
|
||||
}
|
||||
|
||||
movdqa(xmmword[&m_env.temp.s], xmm2);
|
||||
movdqa(xmmword[&m_env.temp.t], xmm3);
|
||||
|
@ -918,14 +921,14 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// xmm7 = used
|
||||
|
||||
// GSVector4i rb10 = c10 & mask;
|
||||
// GSVector4i rb11 = c11 & mask;
|
||||
// GSVector4i ga10 = (c10 >> 8) & mask;
|
||||
|
||||
movdqa(xmm2, xmm1);
|
||||
psllw(xmm1, 8);
|
||||
psrlw(xmm1, 8);
|
||||
psrlw(xmm2, 8);
|
||||
|
||||
// GSVector4i ga10 = (c10 >> 8) & mask;
|
||||
// GSVector4i rb11 = c11 & mask;
|
||||
// GSVector4i ga11 = (c11 >> 8) & mask;
|
||||
|
||||
movdqa(xmm6, xmm5);
|
||||
|
@ -1511,7 +1514,28 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
|
||||
void GSDrawScanlineCodeGenerator::AlphaBlend()
|
||||
{
|
||||
if(!m_env.sel.fwrite || m_env.sel.abe == 255)
|
||||
if(!m_env.sel.fwrite)
|
||||
{
|
||||
return;
|
||||
}
|
||||
/*
|
||||
if(m_env.sel.aa1)
|
||||
{
|
||||
printf("aa1 %016I64x\n", m_env.sel.key);
|
||||
|
||||
if(m_env.sel.fpsm != 1) // TODO: fm == 0xffxxxxxx
|
||||
{
|
||||
// a = 0x80
|
||||
|
||||
pcmpeqd(xmm0, xmm0);
|
||||
psllw(xmm0, 15);
|
||||
mix16(xmm6, xmm0, xmm1);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
*/
|
||||
if(m_env.sel.abe == 255)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -1734,7 +1758,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.fpsm != 1) // TODO: fpsm == 0 && fm == 0xffxxxxxx
|
||||
if(m_env.sel.fpsm != 1) // TODO: fm == 0xffxxxxxx
|
||||
{
|
||||
mix16(xmm6, xmm4, xmm7);
|
||||
}
|
||||
|
|
|
@ -72,5 +72,5 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
|
|||
void blendr(const Xmm& b, const Xmm& a, const Xmm& mask);
|
||||
|
||||
public:
|
||||
GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env);
|
||||
GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize);
|
||||
};
|
|
@ -160,13 +160,18 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
#include "GSCodeBuffer.h"
|
||||
|
||||
template<class CG, class KEY, class VALUE>
|
||||
class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
|
||||
{
|
||||
CRBMap<UINT64, CG*> m_cgmap;
|
||||
GSCodeBuffer m_cb;
|
||||
|
||||
enum {MAX_SIZE = 4096};
|
||||
|
||||
protected:
|
||||
virtual CG* Create(KEY key) = 0;
|
||||
virtual CG* Create(KEY key, void* ptr, size_t maxsize = MAX_SIZE) = 0;
|
||||
|
||||
public:
|
||||
GSCodeGeneratorFunctionMap()
|
||||
|
@ -189,10 +194,14 @@ public:
|
|||
|
||||
if(!m_cgmap.Lookup(key, cg))
|
||||
{
|
||||
cg = Create(key);
|
||||
void* ptr = m_cb.GetBuffer(MAX_SIZE);
|
||||
|
||||
cg = Create(key, ptr, MAX_SIZE);
|
||||
|
||||
ASSERT(cg);
|
||||
|
||||
m_cb.ReleaseBuffer(cg->getSize());
|
||||
|
||||
m_cgmap.SetAt(key, cg);
|
||||
}
|
||||
|
||||
|
|
|
@ -38,7 +38,6 @@ GSRasterizer::~GSRasterizer()
|
|||
|
||||
void GSRasterizer::Draw(const GSRasterizerData* data)
|
||||
{
|
||||
m_dsf.sl = NULL;
|
||||
m_dsf.sr = NULL;
|
||||
m_dsf.sp = NULL;
|
||||
m_dsf.ssl = NULL;
|
||||
|
@ -102,8 +101,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor)
|
|||
(m_ds->*m_dsf.sp)(v, *v);
|
||||
// TODO: (m_dsf.ssp)(v, *v);
|
||||
|
||||
(m_ds->*m_dsf.sl)(p.y, p.x, p.x + 1, *v);
|
||||
// TODO: (m_dsf.ssl)(p.y, p.x, p.x + 1, *v);
|
||||
m_dsf.ssl(p.y, p.x, p.x + 1, *v);
|
||||
|
||||
m_stats.pixels++;
|
||||
}
|
||||
|
@ -420,6 +418,10 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
|||
int left = lr.extract32<0>();
|
||||
int right = lr.extract32<2>();
|
||||
|
||||
// TODO:
|
||||
// left coverage = l.p.ceil().x - l.p.x
|
||||
// right coverage = r.ceil() - r
|
||||
|
||||
if(left < scissor.x) left = scissor.x;
|
||||
if(right > scissor.z) right = scissor.z;
|
||||
|
||||
|
@ -442,8 +444,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
|||
scan = l;
|
||||
}
|
||||
|
||||
(m_ds->*m_dsf.sl)(top, left, right, scan);
|
||||
// TODO: (m_dsf.ssl)(top, left, right, scan);
|
||||
m_dsf.ssl(top, left, right, scan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -472,6 +473,10 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
|||
int left = lr.extract32<0>();
|
||||
int right = lr.extract32<1>();
|
||||
|
||||
// TODO:
|
||||
// left coverage = l.p.ceil().x - l.p.x
|
||||
// right coverage = l.p.ceil().y - l.p.y
|
||||
|
||||
if(left < scissor.x) left = scissor.x;
|
||||
if(right > scissor.z) right = scissor.z;
|
||||
|
||||
|
@ -494,8 +499,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
|||
scan = l;
|
||||
}
|
||||
|
||||
(m_ds->*m_dsf.sl)(top, left, right, scan);
|
||||
// TODO: (m_dsf.ssl)(top, left, right, scan);
|
||||
m_dsf.ssl(top, left, right, scan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -585,8 +589,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
|
|||
{
|
||||
if((top % m_threads) == m_id)
|
||||
{
|
||||
(m_ds->*m_dsf.sl)(top, left, right, scan);
|
||||
// TODO: (m_dsf.ssl)(top, left, right, scan);
|
||||
m_dsf.ssl(top, left, right, scan);
|
||||
|
||||
m_stats.pixels += right - left;
|
||||
}
|
||||
|
|
|
@ -25,9 +25,6 @@
|
|||
#include "GSVertexSW.h"
|
||||
#include "GSFunctionMap.h"
|
||||
|
||||
//
|
||||
#define FAST_DRAWSCANLINE
|
||||
|
||||
__declspec(align(16)) class GSRasterizerData
|
||||
{
|
||||
public:
|
||||
|
@ -51,7 +48,6 @@ public:
|
|||
class IDrawScanline
|
||||
{
|
||||
public:
|
||||
typedef void (IDrawScanline::*DrawScanlinePtr)(int top, int left, int right, const GSVertexSW& v);
|
||||
typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v);
|
||||
typedef void (IDrawScanline::*SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
typedef void (*DrawScanlineStaticPtr)(int top, int left, int right, const GSVertexSW& v);
|
||||
|
@ -59,7 +55,6 @@ public:
|
|||
|
||||
struct Functions
|
||||
{
|
||||
DrawScanlinePtr sl;
|
||||
DrawSolidRectPtr sr;
|
||||
SetupPrimPtr sp;
|
||||
DrawScanlineStaticPtr ssl;
|
||||
|
|
|
@ -60,7 +60,8 @@ protected:
|
|||
m_reset = false;
|
||||
}
|
||||
|
||||
// if((m_perfmon.GetFrame() & 255) == 0) m_rl.PrintStats();
|
||||
//
|
||||
if((m_perfmon.GetFrame() & 255) == 0) m_rl.PrintStats();
|
||||
}
|
||||
|
||||
void ResetDevice()
|
||||
|
@ -266,7 +267,7 @@ protected:
|
|||
p.sel.sprite = primclass == GS_SPRITE_CLASS ? 1 : 0;
|
||||
|
||||
p.fm = context->FRAME.FBMSK;
|
||||
p.zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
||||
p.zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 || PRIM->AA1 && primclass == GS_LINE_CLASS ? 0xffffffff : 0;
|
||||
|
||||
if(context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER)
|
||||
{
|
||||
|
@ -460,7 +461,7 @@ protected:
|
|||
|
||||
if(PRIM->AA1)
|
||||
{
|
||||
// TODO: automatic alpha blending (ABE=1, A=0 B=1 C=0 D=1)
|
||||
p.sel.aa1 = 1;
|
||||
}
|
||||
|
||||
if(p.sel.date
|
||||
|
@ -494,10 +495,10 @@ protected:
|
|||
|
||||
void Draw()
|
||||
{
|
||||
m_vtrace.Update(m_vertices, m_count);
|
||||
|
||||
GS_PRIM_CLASS primclass = GSUtil::GetPrimClass(PRIM->PRIM);
|
||||
|
||||
m_vtrace.Update(m_vertices, m_count, primclass, PRIM->IIP, PRIM->TME, m_context->TEX0.TFX);
|
||||
|
||||
GSScanlineParam p;
|
||||
|
||||
GetScanlineParam(p, primclass);
|
||||
|
@ -796,6 +797,17 @@ public:
|
|||
break;
|
||||
}
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
// are in line or just two of them are the same (cross product == 0)
|
||||
GSVector4 tmp = (v[1].p - v[0].p) * (v[2].p - v[0].p).yxwz();
|
||||
test |= tmp == tmp.yxwz();
|
||||
break;
|
||||
}
|
||||
|
||||
if(test.mask() & 3)
|
||||
{
|
||||
return;
|
||||
|
@ -818,7 +830,7 @@ public:
|
|||
break;
|
||||
}
|
||||
|
||||
if(m_count >= 3 && m_count < 30)
|
||||
if(m_count < 30 && m_count >= 3)
|
||||
{
|
||||
GSVertexSW* v = &m_vertices[m_count - 3];
|
||||
|
||||
|
|
|
@ -60,6 +60,7 @@ union GSScanlineSelector
|
|||
DWORD fba:1; // 42
|
||||
DWORD dthe:1; // 43
|
||||
DWORD zoverflow:1; // 44 (z max >= 0x80000000)
|
||||
DWORD aa1:1; // 45
|
||||
};
|
||||
|
||||
struct
|
||||
|
|
|
@ -24,8 +24,8 @@
|
|||
#include "StdAfx.h"
|
||||
#include "GSSetupPrimCodeGenerator.h"
|
||||
|
||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env)
|
||||
: CodeGenerator(DEFAULT_MAX_CODE_SIZE, 0)
|
||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize)
|
||||
: CodeGenerator(maxsize, ptr)
|
||||
, m_env(env)
|
||||
{
|
||||
m_en.z = m_env.sel.zb ? 1 : 0;
|
||||
|
|
|
@ -46,5 +46,5 @@ class GSSetupPrimCodeGenerator : public CodeGenerator
|
|||
void Color();
|
||||
|
||||
public:
|
||||
GSSetupPrimCodeGenerator(GSScanlineEnvironment& env);
|
||||
GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, void* ptr, size_t maxsize);
|
||||
};
|
|
@ -2208,17 +2208,52 @@ public:
|
|||
|
||||
GSVector4 rcpnr() const
|
||||
{
|
||||
return GSVector4(_mm_rcpnr_ps(m));
|
||||
GSVector4 v = rcp();
|
||||
|
||||
return (v + v) - (v * v) * *this;
|
||||
}
|
||||
|
||||
enum RoundMode {NearestInt = 8, NegInf = 9, PosInf = 10};
|
||||
|
||||
template<int mode> GSVector4 round() const
|
||||
{
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
return GSVector4(_mm_round_ps(m, mode));
|
||||
|
||||
#else
|
||||
|
||||
GSVector4 a = *this;
|
||||
|
||||
GSVector4 b = (a & GSVector4(ps_80000000)) | GSVector4(ps_4b000000);
|
||||
|
||||
b = a + b - b;
|
||||
|
||||
if((mode & 7) == (NegInf & 7))
|
||||
{
|
||||
return b - ((a < b) & GSVector4(ps_3f800000));
|
||||
}
|
||||
|
||||
if((mode & 7) == (PosInf & 7))
|
||||
{
|
||||
return b + ((a > b) & GSVector4(ps_3f800000));
|
||||
}
|
||||
|
||||
ASSERT((mode & 7) == (NearestInt & 7)); // other modes aren't implemented
|
||||
|
||||
return b;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
GSVector4 floor() const
|
||||
{
|
||||
return GSVector4(_mm_floor_ps(m));
|
||||
return round<NegInf>();
|
||||
}
|
||||
|
||||
GSVector4 ceil() const
|
||||
{
|
||||
return GSVector4(_mm_ceil_ps(m));
|
||||
return round<PosInf>();
|
||||
}
|
||||
|
||||
GSVector4 mod2x(const GSVector4& f, const int scale = 256) const
|
||||
|
|
|
@ -0,0 +1,168 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "GSVertexSW.h"
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
GSVertexTrace::GSVertexTraceCodeGenerator::GSVertexTraceCodeGenerator(DWORD key, void* ptr, size_t maxsize)
|
||||
: CodeGenerator(maxsize, ptr)
|
||||
{
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
#endif
|
||||
|
||||
const int params = 0;
|
||||
|
||||
DWORD primclass = (key >> 0) & 3;
|
||||
DWORD iip = (key >> 2) & 1;
|
||||
DWORD tme = (key >> 3) & 1;
|
||||
DWORD tfx = (key >> 4) & 3;
|
||||
DWORD color = !(tme && tfx == TFX_DECAL);
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
const int _v = params + 4;
|
||||
const int _count = params + 8;
|
||||
const int _min = params + 12;
|
||||
const int _max = params + 16;
|
||||
|
||||
//
|
||||
|
||||
static const float fmin = -FLT_MAX;
|
||||
static const float fmax = FLT_MAX;
|
||||
|
||||
movss(xmm0, xmmword[&fmax]);
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
movss(xmm1, xmmword[&fmin]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = FLT_MAX;
|
||||
// max.c = -FLT_MAX;
|
||||
|
||||
movaps(xmm2, xmm0);
|
||||
movaps(xmm3, xmm1);
|
||||
}
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
movaps(xmm4, xmm0);
|
||||
movaps(xmm5, xmm1);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
movaps(xmm6, xmm0);
|
||||
movaps(xmm7, xmm1);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _v]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.minv(v[i + j].c);
|
||||
// max.c = max.c.maxv(v[i + j].c);
|
||||
|
||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW)]);
|
||||
|
||||
minps(xmm2, xmm0);
|
||||
maxps(xmm3, xmm0);
|
||||
}
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 16]);
|
||||
|
||||
minps(xmm4, xmm0);
|
||||
maxps(xmm5, xmm0);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 32]);
|
||||
|
||||
minps(xmm6, xmm0);
|
||||
maxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(edx, n * sizeof(GSVertexSW));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
mov(eax, dword[esp + _min]);
|
||||
mov(edx, dword[esp + _max]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
movaps(xmmword[eax], xmm2);
|
||||
movaps(xmmword[edx], xmm3);
|
||||
}
|
||||
|
||||
movaps(xmmword[eax + 16], xmm4);
|
||||
movaps(xmmword[edx + 16], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
movaps(xmmword[eax + 32], xmm6);
|
||||
movaps(xmmword[edx + 32], xmm7);
|
||||
}
|
||||
|
||||
ret();
|
||||
}
|
|
@ -214,8 +214,27 @@ __forceinline GSVertexSW operator / (const GSVertexSW& v, float f)
|
|||
return v0;
|
||||
}
|
||||
|
||||
__declspec(align(16)) struct GSVertexTrace
|
||||
#include "GSFunctionMap.h"
|
||||
#include "xbyak/xbyak.h"
|
||||
|
||||
__declspec(align(16)) class GSVertexTrace
|
||||
{
|
||||
class GSVertexTraceCodeGenerator : public Xbyak::CodeGenerator
|
||||
{
|
||||
public:
|
||||
GSVertexTraceCodeGenerator(DWORD key, void* ptr, size_t maxsize);
|
||||
};
|
||||
|
||||
typedef void (*VertexTracePtr)(const GSVertexSW* v, int count, GSVertexSW& min, GSVertexSW& max);
|
||||
|
||||
class GSVertexTraceMap : public GSCodeGeneratorFunctionMap<GSVertexTraceCodeGenerator, DWORD, VertexTracePtr>
|
||||
{
|
||||
public:
|
||||
GSVertexTraceMap() {}
|
||||
GSVertexTraceCodeGenerator* Create(DWORD key, void* ptr, size_t maxsize) {return new GSVertexTraceCodeGenerator(key, ptr, maxsize);}
|
||||
} m_map;
|
||||
|
||||
public:
|
||||
GSVertexSW m_min, m_max;
|
||||
|
||||
union
|
||||
|
@ -225,16 +244,28 @@ __declspec(align(16)) struct GSVertexTrace
|
|||
struct {DWORD xyzf:4, stq:4, rgba:4;};
|
||||
} m_eq;
|
||||
|
||||
void Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass, DWORD iip, DWORD tme, DWORD tfx)
|
||||
{
|
||||
if(!tme) tfx = 0;
|
||||
|
||||
DWORD key = primclass | (iip << 2) | (tme << 3) | (tfx << 4);
|
||||
|
||||
m_map.Lookup(key)(v, count, m_min, m_max);
|
||||
|
||||
m_eq.value = (m_min.p == m_max.p).mask() | ((m_min.t == m_max.t).mask() << 4) | ((m_min.c == m_max.c).mask() << 8);
|
||||
}
|
||||
/*
|
||||
*/
|
||||
void Update(const GSVertexSW* v, int count)
|
||||
{
|
||||
GSVertexSW min, max;
|
||||
|
||||
min.p = v[0].p;
|
||||
max.p = v[0].p;
|
||||
min.t = v[0].t;
|
||||
max.t = v[0].t;
|
||||
min.c = v[0].c;
|
||||
max.c = v[0].c;
|
||||
min.t = v[0].t;
|
||||
max.t = v[0].t;
|
||||
min.p = v[0].p;
|
||||
max.p = v[0].p;
|
||||
|
||||
for(int i = 1; i < count; i++)
|
||||
{
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,263 +0,0 @@
|
|||
/**
|
||||
*** Copyright (C) 1985-2007 Intel Corporation. All rights reserved.
|
||||
***
|
||||
*** The information and source code contained herein is the exclusive
|
||||
*** property of Intel Corporation and may not be disclosed, examined
|
||||
*** or reproduced in whole or in part without explicit written authorization
|
||||
*** from the company.
|
||||
***
|
||||
**/
|
||||
|
||||
/*
|
||||
* smmintrin.h
|
||||
*
|
||||
* Principal header file for Intel(R) Core(TM) 2 Duo processor
|
||||
* SSE4.1 intrinsics
|
||||
*/
|
||||
|
||||
// Gsdx Note: This header file has been "borrowed" from the MSVC install and bugfixed to
|
||||
// allow for proper code compilation. The original version of the header includes semicolons
|
||||
// after several macros defined below, which causes compiler errors when using them in
|
||||
// inline object construction situations. -- Air
|
||||
|
||||
#pragma once
|
||||
#ifndef __midl
|
||||
#ifndef _INCLUDED_SMM
|
||||
#define _INCLUDED_SMM
|
||||
|
||||
#if defined(_M_CEE_PURE)
|
||||
#error ERROR: EMM intrinsics not supported in the pure mode!
|
||||
#else
|
||||
|
||||
#include <tmmintrin.h>
|
||||
|
||||
|
||||
/*
|
||||
* Rounding mode macros
|
||||
*/
|
||||
|
||||
#define _MM_FROUND_TO_NEAREST_INT 0x00
|
||||
#define _MM_FROUND_TO_NEG_INF 0x01
|
||||
#define _MM_FROUND_TO_POS_INF 0x02
|
||||
#define _MM_FROUND_TO_ZERO 0x03
|
||||
#define _MM_FROUND_CUR_DIRECTION 0x04
|
||||
|
||||
#define _MM_FROUND_RAISE_EXC 0x00
|
||||
#define _MM_FROUND_NO_EXC 0x08
|
||||
|
||||
#define _MM_FROUND_NINT _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC
|
||||
#define _MM_FROUND_FLOOR _MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC
|
||||
#define _MM_FROUND_CEIL _MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC
|
||||
#define _MM_FROUND_TRUNC _MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC
|
||||
#define _MM_FROUND_RINT _MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC
|
||||
#define _MM_FROUND_NEARBYINT _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC
|
||||
|
||||
/*
|
||||
* MACRO functions for ceil/floor intrinsics
|
||||
*/
|
||||
|
||||
#define _mm_ceil_pd(val) _mm_round_pd((val), _MM_FROUND_CEIL)
|
||||
#define _mm_ceil_sd(dst, val) _mm_round_sd((dst), (val), _MM_FROUND_CEIL)
|
||||
|
||||
#define _mm_floor_pd(val) _mm_round_pd((val), _MM_FROUND_FLOOR)
|
||||
#define _mm_floor_sd(dst, val) _mm_round_sd((dst), (val), _MM_FROUND_FLOOR)
|
||||
|
||||
#define _mm_ceil_ps(val) _mm_round_ps((val), _MM_FROUND_CEIL)
|
||||
#define _mm_ceil_ss(dst, val) _mm_round_ss((dst), (val), _MM_FROUND_CEIL)
|
||||
|
||||
#define _mm_floor_ps(val) _mm_round_ps((val), _MM_FROUND_FLOOR)
|
||||
#define _mm_floor_ss(dst, val) _mm_round_ss((dst), (val), _MM_FROUND_FLOOR)
|
||||
|
||||
#define _mm_test_all_zeros(mask, val) _mm_testz_si128((mask), (val))
|
||||
|
||||
/*
|
||||
* MACRO functions for packed integer 128-bit comparison intrinsics.
|
||||
*/
|
||||
|
||||
#define _mm_test_all_ones(val) \
|
||||
_mm_testc_si128((val), _mm_cmpeq_epi32((val),(val)))
|
||||
|
||||
#define _mm_test_mix_ones_zeros(mask, val) _mm_testnzc_si128((mask), (val))
|
||||
|
||||
#if __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Integer blend instructions - select data from 2 sources
|
||||
// using constant/variable mask
|
||||
|
||||
extern __m128i _mm_blend_epi16 (__m128i v1, __m128i v2,
|
||||
const int mask);
|
||||
extern __m128i _mm_blendv_epi8 (__m128i v1, __m128i v2, __m128i mask);
|
||||
|
||||
// Float single precision blend instructions - select data
|
||||
// from 2 sources using constant/variable mask
|
||||
|
||||
extern __m128 _mm_blend_ps (__m128 v1, __m128 v2, const int mask);
|
||||
extern __m128 _mm_blendv_ps(__m128 v1, __m128 v2, __m128 v3);
|
||||
|
||||
// Float double precision blend instructions - select data
|
||||
// from 2 sources using constant/variable mask
|
||||
|
||||
extern __m128d _mm_blend_pd (__m128d v1, __m128d v2, const int mask);
|
||||
extern __m128d _mm_blendv_pd(__m128d v1, __m128d v2, __m128d v3);
|
||||
|
||||
// Dot product instructions with mask-defined summing and zeroing
|
||||
// of result's parts
|
||||
|
||||
extern __m128 _mm_dp_ps(__m128 val1, __m128 val2, const int mask);
|
||||
extern __m128d _mm_dp_pd(__m128d val1, __m128d val2, const int mask);
|
||||
|
||||
// Packed integer 64-bit comparison, zeroing or filling with ones
|
||||
// corresponding parts of result
|
||||
|
||||
extern __m128i _mm_cmpeq_epi64(__m128i val1, __m128i val2);
|
||||
|
||||
// Min/max packed integer instructions
|
||||
|
||||
extern __m128i _mm_min_epi8 (__m128i val1, __m128i val2);
|
||||
extern __m128i _mm_max_epi8 (__m128i val1, __m128i val2);
|
||||
|
||||
extern __m128i _mm_min_epu16(__m128i val1, __m128i val2);
|
||||
extern __m128i _mm_max_epu16(__m128i val1, __m128i val2);
|
||||
|
||||
extern __m128i _mm_min_epi32(__m128i val1, __m128i val2);
|
||||
extern __m128i _mm_max_epi32(__m128i val1, __m128i val2);
|
||||
extern __m128i _mm_min_epu32(__m128i val1, __m128i val2);
|
||||
extern __m128i _mm_max_epu32(__m128i val1, __m128i val2);
|
||||
|
||||
// Packed integer 32-bit multiplication with truncation
|
||||
// of upper halves of results
|
||||
|
||||
extern __m128i _mm_mullo_epi32(__m128i a, __m128i b);
|
||||
|
||||
// Packed integer 32-bit multiplication of 2 pairs of operands
|
||||
// producing two 64-bit results
|
||||
|
||||
extern __m128i _mm_mul_epi32(__m128i a, __m128i b);
|
||||
|
||||
// Packed integer 128-bit bitwise comparison.
|
||||
// return 1 if (val 'and' mask) == 0
|
||||
|
||||
extern int _mm_testz_si128(__m128i mask, __m128i val);
|
||||
|
||||
// Packed integer 128-bit bitwise comparison.
|
||||
// return 1 if (val 'and_not' mask) == 0
|
||||
|
||||
extern int _mm_testc_si128(__m128i mask, __m128i val);
|
||||
|
||||
// Packed integer 128-bit bitwise comparison
|
||||
// ZF = ((val 'and' mask) == 0) CF = ((val 'and_not' mask) == 0)
|
||||
// return 1 if both ZF and CF are 0
|
||||
|
||||
extern int _mm_testnzc_si128(__m128i mask, __m128i s2);
|
||||
|
||||
// Insert single precision float into packed single precision
|
||||
// array element selected by index.
|
||||
// The bits [7-6] of the 3d parameter define src index,
|
||||
// the bits [5-4] define dst index, and bits [3-0] define zeroing
|
||||
// mask for dst
|
||||
|
||||
extern __m128 _mm_insert_ps(__m128 dst, __m128 src, const int ndx);
|
||||
|
||||
// Helper macro to create ndx-parameter value for _mm_insert_ps
|
||||
|
||||
#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) \
|
||||
(((srcField)<<6) | ((dstField)<<4) | (zeroMask))
|
||||
|
||||
// Extract binary representation of single precision float from
|
||||
// packed single precision array element selected by index
|
||||
|
||||
extern int _mm_extract_ps(__m128 src, const int ndx);
|
||||
|
||||
// Extract single precision float from packed single precision
|
||||
// array element selected by index into dest
|
||||
|
||||
#define _MM_EXTRACT_FLOAT(dest, src, ndx) \
|
||||
*((int*)&(dest)) = _mm_extract_ps((src), (ndx))
|
||||
|
||||
// Extract specified single precision float element
|
||||
// into the lower part of __m128
|
||||
|
||||
#define _MM_PICK_OUT_PS(src, num) \
|
||||
_mm_insert_ps(_mm_setzero_ps(), (src), \
|
||||
_MM_MK_INSERTPS_NDX((num), 0, 0x0e));
|
||||
|
||||
// Insert integer into packed integer array element
|
||||
// selected by index
|
||||
|
||||
extern __m128i _mm_insert_epi8 (__m128i dst, int s, const int ndx);
|
||||
extern __m128i _mm_insert_epi32(__m128i dst, int s, const int ndx);
|
||||
|
||||
#if defined(_M_X64)
|
||||
extern __m128i _mm_insert_epi64(__m128i dst, __int64 s, const int ndx);
|
||||
#endif
|
||||
// Extract integer from packed integer array element
|
||||
// selected by index
|
||||
|
||||
extern int _mm_extract_epi8 (__m128i src, const int ndx);
|
||||
extern int _mm_extract_epi32(__m128i src, const int ndx);
|
||||
|
||||
#if defined(_M_X64)
|
||||
extern __int64 _mm_extract_epi64(__m128i src, const int ndx);
|
||||
#endif
|
||||
|
||||
// Horizontal packed word minimum and its index in
|
||||
// result[15:0] and result[18:16] respectively
|
||||
|
||||
extern __m128i _mm_minpos_epu16(__m128i shortValues);
|
||||
|
||||
// Packed/single float double precision rounding
|
||||
|
||||
extern __m128d _mm_round_pd(__m128d val, int iRoundMode);
|
||||
extern __m128d _mm_round_sd(__m128d dst, __m128d val, int iRoundMode);
|
||||
|
||||
// Packed/single float single precision rounding
|
||||
|
||||
extern __m128 _mm_round_ps(__m128 val, int iRoundMode);
|
||||
extern __m128 _mm_round_ss(__m128 dst, __m128 val, int iRoundMode);
|
||||
|
||||
// Packed integer sign-extension
|
||||
|
||||
extern __m128i _mm_cvtepi8_epi32 (__m128i byteValues);
|
||||
extern __m128i _mm_cvtepi16_epi32(__m128i shortValues);
|
||||
extern __m128i _mm_cvtepi8_epi64 (__m128i byteValues);
|
||||
extern __m128i _mm_cvtepi32_epi64(__m128i intValues);
|
||||
extern __m128i _mm_cvtepi16_epi64(__m128i shortValues);
|
||||
extern __m128i _mm_cvtepi8_epi16 (__m128i byteValues);
|
||||
|
||||
// Packed integer zero-extension
|
||||
|
||||
extern __m128i _mm_cvtepu8_epi32 (__m128i byteValues);
|
||||
extern __m128i _mm_cvtepu16_epi32(__m128i shortValues);
|
||||
extern __m128i _mm_cvtepu8_epi64 (__m128i shortValues);
|
||||
extern __m128i _mm_cvtepu32_epi64(__m128i intValues);
|
||||
extern __m128i _mm_cvtepu16_epi64(__m128i shortValues);
|
||||
extern __m128i _mm_cvtepu8_epi16 (__m128i byteValues);
|
||||
|
||||
|
||||
// Pack 8 double words from 2 operands into 8 words of result
|
||||
// with unsigned saturation
|
||||
|
||||
extern __m128i _mm_packus_epi32(__m128i val1, __m128i val2);
|
||||
|
||||
// Sum absolute 8-bit integer difference of adjacent groups of 4 byte
|
||||
// integers in operands. Starting offsets within operands are
|
||||
// determined by mask
|
||||
|
||||
extern __m128i _mm_mpsadbw_epu8(__m128i s1, __m128i s2, const int msk);
|
||||
|
||||
/*
|
||||
* Load double quadword using non-temporal aligned hint
|
||||
*/
|
||||
|
||||
extern __m128i _mm_stream_load_si128(__m128i* v1);
|
||||
|
||||
#if defined __cplusplus
|
||||
}; /* End "C" */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* defined(_M_CEE_PURE) */
|
||||
|
||||
#endif
|
||||
#endif /* _INCLUDED_SMM */
|
|
@ -73,14 +73,6 @@
|
|||
(row3) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0xDD)); \
|
||||
}
|
||||
|
||||
__forceinline __m128 _mm_rcpnr_ps(__m128 r)
|
||||
{
|
||||
__m128 t = _mm_rcp_ps(r);
|
||||
|
||||
return _mm_sub_ps(_mm_add_ps(t, t), _mm_mul_ps(_mm_mul_ps(t, t), r));
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
|
||||
#error TODO: GSVector4 and GSRasterizer needs SSE2
|
||||
|
@ -99,29 +91,6 @@
|
|||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
#include "smmintrin_gsdx.h"
|
||||
|
||||
#else
|
||||
|
||||
__forceinline __m128 _mm_round_ps(__m128 x)
|
||||
{
|
||||
__m128 t = _mm_or_ps(_mm_and_ps(ps_80000000, x), ps_4b000000);
|
||||
|
||||
return _mm_sub_ps(_mm_add_ps(x, t), t);
|
||||
}
|
||||
|
||||
__forceinline __m128 _mm_floor_ps(__m128 x)
|
||||
{
|
||||
__m128 t = _mm_round_ps(x);
|
||||
|
||||
return _mm_sub_ps(t, _mm_and_ps(_mm_cmplt_ps(x, t), ps_3f800000));
|
||||
}
|
||||
|
||||
__forceinline __m128 _mm_ceil_ps(__m128 x)
|
||||
{
|
||||
__m128 t = _mm_round_ps(x);
|
||||
|
||||
return _mm_add_ps(t, _mm_and_ps(_mm_cmpgt_ps(x, t), ps_3f800000));
|
||||
}
|
||||
#include <smmintrin.h>
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue