GSdx: fixing the broken things...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5047 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2012-01-06 00:17:52 +00:00
parent a8e8e6e80f
commit 49f3aee099
16 changed files with 243 additions and 160 deletions

View File

@ -884,6 +884,7 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
/* /*
vector<uint8> buff;
bool exit = false; bool exit = false;
int round = 0; int round = 0;

View File

@ -35,7 +35,7 @@ public:
enum counter_t enum counter_t
{ {
Frame, Prim, PrimNotRendered, Draw, Swizzle, Unswizzle, Fillrate, Quad, Frame, Prim, Draw, Swizzle, Unswizzle, Fillrate, Quad,
CounterLast, CounterLast,
}; };

View File

@ -333,7 +333,7 @@ void GSRenderer::VSync(int field)
theApp.m_gs_interlace[m_interlace].name.c_str(), theApp.m_gs_interlace[m_interlace].name.c_str(),
theApp.m_gs_aspectratio[m_aspectratio].name.c_str(), theApp.m_gs_aspectratio[m_aspectratio].name.c_str(),
(int)m_perfmon.Get(GSPerfMon::Quad), (int)m_perfmon.Get(GSPerfMon::Quad),
(int)(m_perfmon.Get(GSPerfMon::Prim) - m_perfmon.Get(GSPerfMon::PrimNotRendered)), (int)m_perfmon.Get(GSPerfMon::Prim),
(int)m_perfmon.Get(GSPerfMon::Draw), (int)m_perfmon.Get(GSPerfMon::Draw),
m_perfmon.CPU(), m_perfmon.CPU(),
m_perfmon.Get(GSPerfMon::Swizzle) / 1024, m_perfmon.Get(GSPerfMon::Swizzle) / 1024,
@ -515,7 +515,7 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
return; return;
case VK_F7: case VK_F7:
m_shader = (m_shader + 3 + step) % 3; m_shader = (m_shader + 3 + step) % 3;
printf("GSdx: Set shader %d (%s).\n", (int)m_shader); printf("GSdx: Set shader %d.\n", (int)m_shader);
return; return;
case VK_DELETE: case VK_DELETE:
m_aa1 = !m_aa1; m_aa1 = !m_aa1;

View File

@ -331,7 +331,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
uint8 afix = context->ALPHA.FIX; uint8 afix = context->ALPHA.FIX;
dev->SetupOM(om_dssel, om_bsel, afix); dev->SetupOM(om_dssel, om_bsel, afix);
dev->SetupIA(m_vertex.buff, m_vertex.tail, m_index.buff, m_index.tail, m_topology); dev->SetupIA(m_vertex.buff, m_vertex.next, m_index.buff, m_index.tail, m_topology);
dev->SetupVS(vs_sel, &vs_cb); dev->SetupVS(vs_sel, &vs_cb);
dev->SetupGS(gs_sel); dev->SetupGS(gs_sel);
dev->SetupPS(ps_sel, &ps_cb, ps_ssel); dev->SetupPS(ps_sel, &ps_cb, ps_ssel);

View File

@ -39,10 +39,13 @@ bool GSRendererDX11::CreateDevice(GSDevice* dev)
} }
template<uint32 prim, uint32 tme, uint32 fst> template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererDX11::ConvertVertex(GSVertexHW11* RESTRICT vertex, size_t index) void GSRendererDX11::ConvertVertex(size_t dst_index, size_t src_index)
{ {
GSVector4i v0(m_v.m[0]); GSVertex* s = (GSVertex*)((GSVertexHW11*)m_vertex.buff + src_index);
GSVector4i v1(m_v.m[1]); GSVertexHW11* d = (GSVertexHW11*)m_vertex.buff + dst_index;
GSVector4i v0 = ((GSVector4i*)s)[0];
GSVector4i v1 = ((GSVector4i*)s)[1];
if(tme && fst) if(tme && fst)
{ {
@ -51,17 +54,8 @@ void GSRendererDX11::ConvertVertex(GSVertexHW11* RESTRICT vertex, size_t index)
v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st
} }
GSVector4i* RESTRICT dst = (GSVector4i*)&vertex[index]; ((GSVector4i*)d)[0] = v0;
((GSVector4i*)d)[1] = v1;
dst[0] = v0;
dst[1] = v1;
}
void GSRendererDX11::Draw()
{
// TODO: remove invisible prims here
__super::Draw();
} }
void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)

View File

@ -29,8 +29,7 @@ class GSRendererDX11 : public GSRendererDX
{ {
protected: protected:
template<uint32 prim, uint32 tme, uint32 fst> template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(GSVertexHW11* RESTRICT vertex, size_t index); void ConvertVertex(size_t dst_index, size_t src_index);
void Draw();
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.x;} int GetPosX(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.x;}

View File

@ -58,17 +58,20 @@ bool GSRendererDX9::CreateDevice(GSDevice* dev)
} }
template<uint32 prim, uint32 tme, uint32 fst> template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererDX9::ConvertVertex(GSVertexHW9* RESTRICT vertex, size_t index) void GSRendererDX9::ConvertVertex(size_t dst_index, size_t src_index)
{ {
GSVector4 p = GSVector4(((GSVector4i)m_v.XYZ).upl16()); GSVertex* s = (GSVertex*)((GSVertexHW9*)m_vertex.buff + src_index);
GSVertexHW9* d = (GSVertexHW9*)m_vertex.buff + dst_index;
GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16());
if(tme && !fst) if(tme && !fst)
{ {
p = p.xyxy(GSVector4((float)m_v.XYZ.Z, m_v.RGBAQ.Q)); p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q));
} }
else else
{ {
p = p.xyxy(GSVector4::load((float)m_v.XYZ.Z)); p = p.xyxy(GSVector4::load((float)s->XYZ.Z));
} }
GSVector4 t = GSVector4::zero(); GSVector4 t = GSVector4::zero();
@ -77,27 +80,18 @@ void GSRendererDX9::ConvertVertex(GSVertexHW9* RESTRICT vertex, size_t index)
{ {
if(fst) if(fst)
{ {
t = GSVector4(GSVector4i::load(m_v.UV.u32[0]).upl16()); t = GSVector4(GSVector4i::load(s->UV.u32[0]).upl16());
} }
else else
{ {
t = GSVector4::loadl(&m_v.ST); t = GSVector4::loadl(&s->ST);
} }
} }
t = t.xyxy(GSVector4::cast(GSVector4i(m_v.RGBAQ.u32[0], m_v.FOG.u32[1]))); t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG.u32[1])));
GSVertexHW9* RESTRICT dst = (GSVertexHW9*)&vertex[index]; d->p = p;
d->t = t;
dst->p = p;
dst->t = t;
}
void GSRendererDX9::Draw()
{
// TODO: remove invisible prims here
__super::Draw();
} }
void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
@ -155,9 +149,9 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// assume vertices are tightly packed and sequentially indexed (it should be the case) // assume vertices are tightly packed and sequentially indexed (it should be the case)
if(m_vertex.tail >= 2) if(m_vertex.next >= 2)
{ {
size_t count = m_vertex.tail; size_t count = m_vertex.next;
int i = (int)count * 2 - 4; int i = (int)count * 2 - 4;
GSVertexHW9* s = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * count] - 2; GSVertexHW9* s = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * count] - 2;
@ -195,7 +189,7 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
index[5] = i + 3; index[5] = i + 3;
} }
m_vertex.head = m_vertex.tail = count * 2; m_vertex.head = m_vertex.tail = m_vertex.next = count * 2;
m_index.tail = count * 3; m_index.tail = count * 3;
} }

View File

@ -35,8 +35,8 @@ protected:
} m_fba; } m_fba;
template<uint32 prim, uint32 tme, uint32 fst> template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(GSVertexHW9* RESTRICT vertex, size_t index); void ConvertVertex(size_t dst_index, size_t src_index);
void Draw();
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
void UpdateFBA(GSTexture* rt); void UpdateFBA(GSTexture* rt);

View File

@ -411,16 +411,16 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
if(lines == 0) if(lines == 0)
{ {
if(m_vt->m_primclass == GS_LINE_CLASS && (m_vertex.tail == 448 * 2 || m_vertex.tail == 512 * 2)) if(m_vt->m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2))
{ {
lines = m_vertex.tail / 2; lines = m_vertex.next / 2;
} }
} }
else else
{ {
if(m_vt->m_primclass == GS_POINT_CLASS) if(m_vt->m_primclass == GS_POINT_CLASS)
{ {
if(m_vertex.tail >= 16 * 512) if(m_vertex.next >= 16 * 512)
{ {
// incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454 // incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454
@ -431,7 +431,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
const uint8* RESTRICT v = m_vertex.buff; const uint8* RESTRICT v = m_vertex.buff;
for(int i = (int)m_vertex.tail; i >= 0; i--, v += m_vertex.stride) for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride)
{ {
int x = (GetPosX(v) - ox) >> 4; int x = (GetPosX(v) - ox) >> 4;
int y = (GetPosY(v) - oy) >> 4; int y = (GetPosY(v) - oy) >> 4;
@ -448,7 +448,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
} }
else if(m_vt->m_primclass == GS_LINE_CLASS) else if(m_vt->m_primclass == GS_LINE_CLASS)
{ {
if(m_vertex.tail == lines * 2) if(m_vertex.next == lines * 2)
{ {
// normally, this step would copy the video onto screen with 512 texture mapped horizontal lines, // normally, this step would copy the video onto screen with 512 texture mapped horizontal lines,
// but we use the stored video data to create a new texture, and replace the lines with two triangles // but we use the stored video data to create a new texture, and replace the lines with two triangles
@ -459,8 +459,8 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4); t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
memcpy(&m_vertex.buff[m_vertex.stride * 2], &m_vertex.buff[m_vertex.stride * (m_vertex.tail - 2)], m_vertex.stride); memcpy(&m_vertex.buff[m_vertex.stride * 2], &m_vertex.buff[m_vertex.stride * (m_vertex.next - 2)], m_vertex.stride);
memcpy(&m_vertex.buff[m_vertex.stride * 3], &m_vertex.buff[m_vertex.stride * (m_vertex.tail - 1)], m_vertex.stride); memcpy(&m_vertex.buff[m_vertex.stride * 3], &m_vertex.buff[m_vertex.stride * (m_vertex.next - 1)], m_vertex.stride);
m_index.buff[0] = 0; m_index.buff[0] = 0;
m_index.buff[1] = 1; m_index.buff[1] = 1;
@ -469,7 +469,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
m_index.buff[4] = 2; m_index.buff[4] = 2;
m_index.buff[5] = 3; m_index.buff[5] = 3;
m_vertex.head = m_vertex.tail = 4; m_vertex.head = m_vertex.tail = m_vertex.next = 4;
m_index.tail = 6; m_index.tail = 6;
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS); m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS);
@ -506,7 +506,7 @@ bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::S
uint8* RESTRICT v = m_vertex.buff; uint8* RESTRICT v = m_vertex.buff;
for(int i = (int)m_vertex.tail; i >= 0; i--, v += m_vertex.stride) for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride)
{ {
uint32 c = GetColor(v); uint32 c = GetColor(v);
@ -815,7 +815,7 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa
if(FBP >= 0x03f40 && (FBP & 0x1f) == 0) if(FBP >= 0x03f40 && (FBP & 0x1f) == 0)
{ {
if(m_vertex.tail == 16) if(m_vertex.next == 16)
{ {
uint8* RESTRICT v = m_vertex.buff; uint8* RESTRICT v = m_vertex.buff;
@ -835,7 +835,7 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa
return false; return false;
} }
else if(m_vertex.tail == 256) else if(m_vertex.next == 256)
{ {
uint8* RESTRICT v = m_vertex.buff; uint8* RESTRICT v = m_vertex.buff;

View File

@ -33,7 +33,7 @@ class GSRendererNull : public GSRenderer
protected: protected:
template<uint32 prim, uint32 tme, uint32 fst> template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index) void ConvertVertex(size_t dst_index, size_t src_index)
{ {
} }
@ -48,7 +48,7 @@ protected:
public: public:
GSRendererNull() GSRendererNull()
: GSRenderer(new GSVertexTraceNull(this), sizeof(GSVertexNull)) : GSRenderer(new GSVertexTraceNull(this), sizeof(GSVertex))
{ {
InitConvertVertex(GSRendererNull); InitConvertVertex(GSRendererNull);
} }

View File

@ -29,7 +29,15 @@ GSRendererSW::GSRendererSW(int threads)
, m_fzb(NULL) , m_fzb(NULL)
{ {
InitConvertVertex(GSRendererSW); InitConvertVertex(GSRendererSW);
InitConvertIndex();
m_ci[GS_POINTLIST] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_POINTLIST>;
m_ci[GS_LINELIST] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_LINELIST>;
m_ci[GS_LINESTRIP] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_LINESTRIP>;
m_ci[GS_TRIANGLELIST] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_TRIANGLELIST>;
m_ci[GS_TRIANGLESTRIP] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_TRIANGLESTRIP>;
m_ci[GS_TRIANGLEFAN] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_TRIANGLEFAN>;
m_ci[GS_SPRITE] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_SPRITE>;
m_ci[GS_INVALID] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_INVALID>;
m_tc = new GSTextureCacheSW(this); m_tc = new GSTextureCacheSW(this);
@ -152,51 +160,56 @@ GSTexture* GSRendererSW::GetOutput(int i)
} }
template<uint32 prim, uint32 tme, uint32 fst> template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererSW::ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index) void GSRendererSW::ConvertVertex(size_t dst_index, size_t src_index)
{ {
GSVertexSW* RESTRICT v = &vertex[index]; GSVertex* s = (GSVertex*)((GSVertexSW*)m_vertex.buff + src_index);
GSVertexSW* d = (GSVertexSW*)m_vertex.buff + dst_index;
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - m_context->XYOFFSET; uint32 z = s->XYZ.Z;
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
v->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale; GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - (GSVector4i)m_context->XYOFFSET;
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), s->FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
GSVector4 p, t, c;
p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
if(tme) if(tme)
{ {
GSVector4 t;
if(fst) if(fst)
{ {
t = GSVector4(GSVector4i::load(m_v.UV.u32[0]).upl16() << (16 - 4)); t = GSVector4(GSVector4i::load(s->UV.u32[0]).upl16() << (16 - 4));
} }
else else
{ {
t = GSVector4(m_v.ST.S, m_v.ST.T) * GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH); t = GSVector4(s->ST.S, s->ST.T) * GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH);
t = t.xyxy(GSVector4::load(m_v.RGBAQ.Q)); t = t.xyxy(GSVector4::load(s->RGBAQ.Q));
} }
v->t = t;
} }
v->c = GSVector4::rgba32(m_v.RGBAQ.u32[0], 7); c = GSVector4::rgba32(s->RGBAQ.u32[0], 7);
d->p = p;
d->c = c;
d->t = t;
if(prim == GS_SPRITE) if(prim == GS_SPRITE)
{ {
v->t.u32[3] = m_v.XYZ.Z; d->t.u32[3] = z;
} }
} }
template<uint32 prim> template<uint32 prim>
size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count) size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count)
{ {
// memcpy(dst, src, sizeof(uint32) * count); return; //
memcpy(dst, src, sizeof(uint32) * count); return count;
// TODO: IsQuad // TODO: IsQuad
const GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff;
GSVector4 scissor = m_context->scissor.ex; GSVector4 scissor = m_context->scissor.ex;
const GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff;
const uint32* src_end = src + count; const uint32* src_end = src + count;
uint32* dst_base = dst; uint32* dst_base = dst;
@ -222,13 +235,12 @@ size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT s
pmin = v[src[0]].p.min(v[src[1]].p).min(v[src[2]].p); pmin = v[src[0]].p.min(v[src[1]].p).min(v[src[2]].p);
pmax = v[src[0]].p.max(v[src[1]].p).max(v[src[2]].p); pmax = v[src[0]].p.max(v[src[1]].p).max(v[src[2]].p);
break; break;
default:
__assume(0);
} }
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy()); GSVector4 test = GSVector4::zero(); // (pmax < scissor) | (pmin > scissor.zwxy());
/*
GSVector4 tmp; GSVector4 tmp;
switch(prim) switch(prim)
{ {
case GS_TRIANGLELIST: case GS_TRIANGLELIST:
@ -239,7 +251,7 @@ size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT s
test |= tmp == tmp.yxwz(); test |= tmp == tmp.yxwz();
break; break;
} }
*/
switch(prim) switch(prim)
{ {
case GS_TRIANGLELIST: case GS_TRIANGLELIST:
@ -249,7 +261,7 @@ size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT s
test |= pmin.ceil() == pmax.ceil(); test |= pmin.ceil() == pmax.ceil();
break; break;
} }
bool pass = test.xyxy().allfalse(); bool pass = test.xyxy().allfalse();
switch(prim) switch(prim)
@ -270,8 +282,6 @@ size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT s
if(pass) {dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst += 3;} if(pass) {dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst += 3;}
src += 3; src += 3;
break; break;
default:
__assume(0);
} }
} }
@ -292,19 +302,19 @@ void GSRendererSW::Draw()
shared_ptr<GSRasterizerData> data(new GSRasterizerData2(this)); shared_ptr<GSRasterizerData> data(new GSRasterizerData2(this));
data->primclass = GSUtil::GetPrimClass(PRIM->PRIM); data->primclass = GSUtil::GetPrimClass(PRIM->PRIM);
data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.tail + sizeof(uint32) * m_index.tail, 32); data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.next + sizeof(uint32) * m_index.tail, 32);
data->vertex = (GSVertexSW*)data->buff; data->vertex = (GSVertexSW*)data->buff;
data->vertex_count = m_vertex.tail; data->vertex_count = m_vertex.next;
data->index = (uint32*)(data->buff + sizeof(GSVertexSW) * m_vertex.tail); data->index = (uint32*)(data->buff + sizeof(GSVertexSW) * m_vertex.next);
data->index_count = (this->*m_cif)(data->index, m_index.buff, m_index.tail); data->index_count = (this->*m_cif)(data->index, m_index.buff, m_index.tail);
m_perfmon.Put(GSPerfMon::PrimNotRendered, (m_index.tail - data->index_count) / GSUtil::GetVertexCount(PRIM->PRIM)); m_index.tail = data->index_count;
if(data->index_count == 0) return; if(data->index_count == 0) return;
// TODO: merge these // TODO: merge these
memcpy(data->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.tail); memcpy(data->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.next);
m_vt->Update(data->vertex, data->index, data->index_count, data->primclass); m_vt->Update(data->vertex, data->index, data->index_count, data->primclass);

View File

@ -72,21 +72,8 @@ protected:
ConvertIndexPtr m_ci[8], m_cif; ConvertIndexPtr m_ci[8], m_cif;
#define InitConvertIndex2(P) \
m_ci[P] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<P>; \
#define InitConvertIndex() \
InitConvertIndex2(GS_POINTLIST) \
InitConvertIndex2(GS_LINELIST) \
InitConvertIndex2(GS_LINESTRIP) \
InitConvertIndex2(GS_TRIANGLELIST) \
InitConvertIndex2(GS_TRIANGLESTRIP) \
InitConvertIndex2(GS_TRIANGLEFAN) \
InitConvertIndex2(GS_SPRITE) \
InitConvertIndex2(GS_INVALID) \
template<uint32 prim, uint32 tme, uint32 fst> template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index); void ConvertVertex(size_t dst_index, size_t src_index);
template<uint32 prim> template<uint32 prim>
size_t ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count); size_t ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count);

View File

@ -41,19 +41,21 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
memset(&m_vertex, 0, sizeof(m_vertex)); memset(&m_vertex, 0, sizeof(m_vertex));
memset(&m_index, 0, sizeof(m_index)); memset(&m_index, 0, sizeof(m_index));
ASSERT(vertex_stride >= sizeof(GSVertex));
m_vertex.stride = vertex_stride; m_vertex.stride = vertex_stride;
m_vertex.tmp = (uint8*)_aligned_malloc(vertex_stride * 2, 32); m_vertex.tmp = (uint8*)_aligned_malloc(vertex_stride * 2, 32);
GrowVertexBuffer(); GrowVertexBuffer();
m_dk[GS_POINTLIST] = (DrawingKickPtr)&GSState::DrawingKick<GS_POINTLIST>; m_vk[GS_POINTLIST] = (VertexKickPtr)&GSState::VertexKick<GS_POINTLIST>;
m_dk[GS_LINELIST] = (DrawingKickPtr)&GSState::DrawingKick<GS_LINELIST>; m_vk[GS_LINELIST] = (VertexKickPtr)&GSState::VertexKick<GS_LINELIST>;
m_dk[GS_LINESTRIP] = (DrawingKickPtr)&GSState::DrawingKick<GS_LINESTRIP>; m_vk[GS_LINESTRIP] = (VertexKickPtr)&GSState::VertexKick<GS_LINESTRIP>;
m_dk[GS_TRIANGLELIST] = (DrawingKickPtr)&GSState::DrawingKick<GS_TRIANGLELIST>; m_vk[GS_TRIANGLELIST] = (VertexKickPtr)&GSState::VertexKick<GS_TRIANGLELIST>;
m_dk[GS_TRIANGLESTRIP] = (DrawingKickPtr)&GSState::DrawingKick<GS_TRIANGLESTRIP>; m_vk[GS_TRIANGLESTRIP] = (VertexKickPtr)&GSState::VertexKick<GS_TRIANGLESTRIP>;
m_dk[GS_TRIANGLEFAN] = (DrawingKickPtr)&GSState::DrawingKick<GS_TRIANGLEFAN>; m_vk[GS_TRIANGLEFAN] = (VertexKickPtr)&GSState::VertexKick<GS_TRIANGLEFAN>;
m_dk[GS_SPRITE] = (DrawingKickPtr)&GSState::DrawingKick<GS_SPRITE>; m_vk[GS_SPRITE] = (VertexKickPtr)&GSState::VertexKick<GS_SPRITE>;
m_dk[GS_INVALID] = (DrawingKickPtr)&GSState::DrawingKick<GS_INVALID>; m_vk[GS_INVALID] = (VertexKickPtr)&GSState::VertexKick<GS_INVALID>;
memset(m_cv, 0, sizeof(m_cv)); memset(m_cv, 0, sizeof(m_cv));
@ -222,6 +224,7 @@ void GSState::Reset()
m_vertex.head = 0; m_vertex.head = 0;
m_vertex.tail = 0; m_vertex.tail = 0;
m_vertex.next = 0;
m_index.tail = 0; m_index.tail = 0;
} }
@ -500,7 +503,7 @@ __forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRIC
m_v.XYZ.Z = r->XYZF2.Z; m_v.XYZ.Z = r->XYZF2.Z;
m_v.FOG.F = r->XYZF2.F; m_v.FOG.F = r->XYZF2.F;
VertexKick(r->XYZF2.Skip()); (this->*m_vkf)(r->XYZF2.Skip());
} }
__forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r) __forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
@ -509,7 +512,7 @@ __forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT
m_v.XYZ.Y = r->XYZ2.Y; m_v.XYZ.Y = r->XYZ2.Y;
m_v.XYZ.Z = r->XYZ2.Z; m_v.XYZ.Z = r->XYZ2.Z;
VertexKick(r->XYZ2.Skip()); (this->*m_vkf)(r->XYZ2.Skip());
} }
__forceinline void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r) __forceinline void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r)
@ -556,7 +559,9 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim)
UpdateVertexKick(); UpdateVertexKick();
m_vertex.head = m_vertex.tail = m_index.tail > 0 ? m_index.buff[m_index.tail - 1] + 1 : 0; // remove unused vertices from the end of the vertex buffer ASSERT(m_index.tail == 0 || m_index.buff[m_index.tail - 1] + 1 == m_vertex.next);
m_vertex.head = m_vertex.tail = m_vertex.next; // remove unused vertices from the end of the vertex buffer
} }
void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r) void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r)
@ -604,14 +609,14 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff; m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff;
m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000; m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000;
VertexKick(0); (this->*m_vkf)(0);
} }
void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r) void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r)
{ {
m_v.XYZ = (GSVector4i)r->XYZ; m_v.XYZ = (GSVector4i)r->XYZ;
VertexKick(0); (this->*m_vkf)(0);
} }
void GSState::ApplyTEX0(int i, GIFRegTEX0& TEX0) void GSState::ApplyTEX0(int i, GIFRegTEX0& TEX0)
@ -740,14 +745,14 @@ void GSState::GIFRegHandlerXYZF3(const GIFReg* RESTRICT r)
m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff; m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff;
m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000; m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000;
VertexKick(1); (this->*m_vkf)(1);
} }
void GSState::GIFRegHandlerXYZ3(const GIFReg* RESTRICT r) void GSState::GIFRegHandlerXYZ3(const GIFReg* RESTRICT r)
{ {
m_v.XYZ = (GSVector4i)r->XYZ; m_v.XYZ = (GSVector4i)r->XYZ;
VertexKick(1); (this->*m_vkf)(1);
} }
void GSState::GIFRegHandlerNOP(const GIFReg* RESTRICT r) void GSState::GIFRegHandlerNOP(const GIFReg* RESTRICT r)
@ -1215,14 +1220,15 @@ void GSState::FlushPrim()
switch(PRIM->PRIM) switch(PRIM->PRIM)
{ {
case GS_LINESTRIP: case GS_LINESTRIP:
memcpy(&buff[0], &m_vertex.buff[stride * head], stride); if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
break; break;
case GS_TRIANGLESTRIP: case GS_TRIANGLESTRIP:
memcpy(&buff[0], &m_vertex.buff[stride * head], stride * 2); if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
if(tail > head + 1) memcpy(&buff[stride * 1], &m_vertex.buff[stride * (head + 1)], stride);
break; break;
case GS_TRIANGLEFAN: case GS_TRIANGLEFAN:
memcpy(&buff[0], &m_vertex.buff[stride * head], stride); if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
memcpy(&buff[stride], &m_vertex.buff[stride * (tail - 1)], stride); if(tail > head + 1) memcpy(&buff[stride * 1], &m_vertex.buff[stride * (tail - 1)], stride);
break; break;
case GS_POINTLIST: case GS_POINTLIST:
case GS_LINELIST: case GS_LINELIST:
@ -1244,38 +1250,37 @@ void GSState::FlushPrim()
m_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM)); m_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM));
} }
m_vertex.tail = 0;
switch(PRIM->PRIM) switch(PRIM->PRIM)
{ {
case GS_LINESTRIP: case GS_LINESTRIP:
memcpy(&m_vertex.buff[0], &buff[0], stride); if(tail > head + 0) {memcpy(&m_vertex.buff[stride * 0], &buff[stride * 0], stride); m_vertex.tail++;}
m_vertex.tail = 1;
break; break;
case GS_TRIANGLESTRIP: case GS_TRIANGLESTRIP:
memcpy(&m_vertex.buff[0], &buff[0], stride * 2);
m_vertex.tail = 2;
break;
case GS_TRIANGLEFAN: case GS_TRIANGLEFAN:
memcpy(&m_vertex.buff[0], &buff[0], stride * 2); if(tail > head + 0) {memcpy(&m_vertex.buff[stride * 0], &buff[stride * 0], stride); m_vertex.tail++;}
m_vertex.tail = 2; if(tail > head + 1) {memcpy(&m_vertex.buff[stride * 1], &buff[stride * 1], stride); m_vertex.tail++;}
break; break;
case GS_POINTLIST: case GS_POINTLIST:
case GS_LINELIST: case GS_LINELIST:
case GS_TRIANGLELIST: case GS_TRIANGLELIST:
case GS_SPRITE: case GS_SPRITE:
case GS_INVALID: case GS_INVALID:
m_vertex.tail = 0;
break; break;
default: default:
__assume(0); __assume(0);
} }
m_vertex.head = 0;
m_index.tail = 0; m_index.tail = 0;
} }
else else
{ {
m_vertex.head = m_vertex.tail = 0; m_vertex.tail = 0;
} }
m_vertex.head = 0;
m_vertex.next = 0;
} }
// //
@ -1989,9 +1994,8 @@ void GSState::SetGameCRC(uint32 crc, int options)
void GSState::UpdateVertexKick() void GSState::UpdateVertexKick()
{ {
m_dkf = m_dk[PRIM->PRIM]; m_vkf = m_vk[PRIM->PRIM];
m_cvf = m_cv[PRIM->PRIM][PRIM->TME][PRIM->FST]; m_cvf = m_cv[PRIM->PRIM][PRIM->TME][PRIM->FST];
m_vertex.n = GSUtil::GetVertexCount(PRIM->PRIM);
} }
void GSState::GrowVertexBuffer() void GSState::GrowVertexBuffer()
@ -2020,23 +2024,95 @@ void GSState::GrowVertexBuffer()
m_index.buff = index; m_index.buff = index;
} }
void GSState::VertexKick(uint32 skip) static uint32 s_tmp[4];
{ static size_t s_tmp_i = 0;
(this->*m_cvf)(m_vertex.buff, m_vertex.tail); static GSVector4i s_tmp_zw_sign = GSVector4i::x80000000().sll<8>();
if(++m_vertex.tail - m_vertex.head >= m_vertex.n)
{
(this->*m_dkf)(skip);
}
}
template<uint32 prim> template<uint32 prim>
void GSState::DrawingKick(uint32 skip) void GSState::VertexKick(uint32 skip)
{ {
s_tmp[s_tmp_i++ & 3] = m_v.XYZ.u32[0];
size_t head = m_vertex.head; size_t head = m_vertex.head;
size_t tail = m_vertex.tail; size_t tail = m_vertex.tail;
size_t next = m_vertex.next;
*(GSVertex*)&m_vertex.buff[m_vertex.stride * tail] = m_v;
if(skip) size_t n = 0;
switch(prim)
{
case GS_POINTLIST: n = 1; break;
case GS_LINELIST: n = 2; break;
case GS_LINESTRIP: n = 2; break;
case GS_TRIANGLELIST: n = 3; break;
case GS_TRIANGLESTRIP: n = 3; break;
case GS_TRIANGLEFAN: n = 3; break;
case GS_SPRITE: n = 2; break;
case GS_INVALID: n = 1; break;
}
m_vertex.tail = ++tail;
size_t m = tail - head;
if(m < n)
{
return;
}
if(skip == 0)
{
int p0 = (int)s_tmp[(s_tmp_i + 1) & 3];
int p1 = (int)s_tmp[(s_tmp_i + 2) & 3];
int p2 = (int)s_tmp[(s_tmp_i + 3) & 3];
int p3 = (int)s_tmp[(s_tmp_i - m) & 3];
GSVector4i p(p0, p1, p2, p3);
GSVector4i v0, v1, v2, v3;
v1 = p.upl16();
v3 = p.uph16();
v0 = v1.xyxy();
v1 = v1.zwzw();
v2 = v3.xyxy();
v3 = v3.zwzw();
GSVector4i s = m_context->scissor.dx10;
GSVector4i sm = s_tmp_zw_sign;
GSVector4 cross;
switch(prim)
{
case GS_POINTLIST:
skip = ((v2 - s) ^ sm).mask() & 0x8888;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
skip = (((v1 - s) ^ sm) & ((v2 - s) ^ sm)).mask() & 0x8888;
skip |= p1 == p2;
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
skip = (((v0 - s) ^ sm) & ((v1 - s) ^ sm) & ((v2 - s) ^ sm)).mask() & 0x8888;
cross = (GSVector4(v1) - GSVector4(v0)) * (GSVector4(v2) - GSVector4(v0)).yxyx();
skip |= (cross == cross.yxyx()).mask();
break;
case GS_TRIANGLEFAN:
if(m > 4) break; // s_tmp only knows about the last 4 vertices, head could be far behind
skip = (((v1 - s) ^ sm) & ((v2 - s) ^ sm) & ((v3 - s) ^ sm)).mask() & 0x8888;
cross = (GSVector4(v1) - GSVector4(v3)) * (GSVector4(v2) - GSVector4(v3)).yxyx();
skip |= (cross == cross.yxyx()).mask();
break;
}
}
if(skip != 0)
{ {
switch(prim) switch(prim)
{ {
@ -2049,7 +2125,7 @@ void GSState::DrawingKick(uint32 skip)
break; break;
case GS_LINESTRIP: case GS_LINESTRIP:
case GS_TRIANGLESTRIP: case GS_TRIANGLESTRIP:
m_vertex.head = head + 1; m_vertex.head = head + 1;
break; break;
case GS_TRIANGLEFAN: case GS_TRIANGLEFAN:
break; break;
@ -2067,50 +2143,78 @@ void GSState::DrawingKick(uint32 skip)
uint32* RESTRICT buff = &m_index.buff[m_index.tail]; uint32* RESTRICT buff = &m_index.buff[m_index.tail];
size_t src_index = head;
switch(prim) switch(prim)
{ {
case GS_POINTLIST: case GS_POINTLIST:
buff[0] = head + 0; buff[0] = head + 0;
m_vertex.head = head + 1; m_vertex.head = head + 1;
m_vertex.next = head + 1;
m_index.tail += 1; m_index.tail += 1;
(this->*m_cvf)(head, head);
break; break;
case GS_LINELIST: case GS_LINELIST:
buff[0] = head + 0; buff[0] = head + 0;
buff[1] = head + 1; buff[1] = head + 1;
m_vertex.head = head + 2; m_vertex.head = head + 2;
m_vertex.next = head + 2;
m_index.tail += 2; m_index.tail += 2;
(this->*m_cvf)(head + 0, head + 0);
(this->*m_cvf)(head + 1, head + 1);
break; break;
case GS_LINESTRIP: case GS_LINESTRIP:
if(next < head) {head = next; m_vertex.tail = next + 2;}
buff[0] = head + 0; buff[0] = head + 0;
buff[1] = head + 1; buff[1] = head + 1;
m_vertex.head = head + 1; m_vertex.head = head + 1;
m_vertex.next = head + 2;
m_index.tail += 2; m_index.tail += 2;
if(head + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0);
if(head + 1 >= next) (this->*m_cvf)(head + 1, src_index + 1);
break; break;
case GS_TRIANGLELIST: case GS_TRIANGLELIST:
buff[0] = head + 0; buff[0] = head + 0;
buff[1] = head + 1; buff[1] = head + 1;
buff[2] = head + 2; buff[2] = head + 2;
m_vertex.head = head + 3; m_vertex.head = head + 3;
m_vertex.next = head + 3;
m_index.tail += 3; m_index.tail += 3;
(this->*m_cvf)(head + 0, head + 0);
(this->*m_cvf)(head + 1, head + 1);
(this->*m_cvf)(head + 2, head + 2);
break; break;
case GS_TRIANGLESTRIP: case GS_TRIANGLESTRIP:
if(next < head) {head = next; m_vertex.tail = next + 3;}
buff[0] = head + 0; buff[0] = head + 0;
buff[1] = head + 1; buff[1] = head + 1;
buff[2] = head + 2; buff[2] = head + 2;
m_vertex.head = head + 1; m_vertex.head = head + 1;
m_vertex.next = head + 3;
m_index.tail += 3; m_index.tail += 3;
if(src_index + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0);
if(src_index + 1 >= next) (this->*m_cvf)(head + 1, src_index + 1);
if(src_index + 2 >= next) (this->*m_cvf)(head + 2, src_index + 2);
break; break;
case GS_TRIANGLEFAN: case GS_TRIANGLEFAN:
// TODO: remove gaps
buff[0] = head + 0; buff[0] = head + 0;
buff[1] = tail - 2; buff[1] = tail - 2;
buff[2] = tail - 1; buff[2] = tail - 1;
m_index.tail += 3; m_vertex.next = tail;
m_index.tail += 3;
if(head >= next) (this->*m_cvf)(head, head);
if(tail - 2 >= next) (this->*m_cvf)(tail - 2, tail - 2);
if(tail - 1 >= next) (this->*m_cvf)(tail - 1, tail - 1);
break; break;
case GS_SPRITE: case GS_SPRITE:
buff[0] = head + 0; buff[0] = head + 0;
buff[1] = head + 1; buff[1] = head + 1;
m_vertex.head = head + 2; m_vertex.head = head + 2;
m_vertex.next = head + 2;
m_index.tail += 2; m_index.tail += 2;
(this->*m_cvf)(head + 0, head + 0);
(this->*m_cvf)(head + 1, head + 1);
break; break;
case GS_INVALID: case GS_INVALID:
m_vertex.tail = head; m_vertex.tail = head;

View File

@ -131,13 +131,13 @@ protected:
GSVertex m_v; GSVertex m_v;
float m_q; float m_q;
struct {uint8* buff; size_t head, tail, maxcount, stride, n; uint8* tmp;} m_vertex; struct {uint8* buff; size_t head, tail, next, maxcount, stride, n; uint8* tmp;} m_vertex; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
struct {uint32* buff; size_t tail;} m_index; struct {uint32* buff; size_t tail;} m_index;
typedef void (GSState::*DrawingKickPtr)(uint32 skip); typedef void (GSState::*VertexKickPtr)(uint32 skip);
typedef void (GSState::*ConvertVertexPtr)(void* RESTRICT vertex, size_t index); typedef void (GSState::*ConvertVertexPtr)(size_t dst_index, size_t src_index);
DrawingKickPtr m_dk[8], m_dkf; VertexKickPtr m_vk[8], m_vkf;
ConvertVertexPtr m_cv[8][2][2], m_cvf; // [PRIM][TME][FST] ConvertVertexPtr m_cv[8][2][2], m_cvf; // [PRIM][TME][FST]
#define InitConvertVertex2(T, P) \ #define InitConvertVertex2(T, P) \
@ -160,10 +160,8 @@ protected:
void GrowVertexBuffer(); void GrowVertexBuffer();
void VertexKick(uint32 skip);
template<uint32 prim> template<uint32 prim>
void DrawingKick(uint32 skip); void VertexKick(uint32 skip);
// following functions need m_vt to be initialized // following functions need m_vt to be initialized

View File

@ -63,9 +63,4 @@ struct GSVertexPT2
GSVector2 t[2]; GSVector2 t[2];
}; };
struct GSVertexNull
{
GSVector4 p;
};
#pragma pack(pop) #pragma pack(pop)

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
//#define ENABLE_VTUNE //
#define ENABLE_VTUNE
#define ENABLE_JIT_RASTERIZER #define ENABLE_JIT_RASTERIZER