GSdx: fixing the broken things...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5047 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2012-01-06 00:17:52 +00:00
parent a8e8e6e80f
commit 49f3aee099
16 changed files with 243 additions and 160 deletions

View File

@ -884,6 +884,7 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
/*
vector<uint8> buff;
bool exit = false;
int round = 0;

View File

@ -35,7 +35,7 @@ public:
enum counter_t
{
Frame, Prim, PrimNotRendered, Draw, Swizzle, Unswizzle, Fillrate, Quad,
Frame, Prim, Draw, Swizzle, Unswizzle, Fillrate, Quad,
CounterLast,
};

View File

@ -333,7 +333,7 @@ void GSRenderer::VSync(int field)
theApp.m_gs_interlace[m_interlace].name.c_str(),
theApp.m_gs_aspectratio[m_aspectratio].name.c_str(),
(int)m_perfmon.Get(GSPerfMon::Quad),
(int)(m_perfmon.Get(GSPerfMon::Prim) - m_perfmon.Get(GSPerfMon::PrimNotRendered)),
(int)m_perfmon.Get(GSPerfMon::Prim),
(int)m_perfmon.Get(GSPerfMon::Draw),
m_perfmon.CPU(),
m_perfmon.Get(GSPerfMon::Swizzle) / 1024,
@ -515,7 +515,7 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
return;
case VK_F7:
m_shader = (m_shader + 3 + step) % 3;
printf("GSdx: Set shader %d (%s).\n", (int)m_shader);
printf("GSdx: Set shader %d.\n", (int)m_shader);
return;
case VK_DELETE:
m_aa1 = !m_aa1;

View File

@ -331,7 +331,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
uint8 afix = context->ALPHA.FIX;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->SetupIA(m_vertex.buff, m_vertex.tail, m_index.buff, m_index.tail, m_topology);
dev->SetupIA(m_vertex.buff, m_vertex.next, m_index.buff, m_index.tail, m_topology);
dev->SetupVS(vs_sel, &vs_cb);
dev->SetupGS(gs_sel);
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);

View File

@ -39,10 +39,13 @@ bool GSRendererDX11::CreateDevice(GSDevice* dev)
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererDX11::ConvertVertex(GSVertexHW11* RESTRICT vertex, size_t index)
void GSRendererDX11::ConvertVertex(size_t dst_index, size_t src_index)
{
GSVector4i v0(m_v.m[0]);
GSVector4i v1(m_v.m[1]);
GSVertex* s = (GSVertex*)((GSVertexHW11*)m_vertex.buff + src_index);
GSVertexHW11* d = (GSVertexHW11*)m_vertex.buff + dst_index;
GSVector4i v0 = ((GSVector4i*)s)[0];
GSVector4i v1 = ((GSVector4i*)s)[1];
if(tme && fst)
{
@ -51,17 +54,8 @@ void GSRendererDX11::ConvertVertex(GSVertexHW11* RESTRICT vertex, size_t index)
v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st
}
GSVector4i* RESTRICT dst = (GSVector4i*)&vertex[index];
dst[0] = v0;
dst[1] = v1;
}
void GSRendererDX11::Draw()
{
// TODO: remove invisible prims here
__super::Draw();
((GSVector4i*)d)[0] = v0;
((GSVector4i*)d)[1] = v1;
}
void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)

View File

@ -29,8 +29,7 @@ class GSRendererDX11 : public GSRendererDX
{
protected:
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(GSVertexHW11* RESTRICT vertex, size_t index);
void Draw();
void ConvertVertex(size_t dst_index, size_t src_index);
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.x;}

View File

@ -58,17 +58,20 @@ bool GSRendererDX9::CreateDevice(GSDevice* dev)
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererDX9::ConvertVertex(GSVertexHW9* RESTRICT vertex, size_t index)
void GSRendererDX9::ConvertVertex(size_t dst_index, size_t src_index)
{
GSVector4 p = GSVector4(((GSVector4i)m_v.XYZ).upl16());
GSVertex* s = (GSVertex*)((GSVertexHW9*)m_vertex.buff + src_index);
GSVertexHW9* d = (GSVertexHW9*)m_vertex.buff + dst_index;
GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16());
if(tme && !fst)
{
p = p.xyxy(GSVector4((float)m_v.XYZ.Z, m_v.RGBAQ.Q));
p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q));
}
else
{
p = p.xyxy(GSVector4::load((float)m_v.XYZ.Z));
p = p.xyxy(GSVector4::load((float)s->XYZ.Z));
}
GSVector4 t = GSVector4::zero();
@ -77,27 +80,18 @@ void GSRendererDX9::ConvertVertex(GSVertexHW9* RESTRICT vertex, size_t index)
{
if(fst)
{
t = GSVector4(GSVector4i::load(m_v.UV.u32[0]).upl16());
t = GSVector4(GSVector4i::load(s->UV.u32[0]).upl16());
}
else
{
t = GSVector4::loadl(&m_v.ST);
t = GSVector4::loadl(&s->ST);
}
}
t = t.xyxy(GSVector4::cast(GSVector4i(m_v.RGBAQ.u32[0], m_v.FOG.u32[1])));
t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG.u32[1])));
GSVertexHW9* RESTRICT dst = (GSVertexHW9*)&vertex[index];
dst->p = p;
dst->t = t;
}
void GSRendererDX9::Draw()
{
// TODO: remove invisible prims here
__super::Draw();
d->p = p;
d->t = t;
}
void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
@ -155,9 +149,9 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// assume vertices are tightly packed and sequentially indexed (it should be the case)
if(m_vertex.tail >= 2)
if(m_vertex.next >= 2)
{
size_t count = m_vertex.tail;
size_t count = m_vertex.next;
int i = (int)count * 2 - 4;
GSVertexHW9* s = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * count] - 2;
@ -195,7 +189,7 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
index[5] = i + 3;
}
m_vertex.head = m_vertex.tail = count * 2;
m_vertex.head = m_vertex.tail = m_vertex.next = count * 2;
m_index.tail = count * 3;
}

View File

@ -35,8 +35,8 @@ protected:
} m_fba;
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(GSVertexHW9* RESTRICT vertex, size_t index);
void Draw();
void ConvertVertex(size_t dst_index, size_t src_index);
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
void UpdateFBA(GSTexture* rt);

View File

@ -411,16 +411,16 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
if(lines == 0)
{
if(m_vt->m_primclass == GS_LINE_CLASS && (m_vertex.tail == 448 * 2 || m_vertex.tail == 512 * 2))
if(m_vt->m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2))
{
lines = m_vertex.tail / 2;
lines = m_vertex.next / 2;
}
}
else
{
if(m_vt->m_primclass == GS_POINT_CLASS)
{
if(m_vertex.tail >= 16 * 512)
if(m_vertex.next >= 16 * 512)
{
// incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454
@ -431,7 +431,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
const uint8* RESTRICT v = m_vertex.buff;
for(int i = (int)m_vertex.tail; i >= 0; i--, v += m_vertex.stride)
for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride)
{
int x = (GetPosX(v) - ox) >> 4;
int y = (GetPosY(v) - oy) >> 4;
@ -448,7 +448,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
}
else if(m_vt->m_primclass == GS_LINE_CLASS)
{
if(m_vertex.tail == lines * 2)
if(m_vertex.next == lines * 2)
{
// normally, this step would copy the video onto screen with 512 texture mapped horizontal lines,
// but we use the stored video data to create a new texture, and replace the lines with two triangles
@ -459,8 +459,8 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
memcpy(&m_vertex.buff[m_vertex.stride * 2], &m_vertex.buff[m_vertex.stride * (m_vertex.tail - 2)], m_vertex.stride);
memcpy(&m_vertex.buff[m_vertex.stride * 3], &m_vertex.buff[m_vertex.stride * (m_vertex.tail - 1)], m_vertex.stride);
memcpy(&m_vertex.buff[m_vertex.stride * 2], &m_vertex.buff[m_vertex.stride * (m_vertex.next - 2)], m_vertex.stride);
memcpy(&m_vertex.buff[m_vertex.stride * 3], &m_vertex.buff[m_vertex.stride * (m_vertex.next - 1)], m_vertex.stride);
m_index.buff[0] = 0;
m_index.buff[1] = 1;
@ -469,7 +469,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
m_index.buff[4] = 2;
m_index.buff[5] = 3;
m_vertex.head = m_vertex.tail = 4;
m_vertex.head = m_vertex.tail = m_vertex.next = 4;
m_index.tail = 6;
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS);
@ -506,7 +506,7 @@ bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::S
uint8* RESTRICT v = m_vertex.buff;
for(int i = (int)m_vertex.tail; i >= 0; i--, v += m_vertex.stride)
for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride)
{
uint32 c = GetColor(v);
@ -815,7 +815,7 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa
if(FBP >= 0x03f40 && (FBP & 0x1f) == 0)
{
if(m_vertex.tail == 16)
if(m_vertex.next == 16)
{
uint8* RESTRICT v = m_vertex.buff;
@ -835,7 +835,7 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa
return false;
}
else if(m_vertex.tail == 256)
else if(m_vertex.next == 256)
{
uint8* RESTRICT v = m_vertex.buff;

View File

@ -33,7 +33,7 @@ class GSRendererNull : public GSRenderer
protected:
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index)
void ConvertVertex(size_t dst_index, size_t src_index)
{
}
@ -48,7 +48,7 @@ protected:
public:
GSRendererNull()
: GSRenderer(new GSVertexTraceNull(this), sizeof(GSVertexNull))
: GSRenderer(new GSVertexTraceNull(this), sizeof(GSVertex))
{
InitConvertVertex(GSRendererNull);
}

View File

@ -29,7 +29,15 @@ GSRendererSW::GSRendererSW(int threads)
, m_fzb(NULL)
{
InitConvertVertex(GSRendererSW);
InitConvertIndex();
m_ci[GS_POINTLIST] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_POINTLIST>;
m_ci[GS_LINELIST] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_LINELIST>;
m_ci[GS_LINESTRIP] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_LINESTRIP>;
m_ci[GS_TRIANGLELIST] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_TRIANGLELIST>;
m_ci[GS_TRIANGLESTRIP] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_TRIANGLESTRIP>;
m_ci[GS_TRIANGLEFAN] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_TRIANGLEFAN>;
m_ci[GS_SPRITE] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_SPRITE>;
m_ci[GS_INVALID] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_INVALID>;
m_tc = new GSTextureCacheSW(this);
@ -152,51 +160,56 @@ GSTexture* GSRendererSW::GetOutput(int i)
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererSW::ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index)
void GSRendererSW::ConvertVertex(size_t dst_index, size_t src_index)
{
GSVertexSW* RESTRICT v = &vertex[index];
GSVertex* s = (GSVertex*)((GSVertexSW*)m_vertex.buff + src_index);
GSVertexSW* d = (GSVertexSW*)m_vertex.buff + dst_index;
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - m_context->XYOFFSET;
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
uint32 z = s->XYZ.Z;
v->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - (GSVector4i)m_context->XYOFFSET;
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), s->FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
GSVector4 p, t, c;
p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
if(tme)
{
GSVector4 t;
if(fst)
{
t = GSVector4(GSVector4i::load(m_v.UV.u32[0]).upl16() << (16 - 4));
t = GSVector4(GSVector4i::load(s->UV.u32[0]).upl16() << (16 - 4));
}
else
{
t = GSVector4(m_v.ST.S, m_v.ST.T) * GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH);
t = t.xyxy(GSVector4::load(m_v.RGBAQ.Q));
t = GSVector4(s->ST.S, s->ST.T) * GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH);
t = t.xyxy(GSVector4::load(s->RGBAQ.Q));
}
v->t = t;
}
v->c = GSVector4::rgba32(m_v.RGBAQ.u32[0], 7);
c = GSVector4::rgba32(s->RGBAQ.u32[0], 7);
d->p = p;
d->c = c;
d->t = t;
if(prim == GS_SPRITE)
{
v->t.u32[3] = m_v.XYZ.Z;
d->t.u32[3] = z;
}
}
template<uint32 prim>
size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count)
{
// memcpy(dst, src, sizeof(uint32) * count); return;
//
memcpy(dst, src, sizeof(uint32) * count); return count;
// TODO: IsQuad
const GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff;
GSVector4 scissor = m_context->scissor.ex;
const GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff;
const uint32* src_end = src + count;
uint32* dst_base = dst;
@ -222,13 +235,12 @@ size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT s
pmin = v[src[0]].p.min(v[src[1]].p).min(v[src[2]].p);
pmax = v[src[0]].p.max(v[src[1]].p).max(v[src[2]].p);
break;
default:
__assume(0);
}
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
GSVector4 test = GSVector4::zero(); // (pmax < scissor) | (pmin > scissor.zwxy());
/*
GSVector4 tmp;
switch(prim)
{
case GS_TRIANGLELIST:
@ -239,7 +251,7 @@ size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT s
test |= tmp == tmp.yxwz();
break;
}
*/
switch(prim)
{
case GS_TRIANGLELIST:
@ -249,7 +261,7 @@ size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT s
test |= pmin.ceil() == pmax.ceil();
break;
}
bool pass = test.xyxy().allfalse();
switch(prim)
@ -270,8 +282,6 @@ size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT s
if(pass) {dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst += 3;}
src += 3;
break;
default:
__assume(0);
}
}
@ -292,19 +302,19 @@ void GSRendererSW::Draw()
shared_ptr<GSRasterizerData> data(new GSRasterizerData2(this));
data->primclass = GSUtil::GetPrimClass(PRIM->PRIM);
data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.tail + sizeof(uint32) * m_index.tail, 32);
data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.next + sizeof(uint32) * m_index.tail, 32);
data->vertex = (GSVertexSW*)data->buff;
data->vertex_count = m_vertex.tail;
data->index = (uint32*)(data->buff + sizeof(GSVertexSW) * m_vertex.tail);
data->vertex_count = m_vertex.next;
data->index = (uint32*)(data->buff + sizeof(GSVertexSW) * m_vertex.next);
data->index_count = (this->*m_cif)(data->index, m_index.buff, m_index.tail);
m_perfmon.Put(GSPerfMon::PrimNotRendered, (m_index.tail - data->index_count) / GSUtil::GetVertexCount(PRIM->PRIM));
m_index.tail = data->index_count;
if(data->index_count == 0) return;
// TODO: merge these
memcpy(data->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.tail);
memcpy(data->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.next);
m_vt->Update(data->vertex, data->index, data->index_count, data->primclass);

View File

@ -72,21 +72,8 @@ protected:
ConvertIndexPtr m_ci[8], m_cif;
#define InitConvertIndex2(P) \
m_ci[P] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<P>; \
#define InitConvertIndex() \
InitConvertIndex2(GS_POINTLIST) \
InitConvertIndex2(GS_LINELIST) \
InitConvertIndex2(GS_LINESTRIP) \
InitConvertIndex2(GS_TRIANGLELIST) \
InitConvertIndex2(GS_TRIANGLESTRIP) \
InitConvertIndex2(GS_TRIANGLEFAN) \
InitConvertIndex2(GS_SPRITE) \
InitConvertIndex2(GS_INVALID) \
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index);
void ConvertVertex(size_t dst_index, size_t src_index);
template<uint32 prim>
size_t ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count);

View File

@ -41,19 +41,21 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
memset(&m_vertex, 0, sizeof(m_vertex));
memset(&m_index, 0, sizeof(m_index));
ASSERT(vertex_stride >= sizeof(GSVertex));
m_vertex.stride = vertex_stride;
m_vertex.tmp = (uint8*)_aligned_malloc(vertex_stride * 2, 32);
GrowVertexBuffer();
m_dk[GS_POINTLIST] = (DrawingKickPtr)&GSState::DrawingKick<GS_POINTLIST>;
m_dk[GS_LINELIST] = (DrawingKickPtr)&GSState::DrawingKick<GS_LINELIST>;
m_dk[GS_LINESTRIP] = (DrawingKickPtr)&GSState::DrawingKick<GS_LINESTRIP>;
m_dk[GS_TRIANGLELIST] = (DrawingKickPtr)&GSState::DrawingKick<GS_TRIANGLELIST>;
m_dk[GS_TRIANGLESTRIP] = (DrawingKickPtr)&GSState::DrawingKick<GS_TRIANGLESTRIP>;
m_dk[GS_TRIANGLEFAN] = (DrawingKickPtr)&GSState::DrawingKick<GS_TRIANGLEFAN>;
m_dk[GS_SPRITE] = (DrawingKickPtr)&GSState::DrawingKick<GS_SPRITE>;
m_dk[GS_INVALID] = (DrawingKickPtr)&GSState::DrawingKick<GS_INVALID>;
m_vk[GS_POINTLIST] = (VertexKickPtr)&GSState::VertexKick<GS_POINTLIST>;
m_vk[GS_LINELIST] = (VertexKickPtr)&GSState::VertexKick<GS_LINELIST>;
m_vk[GS_LINESTRIP] = (VertexKickPtr)&GSState::VertexKick<GS_LINESTRIP>;
m_vk[GS_TRIANGLELIST] = (VertexKickPtr)&GSState::VertexKick<GS_TRIANGLELIST>;
m_vk[GS_TRIANGLESTRIP] = (VertexKickPtr)&GSState::VertexKick<GS_TRIANGLESTRIP>;
m_vk[GS_TRIANGLEFAN] = (VertexKickPtr)&GSState::VertexKick<GS_TRIANGLEFAN>;
m_vk[GS_SPRITE] = (VertexKickPtr)&GSState::VertexKick<GS_SPRITE>;
m_vk[GS_INVALID] = (VertexKickPtr)&GSState::VertexKick<GS_INVALID>;
memset(m_cv, 0, sizeof(m_cv));
@ -222,6 +224,7 @@ void GSState::Reset()
m_vertex.head = 0;
m_vertex.tail = 0;
m_vertex.next = 0;
m_index.tail = 0;
}
@ -500,7 +503,7 @@ __forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRIC
m_v.XYZ.Z = r->XYZF2.Z;
m_v.FOG.F = r->XYZF2.F;
VertexKick(r->XYZF2.Skip());
(this->*m_vkf)(r->XYZF2.Skip());
}
__forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
@ -509,7 +512,7 @@ __forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT
m_v.XYZ.Y = r->XYZ2.Y;
m_v.XYZ.Z = r->XYZ2.Z;
VertexKick(r->XYZ2.Skip());
(this->*m_vkf)(r->XYZ2.Skip());
}
__forceinline void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r)
@ -556,7 +559,9 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim)
UpdateVertexKick();
m_vertex.head = m_vertex.tail = m_index.tail > 0 ? m_index.buff[m_index.tail - 1] + 1 : 0; // remove unused vertices from the end of the vertex buffer
ASSERT(m_index.tail == 0 || m_index.buff[m_index.tail - 1] + 1 == m_vertex.next);
m_vertex.head = m_vertex.tail = m_vertex.next; // remove unused vertices from the end of the vertex buffer
}
void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r)
@ -604,14 +609,14 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff;
m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000;
VertexKick(0);
(this->*m_vkf)(0);
}
void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r)
{
m_v.XYZ = (GSVector4i)r->XYZ;
VertexKick(0);
(this->*m_vkf)(0);
}
void GSState::ApplyTEX0(int i, GIFRegTEX0& TEX0)
@ -740,14 +745,14 @@ void GSState::GIFRegHandlerXYZF3(const GIFReg* RESTRICT r)
m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff;
m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000;
VertexKick(1);
(this->*m_vkf)(1);
}
void GSState::GIFRegHandlerXYZ3(const GIFReg* RESTRICT r)
{
m_v.XYZ = (GSVector4i)r->XYZ;
VertexKick(1);
(this->*m_vkf)(1);
}
void GSState::GIFRegHandlerNOP(const GIFReg* RESTRICT r)
@ -1215,14 +1220,15 @@ void GSState::FlushPrim()
switch(PRIM->PRIM)
{
case GS_LINESTRIP:
memcpy(&buff[0], &m_vertex.buff[stride * head], stride);
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
break;
case GS_TRIANGLESTRIP:
memcpy(&buff[0], &m_vertex.buff[stride * head], stride * 2);
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
if(tail > head + 1) memcpy(&buff[stride * 1], &m_vertex.buff[stride * (head + 1)], stride);
break;
case GS_TRIANGLEFAN:
memcpy(&buff[0], &m_vertex.buff[stride * head], stride);
memcpy(&buff[stride], &m_vertex.buff[stride * (tail - 1)], stride);
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
if(tail > head + 1) memcpy(&buff[stride * 1], &m_vertex.buff[stride * (tail - 1)], stride);
break;
case GS_POINTLIST:
case GS_LINELIST:
@ -1244,38 +1250,37 @@ void GSState::FlushPrim()
m_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM));
}
m_vertex.tail = 0;
switch(PRIM->PRIM)
{
case GS_LINESTRIP:
memcpy(&m_vertex.buff[0], &buff[0], stride);
m_vertex.tail = 1;
if(tail > head + 0) {memcpy(&m_vertex.buff[stride * 0], &buff[stride * 0], stride); m_vertex.tail++;}
break;
case GS_TRIANGLESTRIP:
memcpy(&m_vertex.buff[0], &buff[0], stride * 2);
m_vertex.tail = 2;
break;
case GS_TRIANGLEFAN:
memcpy(&m_vertex.buff[0], &buff[0], stride * 2);
m_vertex.tail = 2;
if(tail > head + 0) {memcpy(&m_vertex.buff[stride * 0], &buff[stride * 0], stride); m_vertex.tail++;}
if(tail > head + 1) {memcpy(&m_vertex.buff[stride * 1], &buff[stride * 1], stride); m_vertex.tail++;}
break;
case GS_POINTLIST:
case GS_LINELIST:
case GS_TRIANGLELIST:
case GS_SPRITE:
case GS_INVALID:
m_vertex.tail = 0;
break;
default:
__assume(0);
}
m_vertex.head = 0;
m_index.tail = 0;
}
else
{
m_vertex.head = m_vertex.tail = 0;
m_vertex.tail = 0;
}
m_vertex.head = 0;
m_vertex.next = 0;
}
//
@ -1989,9 +1994,8 @@ void GSState::SetGameCRC(uint32 crc, int options)
void GSState::UpdateVertexKick()
{
m_dkf = m_dk[PRIM->PRIM];
m_vkf = m_vk[PRIM->PRIM];
m_cvf = m_cv[PRIM->PRIM][PRIM->TME][PRIM->FST];
m_vertex.n = GSUtil::GetVertexCount(PRIM->PRIM);
}
void GSState::GrowVertexBuffer()
@ -2020,23 +2024,95 @@ void GSState::GrowVertexBuffer()
m_index.buff = index;
}
void GSState::VertexKick(uint32 skip)
{
(this->*m_cvf)(m_vertex.buff, m_vertex.tail);
if(++m_vertex.tail - m_vertex.head >= m_vertex.n)
{
(this->*m_dkf)(skip);
}
}
static uint32 s_tmp[4];
static size_t s_tmp_i = 0;
static GSVector4i s_tmp_zw_sign = GSVector4i::x80000000().sll<8>();
template<uint32 prim>
void GSState::DrawingKick(uint32 skip)
void GSState::VertexKick(uint32 skip)
{
s_tmp[s_tmp_i++ & 3] = m_v.XYZ.u32[0];
size_t head = m_vertex.head;
size_t tail = m_vertex.tail;
size_t next = m_vertex.next;
*(GSVertex*)&m_vertex.buff[m_vertex.stride * tail] = m_v;
if(skip)
size_t n = 0;
switch(prim)
{
case GS_POINTLIST: n = 1; break;
case GS_LINELIST: n = 2; break;
case GS_LINESTRIP: n = 2; break;
case GS_TRIANGLELIST: n = 3; break;
case GS_TRIANGLESTRIP: n = 3; break;
case GS_TRIANGLEFAN: n = 3; break;
case GS_SPRITE: n = 2; break;
case GS_INVALID: n = 1; break;
}
m_vertex.tail = ++tail;
size_t m = tail - head;
if(m < n)
{
return;
}
if(skip == 0)
{
int p0 = (int)s_tmp[(s_tmp_i + 1) & 3];
int p1 = (int)s_tmp[(s_tmp_i + 2) & 3];
int p2 = (int)s_tmp[(s_tmp_i + 3) & 3];
int p3 = (int)s_tmp[(s_tmp_i - m) & 3];
GSVector4i p(p0, p1, p2, p3);
GSVector4i v0, v1, v2, v3;
v1 = p.upl16();
v3 = p.uph16();
v0 = v1.xyxy();
v1 = v1.zwzw();
v2 = v3.xyxy();
v3 = v3.zwzw();
GSVector4i s = m_context->scissor.dx10;
GSVector4i sm = s_tmp_zw_sign;
GSVector4 cross;
switch(prim)
{
case GS_POINTLIST:
skip = ((v2 - s) ^ sm).mask() & 0x8888;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
skip = (((v1 - s) ^ sm) & ((v2 - s) ^ sm)).mask() & 0x8888;
skip |= p1 == p2;
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
skip = (((v0 - s) ^ sm) & ((v1 - s) ^ sm) & ((v2 - s) ^ sm)).mask() & 0x8888;
cross = (GSVector4(v1) - GSVector4(v0)) * (GSVector4(v2) - GSVector4(v0)).yxyx();
skip |= (cross == cross.yxyx()).mask();
break;
case GS_TRIANGLEFAN:
if(m > 4) break; // s_tmp only knows about the last 4 vertices, head could be far behind
skip = (((v1 - s) ^ sm) & ((v2 - s) ^ sm) & ((v3 - s) ^ sm)).mask() & 0x8888;
cross = (GSVector4(v1) - GSVector4(v3)) * (GSVector4(v2) - GSVector4(v3)).yxyx();
skip |= (cross == cross.yxyx()).mask();
break;
}
}
if(skip != 0)
{
switch(prim)
{
@ -2049,7 +2125,7 @@ void GSState::DrawingKick(uint32 skip)
break;
case GS_LINESTRIP:
case GS_TRIANGLESTRIP:
m_vertex.head = head + 1;
m_vertex.head = head + 1;
break;
case GS_TRIANGLEFAN:
break;
@ -2067,50 +2143,78 @@ void GSState::DrawingKick(uint32 skip)
uint32* RESTRICT buff = &m_index.buff[m_index.tail];
size_t src_index = head;
switch(prim)
{
case GS_POINTLIST:
buff[0] = head + 0;
m_vertex.head = head + 1;
m_vertex.next = head + 1;
m_index.tail += 1;
(this->*m_cvf)(head, head);
break;
case GS_LINELIST:
buff[0] = head + 0;
buff[1] = head + 1;
m_vertex.head = head + 2;
m_vertex.next = head + 2;
m_index.tail += 2;
(this->*m_cvf)(head + 0, head + 0);
(this->*m_cvf)(head + 1, head + 1);
break;
case GS_LINESTRIP:
if(next < head) {head = next; m_vertex.tail = next + 2;}
buff[0] = head + 0;
buff[1] = head + 1;
m_vertex.head = head + 1;
m_vertex.next = head + 2;
m_index.tail += 2;
if(head + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0);
if(head + 1 >= next) (this->*m_cvf)(head + 1, src_index + 1);
break;
case GS_TRIANGLELIST:
buff[0] = head + 0;
buff[1] = head + 1;
buff[2] = head + 2;
m_vertex.head = head + 3;
m_vertex.next = head + 3;
m_index.tail += 3;
(this->*m_cvf)(head + 0, head + 0);
(this->*m_cvf)(head + 1, head + 1);
(this->*m_cvf)(head + 2, head + 2);
break;
case GS_TRIANGLESTRIP:
case GS_TRIANGLESTRIP:
if(next < head) {head = next; m_vertex.tail = next + 3;}
buff[0] = head + 0;
buff[1] = head + 1;
buff[2] = head + 2;
m_vertex.head = head + 1;
m_vertex.next = head + 3;
m_index.tail += 3;
if(src_index + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0);
if(src_index + 1 >= next) (this->*m_cvf)(head + 1, src_index + 1);
if(src_index + 2 >= next) (this->*m_cvf)(head + 2, src_index + 2);
break;
case GS_TRIANGLEFAN:
// TODO: remove gaps
buff[0] = head + 0;
buff[1] = tail - 2;
buff[2] = tail - 1;
m_index.tail += 3;
m_vertex.next = tail;
m_index.tail += 3;
if(head >= next) (this->*m_cvf)(head, head);
if(tail - 2 >= next) (this->*m_cvf)(tail - 2, tail - 2);
if(tail - 1 >= next) (this->*m_cvf)(tail - 1, tail - 1);
break;
case GS_SPRITE:
case GS_SPRITE:
buff[0] = head + 0;
buff[1] = head + 1;
m_vertex.head = head + 2;
m_vertex.next = head + 2;
m_index.tail += 2;
(this->*m_cvf)(head + 0, head + 0);
(this->*m_cvf)(head + 1, head + 1);
break;
case GS_INVALID:
m_vertex.tail = head;

View File

@ -131,13 +131,13 @@ protected:
GSVertex m_v;
float m_q;
struct {uint8* buff; size_t head, tail, maxcount, stride, n; uint8* tmp;} m_vertex;
struct {uint8* buff; size_t head, tail, next, maxcount, stride, n; uint8* tmp;} m_vertex; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
struct {uint32* buff; size_t tail;} m_index;
typedef void (GSState::*DrawingKickPtr)(uint32 skip);
typedef void (GSState::*ConvertVertexPtr)(void* RESTRICT vertex, size_t index);
typedef void (GSState::*VertexKickPtr)(uint32 skip);
typedef void (GSState::*ConvertVertexPtr)(size_t dst_index, size_t src_index);
DrawingKickPtr m_dk[8], m_dkf;
VertexKickPtr m_vk[8], m_vkf;
ConvertVertexPtr m_cv[8][2][2], m_cvf; // [PRIM][TME][FST]
#define InitConvertVertex2(T, P) \
@ -160,10 +160,8 @@ protected:
void GrowVertexBuffer();
void VertexKick(uint32 skip);
template<uint32 prim>
void DrawingKick(uint32 skip);
void VertexKick(uint32 skip);
// following functions need m_vt to be initialized

View File

@ -63,9 +63,4 @@ struct GSVertexPT2
GSVector2 t[2];
};
struct GSVertexNull
{
GSVector4 p;
};
#pragma pack(pop)

View File

@ -1,6 +1,7 @@
#pragma once
//#define ENABLE_VTUNE
//
#define ENABLE_VTUNE
#define ENABLE_JIT_RASTERIZER