GSdx: it's hard to keep track of the leftover vertices properly, a bit of sps was still possible, psx sprites were fixed too

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5065 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2012-01-09 08:41:33 +00:00
parent 5b5a9787d9
commit 481f1fdda2
15 changed files with 220 additions and 100 deletions

View File

@ -83,7 +83,7 @@ void GPUDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
#ifndef ENABLE_JIT_RASTERIZER #ifndef ENABLE_JIT_RASTERIZER
void GPUDrawScanline::SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan) void GPUDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan)
{ {
GPUScanlineSelector sel = m_global.sel; GPUScanlineSelector sel = m_global.sel;
@ -93,7 +93,7 @@ void GPUDrawScanline::SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dsca
{ {
if(sel.sprite) if(sel.sprite)
{ {
GSVector4i t = (GSVector4i(vertex.t) >> 8) - GSVector4i::x00000001(); GSVector4i t = (GSVector4i(vertex[index[1]].t) >> 8) - GSVector4i::x00000001();
t = t.ps32(t); t = t.ps32(t);
t = t.upl16(t); t = t.upl16(t);

View File

@ -65,7 +65,7 @@ public:
#ifndef ENABLE_JIT_RASTERIZER #ifndef ENABLE_JIT_RASTERIZER
void SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan); void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan);
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan); void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan); void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
void DrawRect(const GSVector4i& r, const GSVertexSW& v); void DrawRect(const GSVector4i& r, const GSVertexSW& v);

View File

@ -27,6 +27,11 @@
using namespace Xbyak; using namespace Xbyak;
static const int _args = 0;
static const int _vertex = _args + 4;
static const int _index = _args + 8;
static const int _dscan = _args + 12;
GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize) GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize) : GSCodeGenerator(code, maxsize)
, m_local(*(GPUScanlineLocalData*)param) , m_local(*(GPUScanlineLocalData*)param)
@ -50,7 +55,12 @@ void GPUSetupPrimCodeGenerator::Generate()
{ {
// t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001(); // t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001();
cvttps2dq(xmm1, ptr[ecx + sizeof(GSVertexSW) * 1 + offsetof(GSVertexSW, t)]); mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * 1]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
cvttps2dq(xmm1, ptr[ecx + offsetof(GSVertexSW, t)]);
psrld(xmm1, 8); psrld(xmm1, 8);
psrld(xmm0, 31); psrld(xmm0, 31);
psubd(xmm1, xmm0); psubd(xmm1, xmm0);
@ -86,6 +96,8 @@ void GPUSetupPrimCodeGenerator::Generate()
if(m_sel.tme || m_sel.iip && m_sel.tfx != 3) if(m_sel.tme || m_sel.iip && m_sel.tfx != 3)
{ {
mov(edx, dword[esp + _dscan]);
for(int i = 0; i < 3; i++) for(int i = 0; i < 3; i++)
{ {
movaps(Xmm(5 + i), ptr[&m_shift[i]]); movaps(Xmm(5 + i), ptr[&m_shift[i]]);

View File

@ -104,7 +104,7 @@ void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
// FIXME: something's not right with the sky in burnout 3 // FIXME: something's not right with the sky in burnout 3
void GSDrawScanline::SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan) void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan)
{ {
GSScanlineSelector sel = m_global.sel; GSScanlineSelector sel = m_global.sel;
@ -147,12 +147,12 @@ void GSDrawScanline::SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan
{ {
if(has_f) if(has_f)
{ {
m_local.p.f = GSVector4i(vertex.p).zzzzh().zzzz(); m_local.p.f = GSVector4i(vertex[index[1]].p).zzzzh().zzzz();
} }
if(has_z) if(has_z)
{ {
m_local.p.z = vertex.t.u32[3]; // uint32 z is bypassed in t.w m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w
} }
} }
} }
@ -236,7 +236,17 @@ void GSDrawScanline::SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan
} }
else else
{ {
GSVector4i c = GSVector4i(vertex.c); int last = 0;
switch(sel.prim)
{
case GS_POINT_CLASS: last = 0; break;
case GS_LINE_CLASS: last = 1; break;
case GS_TRIANGLE_CLASS: last = 2; break;
case GS_SPRITE_CLASS: last = 1; break;
}
GSVector4i c = GSVector4i(vertex[index[last]].c);
c = c.upl16(c.zwxy()); c = c.upl16(c.zwxy());

View File

@ -65,7 +65,7 @@ public:
#ifndef ENABLE_JIT_RASTERIZER #ifndef ENABLE_JIT_RASTERIZER
void SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan); void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan);
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan); void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan); void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);

View File

@ -41,12 +41,11 @@ public:
GIFRegFRAME FRAME; GIFRegFRAME FRAME;
GIFRegZBUF ZBUF; GIFRegZBUF ZBUF;
__aligned(struct, 32) struct
{ {
GSVector4i dx10;
GSVector4 dx9;
GSVector4 in; GSVector4 in;
GSVector4 ex; GSVector4 ofex;
uint32 ofxy;
} scissor; } scissor;
struct struct
@ -83,25 +82,22 @@ public:
void UpdateScissor() void UpdateScissor()
{ {
scissor.dx10 = GSVector4i( scissor.ofex = GSVector4(
(int)((SCISSOR.SCAX0 << 4) + XYOFFSET.OFX), (int)((SCISSOR.SCAX0 << 4) + XYOFFSET.OFX),
(int)((SCISSOR.SCAY0 << 4) + XYOFFSET.OFY), (int)((SCISSOR.SCAY0 << 4) + XYOFFSET.OFY),
(int)((SCISSOR.SCAX1 << 4) + XYOFFSET.OFX), (int)((SCISSOR.SCAX1 << 4) + XYOFFSET.OFX),
(int)((SCISSOR.SCAY1 << 4) + XYOFFSET.OFY)); (int)((SCISSOR.SCAY1 << 4) + XYOFFSET.OFY));
scissor.dx9 = GSVector4(scissor.dx10);
scissor.in = GSVector4( scissor.in = GSVector4(
(int)SCISSOR.SCAX0, (int)SCISSOR.SCAX0,
(int)SCISSOR.SCAY0, (int)SCISSOR.SCAY0,
(int)SCISSOR.SCAX1 + 1, (int)SCISSOR.SCAX1 + 1,
(int)SCISSOR.SCAY1 + 1); (int)SCISSOR.SCAY1 + 1);
scissor.ex = GSVector4( uint16 ofx = (uint16)XYOFFSET.OFX - 15;
(int)SCISSOR.SCAX0, uint16 ofy = (uint16)XYOFFSET.OFY - 15;
(int)SCISSOR.SCAY0,
(int)SCISSOR.SCAX1, scissor.ofxy = ((ofy << 16) | ofx); // ceil(xy) => (xy - offset + 15) >> 4 => (xy - [offset - 15]) >> 4
(int)SCISSOR.SCAY1);
} }
bool DepthRead() const bool DepthRead() const

View File

@ -210,21 +210,47 @@ void GSRasterizer::Draw(GSRasterizerData* data)
template<bool scissor_test> template<bool scissor_test>
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count) void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count)
{ {
for(int i = 0, count = index != NULL ? index_count : vertex_count; i < count; i++) if(index != NULL)
{ {
const GSVertexSW& v = vertex[index != NULL ? index[i] : i]; for(int i = 0; i < index_count; i++, index++)
GSVector4i p(v.p);
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
{ {
if(IsOneOfMyScanlines(p.y)) const GSVertexSW& v = vertex[*index];
GSVector4i p(v.p);
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
{ {
m_pixels++; if(IsOneOfMyScanlines(p.y))
{
m_pixels++;
m_ds->SetupPrim(v, v); m_ds->SetupPrim(vertex, index, GSVertexSW::zero());
m_ds->DrawScanline(1, p.x, p.y, v); m_ds->DrawScanline(1, p.x, p.y, v);
}
}
}
}
else
{
uint32 tmp_index[1] = {0};
for(int i = 0; i < vertex_count; i++, vertex++)
{
const GSVertexSW& v = vertex[0];
GSVector4i p(v.p);
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
{
if(IsOneOfMyScanlines(p.y))
{
m_pixels++;
m_ds->SetupPrim(vertex, tmp_index, GSVertexSW::zero());
m_ds->DrawScanline(1, p.x, p.y, v);
}
} }
} }
} }
@ -246,7 +272,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
DrawEdge(v0, v1, dv, i, 0); DrawEdge(v0, v1, dv, i, 0);
DrawEdge(v0, v1, dv, i, 1); DrawEdge(v0, v1, dv, i, 1);
Flush(v1, GSVertexSW::zero(), true); Flush(vertex, index, GSVertexSW::zero(), true);
return; return;
} }
@ -289,7 +315,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
scan += dscan * (l - scan.p).xxxx(); scan += dscan * (l - scan.p).xxxx();
m_ds->SetupPrim(v1, dscan); m_ds->SetupPrim(vertex, index, dscan);
m_ds->DrawScanline(pixels, left, p.y, scan); m_ds->DrawScanline(pixels, left, p.y, scan);
} }
@ -329,7 +355,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
m_edge.count = e - m_edge.buff; m_edge.count = e - m_edge.buff;
Flush(v1, GSVertexSW::zero()); Flush(vertex, index, GSVertexSW::zero());
} }
} }
@ -472,7 +498,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
} }
} }
Flush(vertex[index[2]], dscan); Flush(vertex, index, dscan);
if(m_ds->HasEdge()) if(m_ds->HasEdge())
{ {
@ -487,7 +513,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
DrawEdge(v0, v2, dv[1], orientation & 2, side & 2); DrawEdge(v0, v2, dv[1], orientation & 2, side & 2);
DrawEdge(v1, v2, dv[2], orientation & 4, side & 4); DrawEdge(v1, v2, dv[2], orientation & 4, side & 4);
Flush(vertex[index[2]], GSVertexSW::zero(), true); Flush(vertex, index, GSVertexSW::zero(), true);
} }
} }
@ -609,7 +635,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index, boo
if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy(); if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy();
if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx(); if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx();
m_ds->SetupPrim(v1, dscan); m_ds->SetupPrim(vertex, index, dscan);
while(1) while(1)
{ {
@ -826,7 +852,7 @@ void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, con
e->p.i16[2] = (int16)top; e->p.i16[2] = (int16)top;
} }
void GSRasterizer::Flush(const GSVertexSW& vertex, const GSVertexSW& dscan, bool edge) void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge)
{ {
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline) // TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
@ -834,7 +860,7 @@ void GSRasterizer::Flush(const GSVertexSW& vertex, const GSVertexSW& dscan, bool
if(count > 0) if(count > 0)
{ {
m_ds->SetupPrim(vertex, dscan); m_ds->SetupPrim(vertex, index, dscan);
const GSVertexSW* RESTRICT e = m_edge.buff; const GSVertexSW* RESTRICT e = m_edge.buff;
const GSVertexSW* RESTRICT ee = e + count; const GSVertexSW* RESTRICT ee = e + count;

View File

@ -67,7 +67,7 @@ public:
class IDrawScanline : public GSAlignedClass<32> class IDrawScanline : public GSAlignedClass<32>
{ {
public: public:
typedef void (__fastcall *SetupPrimPtr)(const GSVertexSW& vertex, const GSVertexSW& dscan); typedef void (*SetupPrimPtr)(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan);
typedef void (__fastcall *DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan); typedef void (__fastcall *DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan);
typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit
@ -86,14 +86,14 @@ public:
#ifdef ENABLE_JIT_RASTERIZER #ifdef ENABLE_JIT_RASTERIZER
__forceinline void SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan) {m_sp(vertex, dscan);} __forceinline void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) {m_sp(vertex, index, dscan);}
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);} __forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);}
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);} __forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);}
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);} __forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
#else #else
virtual void SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan) = 0; virtual void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) = 0;
virtual void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) = 0; virtual void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) = 0;
virtual void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) = 0; virtual void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) = 0;
virtual void DrawRect(const GSVector4i& r, const GSVertexSW& v) = 0; virtual void DrawRect(const GSVector4i& r, const GSVertexSW& v) = 0;
@ -140,7 +140,7 @@ protected:
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side); void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan); __forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void Flush(const GSVertexSW& vertex, const GSVertexSW& dscan, bool edge = false); __forceinline void Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge = false);
public: public:
GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon); GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);

View File

@ -206,8 +206,6 @@ void GSRendererSW::Draw()
if(!GetScanlineGlobalData(sd)) return; if(!GetScanlineGlobalData(sd)) return;
//
data->primclass = m_vt->m_primclass; data->primclass = m_vt->m_primclass;
data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.next + sizeof(uint32) * m_index.tail, 32); data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.next + sizeof(uint32) * m_index.tail, 32);
data->vertex = (GSVertexSW*)data->buff; data->vertex = (GSVertexSW*)data->buff;
@ -218,6 +216,11 @@ void GSRendererSW::Draw()
memcpy(data->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.next); memcpy(data->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.next);
memcpy(data->index, m_index.buff, sizeof(uint32) * m_index.tail); memcpy(data->index, m_index.buff, sizeof(uint32) * m_index.tail);
for(size_t i = 0; i < m_index.tail; i++)
{
ASSERT(((GSVertexSW*)m_vertex.buff + m_index.buff[i])->_pad.u32[0] == 0x12345678);
}
// //
const GSDrawingContext* context = m_context; const GSDrawingContext* context = m_context;

View File

@ -27,10 +27,17 @@
using namespace Xbyak; using namespace Xbyak;
static const int _args = 0;
static const int _vertex = _args + 4;
static const int _index = _args + 8;
static const int _dscan = _args + 12;
void GSSetupPrimCodeGenerator::Generate() void GSSetupPrimCodeGenerator::Generate()
{ {
if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
{ {
mov(edx, dword[esp + _dscan]);
for(int i = 0; i < 5; i++) for(int i = 0; i < 5; i++)
{ {
vmovaps(Xmm(3 + i), ptr[&m_shift[i]]); vmovaps(Xmm(3 + i), ptr[&m_shift[i]]);
@ -107,7 +114,12 @@ void GSSetupPrimCodeGenerator::Depth()
} }
else else
{ {
// GSVector4 p = vertex.p; // GSVector4 p = vertex[index[1]].p;
mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * 1]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]); vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]);
@ -312,7 +324,25 @@ void GSSetupPrimCodeGenerator::Color()
} }
else else
{ {
// GSVector4i c = GSVector4i(vertex.c); // GSVector4i c = GSVector4i(vertex[index[last].c);
int last = 0;
switch(m_sel.prim)
{
case GS_POINT_CLASS: last = 0; break;
case GS_LINE_CLASS: last = 1; break;
case GS_TRIANGLE_CLASS: last = 2; break;
case GS_SPRITE_CLASS: last = 1; break;
}
if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
{
mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * last]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
}
vcvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]); vcvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);

View File

@ -27,10 +27,17 @@
using namespace Xbyak; using namespace Xbyak;
static const int _args = 0;
static const int _vertex = _args + 4;
static const int _index = _args + 8;
static const int _dscan = _args + 12;
void GSSetupPrimCodeGenerator::Generate() void GSSetupPrimCodeGenerator::Generate()
{ {
if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
{ {
mov(edx, dword[esp + _dscan]);
for(int i = 0; i < 5; i++) for(int i = 0; i < 5; i++)
{ {
movaps(Xmm(3 + i), ptr[&m_shift[i]]); movaps(Xmm(3 + i), ptr[&m_shift[i]]);
@ -112,7 +119,12 @@ void GSSetupPrimCodeGenerator::Depth()
} }
else else
{ {
// GSVector4 p = vertex.p; // GSVector4 p = vertex[index[1]].p;
mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * 1]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]); movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]);
@ -327,7 +339,25 @@ void GSSetupPrimCodeGenerator::Color()
} }
else else
{ {
// GSVector4i c = GSVector4i(vertex.c); // GSVector4i c = GSVector4i(vertex[index[last].c);
int last = 0;
switch(m_sel.prim)
{
case GS_POINT_CLASS: last = 0; break;
case GS_LINE_CLASS: last = 1; break;
case GS_TRIANGLE_CLASS: last = 2; break;
case GS_SPRITE_CLASS: last = 1; break;
}
if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
{
mov(ecx, ptr[esp + _index]);
mov(ecx, ptr[ecx + sizeof(uint32) * last]);
shl(ecx, 6); // * sizeof(GSVertexSW)
add(ecx, ptr[esp + _vertex]);
}
cvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]); cvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);

View File

@ -499,13 +499,6 @@ void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r)
template<uint32 prim, uint32 adc> template<uint32 prim, uint32 adc>
void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r) void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
{ {
if(adc)
{
// not sure what the difference is between this and XYZF2 with ADC bit set
//printf("XYZF3 X %d Y %d Z %d F %d ADC %d\n", r->XYZF2.X, r->XYZF2.Y, r->XYZF2.Z, r->XYZF2.F, r->XYZF2.ADC);
}
/* /*
m_v.XYZ.X = r->XYZF2.X; m_v.XYZ.X = r->XYZF2.X;
m_v.XYZ.Y = r->XYZF2.Y; m_v.XYZ.Y = r->XYZF2.Y;
@ -525,12 +518,6 @@ void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
template<uint32 prim, uint32 adc> template<uint32 prim, uint32 adc>
void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r) void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
{ {
if(adc)
{
// not sure what the difference is between this and XYZ2 with ADC bit set
//printf("XYZ3 X %d Y %d Z %d ADC %d\n", r->XYZ2.X, r->XYZ2.Y, r->XYZ2.Z, r->XYZ2.ADC);
}
/* /*
m_v.XYZ.X = r->XYZ2.X; m_v.XYZ.X = r->XYZ2.X;
m_v.XYZ.Y = r->XYZ2.Y; m_v.XYZ.Y = r->XYZ2.Y;
@ -585,12 +572,17 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim)
m_env.PRIM = (GSVector4i)prim; m_env.PRIM = (GSVector4i)prim;
m_env.PRMODE._PRIM = prim.PRIM; m_env.PRMODE._PRIM = prim.PRIM;
m_context = &m_env.CTXT[PRIM->CTXT]; UpdateContext();
UpdateVertexKick(); UpdateVertexKick();
ASSERT(m_index.tail == 0 || m_index.buff[m_index.tail - 1] + 1 == m_vertex.next); ASSERT(m_index.tail == 0 || m_index.buff[m_index.tail - 1] + 1 == m_vertex.next);
if(m_index.tail == 0)
{
m_vertex.next = 0;
}
m_vertex.head = m_vertex.tail = m_vertex.next; // remove unused vertices from the end of the vertex buffer m_vertex.head = m_vertex.tail = m_vertex.next; // remove unused vertices from the end of the vertex buffer
} }
@ -821,6 +813,8 @@ template<int i> void GSState::GIFRegHandlerXYOFFSET(const GIFReg* RESTRICT r)
m_env.CTXT[i].XYOFFSET = o; m_env.CTXT[i].XYOFFSET = o;
m_env.CTXT[i].UpdateScissor(); m_env.CTXT[i].UpdateScissor();
UpdateScissor();
} }
void GSState::GIFRegHandlerPRMODECONT(const GIFReg* RESTRICT r) void GSState::GIFRegHandlerPRMODECONT(const GIFReg* RESTRICT r)
@ -836,7 +830,7 @@ void GSState::GIFRegHandlerPRMODECONT(const GIFReg* RESTRICT r)
// if(PRIM->PRIM == 7) printf("Invalid PRMODECONT/PRIM\n"); // if(PRIM->PRIM == 7) printf("Invalid PRMODECONT/PRIM\n");
m_context = &m_env.CTXT[PRIM->CTXT]; UpdateContext();
UpdateVertexKick(); UpdateVertexKick();
} }
@ -852,7 +846,7 @@ void GSState::GIFRegHandlerPRMODE(const GIFReg* RESTRICT r)
m_env.PRMODE = (GSVector4i)r->PRMODE; m_env.PRMODE = (GSVector4i)r->PRMODE;
m_env.PRMODE._PRIM = _PRIM; m_env.PRMODE._PRIM = _PRIM;
m_context = &m_env.CTXT[PRIM->CTXT]; UpdateContext();
UpdateVertexKick(); UpdateVertexKick();
} }
@ -932,6 +926,8 @@ template<int i> void GSState::GIFRegHandlerSCISSOR(const GIFReg* RESTRICT r)
m_env.CTXT[i].SCISSOR = (GSVector4i)r->SCISSOR; m_env.CTXT[i].SCISSOR = (GSVector4i)r->SCISSOR;
m_env.CTXT[i].UpdateScissor(); m_env.CTXT[i].UpdateScissor();
UpdateScissor();
} }
template<int i> void GSState::GIFRegHandlerALPHA(const GIFReg* RESTRICT r) template<int i> void GSState::GIFRegHandlerALPHA(const GIFReg* RESTRICT r)
@ -1268,14 +1264,20 @@ void GSState::FlushPrim()
size_t stride = m_vertex.stride; size_t stride = m_vertex.stride;
size_t head = m_vertex.head; size_t head = m_vertex.head;
size_t tail = m_vertex.tail; size_t tail = m_vertex.tail;
size_t next = m_vertex.next;
if(tail > head) if(tail > head)
{ {
switch(PRIM->PRIM) switch(PRIM->PRIM)
{ {
case GS_POINTLIST:
break;
case GS_LINELIST:
case GS_LINESTRIP: case GS_LINESTRIP:
case GS_SPRITE:
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride); if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
break; break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP: case GS_TRIANGLESTRIP:
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride); if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
if(tail > head + 1) memcpy(&buff[stride * 1], &m_vertex.buff[stride * (head + 1)], stride); if(tail > head + 1) memcpy(&buff[stride * 1], &m_vertex.buff[stride * (head + 1)], stride);
@ -1284,10 +1286,6 @@ void GSState::FlushPrim()
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride); if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
if(tail > head + 1) memcpy(&buff[stride * 1], &m_vertex.buff[stride * (tail - 1)], stride); if(tail > head + 1) memcpy(&buff[stride * 1], &m_vertex.buff[stride * (tail - 1)], stride);
break; break;
case GS_POINTLIST:
case GS_LINELIST:
case GS_TRIANGLELIST:
case GS_SPRITE:
case GS_INVALID: case GS_INVALID:
break; break;
default: default:
@ -1307,40 +1305,37 @@ void GSState::FlushPrim()
m_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM)); m_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM));
} }
m_index.tail = 0;
m_vertex.head = 0; m_vertex.head = 0;
m_vertex.tail = 0; m_vertex.tail = 0;
m_vertex.next = 0;
if(tail > head) if(tail > head)
{ {
switch(PRIM->PRIM) switch(PRIM->PRIM)
{ {
case GS_POINTLIST:
break;
case GS_LINELIST:
case GS_LINESTRIP: case GS_LINESTRIP:
case GS_SPRITE:
if(tail > head + 0) {memcpy(&m_vertex.buff[stride * 0], &buff[stride * 0], stride); m_vertex.tail++;} if(tail > head + 0) {memcpy(&m_vertex.buff[stride * 0], &buff[stride * 0], stride); m_vertex.tail++;}
break; break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP: case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN: case GS_TRIANGLEFAN:
if(tail > head + 0) {memcpy(&m_vertex.buff[stride * 0], &buff[stride * 0], stride); m_vertex.tail++;} if(tail > head + 0) {memcpy(&m_vertex.buff[stride * 0], &buff[stride * 0], stride); m_vertex.tail++;}
if(tail > head + 1) {memcpy(&m_vertex.buff[stride * 1], &buff[stride * 1], stride); m_vertex.tail++;} if(tail > head + 1) {memcpy(&m_vertex.buff[stride * 1], &buff[stride * 1], stride); m_vertex.tail++;}
break; break;
case GS_POINTLIST:
case GS_LINELIST:
case GS_TRIANGLELIST:
case GS_SPRITE:
case GS_INVALID: case GS_INVALID:
break; break;
default: default:
__assume(0); __assume(0);
} }
}
m_vertex.next = m_vertex.tail; m_vertex.next = next > head ? next - head : 0;
m_index.tail = 0; }
}
else
{
m_vertex.head = 0;
m_vertex.tail = 0;
m_vertex.next = 0;
} }
} }
@ -2064,7 +2059,7 @@ int GSState::Defrost(const GSFreezeData* fd)
PRIM = !m_env.PRMODECONT.AC ? (GIFRegPRIM*)&m_env.PRMODE : &m_env.PRIM; PRIM = !m_env.PRMODECONT.AC ? (GIFRegPRIM*)&m_env.PRMODE : &m_env.PRIM;
m_context = &m_env.CTXT[PRIM->CTXT]; UpdateContext();
UpdateVertexKick(); UpdateVertexKick();
@ -2080,6 +2075,8 @@ int GSState::Defrost(const GSFreezeData* fd)
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset4(m_env.CTXT[i].FRAME, m_env.CTXT[i].ZBUF); m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset4(m_env.CTXT[i].FRAME, m_env.CTXT[i].ZBUF);
} }
UpdateScissor();
m_perfmon.SetFrame(5000); m_perfmon.SetFrame(5000);
return 0; return 0;
@ -2094,6 +2091,19 @@ void GSState::SetGameCRC(uint32 crc, int options)
// //
void GSState::UpdateContext()
{
m_context = &m_env.CTXT[PRIM->CTXT];
UpdateScissor();
}
void GSState::UpdateScissor()
{
m_scissor = m_context->scissor.ofex;
m_ofxy = m_context->scissor.ofxy;
}
void GSState::UpdateVertexKick() void GSState::UpdateVertexKick()
{ {
uint32 prim = PRIM->PRIM; uint32 prim = PRIM->PRIM;
@ -2157,7 +2167,7 @@ __forceinline void GSState::VertexKick(uint32 skip)
tailptr[0] = v0; tailptr[0] = v0;
tailptr[1] = v1; tailptr[1] = v1;
m_vertex.xy[xy_tail & 3] = GSVector4(v1.upl32(v1.add16(GSVector4i::x000f()).srl16(4)).upl16()); m_vertex.xy[xy_tail & 3] = GSVector4(v1.upl32(v1.sub16(GSVector4i::load(m_ofxy)).sra16(4)).upl16()); // zw not sign extended, only useful for eq tests
#ifdef _DEBUG #ifdef _DEBUG
memset(&tailptr[2], 0, m_vertex.stride - sizeof(GSVertex)); memset(&tailptr[2], 0, m_vertex.stride - sizeof(GSVertex));
@ -2221,9 +2231,7 @@ __forceinline void GSState::VertexKick(uint32 skip)
break; break;
} }
GSVector4 scissor = m_context->scissor.dx9; GSVector4 test = pmax < m_scissor | pmin > m_scissor.zwxy();
GSVector4 test = pmax < scissor | pmin > scissor.zwxy();
switch(prim) switch(prim)
{ {

View File

@ -131,6 +131,8 @@ protected:
GSVertex m_v; GSVertex m_v;
float m_q; float m_q;
GSVector4 m_scissor;
uint32 m_ofxy;
struct struct
{ {
@ -168,6 +170,9 @@ protected:
InitConvertVertex2(T, GS_SPRITE) \ InitConvertVertex2(T, GS_SPRITE) \
InitConvertVertex2(T, GS_INVALID) \ InitConvertVertex2(T, GS_INVALID) \
void UpdateContext();
void UpdateScissor();
virtual void UpdateVertexKick(); virtual void UpdateVertexKick();
void GrowVertexBuffer(); void GrowVertexBuffer();

View File

@ -26,12 +26,12 @@
using namespace Xbyak; using namespace Xbyak;
static const int _args = 0; static const int _args = 4;
static const int _count = _args + 8; // rcx static const int _count = _args + 4; // rcx
static const int _vertex = _args + 12; // rdx static const int _vertex = _args + 8; // rdx
static const int _index = _args + 16; // r8 static const int _index = _args + 12; // r8
static const int _min = _args + 20; // r9 static const int _min = _args + 16; // r9
static const int _max = _args + 24; // _args + 4 static const int _max = _args + 20; // _args + 4
GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize) GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize) : GSCodeGenerator(code, maxsize)

View File

@ -26,12 +26,12 @@
using namespace Xbyak; using namespace Xbyak;
static const int _args = 0; static const int _args = 4;
static const int _count = _args + 8; // rcx static const int _count = _args + 4; // rcx
static const int _vertex = _args + 12; // rdx static const int _vertex = _args + 8; // rdx
static const int _index = _args + 16; // r8 static const int _index = _args + 12; // r8
static const int _min = _args + 20; // r9 static const int _min = _args + 16; // r9
static const int _max = _args + 24; // _args + 4 static const int _max = _args + 20; // _args + 4
GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize) GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize) : GSCodeGenerator(code, maxsize)