diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index eca864ac19..dab27400bd 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -250,10 +250,12 @@ void GSRendererSW::Draw() GSVertexSW* RESTRICT d = sd->vertex; GSVector4i o = (GSVector4i)m_context->XYOFFSET; - GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH); + GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0); for(size_t i = 0; i < m_vertex.next; i++, s++, d++) { + // TODO: load xyzuvf in one piece + uint32 z = s->XYZ.Z; GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - o; @@ -263,6 +265,8 @@ void GSRendererSW::Draw() p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale; + GSVector4 stcq = GSVector4::load(&s->m[0]); // s t rgba q + if(PRIM->TME) { if(PRIM->FST) @@ -271,12 +275,11 @@ void GSRendererSW::Draw() } else { - t = GSVector4(s->ST.S, s->ST.T) * tsize; - t = t.xyxy(GSVector4::load(s->RGBAQ.Q)); + t = stcq.xyww() * tsize; } } - c = GSVector4::rgba32(s->RGBAQ.u32[0], 7); + c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7); d->p = p; d->c = c; @@ -284,7 +287,7 @@ void GSRendererSW::Draw() if(sd->primclass == GS_SPRITE_CLASS) { - d->t.u32[3] = z; + d->t.u32[3] = z; // TODO: store this to the 4th unused GSVector4? } } } diff --git a/plugins/GSdx/GSVector.cpp b/plugins/GSdx/GSVector.cpp index 7a824cb9ad..27b6a5a0e9 100644 --- a/plugins/GSdx/GSVector.cpp +++ b/plugins/GSdx/GSVector.cpp @@ -22,8 +22,9 @@ #include "stdafx.h" #include "GSVector.h" -const GSVector4i GSVector4i::m_xff[16] = +const GSVector4i GSVector4i::m_xff[17] = { + GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000), @@ -42,8 +43,9 @@ const GSVector4i GSVector4i::m_xff[16] = GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), }; -const GSVector4i GSVector4i::m_x0f[16] = +const GSVector4i GSVector4i::m_x0f[17] = { + GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000), diff --git a/plugins/GSdx/GSVector.h b/plugins/GSdx/GSVector.h index f066a11016..5646dfbd6f 100644 --- a/plugins/GSdx/GSVector.h +++ b/plugins/GSdx/GSVector.h @@ -79,8 +79,8 @@ class GSVector4; __aligned(class, 16) GSVector4i { - static const GSVector4i m_xff[16]; - static const GSVector4i m_x0f[16]; + static const GSVector4i m_xff[17]; + static const GSVector4i m_x0f[17]; public: union diff --git a/plugins/GSdx/GSVertexTrace.cpp b/plugins/GSdx/GSVertexTrace.cpp index bfb0857c48..63f4c1a3bd 100644 --- a/plugins/GSdx/GSVertexTrace.cpp +++ b/plugins/GSdx/GSVertexTrace.cpp @@ -140,6 +140,8 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun GSVector4i cmin = GSVector4i::xffffffff(); GSVector4i cmax = GSVector4i::zero(); + // TODO: SSE41 has integer min/max, use that for xy/z/uv/f + const GSVertex* RESTRICT v = (GSVertex*)vertex; for(int i = 0; i < count; i += n)