gsdx ogl: improve ST/Q precision

When a float overflow is detected, geometry shader is disabled. And ST/Q is done on the CPU

It will help
Issue 551
Issue 1684
This commit is contained in:
Gregory Hainaut 2017-03-11 00:24:38 +01:00
parent 6d6ed1a205
commit 7d3c850813
4 changed files with 31 additions and 4 deletions

View File

@ -92,6 +92,19 @@ void GSRendererOGL::Lines2Sprites()
v0.XYZ.Z = v1.XYZ.Z;
v0.FOG = v1.FOG;
if (PRIM->TME && !PRIM->FST) {
GSVector4 st0 = GSVector4::loadl(&v0.ST.u64);
GSVector4 st1 = GSVector4::loadl(&v1.ST.u64);
GSVector4 Q = GSVector4(v1.RGBAQ.Q, v1.RGBAQ.Q, v1.RGBAQ.Q, v1.RGBAQ.Q);
GSVector4 st = st0.upld(st1) / Q;
GSVector4::storel(&v0.ST.u64, st);
GSVector4::storeh(&v1.ST.u64, st);
v0.RGBAQ.Q = 1.0f;
v1.RGBAQ.Q = 1.0f;
}
q[0] = v0;
q[3] = v1;
@ -176,7 +189,7 @@ void GSRendererOGL::SetupIA(const float& sx, const float& sy)
// the extra validation cost of the extra stage.
//
// Note: keep Geometry Shader in the replayer to ease debug.
if (GLLoader::found_geometry_shader && (m_vertex.next > 32 || GLLoader::in_replayer)) { // <=> 16 sprites (based on Shadow Hearts)
if (GLLoader::found_geometry_shader && !m_vt.m_accurate_stq && (m_vertex.next > 32 || GLLoader::in_replayer)) { // <=> 16 sprites (based on Shadow Hearts)
m_gs_sel.sprite = 1;
t = GL_LINES;

View File

@ -2900,6 +2900,16 @@ public:
return GSVector4(_mm_unpackhi_ps(m, a));
}
__forceinline GSVector4 upld(const GSVector4& a) const
{
return GSVector4(_mm_castpd_ps(_mm_unpacklo_pd(_mm_castps_pd(m), _mm_castps_pd(a.m))));
}
__forceinline GSVector4 uphd(const GSVector4& a) const
{
return GSVector4(_mm_castpd_ps(_mm_unpackhi_pd(_mm_castps_pd(m), _mm_castps_pd(a.m))));
}
__forceinline GSVector4 l2h(const GSVector4& a) const
{
return GSVector4(_mm_movelh_ps(m, a));
@ -3105,6 +3115,11 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
_mm_store_sd((double*)p, _mm_castps_pd(v.m));
}
__forceinline static void storeh(void* p, const GSVector4& v)
{
_mm_storeh_pd((double*)p, _mm_castps_pd(v.m));
}
template<bool aligned> __forceinline static void store(void* p, const GSVector4& v)
{
if(aligned) _mm_store_ps((float*)p, v.m);

View File

@ -32,7 +32,7 @@ void GSVertexTrace::InitVectors()
}
GSVertexTrace::GSVertexTrace(const GSState* state)
: m_state(state), m_accurate_stq(false), m_primclass(GS_INVALID_CLASS)
: m_accurate_stq(false), m_state(state), m_primclass(GS_INVALID_CLASS)
{
m_force_filter = static_cast<BiFiltering>(theApp.GetConfigI("filter"));
memset(&m_alpha, 0, sizeof(m_alpha));

View File

@ -36,14 +36,13 @@ class alignas(32) GSVertexTrace : public GSAlignedClass<32>
public:
struct Vertex {GSVector4i c; GSVector4 p, t;};
struct VertexAlpha {int min, max; bool valid;};
bool m_accurate_stq;
protected:
const GSState* m_state;
static GSVector4 s_minmax;
bool m_accurate_stq;
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count);
FindMinMaxPtr m_fmm[2][2][2][2][2][4];