GS: Use flat/nointerpolation for IIP=0 instead of GS

This commit is contained in:
Connor McLaughlin 2021-12-23 21:35:05 +10:00 committed by refractionpcsx2
parent bd43400ad8
commit bce54a64ff
13 changed files with 138 additions and 134 deletions

View File

@ -5,6 +5,7 @@
#define FMT_16 2
#ifndef VS_TME
#define VS_IIP 0
#define VS_TME 1
#define VS_FST 1
#endif
@ -12,10 +13,10 @@
#ifndef GS_IIP
#define GS_IIP 0
#define GS_PRIM 3
#define GS_EXPAND 0
#endif
#ifndef PS_FST
#define PS_IIP 0
#define PS_FST 0
#define PS_WMS 0
#define PS_WMT 0
@ -25,6 +26,7 @@
#define PS_TCC 1
#define PS_ATST 1
#define PS_FOG 0
#define PS_IIP 0
#define PS_CLR1 0
#define PS_FBA 0
#define PS_FBMASK 0
@ -72,7 +74,12 @@ struct VS_OUTPUT
float4 p : SV_Position;
float4 t : TEXCOORD0;
float4 ti : TEXCOORD2;
#if VS_IIP != 0 || GS_IIP != 0 || PS_IIP != 0
float4 c : COLOR0;
#else
nointerpolation float4 c : COLOR0;
#endif
};
struct PS_INPUT
@ -80,7 +87,11 @@ struct PS_INPUT
float4 p : SV_Position;
float4 t : TEXCOORD0;
float4 ti : TEXCOORD2;
#if VS_IIP != 0 || GS_IIP != 0 || PS_IIP != 0
float4 c : COLOR0;
#else
nointerpolation float4 c : COLOR0;
#endif
};
struct PS_OUTPUT
@ -854,15 +865,7 @@ VS_OUTPUT vs_main(VS_INPUT input)
// Geometry Shader
//////////////////////////////////////////////////////////////////////
#if GS_PRIM == 0 && GS_EXPAND == 0
[maxvertexcount(1)]
void gs_main(point VS_OUTPUT input[1], inout PointStream<VS_OUTPUT> stream)
{
stream.Append(input[0]);
}
#elif GS_PRIM == 0 && GS_EXPAND == 1
#if GS_PRIM == 0
[maxvertexcount(6)]
void gs_main(point VS_OUTPUT input[1], inout TriangleStream<VS_OUTPUT> stream)
@ -899,20 +902,7 @@ void gs_main(point VS_OUTPUT input[1], inout TriangleStream<VS_OUTPUT> stream)
stream.Append(Point);
}
#elif GS_PRIM == 1 && GS_EXPAND == 0
[maxvertexcount(2)]
void gs_main(line VS_OUTPUT input[2], inout LineStream<VS_OUTPUT> stream)
{
#if GS_IIP == 0
input[0].c = input[1].c;
#endif
stream.Append(input[0]);
stream.Append(input[1]);
}
#elif GS_PRIM == 1 && GS_EXPAND == 1
#elif GS_PRIM == 1
[maxvertexcount(6)]
void gs_main(line VS_OUTPUT input[2], inout TriangleStream<VS_OUTPUT> stream)
@ -960,21 +950,6 @@ void gs_main(line VS_OUTPUT input[2], inout TriangleStream<VS_OUTPUT> stream)
stream.RestartStrip();
}
#elif GS_PRIM == 2
[maxvertexcount(3)]
void gs_main(triangle VS_OUTPUT input[3], inout TriangleStream<VS_OUTPUT> stream)
{
#if GS_IIP == 0
input[0].c = input[2].c;
input[1].c = input[2].c;
#endif
stream.Append(input[0]);
stream.Append(input[1]);
stream.Append(input[2]);
}
#elif GS_PRIM == 3
[maxvertexcount(4)]

View File

@ -30,8 +30,12 @@ in SHADER
{
vec4 t_float;
vec4 t_int;
vec4 c;
flat vec4 fc;
#if PS_IIP != 0
vec4 c;
#else
flat vec4 c;
#endif
} PSin;
// Same buffer but 2 colors for dual source blending
@ -587,11 +591,7 @@ vec4 ps_color()
vec4 T = sample_color(st);
#endif
#if PS_IIP == 1
vec4 C = tfx(T, PSin.c);
#else
vec4 C = tfx(T, PSin.fc);
#endif
atst(C);

View File

@ -16,8 +16,11 @@ out SHADER
{
vec4 t_float;
vec4 t_int;
vec4 c;
flat vec4 fc;
#if VS_IIP != 0
vec4 c;
#else
flat vec4 c;
#endif
} VSout;
const float exp_min32 = exp2(-32.0f);
@ -63,7 +66,6 @@ void vs_main()
texture_coord();
VSout.c = i_c;
VSout.fc = i_c;
VSout.t_float.z = i_f.x; // pack for with texture
}
@ -78,8 +80,11 @@ in SHADER
{
vec4 t_float;
vec4 t_int;
vec4 c;
flat vec4 fc;
#if GS_IIP != 0
vec4 c;
#else
flat vec4 c;
#endif
} GSin[];
#if !defined(BROKEN_DRIVER) && defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts
@ -89,8 +94,11 @@ out SHADER
{
vec4 t_float;
vec4 t_int;
vec4 c;
flat vec4 fc;
#if GS_IIP != 0
vec4 c;
#else
flat vec4 c;
#endif
} GSout;
struct vertex
@ -104,12 +112,11 @@ void out_vertex(in vec4 position, in vertex v)
{
GSout.t_float = v.t_float;
GSout.t_int = v.t_int;
GSout.c = v.c;
// Flat output
#if GS_POINT == 1
GSout.fc = GSin[0].fc;
GSout.c = GSin[0].c;
#else
GSout.fc = GSin[1].fc;
GSout.c = GSin[1].c;
#endif
gl_Position = position;
gl_PrimitiveID = gl_PrimitiveIDIn;

View File

@ -225,6 +225,33 @@ void GSState::Reset()
m_scanmask_used = false;
}
template<bool auto_flush, bool index_swap>
void GSState::SetPrimHandlers()
{
#define SetHandlerXYZ(P, auto_flush, index_swap) \
m_fpGIFPackedRegHandlerXYZ[P][0] = &GSState::GIFPackedRegHandlerXYZF2<P, 0, auto_flush, index_swap>; \
m_fpGIFPackedRegHandlerXYZ[P][1] = &GSState::GIFPackedRegHandlerXYZF2<P, 1, auto_flush, index_swap>; \
m_fpGIFPackedRegHandlerXYZ[P][2] = &GSState::GIFPackedRegHandlerXYZ2<P, 0, auto_flush, index_swap>; \
m_fpGIFPackedRegHandlerXYZ[P][3] = &GSState::GIFPackedRegHandlerXYZ2<P, 1, auto_flush, index_swap>; \
m_fpGIFRegHandlerXYZ[P][0] = &GSState::GIFRegHandlerXYZF2<P, 0, auto_flush, index_swap>; \
m_fpGIFRegHandlerXYZ[P][1] = &GSState::GIFRegHandlerXYZF2<P, 1, auto_flush, index_swap>; \
m_fpGIFRegHandlerXYZ[P][2] = &GSState::GIFRegHandlerXYZ2<P, 0, auto_flush, index_swap>; \
m_fpGIFRegHandlerXYZ[P][3] = &GSState::GIFRegHandlerXYZ2<P, 1, auto_flush, index_swap>; \
m_fpGIFPackedRegHandlerSTQRGBAXYZF2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZF2<P, auto_flush, index_swap>; \
m_fpGIFPackedRegHandlerSTQRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZ2<P, auto_flush, index_swap>;
SetHandlerXYZ(GS_POINTLIST, true, false);
SetHandlerXYZ(GS_LINELIST, auto_flush, index_swap);
SetHandlerXYZ(GS_LINESTRIP, auto_flush, index_swap);
SetHandlerXYZ(GS_TRIANGLELIST, auto_flush, index_swap);
SetHandlerXYZ(GS_TRIANGLESTRIP, auto_flush, index_swap);
SetHandlerXYZ(GS_TRIANGLEFAN, auto_flush, index_swap);
SetHandlerXYZ(GS_SPRITE, auto_flush, false);
SetHandlerXYZ(GS_INVALID, auto_flush, false);
#undef SetHandlerXYZ
}
void GSState::ResetHandlers()
{
std::fill(std::begin(m_fpGIFPackedRegHandlers), std::end(m_fpGIFPackedRegHandlers), &GSState::GIFPackedRegHandlerNull);
@ -241,40 +268,14 @@ void GSState::ResetHandlers()
m_fpGIFPackedRegHandlers[GIF_REG_A_D] = &GSState::GIFPackedRegHandlerA_D;
m_fpGIFPackedRegHandlers[GIF_REG_NOP] = &GSState::GIFPackedRegHandlerNOP;
#define SetHandlerXYZ(P, auto_flush) \
m_fpGIFPackedRegHandlerXYZ[P][0] = &GSState::GIFPackedRegHandlerXYZF2<P, 0, auto_flush>; \
m_fpGIFPackedRegHandlerXYZ[P][1] = &GSState::GIFPackedRegHandlerXYZF2<P, 1, auto_flush>; \
m_fpGIFPackedRegHandlerXYZ[P][2] = &GSState::GIFPackedRegHandlerXYZ2<P, 0, auto_flush>; \
m_fpGIFPackedRegHandlerXYZ[P][3] = &GSState::GIFPackedRegHandlerXYZ2<P, 1, auto_flush>; \
m_fpGIFRegHandlerXYZ[P][0] = &GSState::GIFRegHandlerXYZF2<P, 0, auto_flush>; \
m_fpGIFRegHandlerXYZ[P][1] = &GSState::GIFRegHandlerXYZF2<P, 1, auto_flush>; \
m_fpGIFRegHandlerXYZ[P][2] = &GSState::GIFRegHandlerXYZ2<P, 0, auto_flush>; \
m_fpGIFRegHandlerXYZ[P][3] = &GSState::GIFRegHandlerXYZ2<P, 1, auto_flush>; \
m_fpGIFPackedRegHandlerSTQRGBAXYZF2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZF2<P, auto_flush>; \
m_fpGIFPackedRegHandlerSTQRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZ2<P, auto_flush>;
// swap first/last indices when the provoking vertex is the first (D3D/Vulkan)
const GSRendererType renderer = theApp.GetCurrentRendererType();
const bool is_hardware_renderer = (renderer == GSRendererType::DX1011_HW || renderer == GSRendererType::OGL_HW);
const bool index_swap = is_hardware_renderer && m_dev && !m_dev->Features().provoking_vertex_last;
if (m_userhacks_auto_flush)
{
SetHandlerXYZ(GS_POINTLIST, true);
SetHandlerXYZ(GS_LINELIST, true);
SetHandlerXYZ(GS_LINESTRIP, true);
SetHandlerXYZ(GS_TRIANGLELIST, true);
SetHandlerXYZ(GS_TRIANGLESTRIP, true);
SetHandlerXYZ(GS_TRIANGLEFAN, true);
SetHandlerXYZ(GS_SPRITE, true);
SetHandlerXYZ(GS_INVALID, true);
}
index_swap ? SetPrimHandlers<true, true>() : SetPrimHandlers<true, false>();
else
{
SetHandlerXYZ(GS_POINTLIST, false);
SetHandlerXYZ(GS_LINELIST, false);
SetHandlerXYZ(GS_LINESTRIP, false);
SetHandlerXYZ(GS_TRIANGLELIST, false);
SetHandlerXYZ(GS_TRIANGLESTRIP, false);
SetHandlerXYZ(GS_TRIANGLEFAN, false);
SetHandlerXYZ(GS_SPRITE, false);
SetHandlerXYZ(GS_INVALID, false);
}
index_swap ? SetPrimHandlers<false, true>() : SetPrimHandlers<false, false>();
std::fill(std::begin(m_fpGIFRegHandlers), std::end(m_fpGIFRegHandlers), &GSState::GIFRegHandlerNull);
@ -611,7 +612,7 @@ void GSState::GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r)
m_isPackedUV_HackFlag = true;
}
template <u32 prim, u32 adc, bool auto_flush>
template <u32 prim, u32 adc, bool auto_flush, bool index_swap>
void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
{
GSVector4i xy = GSVector4i::loadl(&r->U64[0]);
@ -622,10 +623,10 @@ void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
m_v.m[1] = xy.upl32(zf);
VertexKick<prim, auto_flush>(adc ? 1 : r->XYZF2.Skip());
VertexKick<prim, auto_flush, index_swap>(adc ? 1 : r->XYZF2.Skip());
}
template <u32 prim, u32 adc, bool auto_flush>
template <u32 prim, u32 adc, bool auto_flush, bool index_swap>
void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
{
const GSVector4i xy = GSVector4i::loadl(&r->U64[0]);
@ -634,7 +635,7 @@ void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV));
VertexKick<prim, auto_flush>(adc ? 1 : r->XYZ2.Skip());
VertexKick<prim, auto_flush, index_swap>(adc ? 1 : r->XYZ2.Skip());
}
void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r)
@ -651,7 +652,7 @@ void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r)
{
}
template <u32 prim, bool auto_flush>
template <u32 prim, bool auto_flush, bool index_swap>
void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size)
{
ASSERT(size > 0 && size % 3 == 0);
@ -675,7 +676,7 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u3
m_v.m[1] = xy.upl32(zf); // TODO: only store the last one
VertexKick<prim, auto_flush>(r[2].XYZF2.Skip());
VertexKick<prim, auto_flush, index_swap>(r[2].XYZF2.Skip());
r += 3;
}
@ -683,7 +684,7 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u3
m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
}
template <u32 prim, bool auto_flush>
template <u32 prim, bool auto_flush, bool index_swap>
void GSState::GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size)
{
ASSERT(size > 0 && size % 3 == 0);
@ -706,7 +707,7 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32
m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV)); // TODO: only store the last one
VertexKick<prim, auto_flush>(r[2].XYZ2.Skip());
VertexKick<prim, auto_flush, index_swap>(r[2].XYZ2.Skip());
r += 3;
}
@ -753,7 +754,7 @@ __forceinline void GSState::ApplyPRIM(u32 prim)
UpdateVertexKick();
ASSERT(m_index.tail == 0 || m_index.buff[m_index.tail - 1] + 1 == m_vertex.next);
ASSERT(m_index.tail == 0 || !g_gs_device->Features().provoking_vertex_last || m_index.buff[m_index.tail - 1] + 1 == m_vertex.next);
if (m_index.tail == 0)
m_vertex.next = 0;
@ -804,7 +805,7 @@ void GSState::GIFRegHandlerUV_Hack(const GIFReg* RESTRICT r)
m_isPackedUV_HackFlag = false;
}
template <u32 prim, u32 adc, bool auto_flush>
template <u32 prim, u32 adc, bool auto_flush, bool index_swap>
void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
{
GSVector4i xyzf = GSVector4i::loadl(&r->XYZF);
@ -813,15 +814,15 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
m_v.m[1] = xyz.upl64(uvf);
VertexKick<prim, auto_flush>(adc);
VertexKick<prim, auto_flush, index_swap>(adc);
}
template <u32 prim, u32 adc, bool auto_flush>
template <u32 prim, u32 adc, bool auto_flush, bool index_swap>
void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r)
{
m_v.m[1] = GSVector4i::load(&r->XYZ, &m_v.UV);
VertexKick<prim, auto_flush>(adc);
VertexKick<prim, auto_flush, index_swap>(adc);
}
template <int i>
@ -2316,7 +2317,7 @@ void GSState::GrowVertexBuffer()
m_index.buff = index;
}
template <u32 prim, bool auto_flush>
template <u32 prim, bool auto_flush, bool index_swap>
__forceinline void GSState::VertexKick(u32 skip)
{
ASSERT(m_vertex.tail < m_vertex.maxcount + 3);
@ -2486,8 +2487,8 @@ __forceinline void GSState::VertexKick(u32 skip)
m_index.tail += 1;
break;
case GS_LINELIST:
buff[0] = head + 0;
buff[1] = head + 1;
buff[0] = head + (index_swap ? 1 : 0);
buff[1] = head + (index_swap ? 0 : 1);
m_vertex.head = head + 2;
m_vertex.next = head + 2;
m_index.tail += 2;
@ -2500,16 +2501,16 @@ __forceinline void GSState::VertexKick(u32 skip)
head = next;
m_vertex.tail = next + 2;
}
buff[0] = head + 0;
buff[1] = head + 1;
buff[0] = head + (index_swap ? 1 : 0);
buff[1] = head + (index_swap ? 0 : 1);
m_vertex.head = head + 1;
m_vertex.next = head + 2;
m_index.tail += 2;
break;
case GS_TRIANGLELIST:
buff[0] = head + 0;
buff[0] = head + (index_swap ? 2 : 0);
buff[1] = head + 1;
buff[2] = head + 2;
buff[2] = head + (index_swap ? 0 : 2);
m_vertex.head = head + 3;
m_vertex.next = head + 3;
m_index.tail += 3;
@ -2523,18 +2524,18 @@ __forceinline void GSState::VertexKick(u32 skip)
head = next;
m_vertex.tail = next + 3;
}
buff[0] = head + 0;
buff[0] = head + (index_swap ? 2 : 0);
buff[1] = head + 1;
buff[2] = head + 2;
buff[2] = head + (index_swap ? 0 : 2);
m_vertex.head = head + 1;
m_vertex.next = head + 3;
m_index.tail += 3;
break;
case GS_TRIANGLEFAN:
// TODO: remove gaps, next == head && head < tail - 3 || next > head && next < tail - 2 (very rare)
buff[0] = head + 0;
buff[0] = index_swap ? (tail - 1) : (head + 0);
buff[1] = tail - 2;
buff[2] = tail - 1;
buff[2] = index_swap ? (head + 0) : (tail - 1);
m_vertex.next = tail;
m_index.tail += 3;
break;

View File

@ -56,8 +56,8 @@ class GSState : public GSAlignedClass<32>
void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r);
template<u32 prim, u32 adc, bool auto_flush> void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r);
template<u32 prim, u32 adc, bool auto_flush> void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r);
template<u32 prim, u32 adc, bool auto_flush, bool index_swap> void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r);
template<u32 prim, u32 adc, bool auto_flush, bool index_swap> void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r);
@ -73,8 +73,8 @@ class GSState : public GSAlignedClass<32>
GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZF2[8];
GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZ2[8];
template<u32 prim, bool auto_flush> void GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size);
template<u32 prim, bool auto_flush> void GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size);
template<u32 prim, bool auto_flush, bool index_swap> void GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size);
template<u32 prim, bool auto_flush, bool index_swap> void GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size);
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, u32 size);
template<int i> void ApplyTEX0(GIFRegTEX0& TEX0);
@ -86,8 +86,8 @@ class GSState : public GSAlignedClass<32>
void GIFRegHandlerST(const GIFReg* RESTRICT r);
void GIFRegHandlerUV(const GIFReg* RESTRICT r);
void GIFRegHandlerUV_Hack(const GIFReg* RESTRICT r);
template<u32 prim, u32 adc, bool auto_flush> void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r);
template<u32 prim, u32 adc, bool auto_flush> void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r);
template<u32 prim, u32 adc, bool auto_flush, bool index_swap> void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r);
template<u32 prim, u32 adc, bool auto_flush, bool index_swap> void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEX0(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerCLAMP(const GIFReg* RESTRICT r);
void GIFRegHandlerFOG(const GIFReg* RESTRICT r);
@ -120,6 +120,9 @@ class GSState : public GSAlignedClass<32>
void GIFRegHandlerTRXDIR(const GIFReg* RESTRICT r);
void GIFRegHandlerHWREG(const GIFReg* RESTRICT r);
template<bool auto_flush, bool index_swap>
void SetPrimHandlers();
int m_version;
int m_sssize;
@ -185,7 +188,7 @@ protected:
void GrowVertexBuffer();
template <u32 prim, bool auto_flush>
template <u32 prim, bool auto_flush, bool index_swap>
void VertexKick(u32 skip);
// following functions need m_vt to be initialized

View File

@ -164,7 +164,8 @@ struct alignas(16) GSHWDrawConfig
{
u8 fst : 1;
u8 tme : 1;
u8 _free : 6;
u8 iip : 1;
u8 _free : 1;
};
u8 key;
};
@ -494,6 +495,7 @@ public:
bool geometry_shader : 1; ///< Supports geometry shader
bool image_load_store : 1; ///< Supports atomic min and max on images (for use with prim tracking destination alpha algorithm)
bool texture_barrier : 1; ///< Supports sampling rt and hopefully texture barrier
bool provoking_vertex_last: 1; ///< Supports using the last vertex in a primitive as the value for flat shading.
FeatureSupport()
{
memset(this, 0, sizeof(*this));

View File

@ -67,6 +67,9 @@ bool GSRenderer::CreateDevice(GSDevice* dev, const WindowInfo& wi)
m_dev = dev;
m_dev->SetVSync(m_vsync);
// reset handlers to pick up index swap if needed
ResetHandlers();
return true;
}

View File

@ -39,6 +39,7 @@ GSDevice11::GSDevice11()
m_features.geometry_shader = true;
m_features.image_load_store = false;
m_features.texture_barrier = false;
m_features.provoking_vertex_last = false;
}
bool GSDevice11::SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode)
@ -1471,7 +1472,6 @@ static void preprocessSel(GSDevice11::PSSelector& sel)
ASSERT(sel.date == 0); // In-shader destination alpha not supported and shouldn't be sent
ASSERT(sel.write_rg == 0); // Not supported, shouldn't be sent
ASSERT(sel.tex_is_fb == 0); // Not supported, shouldn't be sent
sel.iip = 0; // Handled in GS, not PS in DX11
sel.automatic_lod = 0; // Not currently supported in DX11
sel.manual_lod = 0; // Not currently supported in DX11
}

View File

@ -88,6 +88,7 @@ void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer*
sm.AddMacro("VS_TME", sel.tme);
sm.AddMacro("VS_FST", sel.fst);
sm.AddMacro("VS_IIP", sel.iip);
D3D11_INPUT_ELEMENT_DESC layout[] =
{
@ -120,9 +121,8 @@ void GSDevice11::SetupGS(GSSelector sel)
{
wil::com_ptr_nothrow<ID3D11GeometryShader> gs;
const bool flat_shading_needs_gs = sel.topology == GSHWDrawConfig::GSTopology::Line || sel.topology == GSHWDrawConfig::GSTopology::Triangle;
// Geometry shader is disabled if sprite conversion is done on the cpu (sel.cpu_sprite).
if (sel.expand || (sel.iip == 0 && flat_shading_needs_gs))
if (sel.expand)
{
const auto i = std::as_const(m_gs).find(sel.key);
@ -165,6 +165,7 @@ void GSDevice11::SetupPS(PSSelector sel, const GSHWDrawConfig::PSConstantBuffer*
sm.AddMacro("PS_TCC", sel.tcc);
sm.AddMacro("PS_ATST", sel.atst);
sm.AddMacro("PS_FOG", sel.fog);
sm.AddMacro("PS_IIP", sel.iip);
sm.AddMacro("PS_CLR1", sel.clr1);
sm.AddMacro("PS_FBA", sel.fba);
sm.AddMacro("PS_FBMASK", sel.fbmask);

View File

@ -384,13 +384,20 @@ void GSRendererHW::Lines2Sprites()
if (m_vertex.next >= 2)
{
size_t count = m_vertex.next;
const size_t count = m_vertex.next;
int i = (int)count * 2 - 4;
GSVertex* s = &m_vertex.buff[count - 2];
GSVertex* q = &m_vertex.buff[count * 2 - 4];
u32* RESTRICT index = &m_index.buff[count * 3 - 6];
alignas(16) static constexpr std::array<int, 8> tri_normal_indices = {{0, 1, 2, 1, 2, 3}};
alignas(16) static constexpr std::array<int, 8> tri_swapped_indices = {{0, 1, 2, 1, 2, 3}};
const bool index_swap = !m_dev->Features().provoking_vertex_last;
const int* tri_indices = index_swap ? tri_swapped_indices.data() : tri_normal_indices.data();
const GSVector4i indices_low(GSVector4i::load<true>(tri_indices));
const GSVector4i indices_high(GSVector4i::loadl(tri_indices + 4));
for (; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
{
GSVertex v0 = s[0];
@ -434,12 +441,9 @@ void GSRendererHW::Lines2Sprites()
q[1] = v0;
q[2] = v1;
index[0] = i + 0;
index[1] = i + 1;
index[2] = i + 2;
index[3] = i + 1;
index[4] = i + 2;
index[5] = i + 3;
const GSVector4i i_splat(i);
GSVector4i::store<false>(index, i_splat + indices_low);
GSVector4i::storel(index + 4, i_splat + indices_high);
}
m_vertex.head = m_vertex.tail = m_vertex.next = count * 2;

View File

@ -1367,8 +1367,9 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// END of FIXME
// GS_SPRITE_CLASS are already flat (either by CPU or the GS)
m_conf.ps.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 1 : PRIM->IIP;
m_conf.ps.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 0 : PRIM->IIP;
m_conf.gs.iip = m_conf.ps.iip;
m_conf.vs.iip = m_conf.ps.iip;
if (DATE_GL45)
{

View File

@ -254,6 +254,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi)
m_features.geometry_shader = GLLoader::found_geometry_shader;
m_features.image_load_store = GLLoader::found_GL_ARB_shader_image_load_store && GLLoader::found_GL_ARB_clear_texture;
m_features.texture_barrier = true;
m_features.provoking_vertex_last = true;
{
auto shader = Host::ReadResourceFileToString("shaders/opengl/common_header.glsl");
@ -989,7 +990,8 @@ std::string GSDeviceOGL::GetVSSource(VSSelector sel)
Console.WriteLn("Compiling new vertex shader with selector 0x%" PRIX64, sel.key);
#endif
std::string macro = format("#define VS_INT_FST %d\n", sel.int_fst);
std::string macro = format("#define VS_INT_FST %d\n", sel.int_fst)
+ format("#define VS_IIP %d\n", sel.iip);
std::string src = GenGlslHeader("vs_main", GL_VERTEX_SHADER, macro);
src += m_shader_common_header;
@ -1004,7 +1006,8 @@ std::string GSDeviceOGL::GetGSSource(GSSelector sel)
#endif
std::string macro = format("#define GS_POINT %d\n", sel.point)
+ format("#define GS_LINE %d\n", sel.line);
+ format("#define GS_LINE %d\n", sel.line)
+ format("#define GS_IIP %d\n", sel.iip);
std::string src = GenGlslHeader("gs_main", GL_GEOMETRY_SHADER, macro);
src += m_shader_common_header;
@ -1780,6 +1783,7 @@ static GSDeviceOGL::VSSelector convertSel(const GSHWDrawConfig::VSSelector sel)
{
GSDeviceOGL::VSSelector out;
out.int_fst = !sel.fst;
out.iip = sel.iip;
return out;
}
@ -1876,6 +1880,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
psel.gs.key = 0;
if (config.gs.expand)
{
psel.gs.iip = config.gs.iip;
switch (config.gs.topology)
{
case GSHWDrawConfig::GSTopology::Point: psel.gs.point = 1; break;

View File

@ -132,7 +132,8 @@ public:
struct
{
u32 int_fst : 1;
u32 _free : 31;
u32 iip : 1;
u32 _free : 30;
};
u32 key;
@ -159,8 +160,9 @@ public:
u32 sprite : 1;
u32 point : 1;
u32 line : 1;
u32 iip : 1;
u32 _free : 29;
u32 _free : 28;
};
u32 key;