GS/HW: VS expand instead of GS for DX/GL/Vulkan

This commit is contained in:
Stenzek 2023-04-07 17:55:55 +10:00 committed by refractionpcsx2
parent 6877abb2ec
commit 7f7dd60587
51 changed files with 1151 additions and 1301 deletions

View File

@ -1,5 +1,3 @@
#ifdef SHADER_MODEL // make safe to include in resource file to enforce dependency
#define FMT_32 0
#define FMT_24 1
#define FMT_16 2
@ -113,6 +111,8 @@ struct PS_INPUT
#endif
};
#ifdef PIXEL_SHADER
struct PS_OUTPUT
{
#if !PS_NO_COLOR
@ -136,21 +136,6 @@ Texture2D<float4> RtTexture : register(t2);
Texture2D<float> PrimMinTexture : register(t3);
SamplerState TextureSampler : register(s0);
#ifdef DX12
cbuffer cb0 : register(b0)
#else
cbuffer cb0
#endif
{
float2 VertexScale;
float2 VertexOffset;
float2 TextureScale;
float2 TextureOffset;
float2 PointSize;
uint MaxDepth;
uint pad_cb0;
};
#ifdef DX12
cbuffer cb1 : register(b1)
#else
@ -1062,10 +1047,29 @@ PS_OUTPUT ps_main(PS_INPUT input)
return output;
}
#endif // PIXEL_SHADER
//////////////////////////////////////////////////////////////////////
// Vertex Shader
//////////////////////////////////////////////////////////////////////
#ifdef VERTEX_SHADER
#ifdef DX12
cbuffer cb0 : register(b0)
#else
cbuffer cb0
#endif
{
float2 VertexScale;
float2 VertexOffset;
float2 TextureScale;
float2 TextureOffset;
float2 PointSize;
uint MaxDepth;
uint BaseVertex; // Only used in DX11.
};
VS_OUTPUT vs_main(VS_INPUT input)
{
// Clamp to max depth, gs doesn't wrap
@ -1118,156 +1122,101 @@ VS_OUTPUT vs_main(VS_INPUT input)
return output;
}
//////////////////////////////////////////////////////////////////////
// Geometry Shader
//////////////////////////////////////////////////////////////////////
#if VS_EXPAND != 0
#if GS_FORWARD_PRIMID
#define PRIMID_IN , uint primid : SV_PrimitiveID
#define VS2PS(x) vs2ps_impl(x, primid)
PS_INPUT vs2ps_impl(VS_OUTPUT vs, uint primid)
struct VS_RAW_INPUT
{
PS_INPUT o;
o.p = vs.p;
o.t = vs.t;
o.ti = vs.ti;
o.c = vs.c;
o.primid = primid;
return o;
}
float2 ST;
uint RGBA;
float Q;
uint XY;
uint Z;
uint UV;
uint FOG;
};
StructuredBuffer<VS_RAW_INPUT> vertices : register(t0);
VS_INPUT load_vertex(uint index)
{
#ifdef DX12
VS_RAW_INPUT raw = vertices.Load(index);
#else
#define PRIMID_IN
#define VS2PS(x) vs2ps_impl(x)
PS_INPUT vs2ps_impl(VS_OUTPUT vs)
{
PS_INPUT o;
o.p = vs.p;
o.t = vs.t;
o.ti = vs.ti;
o.c = vs.c;
return o;
}
VS_RAW_INPUT raw = vertices.Load(BaseVertex + index);
#endif
#if GS_PRIM == 0
[maxvertexcount(6)]
void gs_main(point VS_OUTPUT input[1], inout TriangleStream<PS_INPUT> stream PRIMID_IN)
{
// Transform a point to a NxN sprite
PS_INPUT Point = VS2PS(input[0]);
// Get new position
float4 lt_p = input[0].p;
float4 rb_p = input[0].p + float4(PointSize.x, PointSize.y, 0.0f, 0.0f);
float4 lb_p = rb_p;
float4 rt_p = rb_p;
lb_p.x = lt_p.x;
rt_p.y = lt_p.y;
// Triangle 1
Point.p = lt_p;
stream.Append(Point);
Point.p = lb_p;
stream.Append(Point);
Point.p = rt_p;
stream.Append(Point);
// Triangle 2
Point.p = lb_p;
stream.Append(Point);
Point.p = rt_p;
stream.Append(Point);
Point.p = rb_p;
stream.Append(Point);
VS_INPUT vert;
vert.st = raw.ST;
vert.c = uint4(raw.RGBA & 0xFFu, (raw.RGBA >> 8) & 0xFFu, (raw.RGBA >> 16) & 0xFFu, raw.RGBA >> 24);
vert.q = raw.Q;
vert.p = uint2(raw.XY & 0xFFFFu, raw.XY >> 16);
vert.z = raw.Z;
vert.uv = uint2(raw.UV & 0xFFFFu, raw.UV >> 16);
vert.f = float4(float(raw.FOG & 0xFFu), float((raw.FOG >> 8) & 0xFFu), float((raw.FOG >> 16) & 0xFFu), float(raw.FOG >> 24)) / 255.0f;
return vert;
}
#elif GS_PRIM == 1
[maxvertexcount(6)]
void gs_main(line VS_OUTPUT input[2], inout TriangleStream<PS_INPUT> stream PRIMID_IN)
VS_OUTPUT vs_main_expand(uint vid : SV_VertexID)
{
// Transform a line to a thick line-sprite
PS_INPUT left = VS2PS(input[0]);
PS_INPUT right = VS2PS(input[1]);
float2 lt_p = input[0].p.xy;
float2 rt_p = input[1].p.xy;
#if VS_EXPAND == 1 // Point
// Potentially there is faster math
float2 line_vector = normalize(rt_p.xy - lt_p.xy);
VS_OUTPUT vtx = vs_main(load_vertex(vid >> 2));
vtx.p.x += ((vid & 1u) != 0u) ? PointSize.x : 0.0f;
vtx.p.y += ((vid & 2u) != 0u) ? PointSize.y : 0.0f;
return vtx;
#elif VS_EXPAND == 2 // Line
uint vid_base = vid >> 2;
bool is_bottom = vid & 2;
bool is_right = vid & 1;
// All lines will be a pair of vertices next to each other
// Since DirectX uses provoking vertex first, the bottom point will be the lower of the two
uint vid_other = is_bottom ? vid_base + 1 : vid_base - 1;
VS_OUTPUT vtx = vs_main(load_vertex(vid_base));
VS_OUTPUT other = vs_main(load_vertex(vid_other));
float2 line_vector = normalize(vtx.p.xy - other.p.xy);
float2 line_normal = float2(line_vector.y, -line_vector.x);
float2 line_width = (line_normal * PointSize) / 2;
// line_normal is inverted for bottom point
float2 offset = (is_bottom ^ is_right) ? line_width : -line_width;
vtx.p.xy += offset;
lt_p -= line_width;
rt_p -= line_width;
float2 lb_p = input[0].p.xy + line_width;
float2 rb_p = input[1].p.xy + line_width;
// Lines will be run as (0 1 2) (1 2 3)
// This means that both triangles will have a point based off the top line point as their first point
// So we don't have to do anything for !IIP
#if GS_IIP == 0
left.c = right.c;
#endif
return vtx;
// Triangle 1
left.p.xy = lt_p;
stream.Append(left);
#elif VS_EXPAND == 3 // Sprite
left.p.xy = lb_p;
stream.Append(left);
// Sprite points are always in pairs
uint vid_base = vid >> 1;
uint vid_lt = vid_base & ~1u;
uint vid_rb = vid_base | 1u;
right.p.xy = rt_p;
stream.Append(right);
stream.RestartStrip();
VS_OUTPUT lt = vs_main(load_vertex(vid_lt));
VS_OUTPUT rb = vs_main(load_vertex(vid_rb));
VS_OUTPUT vtx = rb;
// Triangle 2
left.p.xy = lb_p;
stream.Append(left);
bool is_right = ((vid & 1u) != 0u);
vtx.p.x = is_right ? lt.p.x : vtx.p.x;
vtx.t.x = is_right ? lt.t.x : vtx.t.x;
vtx.ti.xz = is_right ? lt.ti.xz : vtx.ti.xz;
right.p.xy = rt_p;
stream.Append(right);
bool is_bottom = ((vid & 2u) != 0u);
vtx.p.y = is_bottom ? lt.p.y : vtx.p.y;
vtx.t.y = is_bottom ? lt.t.y : vtx.t.y;
vtx.ti.yw = is_bottom ? lt.ti.yw : vtx.ti.yw;
right.p.xy = rb_p;
stream.Append(right);
stream.RestartStrip();
}
#elif GS_PRIM == 3
[maxvertexcount(4)]
void gs_main(line VS_OUTPUT input[2], inout TriangleStream<PS_INPUT> stream PRIMID_IN)
{
PS_INPUT lt = VS2PS(input[0]);
PS_INPUT rb = VS2PS(input[1]);
// flat depth
lt.p.z = rb.p.z;
// flat fog and texture perspective
lt.t.zw = rb.t.zw;
// flat color
lt.c = rb.c;
// Swap texture and position coordinate
PS_INPUT lb = rb;
lb.p.x = lt.p.x;
lb.t.x = lt.t.x;
lb.ti.x = lt.ti.x;
lb.ti.z = lt.ti.z;
PS_INPUT rt = rb;
rt.p.y = lt.p.y;
rt.t.y = lt.t.y;
rt.ti.y = lt.ti.y;
rt.ti.w = lt.ti.w;
stream.Append(lt);
stream.Append(lb);
stream.Append(rt);
stream.Append(rb);
}
return vtx;
#endif
#endif
}
#endif // VS_EXPAND
#endif // VERTEX_SHADER

View File

@ -14,13 +14,6 @@ layout(std140, binding = 1) uniform cb20
};
#ifdef VERTEX_SHADER
layout(location = 0) in vec2 i_st;
layout(location = 2) in vec4 i_c;
layout(location = 3) in float i_q;
layout(location = 4) in uvec2 i_p;
layout(location = 5) in uint i_z;
layout(location = 6) in uvec2 i_uv;
layout(location = 7) in vec4 i_f;
out SHADER
{
@ -35,6 +28,16 @@ out SHADER
const float exp_min32 = exp2(-32.0f);
#if VS_EXPAND == 0
layout(location = 0) in vec2 i_st;
layout(location = 2) in vec4 i_c;
layout(location = 3) in float i_q;
layout(location = 4) in uvec2 i_p;
layout(location = 5) in uint i_z;
layout(location = 6) in uvec2 i_uv;
layout(location = 7) in vec4 i_f;
void texture_coord()
{
vec2 uv = vec2(i_uv) - TextureOffset;
@ -91,163 +94,145 @@ void vs_main()
#endif
}
#endif
#else // VS_EXPAND
#ifdef GEOMETRY_SHADER
in SHADER
struct RawVertex
{
vec4 t_float;
vec4 t_int;
#if GS_IIP != 0
vec4 c;
#else
flat vec4 c;
#endif
} GSin[];
vec2 ST;
uint RGBA;
float Q;
uint XY;
uint Z;
uint UV;
uint FOG;
};
out SHADER
{
vec4 t_float;
vec4 t_int;
#if GS_IIP != 0
vec4 c;
#else
flat vec4 c;
#endif
} GSout;
layout(std140, binding = 2) readonly buffer VertexBuffer {
RawVertex vertex_buffer[];
};
struct vertex
struct ProcessedVertex
{
vec4 p;
vec4 t_float;
vec4 t_int;
vec4 c;
};
void out_vertex(in vec4 position, in vertex v)
ProcessedVertex load_vertex(uint index)
{
GSout.t_float = v.t_float;
GSout.t_int = v.t_int;
// Flat output
#if GS_PRIM == 0
GSout.c = GSin[0].c;
#if defined(GL_ARB_shader_draw_parameters) && GL_ARB_shader_draw_parameters
RawVertex rvtx = vertex_buffer[index + gl_BaseVertexARB];
#else
GSout.c = GSin[1].c;
RawVertex rvtx = vertex_buffer[index];
#endif
gl_Position = position;
gl_PrimitiveID = gl_PrimitiveIDIn;
EmitVertex();
vec2 i_st = rvtx.ST;
vec4 i_c = vec4(uvec4(rvtx.RGBA & 0xFFu, (rvtx.RGBA >> 8) & 0xFFu, (rvtx.RGBA >> 16) & 0xFFu, rvtx.RGBA >> 24));
float i_q = rvtx.Q;
uvec2 i_p = uvec2(rvtx.XY & 0xFFFFu, rvtx.XY >> 16);
uint i_z = rvtx.Z;
uvec2 i_uv = uvec2(rvtx.UV & 0xFFFFu, rvtx.UV >> 16);
vec4 i_f = unpackUnorm4x8(rvtx.FOG);
ProcessedVertex vtx;
uint z = min(i_z, MaxDepth);
vtx.p.xy = vec2(i_p) - vec2(0.05f, 0.05f);
vtx.p.xy = vtx.p.xy * VertexScale - VertexOffset;
vtx.p.w = 1.0f;
#if HAS_CLIP_CONTROL
vtx.p.z = float(z) * exp_min32;
#else
vtx.p.z = min(float(z) * exp2(-23.0f), 2.0f) - 1.0f;
#endif
vec2 uv = vec2(i_uv) - TextureOffset;
vec2 st = i_st - TextureOffset;
vtx.t_float.xy = st;
vtx.t_float.w = i_q;
vtx.t_int.xy = uv * TextureScale;
#if VS_FST
vtx.t_int.zw = uv;
#else
vtx.t_int.zw = st / TextureScale;
#endif
vtx.c = i_c;
vtx.t_float.z = i_f.x;
return vtx;
}
#if GS_PRIM == 0
layout(points) in;
void main()
{
ProcessedVertex vtx;
#if defined(GL_ARB_shader_draw_parameters) && GL_ARB_shader_draw_parameters
uint vid = uint(gl_VertexID - gl_BaseVertexARB);
#else
layout(lines) in;
uint vid = uint(gl_VertexID);
#endif
layout(triangle_strip, max_vertices = 4) out;
#if GS_PRIM == 0
#if VS_EXPAND == 1 // Point
void gs_main()
{
// Transform a point to a NxN sprite
vertex point = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c);
vtx = load_vertex(vid >> 2);
// Get new position
vec4 lt_p = gl_in[0].gl_Position;
vec4 rb_p = gl_in[0].gl_Position + vec4(PointSize.x, PointSize.y, 0.0f, 0.0f);
vec4 lb_p = rb_p;
vec4 rt_p = rb_p;
lb_p.x = lt_p.x;
rt_p.y = lt_p.y;
vtx.p.x += ((vid & 1u) != 0u) ? PointSize.x : 0.0f;
vtx.p.y += ((vid & 2u) != 0u) ? PointSize.y : 0.0f;
out_vertex(lt_p, point);
#elif VS_EXPAND == 2 // Line
out_vertex(lb_p, point);
uint vid_base = vid >> 2;
bool is_bottom = (vid & 2u) != 0u;
bool is_right = (vid & 1u) != 0u;
uint vid_other = is_bottom ? vid_base - 1 : vid_base + 1;
vtx = load_vertex(vid_base);
ProcessedVertex other = load_vertex(vid_other);
out_vertex(rt_p, point);
out_vertex(rb_p, point);
EndPrimitive();
}
#elif GS_PRIM == 1
void gs_main()
{
// Transform a line to a thick line-sprite
vertex left = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c);
vertex right = vertex(GSin[1].t_float, GSin[1].t_int, GSin[1].c);
vec4 lt_p = gl_in[0].gl_Position;
vec4 rt_p = gl_in[1].gl_Position;
// Potentially there is faster math
vec2 line_vector = normalize(rt_p.xy - lt_p.xy);
vec2 line_vector = normalize(vtx.p.xy - other.p.xy);
vec2 line_normal = vec2(line_vector.y, -line_vector.x);
vec2 line_width = (line_normal * PointSize) / 2.0f;
vec2 line_width = (line_normal * PointSize) / 2;
// line_normal is inverted for bottom point
vec2 offset = ((uint(is_bottom) ^ uint(is_right)) != 0u) ? line_width : -line_width;
vtx.p.xy += offset;
lt_p.xy -= line_width;
rt_p.xy -= line_width;
vec4 lb_p = gl_in[0].gl_Position + vec4(line_width, 0.0f, 0.0f);
vec4 rb_p = gl_in[1].gl_Position + vec4(line_width, 0.0f, 0.0f);
// Lines will be run as (0 1 2) (1 2 3)
// This means that both triangles will have a point based off the top line point as their first point
// So we don't have to do anything for !IIP
out_vertex(lt_p, left);
#elif VS_EXPAND == 3 // Sprite
out_vertex(lb_p, left);
// Sprite points are always in pairs
uint vid_base = vid >> 1;
uint vid_lt = vid_base & ~1u;
uint vid_rb = vid_base | 1u;
out_vertex(rt_p, right);
ProcessedVertex lt = load_vertex(vid_lt);
ProcessedVertex rb = load_vertex(vid_rb);
vtx = rb;
out_vertex(rb_p, right);
bool is_right = ((vid & 1u) != 0u);
vtx.p.x = is_right ? lt.p.x : vtx.p.x;
vtx.t_float.x = is_right ? lt.t_float.x : vtx.t_float.x;
vtx.t_int.xz = is_right ? lt.t_int.xz : vtx.t_int.xz;
EndPrimitive();
}
#else // GS_PRIM == 3
void gs_main()
{
// left top => GSin[0];
// right bottom => GSin[1];
vertex rb = vertex(GSin[1].t_float, GSin[1].t_int, GSin[1].c);
vertex lt = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c);
vec4 rb_p = gl_in[1].gl_Position;
vec4 lb_p = rb_p;
vec4 rt_p = rb_p;
vec4 lt_p = gl_in[0].gl_Position;
// flat depth
lt_p.z = rb_p.z;
// flat fog and texture perspective
lt.t_float.zw = rb.t_float.zw;
// flat color
lt.c = rb.c;
// Swap texture and position coordinate
vertex lb = rb;
lb.t_float.x = lt.t_float.x;
lb.t_int.x = lt.t_int.x;
lb.t_int.z = lt.t_int.z;
lb_p.x = lt_p.x;
vertex rt = rb;
rt_p.y = lt_p.y;
rt.t_float.y = lt.t_float.y;
rt.t_int.y = lt.t_int.y;
rt.t_int.w = lt.t_int.w;
out_vertex(lt_p, lt);
out_vertex(lb_p, lb);
out_vertex(rt_p, rt);
out_vertex(rb_p, rb);
EndPrimitive();
}
bool is_bottom = ((vid & 2u) != 0u);
vtx.p.y = is_bottom ? lt.p.y : vtx.p.y;
vtx.t_float.y = is_bottom ? lt.t_float.y : vtx.t_float.y;
vtx.t_int.yw = is_bottom ? lt.t_int.yw : vtx.t_int.yw;
#endif
#endif
gl_Position = vtx.p;
VSout.t_float = vtx.t_float;
VSout.t_int = vtx.t_int;
VSout.c = vtx.c;
}
#endif // VS_EXPAND
#endif // VERTEX_SHADER

View File

@ -2,7 +2,7 @@
// Vertex Shader
//////////////////////////////////////////////////////////////////////
#if defined(VERTEX_SHADER) || defined(GEOMETRY_SHADER)
#if defined(VERTEX_SHADER)
layout(std140, set = 0, binding = 0) uniform cb0
{
@ -15,18 +15,6 @@ layout(std140, set = 0, binding = 0) uniform cb0
uint pad_cb0;
};
#endif
#ifdef VERTEX_SHADER
layout(location = 0) in vec2 a_st;
layout(location = 1) in uvec4 a_c;
layout(location = 2) in float a_q;
layout(location = 3) in uvec2 a_p;
layout(location = 4) in uint a_z;
layout(location = 5) in uvec2 a_uv;
layout(location = 6) in vec4 a_f;
layout(location = 0) out VSOutput
{
vec4 t;
@ -39,17 +27,27 @@ layout(location = 0) out VSOutput
#endif
} vsOut;
#if VS_EXPAND == 0
layout(location = 0) in vec2 a_st;
layout(location = 1) in uvec4 a_c;
layout(location = 2) in float a_q;
layout(location = 3) in uvec2 a_p;
layout(location = 4) in uint a_z;
layout(location = 5) in uvec2 a_uv;
layout(location = 6) in vec4 a_f;
void main()
{
// Clamp to max depth, gs doesn't wrap
float z = min(a_z, MaxDepth);
uint z = min(a_z, MaxDepth);
// pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)
// example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty
// input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel
// example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133
gl_Position = vec4(a_p, z, 1.0f) - vec4(0.05f, 0.05f, 0, 0);
gl_Position = vec4(a_p, float(z), 1.0f) - vec4(0.05f, 0.05f, 0, 0);
gl_Position.xy = gl_Position.xy * vec2(VertexScale.x, -VertexScale.y) - vec2(VertexOffset.x, -VertexOffset.y);
gl_Position.z *= exp2(-32.0f); // integer->float depth
gl_Position.y = -gl_Position.y;
@ -81,214 +79,149 @@ void main()
gl_PointSize = PointSize.x;
#endif
vsOut.c = a_c;
vsOut.c = vec4(a_c);
vsOut.t.z = a_f.r;
}
#endif
#else // VS_EXPAND
#ifdef GEOMETRY_SHADER
layout(location = 0) in VSOutput
struct RawVertex
{
vec2 ST;
uint RGBA;
float Q;
uint XY;
uint Z;
uint UV;
uint FOG;
};
layout(std140, set = 0, binding = 2) readonly buffer VertexBuffer {
RawVertex vertex_buffer[];
};
struct ProcessedVertex
{
vec4 p;
vec4 t;
vec4 ti;
#if GS_IIP != 0
vec4 c;
vec4 c;
};
ProcessedVertex load_vertex(uint index)
{
RawVertex rvtx = vertex_buffer[gl_BaseVertexARB + index];
vec2 a_st = rvtx.ST;
uvec4 a_c = uvec4(rvtx.RGBA & 0xFFu, (rvtx.RGBA >> 8) & 0xFFu, (rvtx.RGBA >> 16) & 0xFFu, rvtx.RGBA >> 24);
float a_q = rvtx.Q;
uvec2 a_p = uvec2(rvtx.XY & 0xFFFFu, rvtx.XY >> 16);
uint a_z = rvtx.Z;
uvec2 a_uv = uvec2(rvtx.UV & 0xFFFFu, rvtx.UV >> 16);
vec4 a_f = unpackUnorm4x8(rvtx.FOG);
ProcessedVertex vtx;
uint z = min(a_z, MaxDepth);
vtx.p = vec4(a_p, float(z), 1.0f) - vec4(0.05f, 0.05f, 0, 0);
vtx.p.xy = vtx.p.xy * vec2(VertexScale.x, -VertexScale.y) - vec2(VertexOffset.x, -VertexOffset.y);
vtx.p.z *= exp2(-32.0f); // integer->float depth
vtx.p.y = -vtx.p.y;
#if VS_TME
vec2 uv = a_uv - TextureOffset;
vec2 st = a_st - TextureOffset;
vtx.ti.xy = uv * TextureScale;
#if VS_FST
vtx.ti.zw = uv;
#else
vtx.ti.zw = st / TextureScale;
#endif
vtx.t.xy = st;
vtx.t.w = a_q;
#else
flat vec4 c;
vtx.t = vec4(0.0f, 0.0f, 0.0f, 1.0f);
vtx.ti = vec4(0.0f);
#endif
} gsIn[];
layout(location = 0) out GSOutput
{
vec4 t;
vec4 ti;
#if GS_IIP != 0
vec4 c;
#else
flat vec4 c;
#endif
} gsOut;
vtx.c = a_c;
vtx.t.z = a_f.r;
void WriteVertex(vec4 pos, vec4 t, vec4 ti, vec4 c)
{
#if GS_FORWARD_PRIMID
gl_PrimitiveID = gl_PrimitiveIDIn;
#endif
gl_Position = pos;
gsOut.t = t;
gsOut.ti = ti;
gsOut.c = c;
EmitVertex();
return vtx;
}
//////////////////////////////////////////////////////////////////////
// Geometry Shader
//////////////////////////////////////////////////////////////////////
#if GS_PRIM == 0 && GS_POINT == 0
layout(points) in;
layout(points, max_vertices = 1) out;
void main()
{
WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[0].c);
EndPrimitive();
}
#elif GS_PRIM == 0 && GS_POINT == 1
layout(points) in;
layout(triangle_strip, max_vertices = 4) out;
void main()
{
// Transform a point to a NxN sprite
ProcessedVertex vtx;
uint vid = uint(gl_VertexIndex - gl_BaseVertexARB);
// Get new position
vec4 lt_p = gl_in[0].gl_Position;
vec4 rb_p = gl_in[0].gl_Position + vec4(PointSize.x, PointSize.y, 0.0f, 0.0f);
vec4 lb_p = rb_p;
vec4 rt_p = rb_p;
lb_p.x = lt_p.x;
rt_p.y = lt_p.y;
#if VS_EXPAND == 1 // Point
WriteVertex(lt_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c);
WriteVertex(lb_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c);
WriteVertex(rt_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c);
WriteVertex(rb_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c);
vtx = load_vertex(vid >> 2);
EndPrimitive();
}
vtx.p.x += ((vid & 1u) != 0u) ? PointSize.x : 0.0f;
vtx.p.y += ((vid & 2u) != 0u) ? PointSize.y : 0.0f;
#elif GS_PRIM == 1 && GS_LINE == 0
#elif VS_EXPAND == 2 // Line
layout(lines) in;
layout(line_strip, max_vertices = 2) out;
uint vid_base = vid >> 2;
void main()
{
#if GS_IIP == 0
WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[1].c);
WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[1].c);
bool is_bottom = (vid & 2u) != 0u;
bool is_right = (vid & 1u) != 0u;
#ifdef VS_PROVOKING_VERTEX_LAST
uint vid_other = is_bottom ? vid_base - 1 : vid_base + 1;
#else
WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[0].c);
WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[1].c);
uint vid_other = is_bottom ? vid_base + 1 : vid_base - 1;
#endif
EndPrimitive();
}
#elif GS_PRIM == 1 && GS_LINE == 1
vtx = load_vertex(vid_base);
ProcessedVertex other = load_vertex(vid_other);
layout(lines) in;
layout(triangle_strip, max_vertices = 4) out;
void main()
{
// Transform a line to a thick line-sprite
vec4 left_t = gsIn[0].t;
vec4 left_ti = gsIn[0].ti;
vec4 left_c = gsIn[0].c;
vec4 right_t = gsIn[1].t;
vec4 right_ti = gsIn[1].ti;
vec4 right_c = gsIn[1].c;
vec4 lt_p = gl_in[0].gl_Position;
vec4 rt_p = gl_in[1].gl_Position;
// Potentially there is faster math
vec2 line_vector = normalize(rt_p.xy - lt_p.xy);
vec2 line_vector = normalize(vtx.p.xy - other.p.xy);
vec2 line_normal = vec2(line_vector.y, -line_vector.x);
vec2 line_width = (line_normal * PointSize) / 2.0;
vec2 line_width = (line_normal * PointSize) / 2;
// line_normal is inverted for bottom point
vec2 offset = ((uint(is_bottom) ^ uint(is_right)) != 0u) ? line_width : -line_width;
vtx.p.xy += offset;
lt_p.xy -= line_width;
rt_p.xy -= line_width;
vec4 lb_p = gl_in[0].gl_Position + vec4(line_width, 0.0, 0.0);
vec4 rb_p = gl_in[1].gl_Position + vec4(line_width, 0.0, 0.0);
// Lines will be run as (0 1 2) (1 2 3)
// This means that both triangles will have a point based off the top line point as their first point
// So we don't have to do anything for !IIP
#if GS_IIP == 0
left_c = right_c;
#endif
#elif VS_EXPAND == 3 // Sprite
WriteVertex(lt_p, left_t, left_ti, left_c);
WriteVertex(lb_p, left_t, left_ti, left_c);
WriteVertex(rt_p, right_t, right_ti, right_c);
WriteVertex(rb_p, right_t, right_ti, right_c);
EndPrimitive();
}
// Sprite points are always in pairs
uint vid_base = vid >> 1;
uint vid_lt = vid_base & ~1u;
uint vid_rb = vid_base | 1u;
#elif GS_PRIM == 2
ProcessedVertex lt = load_vertex(vid_lt);
ProcessedVertex rb = load_vertex(vid_rb);
vtx = rb;
layout(triangles) in;
layout(triangle_strip, max_vertices = 3) out;
bool is_right = ((vid & 1u) != 0u);
vtx.p.x = is_right ? lt.p.x : vtx.p.x;
vtx.t.x = is_right ? lt.t.x : vtx.t.x;
vtx.ti.xz = is_right ? lt.ti.xz : vtx.ti.xz;
void main()
{
#if GS_IIP == 0
WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[2].c);
WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[2].c);
WriteVertex(gl_in[2].gl_Position, gsIn[2].t, gsIn[2].ti, gsIn[2].c);
#else
WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[0].c);
WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[0].c);
WriteVertex(gl_in[2].gl_Position, gsIn[2].t, gsIn[2].ti, gsIn[0].c);
#endif
EndPrimitive();
}
#elif GS_PRIM == 3
layout(lines) in;
layout(triangle_strip, max_vertices = 4) out;
void main()
{
vec4 lt_p = gl_in[0].gl_Position;
vec4 lt_t = gsIn[0].t;
vec4 lt_ti = gsIn[0].ti;
vec4 lt_c = gsIn[0].c;
vec4 rb_p = gl_in[1].gl_Position;
vec4 rb_t = gsIn[1].t;
vec4 rb_ti = gsIn[1].ti;
vec4 rb_c = gsIn[1].c;
// flat depth
lt_p.z = rb_p.z;
// flat fog and texture perspective
lt_t.zw = rb_t.zw;
// flat color
lt_c = rb_c;
// Swap texture and position coordinate
vec4 lb_p = rb_p;
vec4 lb_t = rb_t;
vec4 lb_ti = rb_ti;
vec4 lb_c = rb_c;
lb_p.x = lt_p.x;
lb_t.x = lt_t.x;
lb_ti.x = lt_ti.x;
lb_ti.z = lt_ti.z;
vec4 rt_p = rb_p;
vec4 rt_t = rb_t;
vec4 rt_ti = rb_ti;
vec4 rt_c = rb_c;
rt_p.y = lt_p.y;
rt_t.y = lt_t.y;
rt_ti.y = lt_ti.y;
rt_ti.w = lt_ti.w;
WriteVertex(lt_p, lt_t, lt_ti, lt_c);
WriteVertex(lb_p, lb_t, lb_ti, lb_c);
WriteVertex(rt_p, rt_t, rt_ti, rt_c);
WriteVertex(rb_p, rb_t, rb_ti, rb_c);
EndPrimitive();
}
bool is_bottom = ((vid & 2u) != 0u);
vtx.p.y = is_bottom ? lt.p.y : vtx.p.y;
vtx.t.y = is_bottom ? lt.t.y : vtx.t.y;
vtx.ti.yw = is_bottom ? lt.ti.yw : vtx.ti.yw;
#endif
#endif
gl_Position = vtx.p;
vsOut.t = vtx.t;
vsOut.ti = vtx.ti;
vsOut.c = vtx.c;
}
#endif // VS_EXPAND
#endif // VERTEX_SHADER
#ifdef FRAGMENT_SHADER

View File

@ -337,16 +337,6 @@ bool D3D11::ShaderCache::GetVertexShaderAndInputLayout(ID3D11Device* device,
return true;
}
wil::com_ptr_nothrow<ID3D11GeometryShader> D3D11::ShaderCache::GetGeometryShader(ID3D11Device* device,
const std::string_view& shader_code, const D3D_SHADER_MACRO* macros /* = nullptr */, const char* entry_point /* = "main" */)
{
wil::com_ptr_nothrow<ID3DBlob> blob = GetShaderBlob(ShaderCompiler::Type::Geometry, shader_code, macros, entry_point);
if (!blob)
return {};
return D3D11::ShaderCompiler::CreateGeometryShader(device, blob.get());
}
wil::com_ptr_nothrow<ID3D11PixelShader> D3D11::ShaderCache::GetPixelShader(ID3D11Device* device,
const std::string_view& shader_code, const D3D_SHADER_MACRO* macros /* = nullptr */, const char* entry_point /* = "main" */)
{

View File

@ -51,9 +51,6 @@ namespace D3D11
const D3D11_INPUT_ELEMENT_DESC* layout, size_t layout_size,
const std::string_view& shader_code, const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main");
wil::com_ptr_nothrow<ID3D11GeometryShader> GetGeometryShader(ID3D11Device* device, const std::string_view& shader_code,
const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main");
wil::com_ptr_nothrow<ID3D11PixelShader> GetPixelShader(ID3D11Device* device, const std::string_view& shader_code,
const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main");

View File

@ -31,21 +31,21 @@ wil::com_ptr_nothrow<ID3DBlob> D3D11::ShaderCompiler::CompileShader(Type type, D
{
case D3D_FEATURE_LEVEL_10_0:
{
static constexpr std::array<const char*, 4> targets = {{"vs_4_0", "gs_4_0", "ps_4_0", "cs_4_0"}};
static constexpr std::array<const char*, 4> targets = {{"vs_4_0", "ps_4_0", "cs_4_0"}};
target = targets[static_cast<int>(type)];
}
break;
case D3D_FEATURE_LEVEL_10_1:
{
static constexpr std::array<const char*, 4> targets = {{"vs_4_1", "gs_4_1", "ps_4_1", "cs_4_1"}};
static constexpr std::array<const char*, 4> targets = {{"vs_4_1", "ps_4_1", "cs_4_1"}};
target = targets[static_cast<int>(type)];
}
break;
case D3D_FEATURE_LEVEL_11_0:
{
static constexpr std::array<const char*, 4> targets = {{"vs_5_0", "gs_5_0", "ps_5_0", "cs_5_0"}};
static constexpr std::array<const char*, 4> targets = {{"vs_5_0", "ps_5_0", "cs_5_0"}};
target = targets[static_cast<int>(type)];
}
break;
@ -53,7 +53,7 @@ wil::com_ptr_nothrow<ID3DBlob> D3D11::ShaderCompiler::CompileShader(Type type, D
case D3D_FEATURE_LEVEL_11_1:
default:
{
static constexpr std::array<const char*, 4> targets = {{"vs_5_1", "gs_5_1", "ps_5_1", "cs_5_1"}};
static constexpr std::array<const char*, 4> targets = {{"vs_5_1", "ps_5_1", "cs_5_1"}};
target = targets[static_cast<int>(type)];
}
break;
@ -108,16 +108,6 @@ wil::com_ptr_nothrow<ID3D11VertexShader> D3D11::ShaderCompiler::CompileAndCreate
return CreateVertexShader(device, blob.get());
}
wil::com_ptr_nothrow<ID3D11GeometryShader> D3D11::ShaderCompiler::CompileAndCreateGeometryShader(ID3D11Device* device, bool debug,
const std::string_view& code, const D3D_SHADER_MACRO* macros /* = nullptr */, const char* entry_point /* = "main" */)
{
wil::com_ptr_nothrow<ID3DBlob> blob = CompileShader(Type::Geometry, device->GetFeatureLevel(), debug, code, macros, entry_point);
if (!blob)
return {};
return CreateGeometryShader(device, blob.get());
}
wil::com_ptr_nothrow<ID3D11PixelShader> D3D11::ShaderCompiler::CompileAndCreatePixelShader(ID3D11Device* device, bool debug,
const std::string_view& code, const D3D_SHADER_MACRO* macros /* = nullptr */, const char* entry_point /* = "main" */)
{
@ -157,25 +147,6 @@ wil::com_ptr_nothrow<ID3D11VertexShader> D3D11::ShaderCompiler::CreateVertexShad
const_cast<ID3DBlob*>(blob)->GetBufferSize());
}
wil::com_ptr_nothrow<ID3D11GeometryShader> D3D11::ShaderCompiler::CreateGeometryShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length)
{
wil::com_ptr_nothrow<ID3D11GeometryShader> shader;
const HRESULT hr = device->CreateGeometryShader(bytecode, bytecode_length, nullptr, shader.put());
if (FAILED(hr))
{
Console.Error("Failed to create geometry shader: 0x%08X", hr);
return {};
}
return shader;
}
wil::com_ptr_nothrow<ID3D11GeometryShader> D3D11::ShaderCompiler::CreateGeometryShader(ID3D11Device* device, const ID3DBlob* blob)
{
return CreateGeometryShader(device, const_cast<ID3DBlob*>(blob)->GetBufferPointer(),
const_cast<ID3DBlob*>(blob)->GetBufferSize());
}
wil::com_ptr_nothrow<ID3D11PixelShader> D3D11::ShaderCompiler::CreatePixelShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length)
{
wil::com_ptr_nothrow<ID3D11PixelShader> shader;

View File

@ -27,7 +27,6 @@ namespace D3D11::ShaderCompiler
enum class Type
{
Vertex,
Geometry,
Pixel,
Compute
};
@ -37,8 +36,6 @@ namespace D3D11::ShaderCompiler
wil::com_ptr_nothrow<ID3D11VertexShader> CompileAndCreateVertexShader(ID3D11Device* device, bool debug, const std::string_view& code,
const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main");
wil::com_ptr_nothrow<ID3D11GeometryShader> CompileAndCreateGeometryShader(ID3D11Device* device, bool debug, const std::string_view& code,
const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main");
wil::com_ptr_nothrow<ID3D11PixelShader> CompileAndCreatePixelShader(ID3D11Device* device, bool debug, const std::string_view& code,
const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main");
wil::com_ptr_nothrow<ID3D11ComputeShader> CompileAndCreateComputeShader(ID3D11Device* device, bool debug, const std::string_view& code,
@ -46,8 +43,6 @@ namespace D3D11::ShaderCompiler
wil::com_ptr_nothrow<ID3D11VertexShader> CreateVertexShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length);
wil::com_ptr_nothrow<ID3D11VertexShader> CreateVertexShader(ID3D11Device* device, const ID3DBlob* blob);
wil::com_ptr_nothrow<ID3D11GeometryShader> CreateGeometryShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length);
wil::com_ptr_nothrow<ID3D11GeometryShader> CreateGeometryShader(ID3D11Device* device, const ID3DBlob* blob);
wil::com_ptr_nothrow<ID3D11PixelShader> CreatePixelShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length);
wil::com_ptr_nothrow<ID3D11PixelShader> CreatePixelShader(ID3D11Device* device, const ID3DBlob* blob);
wil::com_ptr_nothrow<ID3D11ComputeShader> CreateComputeShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length);

View File

@ -665,3 +665,57 @@ void Context::SetEnableGPUTiming(bool enabled)
{
m_gpu_timing_enabled = enabled;
}
bool Context::AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_buffer,
D3D12MA::Allocation** gpu_allocation, const std::function<void(void*)>& fill_callback)
{
// Try to place the fixed index buffer in GPU local memory.
// Use the staging buffer to copy into it.
const D3D12_RESOURCE_DESC rd = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1,
DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
D3D12_RESOURCE_FLAG_NONE};
const D3D12MA::ALLOCATION_DESC cpu_ad = {
D3D12MA::ALLOCATION_FLAG_NONE,
D3D12_HEAP_TYPE_UPLOAD};
ComPtr<ID3D12Resource> cpu_buffer;
ComPtr<D3D12MA::Allocation> cpu_allocation;
HRESULT hr = m_allocator->CreateResource(&cpu_ad, &rd, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
cpu_allocation.put(), IID_PPV_ARGS(cpu_buffer.put()));
pxAssertMsg(SUCCEEDED(hr), "Allocate CPU buffer");
if (FAILED(hr))
return false;
static constexpr const D3D12_RANGE read_range = {};
const D3D12_RANGE write_range = {0, size};
void* mapped;
hr = cpu_buffer->Map(0, &read_range, &mapped);
pxAssertMsg(SUCCEEDED(hr), "Map CPU buffer");
if (FAILED(hr))
return false;
fill_callback(mapped);
cpu_buffer->Unmap(0, &write_range);
const D3D12MA::ALLOCATION_DESC gpu_ad = {
D3D12MA::ALLOCATION_FLAG_COMMITTED,
D3D12_HEAP_TYPE_DEFAULT};
hr = m_allocator->CreateResource(&gpu_ad, &rd, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
gpu_allocation, IID_PPV_ARGS(gpu_buffer));
pxAssertMsg(SUCCEEDED(hr), "Allocate GPU buffer");
if (FAILED(hr))
return false;
GetInitCommandList()->CopyBufferRegion(*gpu_buffer, 0, cpu_buffer.get(), 0, size);
D3D12_RESOURCE_BARRIER rb = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE};
rb.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rb.Transition.pResource = *gpu_buffer;
rb.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
rb.Transition.StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER;
GetInitCommandList()->ResourceBarrier(1, &rb);
DeferResourceDestruction(cpu_allocation.get(), cpu_buffer.get());
return true;
}

View File

@ -152,6 +152,10 @@ namespace D3D12
float GetAndResetAccumulatedGPUTime();
void SetEnableGPUTiming(bool enabled);
// Allocates a temporary CPU staging buffer, fires the callback with it to populate, then copies to a GPU buffer.
bool AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_buffer, D3D12MA::Allocation** gpu_allocation,
const std::function<void(void*)>& fill_callback);
private:
struct CommandListResources
{

View File

@ -521,9 +521,6 @@ ShaderCache::ComPtr<ID3DBlob> ShaderCache::CompileAndAddShaderBlob(const CacheIn
case EntryType::VertexShader:
blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Vertex, m_feature_level, m_debug, shader_code, macros, entry_point);
break;
case EntryType::GeometryShader:
blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Geometry, m_feature_level, m_debug, shader_code, macros, entry_point);
break;
case EntryType::PixelShader:
blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Pixel, m_feature_level, m_debug, shader_code, macros, entry_point);
break;

View File

@ -37,7 +37,6 @@ namespace D3D12
enum class EntryType
{
VertexShader,
GeometryShader,
PixelShader,
ComputeShader,
GraphicsPipeline,
@ -59,11 +58,6 @@ namespace D3D12
{
return GetShaderBlob(EntryType::VertexShader, shader_code, macros, entry_point);
}
__fi ComPtr<ID3DBlob> GetGeometryShader(std::string_view shader_code,
const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main")
{
return GetShaderBlob(EntryType::GeometryShader, shader_code, macros, entry_point);
}
__fi ComPtr<ID3DBlob> GetPixelShader(std::string_view shader_code,
const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main")
{

View File

@ -35,8 +35,6 @@ namespace GL
prog.m_program_id = 0;
m_vertex_shader_id = prog.m_vertex_shader_id;
prog.m_vertex_shader_id = 0;
m_geometry_shader_id = prog.m_geometry_shader_id;
prog.m_geometry_shader_id = 0;
m_fragment_shader_id = prog.m_fragment_shader_id;
prog.m_fragment_shader_id = 0;
m_uniform_locations = std::move(prog.m_uniform_locations);
@ -102,8 +100,7 @@ namespace GL
s_last_program_id = 0;
}
bool Program::Compile(const std::string_view vertex_shader, const std::string_view geometry_shader,
const std::string_view fragment_shader)
bool Program::Compile(const std::string_view vertex_shader, const std::string_view fragment_shader)
{
if (!vertex_shader.empty())
{
@ -112,13 +109,6 @@ namespace GL
return false;
}
if (!geometry_shader.empty())
{
m_geometry_shader_id = CompileShader(GL_GEOMETRY_SHADER, geometry_shader);
if (m_geometry_shader_id == 0)
return false;
}
if (!fragment_shader.empty())
{
m_fragment_shader_id = CompileShader(GL_FRAGMENT_SHADER, fragment_shader);
@ -129,8 +119,6 @@ namespace GL
m_program_id = glCreateProgram();
if (m_vertex_shader_id != 0)
glAttachShader(m_program_id, m_vertex_shader_id);
if (m_geometry_shader_id != 0)
glAttachShader(m_program_id, m_geometry_shader_id);
if (m_fragment_shader_id != 0)
glAttachShader(m_program_id, m_fragment_shader_id);
return true;
@ -240,9 +228,6 @@ namespace GL
if (m_vertex_shader_id != 0)
glDeleteShader(m_vertex_shader_id);
m_vertex_shader_id = 0;
if (m_geometry_shader_id != 0)
glDeleteShader(m_geometry_shader_id);
m_geometry_shader_id = 0;
if (m_fragment_shader_id != 0)
glDeleteShader(m_fragment_shader_id);
m_fragment_shader_id = 0;
@ -541,8 +526,6 @@ namespace GL
prog.m_program_id = 0;
m_vertex_shader_id = prog.m_vertex_shader_id;
prog.m_vertex_shader_id = 0;
m_geometry_shader_id = prog.m_geometry_shader_id;
prog.m_geometry_shader_id = 0;
m_fragment_shader_id = prog.m_fragment_shader_id;
prog.m_fragment_shader_id = 0;
m_uniform_locations = std::move(prog.m_uniform_locations);

View File

@ -34,8 +34,7 @@ namespace GL
bool IsValid() const { return m_program_id != 0; }
bool Compile(const std::string_view vertex_shader, const std::string_view geometry_shader,
const std::string_view fragment_shader);
bool Compile(const std::string_view vertex_shader, const std::string_view fragment_shader);
bool CompileCompute(const std::string_view glsl);
@ -99,7 +98,6 @@ namespace GL
GLuint m_program_id = 0;
GLuint m_vertex_shader_id = 0;
GLuint m_geometry_shader_id = 0;
GLuint m_fragment_shader_id = 0;
std::vector<GLint> m_uniform_locations;

View File

@ -28,9 +28,6 @@ namespace GL
u64 vertex_source_hash_low;
u64 vertex_source_hash_high;
u32 vertex_source_length;
u64 geometry_source_hash_low;
u64 geometry_source_hash_high;
u32 geometry_source_length;
u64 fragment_source_hash_low;
u64 fragment_source_hash_high;
u32 fragment_source_length;
@ -51,9 +48,7 @@ namespace GL
{
return (
vertex_source_hash_low == key.vertex_source_hash_low && vertex_source_hash_high == key.vertex_source_hash_high &&
vertex_source_length == key.vertex_source_length && geometry_source_hash_low == key.geometry_source_hash_low &&
geometry_source_hash_high == key.geometry_source_hash_high &&
geometry_source_length == key.geometry_source_length && fragment_source_hash_low == key.fragment_source_hash_low &&
vertex_source_length == key.vertex_source_length && fragment_source_hash_low == key.fragment_source_hash_low &&
fragment_source_hash_high == key.fragment_source_hash_high && fragment_source_length == key.fragment_source_length);
}
@ -61,9 +56,7 @@ namespace GL
{
return (
vertex_source_hash_low != key.vertex_source_hash_low || vertex_source_hash_high != key.vertex_source_hash_high ||
vertex_source_length != key.vertex_source_length || geometry_source_hash_low != key.geometry_source_hash_low ||
geometry_source_hash_high != key.geometry_source_hash_high ||
geometry_source_length != key.geometry_source_length || fragment_source_hash_low != key.fragment_source_hash_low ||
vertex_source_length != key.vertex_source_length || fragment_source_hash_low != key.fragment_source_hash_low ||
fragment_source_hash_high != key.fragment_source_hash_high || fragment_source_length != key.fragment_source_length);
}
@ -204,7 +197,6 @@ namespace GL
const CacheIndexKey key{
entry.vertex_source_hash_low, entry.vertex_source_hash_high, entry.vertex_source_length,
entry.geometry_source_hash_low, entry.geometry_source_hash_high, entry.geometry_source_length,
entry.fragment_source_hash_low, entry.fragment_source_hash_high, entry.fragment_source_length};
const CacheIndexData data{entry.file_offset, entry.blob_size, entry.blob_format};
m_index.emplace(key, data);
@ -242,7 +234,6 @@ namespace GL
}
ShaderCache::CacheIndexKey ShaderCache::GetCacheKey(const std::string_view& vertex_shader,
const std::string_view& geometry_shader,
const std::string_view& fragment_shader)
{
union ShaderHash
@ -256,7 +247,6 @@ namespace GL
};
ShaderHash vertex_hash = {};
ShaderHash geometry_hash = {};
ShaderHash fragment_hash = {};
MD5Digest digest;
@ -266,13 +256,6 @@ namespace GL
digest.Final(vertex_hash.bytes);
}
if (!geometry_shader.empty())
{
digest.Reset();
digest.Update(geometry_shader.data(), static_cast<u32>(geometry_shader.length()));
digest.Final(geometry_hash.bytes);
}
if (!fragment_shader.empty())
{
digest.Reset();
@ -281,7 +264,6 @@ namespace GL
}
return CacheIndexKey{vertex_hash.low, vertex_hash.high, static_cast<u32>(vertex_shader.length()),
geometry_hash.low, geometry_hash.high, static_cast<u32>(geometry_shader.length()),
fragment_hash.low, fragment_hash.high, static_cast<u32>(fragment_shader.length())};
}
@ -296,7 +278,6 @@ namespace GL
}
std::optional<Program> ShaderCache::GetProgram(const std::string_view vertex_shader,
const std::string_view geometry_shader,
const std::string_view fragment_shader, const PreLinkCallback& callback)
{
if (!m_program_binary_supported || !m_blob_file)
@ -305,7 +286,7 @@ namespace GL
Common::Timer timer;
#endif
std::optional<Program> res = CompileProgram(vertex_shader, geometry_shader, fragment_shader, callback, false);
std::optional<Program> res = CompileProgram(vertex_shader, fragment_shader, callback, false);
#ifdef PCSX2_DEVBUILD
Console.WriteLn("Time to compile shader without caching: %.2fms", timer.GetTimeMilliseconds());
@ -313,10 +294,10 @@ namespace GL
return res;
}
const auto key = GetCacheKey(vertex_shader, geometry_shader, fragment_shader);
const auto key = GetCacheKey(vertex_shader, fragment_shader);
auto iter = m_index.find(key);
if (iter == m_index.end())
return CompileAndAddProgram(key, vertex_shader, geometry_shader, fragment_shader, callback);
return CompileAndAddProgram(key, vertex_shader, fragment_shader, callback);
std::vector<u8> data(iter->second.blob_size);
if (std::fseek(m_blob_file, iter->second.file_offset, SEEK_SET) != 0 ||
@ -343,16 +324,15 @@ namespace GL
Console.Warning(
"Failed to create program from binary, this may be due to a driver or GPU Change. Recreating cache.");
if (!Recreate())
return CompileProgram(vertex_shader, geometry_shader, fragment_shader, callback, false);
return CompileProgram(vertex_shader, fragment_shader, callback, false);
else
return CompileAndAddProgram(key, vertex_shader, geometry_shader, fragment_shader, callback);
return CompileAndAddProgram(key, vertex_shader, fragment_shader, callback);
}
bool ShaderCache::GetProgram(Program* out_program, const std::string_view vertex_shader,
const std::string_view geometry_shader, const std::string_view fragment_shader,
const PreLinkCallback& callback /* = */)
const std::string_view fragment_shader, const PreLinkCallback& callback /* = */)
{
auto prog = GetProgram(vertex_shader, geometry_shader, fragment_shader, callback);
auto prog = GetProgram(vertex_shader, fragment_shader, callback);
if (!prog)
return false;
@ -374,9 +354,6 @@ namespace GL
entry.vertex_source_hash_low = key.vertex_source_hash_low;
entry.vertex_source_hash_high = key.vertex_source_hash_high;
entry.vertex_source_length = key.vertex_source_length;
entry.geometry_source_hash_low = key.geometry_source_hash_low;
entry.geometry_source_hash_high = key.geometry_source_hash_high;
entry.geometry_source_length = key.geometry_source_length;
entry.fragment_source_hash_low = key.fragment_source_hash_low;
entry.fragment_source_hash_high = key.fragment_source_hash_high;
entry.fragment_source_length = key.fragment_source_length;
@ -397,12 +374,10 @@ namespace GL
}
std::optional<Program> ShaderCache::CompileProgram(const std::string_view& vertex_shader,
const std::string_view& geometry_shader,
const std::string_view& fragment_shader,
const PreLinkCallback& callback, bool set_retrievable)
const std::string_view& fragment_shader, const PreLinkCallback& callback, bool set_retrievable)
{
Program prog;
if (!prog.Compile(vertex_shader, geometry_shader, fragment_shader))
if (!prog.Compile(vertex_shader, fragment_shader))
return std::nullopt;
if (callback)
@ -437,16 +412,14 @@ namespace GL
}
std::optional<Program> ShaderCache::CompileAndAddProgram(const CacheIndexKey& key,
const std::string_view& vertex_shader,
const std::string_view& geometry_shader,
const std::string_view& fragment_shader,
const std::string_view& vertex_shader, const std::string_view& fragment_shader,
const PreLinkCallback& callback)
{
#ifdef PCSX2_DEVBUILD
Common::Timer timer;
#endif
std::optional<Program> prog = CompileProgram(vertex_shader, geometry_shader, fragment_shader, callback, true);
std::optional<Program> prog = CompileProgram(vertex_shader, fragment_shader, callback, true);
if (!prog)
return std::nullopt;
@ -491,7 +464,7 @@ namespace GL
return res;
}
const auto key = GetCacheKey(glsl, std::string_view(), std::string_view());
const auto key = GetCacheKey(glsl, std::string_view());
auto iter = m_index.find(key);
if (iter == m_index.end())
return CompileAndAddComputeProgram(key, glsl, callback);

View File

@ -38,10 +38,8 @@ namespace GL
bool Open(bool is_gles, std::string_view base_path, u32 version);
void Close();
std::optional<Program> GetProgram(const std::string_view vertex_shader, const std::string_view geometry_shader,
const std::string_view fragment_shader, const PreLinkCallback& callback = {});
bool GetProgram(Program* out_program, const std::string_view vertex_shader, const std::string_view geometry_shader,
const std::string_view fragment_shader, const PreLinkCallback& callback = {});
std::optional<Program> GetProgram(const std::string_view vertex_shader, const std::string_view fragment_shader, const PreLinkCallback& callback = {});
bool GetProgram(Program* out_program, const std::string_view vertex_shader, const std::string_view fragment_shader, const PreLinkCallback& callback = {});
std::optional<Program> GetComputeProgram(const std::string_view glsl, const PreLinkCallback& callback = {});
bool GetComputeProgram(Program* out_program, const std::string_view glsl, const PreLinkCallback& callback = {});
@ -54,9 +52,6 @@ namespace GL
u64 vertex_source_hash_low;
u64 vertex_source_hash_high;
u32 vertex_source_length;
u64 geometry_source_hash_low;
u64 geometry_source_hash_high;
u32 geometry_source_length;
u64 fragment_source_hash_low;
u64 fragment_source_hash_high;
u32 fragment_source_length;
@ -72,7 +67,6 @@ namespace GL
std::size_t h = 0;
HashCombine(h,
e.vertex_source_hash_low, e.vertex_source_hash_high, e.vertex_source_length,
e.geometry_source_hash_low, e.geometry_source_hash_high, e.geometry_source_length,
e.fragment_source_hash_low, e.fragment_source_hash_high, e.fragment_source_length);
return h;
}
@ -87,8 +81,7 @@ namespace GL
using CacheIndex = std::unordered_map<CacheIndexKey, CacheIndexData, CacheIndexEntryHasher>;
static CacheIndexKey GetCacheKey(const std::string_view& vertex_shader, const std::string_view& geometry_shader,
const std::string_view& fragment_shader);
static CacheIndexKey GetCacheKey(const std::string_view& vertex_shader, const std::string_view& fragment_shader);
std::string GetIndexFileName() const;
std::string GetBlobFileName() const;
@ -99,11 +92,10 @@ namespace GL
bool WriteToBlobFile(const CacheIndexKey& key, const std::vector<u8>& prog_data, u32 prog_format);
std::optional<Program> CompileProgram(const std::string_view& vertex_shader, const std::string_view& geometry_shader,
std::optional<Program> CompileProgram(const std::string_view& vertex_shader,
const std::string_view& fragment_shader, const PreLinkCallback& callback,
bool set_retrievable);
std::optional<Program> CompileAndAddProgram(const CacheIndexKey& key, const std::string_view& vertex_shader,
const std::string_view& geometry_shader,
const std::string_view& fragment_shader, const PreLinkCallback& callback);
std::optional<Program> CompileComputeProgram(const std::string_view& glsl, const PreLinkCallback& callback, bool set_retrievable);

View File

@ -698,11 +698,6 @@ namespace Vulkan
{
pxAssert(m_num_writes < MAX_WRITES && (m_num_image_infos + num_views) < MAX_IMAGE_INFOS);
#if 1
// NOTE: This is deliberately split up - updating multiple descriptors in one write is broken on Adreno.
for (u32 i = 0; i < num_views; i++)
AddCombinedImageSamplerDescriptorWrite(set, binding + i, views[i], samplers[i], layout);
#else
VkWriteDescriptorSet& dw = m_writes[m_num_writes++];
dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
dw.dstSet = set;
@ -718,7 +713,6 @@ namespace Vulkan
ii.sampler = samplers[i];
ii.imageLayout = layout;
}
#endif
}
void DescriptorSetUpdateBuilder::AddBufferDescriptorWrite(

View File

@ -474,6 +474,8 @@ namespace Vulkan
SupportsExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false);
m_optional_extensions.vk_khr_fragment_shader_barycentric =
SupportsExtension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME, false);
m_optional_extensions.vk_khr_shader_draw_parameters =
SupportsExtension(VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, false);
return true;
}
@ -956,11 +958,9 @@ namespace Vulkan
bool Context::CreateGlobalDescriptorPool()
{
// TODO: A better way to choose the number of descriptors.
VkDescriptorPoolSize pool_sizes[] = {
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1024},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1024},
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1},
static constexpr const VkDescriptorPoolSize pool_sizes[] = {
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 2},
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2},
};
VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr,
@ -2077,4 +2077,52 @@ void main()
return static_cast<u64>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#endif
}
bool Context::AllocatePreinitializedGPUBuffer(u32 size, VkBuffer* gpu_buffer, VmaAllocation* gpu_allocation,
VkBufferUsageFlags gpu_usage, const std::function<void(void*)>& fill_callback)
{
// Try to place the fixed index buffer in GPU local memory.
// Use the staging buffer to copy into it.
const VkBufferCreateInfo cpu_bci = {
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
nullptr,
0, size,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE};
const VmaAllocationCreateInfo cpu_aci = {
VMA_ALLOCATION_CREATE_MAPPED_BIT, VMA_MEMORY_USAGE_CPU_ONLY, 0, 0};
VkBuffer cpu_buffer;
VmaAllocation cpu_allocation;
VmaAllocationInfo cpu_ai;
VkResult res = vmaCreateBuffer(m_allocator, &cpu_bci, &cpu_aci, &cpu_buffer,
&cpu_allocation, &cpu_ai);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vmaCreateBuffer() for CPU expand buffer failed: ");
return false;
}
const VkBufferCreateInfo gpu_bci = {
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
nullptr,
0, size,
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_SHARING_MODE_EXCLUSIVE};
const VmaAllocationCreateInfo gpu_aci = {
0, VMA_MEMORY_USAGE_GPU_ONLY, 0, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT};
VmaAllocationInfo ai;
res = vmaCreateBuffer(m_allocator, &gpu_bci, &gpu_aci, gpu_buffer, gpu_allocation, &ai);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vmaCreateBuffer() for expand buffer failed: ");
vmaDestroyBuffer(m_allocator, cpu_buffer, cpu_allocation);
return false;
}
const VkBufferCopy buf_copy = {0u, 0u, size};
fill_callback(cpu_ai.pMappedData);
vmaFlushAllocation(m_allocator, cpu_allocation, 0, size);
vkCmdCopyBuffer(GetCurrentInitCommandBuffer(), cpu_buffer, *gpu_buffer, 1, &buf_copy);
DeferBufferDestruction(cpu_buffer, cpu_allocation);
return true;
}
} // namespace Vulkan

View File

@ -56,6 +56,7 @@ namespace Vulkan
bool vk_khr_driver_properties : 1;
bool vk_arm_rasterization_order_attachment_access : 1;
bool vk_khr_fragment_shader_barycentric : 1;
bool vk_khr_shader_draw_parameters : 1;
};
~Context();
@ -238,6 +239,10 @@ namespace Vulkan
void CountRenderPass() { m_command_buffer_render_passes++; }
void NotifyOfReadback();
// Allocates a temporary CPU staging buffer, fires the callback with it to populate, then copies to a GPU buffer.
bool AllocatePreinitializedGPUBuffer(u32 size, VkBuffer* gpu_buffer, VmaAllocation* gpu_allocation,
VkBufferUsageFlags gpu_usage, const std::function<void(void*)>& fill_callback);
private:
Context(VkInstance instance, VkPhysicalDevice physical_device);

View File

@ -493,11 +493,6 @@ namespace Vulkan
return GetShaderModule(ShaderCompiler::Type::Vertex, std::move(shader_code));
}
VkShaderModule ShaderCache::GetGeometryShader(std::string_view shader_code)
{
return GetShaderModule(ShaderCompiler::Type::Geometry, std::move(shader_code));
}
VkShaderModule ShaderCache::GetFragmentShader(std::string_view shader_code)
{
return GetShaderModule(ShaderCompiler::Type::Fragment, std::move(shader_code));

View File

@ -47,7 +47,6 @@ namespace Vulkan
VkShaderModule GetShaderModule(ShaderCompiler::Type type, std::string_view shader_code);
VkShaderModule GetVertexShader(std::string_view shader_code);
VkShaderModule GetGeometryShader(std::string_view shader_code);
VkShaderModule GetFragmentShader(std::string_view shader_code);
VkShaderModule GetComputeShader(std::string_view shader_code);

View File

@ -154,11 +154,6 @@ namespace Vulkan::ShaderCompiler
return CompileShaderToSPV(EShLangVertex, "vs", source_code, debug);
}
std::optional<SPIRVCodeVector> CompileGeometryShader(std::string_view source_code, bool debug)
{
return CompileShaderToSPV(EShLangGeometry, "gs", source_code, debug);
}
std::optional<SPIRVCodeVector> CompileFragmentShader(std::string_view source_code, bool debug)
{
return CompileShaderToSPV(EShLangFragment, "ps", source_code, debug);
@ -176,9 +171,6 @@ namespace Vulkan::ShaderCompiler
case Type::Vertex:
return CompileShaderToSPV(EShLangVertex, "vs", source_code, debug);
case Type::Geometry:
return CompileShaderToSPV(EShLangGeometry, "gs", source_code, debug);
case Type::Fragment:
return CompileShaderToSPV(EShLangFragment, "ps", source_code, debug);

View File

@ -26,7 +26,6 @@ namespace Vulkan::ShaderCompiler
enum class Type
{
Vertex,
Geometry,
Fragment,
Compute
};
@ -40,9 +39,6 @@ namespace Vulkan::ShaderCompiler
// Compile a vertex shader to SPIR-V.
std::optional<SPIRVCodeVector> CompileVertexShader(std::string_view source_code, bool debug);
// Compile a geometry shader to SPIR-V.
std::optional<SPIRVCodeVector> CompileGeometryShader(std::string_view source_code, bool debug);
// Compile a fragment shader to SPIR-V.
std::optional<SPIRVCodeVector> CompileFragmentShader(std::string_view source_code, bool debug);

View File

@ -245,7 +245,6 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget*
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.skipPresentingDuplicateFrames, "EmuCore/GS", "SkipDuplicateFrames", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.threadedPresentation, "EmuCore/GS", "DisableThreadedPresentation", false);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideTextureBarriers, "EmuCore/GS", "OverrideTextureBarriers", -1, -1);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideGeometryShader, "EmuCore/GS", "OverrideGeometryShaders", -1, -1);
SettingWidgetBinder::BindWidgetToIntSetting(
sif, m_ui.gsDumpCompression, "EmuCore/GS", "GSDumpCompression", static_cast<int>(GSDumpCompressionMethod::Zstandard));
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.disableFramebufferFetch, "EmuCore/GS", "DisableFramebufferFetch", false);
@ -693,10 +692,6 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget*
{
dialog->registerWidgetHelp(m_ui.overrideTextureBarriers, tr("Override Texture Barriers"), tr("Automatic (Default)"), tr(""));
dialog->registerWidgetHelp(m_ui.overrideGeometryShader, tr("Override Geometry Shader"), tr("Automatic (Default)"),
tr("Allows the GPU instead of just the CPU to transform lines into sprites. "
"This reduces CPU load and bandwidth requirement, but it is heavier on the GPU."));
dialog->registerWidgetHelp(m_ui.gsDumpCompression, tr("GS Dump Compression"), tr("Zstandard (zst)"),
tr("Change the compression algorithm used when creating a GS dump."));
@ -948,7 +943,6 @@ void GraphicsSettingsWidget::updateRendererDependentOptions()
m_ui.useBlitSwapChain->setEnabled(is_dx11);
m_ui.overrideTextureBarriers->setDisabled(is_sw_dx);
m_ui.overrideGeometryShader->setDisabled(is_sw_dx);
m_ui.disableFramebufferFetch->setDisabled(is_sw_dx);

View File

@ -2133,33 +2133,7 @@
</item>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_30">
<property name="text">
<string>Override Geometry Shader:</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QComboBox" name="overrideGeometryShader">
<item>
<property name="text">
<string>Automatic (Default)</string>
</property>
</item>
<item>
<property name="text">
<string>Force Disabled</string>
</property>
</item>
<item>
<property name="text">
<string>Force Enabled</string>
</property>
</item>
</widget>
</item>
<item row="3" column="0" colspan="2">
<item row="2" column="0" colspan="2">
<layout class="QGridLayout" name="gridLayout_7">
<item row="1" column="0">
<widget class="QCheckBox" name="useDebugDevice">

View File

@ -760,7 +760,6 @@ struct Pcsx2Config
GSTextureInRtMode UserHacks_TextureInsideRt{GSTextureInRtMode::Disabled};
TriFiltering TriFilter{TriFiltering::Automatic};
int OverrideTextureBarriers{-1};
int OverrideGeometryShaders{-1};
int CAS_Sharpness{50};
int ShadeBoost_Brightness{50};

View File

@ -3325,8 +3325,6 @@ void FullscreenUI::DrawGraphicsSettingsPage()
}
DrawIntListSetting(bsi, "Override Texture Barriers", "Forces texture barrier functionality to the specified value.", "EmuCore/GS",
"OverrideTextureBarriers", -1, s_generic_options, std::size(s_generic_options), -1);
DrawIntListSetting(bsi, "Override Geometry Shaders", "Forces geometry shader functionality to the specified value.", "EmuCore/GS",
"OverrideGeometryShaders", -1, s_generic_options, std::size(s_generic_options), -1);
DrawIntListSetting(bsi, "GS Dump Compression", "Sets the compression algorithm for GS dumps.", "EmuCore/GS", "GSDumpCompression",
static_cast<int>(GSDumpCompressionMethod::LZMA), s_gsdump_compression, std::size(s_gsdump_compression));
DrawToggleSetting(bsi, "Disable Framebuffer Fetch", "Prevents the usage of framebuffer fetch when supported by host GPU.", "EmuCore/GS",

View File

@ -3037,6 +3037,28 @@ static constexpr u32 NumIndicesForPrim(u32 prim)
}
}
static constexpr u32 MaxVerticesForPrim(u32 prim)
{
switch (prim)
{
case GS_POINTLIST:
case GS_INVALID:
// Needed due to expansion in hardware renderers.
return (std::numeric_limits<u16>::max() / 4) - 4;
case GS_SPRITE:
return (std::numeric_limits<u16>::max() / 2) - 2;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
default:
return 0;
}
}
template <u32 prim, bool auto_flush, bool index_swap>
__forceinline void GSState::VertexKick(u32 skip)
{
@ -3305,6 +3327,10 @@ __forceinline void GSState::VertexKick(u32 skip)
}
CLUTAutoFlush(prim);
constexpr u32 max_vertices = MaxVerticesForPrim(prim);
if (max_vertices != 0 && m_vertex.tail >= max_vertices)
Flush(VERTEXCOUNT);
}
/// Checks if region repeat is used (applying it does something to at least one of the values in min...max)

View File

@ -278,6 +278,7 @@ public:
AUTOFLUSH = 1 << 11,
VSYNC = 1 << 12,
GSREOPEN = 1 << 13,
VERTEXCOUNT = 1 << 14,
};
GSFlushReason m_state_flush_reason = UNKNOWN;

View File

@ -163,6 +163,23 @@ std::string GSDevice::GetFullscreenModeString(u32 width, u32 height, float refre
return StringUtil::StdStringFromFormat("%u x %u @ %f hz", width, height, refresh_rate);
}
void GSDevice::GenerateExpansionIndexBuffer(void* buffer)
{
static constexpr u32 MAX_INDEX = std::numeric_limits<u16>::max();
u32* idx_buffer = static_cast<u32*>(buffer);
for (u32 i = 0; i < MAX_INDEX; i++)
{
const u32 base = i * 4;
*(idx_buffer++) = base + 0;
*(idx_buffer++) = base + 1;
*(idx_buffer++) = base + 2;
*(idx_buffer++) = base + 1;
*(idx_buffer++) = base + 2;
*(idx_buffer++) = base + 3;
}
}
bool GSDevice::Create(const WindowInfo& wi, VsyncMode vsync)
{
m_window_info = wi;

View File

@ -235,13 +235,6 @@ struct alignas(16) GSHWDrawConfig
Line,
Triangle,
};
enum class GSTopology: u8
{
Point,
Line,
Triangle,
Sprite,
};
enum class VSExpand: u8
{
None,
@ -250,22 +243,6 @@ struct alignas(16) GSHWDrawConfig
Sprite,
};
#pragma pack(push, 1)
struct GSSelector
{
union
{
struct
{
GSTopology topology : 2;
bool expand : 1;
bool iip : 1;
bool forward_primid : 1;
};
u8 key;
};
GSSelector(): key(0) {}
GSSelector(u8 k): key(k) {}
};
struct VSSelector
{
union
@ -275,7 +252,7 @@ struct alignas(16) GSHWDrawConfig
u8 fst : 1;
u8 tme : 1;
u8 iip : 1;
u8 point_size : 1; ///< Set when points need to be expanded without geometry shader.
u8 point_size : 1; ///< Set when points need to be expanded without VS expanding.
VSExpand expand : 2;
u8 _free : 2;
};
@ -283,6 +260,9 @@ struct alignas(16) GSHWDrawConfig
};
VSSelector(): key(0) {}
VSSelector(u8 k): key(k) {}
/// Returns true if the fixed index buffer should be used.
__fi bool UseExpandIndexBuffer() const { return (expand == VSExpand::Point || expand == VSExpand::Sprite); }
};
#pragma pack(pop)
#pragma pack(push, 4)
@ -657,7 +637,6 @@ struct alignas(16) GSHWDrawConfig
Topology topology; ///< Draw topology
alignas(8) PSSelector ps;
GSSelector gs;
VSSelector vs;
BlendState blend;
@ -713,13 +692,12 @@ public:
struct FeatureSupport
{
bool broken_point_sampler : 1; ///< Issue with AMD cards, see tfx shader for details
bool geometry_shader : 1; ///< Supports geometry shader
bool vs_expand : 1; ///< Supports expanding points/lines/sprites in the vertex shader
bool primitive_id : 1; ///< Supports primitive ID for use with prim tracking destination alpha algorithm
bool texture_barrier : 1; ///< Supports sampling rt and hopefully texture barrier
bool provoking_vertex_last: 1; ///< Supports using the last vertex in a primitive as the value for flat shading.
bool point_expand : 1; ///< Supports point expansion in hardware without using geometry shaders.
bool line_expand : 1; ///< Supports line expansion in hardware without using geometry shaders.
bool point_expand : 1; ///< Supports point expansion in hardware.
bool line_expand : 1; ///< Supports line expansion in hardware.
bool prefer_new_textures : 1; ///< Allocate textures up to the pool size before reusing them, to avoid render pass restarts.
bool dxt_textures : 1; ///< Supports DXTn texture compression, i.e. S3TC and BC1-3.
bool bptc_textures : 1; ///< Supports BC6/7 texture compression.
@ -771,6 +749,7 @@ protected:
static constexpr float MAD_SENSITIVITY = 0.08f;
static constexpr u32 MAX_POOLED_TEXTURES = 300;
static constexpr u32 NUM_CAS_CONSTANTS = 12; // 8 plus src offset x/y, 16 byte alignment
static constexpr u32 EXPAND_BUFFER_SIZE = sizeof(u32) * std::numeric_limits<u16>::max() * 6;
WindowInfo m_window_info;
VsyncMode m_vsync_mode = VsyncMode::Off;
@ -824,6 +803,9 @@ public:
/// Converts a fullscreen mode to a string.
static std::string GetFullscreenModeString(u32 width, u32 height, float refresh_rate);
/// Generates a fixed index buffer for expanding points and sprites. Buffer is assumed to be at least EXPAND_BUFFER_SIZE in size.
static void GenerateExpansionIndexBuffer(void* buffer);
__fi unsigned int GetFrameNumber() const { return m_frame; }
__fi u64 GetPoolMemoryUsage() const { return m_pool_memory_usage; }

View File

@ -53,7 +53,6 @@ GSDevice11::GSDevice11()
m_state.topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
m_state.bf = -1;
m_features.geometry_shader = true;
m_features.primitive_id = true;
m_features.texture_barrier = false;
m_features.provoking_vertex_last = false;
@ -365,7 +364,46 @@ bool GSDevice11::Create(const WindowInfo& wi, VsyncMode vsync)
Console.Error("Failed to create index buffer.");
return false;
}
m_ctx->IASetIndexBuffer(m_ib.get(), DXGI_FORMAT_R32_UINT, 0);
IASetIndexBuffer(m_ib.get());
if (m_features.vs_expand)
{
bd.ByteWidth = VERTEX_BUFFER_SIZE;
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
bd.StructureByteStride = sizeof(GSVertex);
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
if (FAILED(m_dev->CreateBuffer(&bd, nullptr, m_expand_vb.put())))
{
Console.Error("Failed to create expand vertex buffer.");
return false;
}
const CD3D11_SHADER_RESOURCE_VIEW_DESC vb_srv_desc(
D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 0, VERTEX_BUFFER_SIZE / sizeof(GSVertex));
if (FAILED(m_dev->CreateShaderResourceView(m_expand_vb.get(), &vb_srv_desc, m_expand_vb_srv.put())))
{
Console.Error("Failed to create expand vertex buffer SRV.");
return false;
}
m_ctx->VSSetShaderResources(0, 1, m_expand_vb_srv.addressof());
bd.ByteWidth = EXPAND_BUFFER_SIZE;
bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
bd.StructureByteStride = 0;
bd.MiscFlags = 0;
std::unique_ptr<u8[]> expand_data = std::make_unique<u8[]>(EXPAND_BUFFER_SIZE);
GenerateExpansionIndexBuffer(expand_data.get());
const D3D11_SUBRESOURCE_DATA srd = {expand_data.get()};
if (FAILED(m_dev->CreateBuffer(&bd, &srd, m_expand_ib.put())))
{
Console.Error("Failed to create expand index buffer.");
return false;
}
}
//
@ -466,6 +504,9 @@ void GSDevice11::Destroy()
m_vb.reset();
m_ib.reset();
m_expand_vb_srv.reset();
m_expand_vb.reset();
m_expand_ib.reset();
m_vs.clear();
m_vs_cb.reset();
@ -508,6 +549,9 @@ void GSDevice11::SetFeatures()
SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC3_UNORM);
m_features.bptc_textures = SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC7_UNORM);
const D3D_FEATURE_LEVEL feature_level = m_dev->GetFeatureLevel();
m_features.vs_expand = (feature_level >= D3D_FEATURE_LEVEL_11_0);
}
bool GSDevice11::HasSurface() const
@ -1234,11 +1278,6 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
VSSetShader(m_convert.vs.get(), nullptr);
// gs
GSSetShader(nullptr, nullptr);
// ps
PSSetShaderResources(sTex, nullptr);
@ -1307,11 +1346,6 @@ void GSDevice11::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
VSSetShader(m_present.vs.get(), nullptr);
// gs
GSSetShader(nullptr, nullptr);
// ps
PSSetShaderResources(sTex, nullptr);
@ -1368,7 +1402,6 @@ void GSDevice11::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_re
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
VSSetShader(m_convert.vs.get(), nullptr);
GSSetShader(nullptr, nullptr);
PSSetShader(m_convert.ps[static_cast<int>(shader)].get(), nullptr);
OMSetDepthStencilState(dTex->IsRenderTarget() ? m_convert.dss.get() : m_convert.dss_write.get(), 0);
@ -1437,6 +1470,7 @@ void GSDevice11::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rect
IAUnmapVertexBuffer(sizeof(GSVertexPT1), vcount);
IAUnmapIndexBuffer(icount);
IASetIndexBuffer(m_ib.get());
PSSetShaderResource(0, rects[0].src);
PSSetSamplerState(rects[0].linear ? m_convert.ln.get() : m_convert.pt.get());
@ -1682,7 +1716,6 @@ void GSDevice11::RenderImGui()
IASetInputLayout(m_imgui.il.get());
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
VSSetShader(m_imgui.vs.get(), m_imgui.vs_cb.get());
GSSetShader(nullptr, nullptr);
PSSetShader(m_imgui.ps.get(), nullptr);
OMSetBlendState(m_imgui.bs.get(), 0.0f);
OMSetDepthStencilState(m_convert.dss.get(), 0);
@ -1761,7 +1794,7 @@ void GSDevice11::RenderImGui()
}
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &m_state.vb_stride, &vb_offset);
m_ctx->IASetIndexBuffer(m_ib.get(), DXGI_FORMAT_R32_UINT, 0);
m_ctx->IASetIndexBuffer(m_state.index_buffer, DXGI_FORMAT_R32_UINT, 0);
}
void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm)
@ -1786,10 +1819,6 @@ void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vert
VSSetShader(m_convert.vs.get(), nullptr);
// gs
GSSetShader(nullptr, nullptr);
// ps
PSSetShaderResources(rt, nullptr);
PSSetSamplerState(m_convert.pt.get());
@ -1852,6 +1881,37 @@ bool GSDevice11::IASetVertexBuffer(const void* vertex, u32 stride, u32 count)
return true;
}
bool GSDevice11::IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 count)
{
const u32 size = stride * count;
if (size > VERTEX_BUFFER_SIZE)
return false;
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
m_vertex.start = (m_structured_vb_pos + (stride - 1)) / stride;
m_structured_vb_pos = (m_vertex.start * stride) + size;
if (m_structured_vb_pos > VERTEX_BUFFER_SIZE)
{
m_vertex.start = 0;
m_structured_vb_pos = size;
type = D3D11_MAP_WRITE_DISCARD;
}
D3D11_MAPPED_SUBRESOURCE m;
if (FAILED(m_ctx->Map(m_expand_vb.get(), 0, type, 0, &m)))
return false;
void* map = static_cast<u8*>(m.pData) + (m_vertex.start * stride);
GSVector4i::storent(map, vertex, count * stride);
m_ctx->Unmap(m_expand_vb.get(), 0);
m_vertex.count = count;
return true;
}
u32* GSDevice11::IAMapIndexBuffer(u32 count)
{
if (count > (INDEX_BUFFER_SIZE / sizeof(u32)))
@ -1890,9 +1950,19 @@ bool GSDevice11::IASetIndexBuffer(const void* index, u32 count)
std::memcpy(map, index, count * sizeof(u32));
IAUnmapIndexBuffer(count);
IASetIndexBuffer(m_ib.get());
return true;
}
void GSDevice11::IASetIndexBuffer(ID3D11Buffer* buffer)
{
if (m_state.index_buffer != buffer)
{
m_ctx->IASetIndexBuffer(buffer, DXGI_FORMAT_R32_UINT, 0);
m_state.index_buffer = buffer;
}
}
void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout)
{
if (m_state.layout != layout)
@ -1930,23 +2000,6 @@ void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb)
}
}
void GSDevice11::GSSetShader(ID3D11GeometryShader* gs, ID3D11Buffer* gs_cb)
{
if (m_state.gs != gs)
{
m_state.gs = gs;
m_ctx->GSSetShader(gs, nullptr, 0);
}
if (m_state.gs_cb != gs_cb)
{
m_state.gs_cb = gs_cb;
m_ctx->GSSetConstantBuffers(0, 1, &gs_cb);
}
}
void GSDevice11::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1)
{
PSSetShaderResource(0, sr0);
@ -2172,12 +2225,40 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
}
if (!IASetVertexBuffer(config.verts, sizeof(*config.verts), config.nverts) ||
!IASetIndexBuffer(config.indices, config.nindices))
if (config.vs.expand != GSHWDrawConfig::VSExpand::None)
{
Console.Error("Failed to upload vertices/indices (%u/%u)", config.nverts, config.nindices);
return;
if (!IASetExpandVertexBuffer(config.verts, sizeof(*config.verts), config.nverts))
{
Console.Error("Failed to upload structured vertices (%u)", config.nverts);
return;
}
config.cb_vs.max_depth.y = m_vertex.start;
}
else
{
if (!IASetVertexBuffer(config.verts, sizeof(*config.verts), config.nverts))
{
Console.Error("Failed to upload vertices (%u)", config.nverts);
return;
}
}
if (config.vs.UseExpandIndexBuffer())
{
IASetIndexBuffer(m_expand_ib.get());
m_index.start = 0;
m_index.count = config.nindices;
}
else
{
if (!IASetIndexBuffer(config.indices, config.nindices))
{
Console.Error("Failed to upload indices (%u)", config.nindices);
return;
}
}
D3D11_PRIMITIVE_TOPOLOGY topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
switch (config.topology)
{
@ -2207,7 +2288,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
}
SetupVS(config.vs, &config.cb_vs);
SetupGS(config.gs);
SetupPS(config.ps, &config.cb_ps, config.sampler);
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
@ -2223,7 +2303,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
blend.blend_op = 3; // MIN
SetupOM(dss, blend, 0);
OMSetRenderTargets(primid_tex, config.ds, &config.scissor);
DrawIndexedPrimitive();
config.ps.date = 3;
@ -2234,7 +2313,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
SetupOM(config.depth, convertSel(config.colormask, config.blend), config.blend.constant);
OMSetRenderTargets(hdr_rt ? hdr_rt : config.rt, config.ds, &config.scissor);
DrawIndexedPrimitive();
if (config.separate_alpha_pass)
@ -2243,7 +2321,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
SetHWDrawConfigForAlphaPass(&config.ps, &config.colormask, &sap_blend, &config.depth);
SetupOM(config.depth, convertSel(config.colormask, sap_blend), config.blend.constant);
SetupPS(config.ps, &config.cb_ps, config.sampler);
DrawIndexedPrimitive();
}
@ -2262,7 +2339,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
}
SetupOM(config.alpha_second_pass.depth, convertSel(config.alpha_second_pass.colormask, config.blend), config.blend.constant);
DrawIndexedPrimitive();
if (config.second_separate_alpha_pass)
@ -2271,7 +2347,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
SetHWDrawConfigForAlphaPass(&config.alpha_second_pass.ps, &config.alpha_second_pass.colormask, &sap_blend, &config.alpha_second_pass.depth);
SetupOM(config.alpha_second_pass.depth, convertSel(config.alpha_second_pass.colormask, sap_blend), config.blend.constant);
SetupPS(config.alpha_second_pass.ps, &config.cb_ps, config.sampler);
DrawIndexedPrimitive();
}
}

View File

@ -34,7 +34,6 @@ class GSDevice11 final : public GSDevice
{
public:
using VSSelector = GSHWDrawConfig::VSSelector;
using GSSelector = GSHWDrawConfig::GSSelector;
using PSSelector = GSHWDrawConfig::PSSelector;
using PSSamplerSelector = GSHWDrawConfig::SamplerSelector;
using OMDepthStencilSelector = GSHWDrawConfig::DepthStencilSelector;
@ -150,8 +149,12 @@ private:
wil::com_ptr_nothrow<ID3D11Buffer> m_vb;
wil::com_ptr_nothrow<ID3D11Buffer> m_ib;
wil::com_ptr_nothrow<ID3D11Buffer> m_expand_vb;
wil::com_ptr_nothrow<ID3D11Buffer> m_expand_ib;
wil::com_ptr_nothrow<ID3D11ShaderResourceView> m_expand_vb_srv;
u32 m_vb_pos = 0; // bytes
u32 m_ib_pos = 0; // indices/sizeof(u32)
u32 m_structured_vb_pos = 0; // bytes
int m_d3d_texsize = 0;
bool m_allow_tearing_supported = false;
@ -162,10 +165,9 @@ private:
{
ID3D11InputLayout* layout;
D3D11_PRIMITIVE_TOPOLOGY topology;
ID3D11Buffer* index_buffer;
ID3D11VertexShader* vs;
ID3D11Buffer* vs_cb;
ID3D11GeometryShader* gs;
ID3D11Buffer* gs_cb;
std::array<ID3D11ShaderResourceView*, MAX_TEXTURES> ps_sr_views;
ID3D11PixelShader* ps;
ID3D11Buffer* ps_cb;
@ -339,16 +341,17 @@ public:
void* IAMapVertexBuffer(u32 stride, u32 count);
void IAUnmapVertexBuffer(u32 stride, u32 count);
bool IASetVertexBuffer(const void* vertex, u32 stride, u32 count);
bool IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 count);
u32* IAMapIndexBuffer(u32 count);
void IAUnmapIndexBuffer(u32 count);
bool IASetIndexBuffer(const void* index, u32 count);
void IASetIndexBuffer(ID3D11Buffer* buffer);
void IASetInputLayout(ID3D11InputLayout* layout);
void IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology);
void VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb);
void GSSetShader(ID3D11GeometryShader* gs, ID3D11Buffer* gs_cb = nullptr);
void PSSetShaderResources(GSTexture* sr0, GSTexture* sr1);
void PSSetShaderResource(int i, GSTexture* sr);
@ -364,7 +367,6 @@ public:
bool CreateTextureFX();
void SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer* cb);
void SetupGS(GSSelector sel);
void SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstantBuffer* cb, PSSamplerSelector ssel);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 afix);

View File

@ -53,8 +53,6 @@ bool GSDevice11::CreateTextureFX()
SetupVS(sel, &cb);
SetupGS(GSSelector(1));
//
return true;
@ -68,11 +66,13 @@ void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer*
{
ShaderMacro sm(m_shader_cache.GetFeatureLevel());
sm.AddMacro("VERTEX_SHADER", 1);
sm.AddMacro("VS_TME", sel.tme);
sm.AddMacro("VS_FST", sel.fst);
sm.AddMacro("VS_IIP", sel.iip);
sm.AddMacro("VS_EXPAND", static_cast<int>(sel.expand));
D3D11_INPUT_ELEMENT_DESC layout[] =
static constexpr const D3D11_INPUT_ELEMENT_DESC layout[] =
{
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0},
@ -84,8 +84,16 @@ void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer*
};
GSVertexShader11 vs;
m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(),
vs.vs.put(), vs.il.put(), layout, std::size(layout), m_tfx_source, sm.GetPtr(), "vs_main");
if (sel.expand == GSHWDrawConfig::VSExpand::None)
{
m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(), vs.vs.put(), vs.il.put(), layout,
std::size(layout), m_tfx_source, sm.GetPtr(), "vs_main");
}
else
{
vs.vs = m_shader_cache.GetVertexShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "vs_main_expand");
}
i = m_vs.try_emplace(sel.key, std::move(vs)).first;
}
@ -99,37 +107,6 @@ void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer*
IASetInputLayout(i->second.il.get());
}
void GSDevice11::SetupGS(GSSelector sel)
{
wil::com_ptr_nothrow<ID3D11GeometryShader> gs;
// Geometry shader is disabled if sprite conversion is done on the cpu (sel.cpu_sprite).
if (sel.expand)
{
const auto i = std::as_const(m_gs).find(sel.key);
if (i != m_gs.end())
{
gs = i->second;
}
else
{
ShaderMacro sm(m_shader_cache.GetFeatureLevel());
sm.AddMacro("GS_IIP", sel.iip);
sm.AddMacro("GS_PRIM", static_cast<int>(sel.topology));
sm.AddMacro("GS_EXPAND", sel.expand);
sm.AddMacro("GS_FORWARD_PRIMID", sel.forward_primid);
gs = m_shader_cache.GetGeometryShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "gs_main");
m_gs[sel.key] = gs;
}
}
GSSetShader(gs.get(), m_vs_cb.get());
}
void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstantBuffer* cb, PSSamplerSelector ssel)
{
auto i = std::as_const(m_ps).find(sel);
@ -138,6 +115,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
{
ShaderMacro sm(m_shader_cache.GetFeatureLevel());
sm.AddMacro("PIXEL_SHADER", 1);
sm.AddMacro("PS_FST", sel.fst);
sm.AddMacro("PS_WMS", sel.wms);
sm.AddMacro("PS_WMT", sel.wmt);

View File

@ -602,7 +602,6 @@ bool GSDevice12::CheckFeatures()
m_features.texture_barrier = false;
m_features.broken_point_sampler = isAMD;
m_features.geometry_shader = true;
m_features.primitive_id = true;
m_features.prefer_new_textures = true;
m_features.provoking_vertex_last = false;
@ -613,6 +612,7 @@ bool GSDevice12::CheckFeatures()
m_features.clip_control = true;
m_features.stencil_buffer = true;
m_features.test_and_sample_depth = false;
m_features.vs_expand = true;
m_features.dxt_textures = g_d3d12_context->SupportsTextureFormat(DXGI_FORMAT_BC1_UNORM) &&
g_d3d12_context->SupportsTextureFormat(DXGI_FORMAT_BC2_UNORM) &&
@ -1729,6 +1729,13 @@ bool GSDevice12::CreateBuffers()
return false;
}
if (!g_d3d12_context->AllocatePreinitializedGPUBuffer(EXPAND_BUFFER_SIZE, &m_expand_index_buffer,
&m_expand_index_buffer_allocation, &GSDevice::GenerateExpansionIndexBuffer))
{
Host::ReportErrorAsync("GS", "Failed to allocate expansion index buffer");
return false;
}
return true;
}
@ -1753,6 +1760,7 @@ bool GSDevice12::CreateRootSignatures()
rsb.SetInputAssemblerFlag();
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
rsb.AddCBVParameter(1, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddSRVParameter(0, D3D12_SHADER_VISIBILITY_VERTEX);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, NUM_TFX_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 2, D3D12_SHADER_VISIBILITY_PIXEL);
@ -2096,7 +2104,6 @@ void GSDevice12::DestroyResources()
g_d3d12_context->DeferObjectDestruction(it.second.get());
m_tfx_pipelines.clear();
m_tfx_pixel_shaders.clear();
m_tfx_geometry_shaders.clear();
m_tfx_vertex_shaders.clear();
m_interlace = {};
m_merge = {};
@ -2119,6 +2126,8 @@ void GSDevice12::DestroyResources()
g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetSamplerHeapManager(), &m_point_sampler_cpu);
g_d3d12_context->InvalidateSamplerGroups();
m_expand_index_buffer.reset();
m_expand_index_buffer_allocation.reset();
m_pixel_constant_buffer.Destroy(false);
m_vertex_constant_buffer.Destroy(false);
m_index_stream_buffer.Destroy(false);
@ -2139,32 +2148,18 @@ const ID3DBlob* GSDevice12::GetTFXVertexShader(GSHWDrawConfig::VSSelector sel)
return it->second.get();
ShaderMacro sm(m_shader_cache.GetFeatureLevel());
sm.AddMacro("VERTEX_SHADER", 1);
sm.AddMacro("VS_TME", sel.tme);
sm.AddMacro("VS_FST", sel.fst);
sm.AddMacro("VS_IIP", sel.iip);
sm.AddMacro("VS_EXPAND", static_cast<int>(sel.expand));
ComPtr<ID3DBlob> vs(m_shader_cache.GetVertexShader(m_tfx_source, sm.GetPtr(), "vs_main"));
const char* entry_point = (sel.expand != GSHWDrawConfig::VSExpand::None) ? "vs_main_expand" : "vs_main";
ComPtr<ID3DBlob> vs(m_shader_cache.GetVertexShader(m_tfx_source, sm.GetPtr(), entry_point));
it = m_tfx_vertex_shaders.emplace(sel.key, std::move(vs)).first;
return it->second.get();
}
const ID3DBlob* GSDevice12::GetTFXGeometryShader(GSHWDrawConfig::GSSelector sel)
{
auto it = m_tfx_geometry_shaders.find(sel.key);
if (it != m_tfx_geometry_shaders.end())
return it->second.get();
ShaderMacro sm(m_shader_cache.GetFeatureLevel());
sm.AddMacro("GS_IIP", sel.iip);
sm.AddMacro("GS_PRIM", static_cast<int>(sel.topology));
sm.AddMacro("GS_EXPAND", sel.expand);
sm.AddMacro("GS_FORWARD_PRIMID", sel.forward_primid);
ComPtr<ID3DBlob> gs(m_shader_cache.GetGeometryShader(m_tfx_source, sm.GetPtr(), "gs_main"));
it = m_tfx_geometry_shaders.emplace(sel.key, std::move(gs)).first;
return it->second.get();
}
const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sel)
{
auto it = m_tfx_pixel_shaders.find(sel);
@ -2172,6 +2167,7 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
return it->second.get();
ShaderMacro sm(m_shader_cache.GetFeatureLevel());
sm.AddMacro("PIXEL_SHADER", 1);
sm.AddMacro("PS_FST", sel.fst);
sm.AddMacro("PS_WMS", sel.wms);
sm.AddMacro("PS_WMT", sel.wmt);
@ -2246,9 +2242,8 @@ GSDevice12::ComPtr<ID3D12PipelineState> GSDevice12::CreateTFXPipeline(const Pipe
}
const ID3DBlob* vs = GetTFXVertexShader(p.vs);
const ID3DBlob* gs = p.gs.expand ? GetTFXGeometryShader(p.gs) : nullptr;
const ID3DBlob* ps = GetTFXPixelShader(pps);
if (!vs || (p.gs.expand && !gs) || !ps)
if (!vs || !ps)
return nullptr;
// Common state
@ -2271,18 +2266,19 @@ GSDevice12::ComPtr<ID3D12PipelineState> GSDevice12::CreateTFXPipeline(const Pipe
// Shaders
gpb.SetVertexShader(vs);
if (gs)
gpb.SetGeometryShader(gs);
gpb.SetPixelShader(ps);
// IA
gpb.AddVertexAttribute("TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0);
gpb.AddVertexAttribute("COLOR", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, 8);
gpb.AddVertexAttribute("TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12);
gpb.AddVertexAttribute("POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16);
gpb.AddVertexAttribute("POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20);
gpb.AddVertexAttribute("TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24);
gpb.AddVertexAttribute("COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28);
if (p.vs.expand == GSHWDrawConfig::VSExpand::None)
{
gpb.AddVertexAttribute("TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0);
gpb.AddVertexAttribute("COLOR", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, 8);
gpb.AddVertexAttribute("TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12);
gpb.AddVertexAttribute("POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16);
gpb.AddVertexAttribute("POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20);
gpb.AddVertexAttribute("TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24);
gpb.AddVertexAttribute("COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28);
}
// DepthStencil
if (p.ds)
@ -2337,7 +2333,7 @@ GSDevice12::ComPtr<ID3D12PipelineState> GSDevice12::CreateTFXPipeline(const Pipe
if (pipeline)
{
D3D12::SetObjectNameFormatted(
pipeline.get(), "TFX Pipeline %08X/%08X/%" PRIX64 "%08X", p.vs.key, p.gs.key, p.ps.key_hi, p.ps.key_lo);
pipeline.get(), "TFX Pipeline %08X/%" PRIX64 "%08X", p.vs.key, p.ps.key_hi, p.ps.key_lo);
}
return pipeline;
@ -2941,6 +2937,11 @@ bool GSDevice12::ApplyTFXState(bool already_execed)
cmdlist->SetGraphicsRootConstantBufferView(TFX_ROOT_SIGNATURE_PARAM_VS_CBV, m_tfx_constant_buffers[0]);
if (flags & DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING)
cmdlist->SetGraphicsRootConstantBufferView(TFX_ROOT_SIGNATURE_PARAM_PS_CBV, m_tfx_constant_buffers[1]);
if (flags & DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING)
{
cmdlist->SetGraphicsRootShaderResourceView(TFX_ROOT_SIGNATURE_PARAM_VS_SRV,
m_vertex_stream_buffer.GetGPUPointer() + m_vertex.start * sizeof(GSVertex));
}
if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE)
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES, m_tfx_textures_handle_gpu);
if (flags & DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE)
@ -3070,8 +3071,7 @@ GSTexture12* GSDevice12::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config, Pipe
// image is now filled with either -1 or INT_MAX, so now we can do the prepass
SetPrimitiveTopology(s_primitive_topology_mapping[static_cast<u8>(config.topology)]);
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
IASetIndexBuffer(config.indices, config.nindices);
UploadHWDrawVerticesAndIndices(config);
// cut down the configuration for the prepass, we don't need blending or any feedback loop
PipelineSelector init_pipe(m_pipeline_selector);
@ -3252,10 +3252,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
// VB/IB upload, if we did DATE setup and it's not HDR this has already been done
SetPrimitiveTopology(s_primitive_topology_mapping[static_cast<u8>(config.topology)]);
if (!date_image || hdr_rt)
{
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
IASetIndexBuffer(config.indices, config.nindices);
}
UploadHWDrawVerticesAndIndices(config);
// now we can do the actual draw
if (BindDrawPipeline(pipe))
@ -3333,7 +3330,6 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
void GSDevice12::UpdateHWPipelineSelector(GSHWDrawConfig& config)
{
m_pipeline_selector.vs.key = config.vs.key;
m_pipeline_selector.gs.key = config.gs.key;
m_pipeline_selector.ps.key_hi = config.ps.key_hi;
m_pipeline_selector.ps.key_lo = config.ps.key_lo;
m_pipeline_selector.dss.key = config.depth.key;
@ -3344,3 +3340,23 @@ void GSDevice12::UpdateHWPipelineSelector(GSHWDrawConfig& config)
m_pipeline_selector.rt = config.rt != nullptr;
m_pipeline_selector.ds = config.ds != nullptr;
}
void GSDevice12::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
{
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
// Update SRV in root signature directly, rather than using a uniform for base vertex.
if (config.vs.expand != GSHWDrawConfig::VSExpand::None)
m_dirty_flags |= DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING;
if (config.vs.UseExpandIndexBuffer())
{
m_index.start = 0;
m_index.count = config.nindices;
SetIndexBuffer(m_expand_index_buffer->GetGPUVirtualAddress(), EXPAND_BUFFER_SIZE, DXGI_FORMAT_R32_UINT);
}
else
{
IASetIndexBuffer(config.indices, config.nindices);
}
}

View File

@ -52,7 +52,6 @@ public:
};
GSHWDrawConfig::VSSelector vs;
GSHWDrawConfig::GSSelector gs;
GSHWDrawConfig::DepthStencilSelector dss;
GSHWDrawConfig::ColorMaskSelector cms;
GSHWDrawConfig::BlendState bs;
@ -69,7 +68,7 @@ public:
std::size_t operator()(const PipelineSelector& e) const noexcept
{
std::size_t hash = 0;
HashCombine(hash, e.vs.key, e.gs.key, e.ps.key_hi, e.ps.key_lo, e.dss.key, e.cms.key, e.bs.key, e.key);
HashCombine(hash, e.vs.key, e.ps.key_hi, e.ps.key_lo, e.dss.key, e.cms.key, e.bs.key, e.key);
return hash;
}
};
@ -124,9 +123,10 @@ public:
TFX_ROOT_SIGNATURE_PARAM_VS_CBV = 0,
TFX_ROOT_SIGNATURE_PARAM_PS_CBV = 1,
TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 2,
TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS = 3,
TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES = 4,
TFX_ROOT_SIGNATURE_PARAM_VS_SRV = 2,
TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 3,
TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS = 4,
TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES = 5,
UTILITY_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS = 0,
UTILITY_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 1,
@ -154,6 +154,8 @@ private:
D3D12::StreamBuffer m_index_stream_buffer;
D3D12::StreamBuffer m_vertex_constant_buffer;
D3D12::StreamBuffer m_pixel_constant_buffer;
ComPtr<ID3D12Resource> m_expand_index_buffer;
ComPtr<D3D12MA::Allocation> m_expand_index_buffer_allocation;
D3D12::DescriptorHandle m_point_sampler_cpu;
D3D12::DescriptorHandle m_linear_sampler_cpu;
@ -173,7 +175,6 @@ private:
ComPtr<ID3D12PipelineState> m_imgui_pipeline;
std::unordered_map<u32, ComPtr<ID3DBlob>> m_tfx_vertex_shaders;
std::unordered_map<u32, ComPtr<ID3DBlob>> m_tfx_geometry_shaders;
std::unordered_map<GSHWDrawConfig::PSSelector, ComPtr<ID3DBlob>, GSHWDrawConfig::PSSelectorHash> m_tfx_pixel_shaders;
std::unordered_map<PipelineSelector, ComPtr<ID3D12PipelineState>, PipelineSelectorHash> m_tfx_pipelines;
@ -209,7 +210,6 @@ private:
bool GetTextureGroupDescriptors(D3D12::DescriptorHandle* gpu_handle, const D3D12::DescriptorHandle* cpu_handles, u32 count);
const ID3DBlob* GetTFXVertexShader(GSHWDrawConfig::VSSelector sel);
const ID3DBlob* GetTFXGeometryShader(GSHWDrawConfig::GSSelector sel);
const ID3DBlob* GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sel);
ComPtr<ID3D12PipelineState> CreateTFXPipeline(const PipelineSelector& p);
const ID3D12PipelineState* GetTFXPipeline(const PipelineSelector& p);
@ -317,6 +317,7 @@ public:
void RenderHW(GSHWDrawConfig& config) override;
void UpdateHWPipelineSelector(GSHWDrawConfig& config);
void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config);
public:
/// Ends any render pass, executes the command buffer, and invalidates cached state.
@ -377,25 +378,27 @@ private:
DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING = (1 << 5),
DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING = (1 << 6),
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE = (1 << 7),
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE = (1 << 8),
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 = (1 << 9),
DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING = (1 << 7),
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE = (1 << 8),
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE = (1 << 9),
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 = (1 << 10),
DIRTY_FLAG_VERTEX_BUFFER = (1 << 10),
DIRTY_FLAG_INDEX_BUFFER = (1 << 11),
DIRTY_FLAG_PRIMITIVE_TOPOLOGY = (1 << 12),
DIRTY_FLAG_VIEWPORT = (1 << 13),
DIRTY_FLAG_SCISSOR = (1 << 14),
DIRTY_FLAG_RENDER_TARGET = (1 << 15),
DIRTY_FLAG_PIPELINE = (1 << 16),
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 17),
DIRTY_FLAG_STENCIL_REF = (1 << 18),
DIRTY_FLAG_VERTEX_BUFFER = (1 << 11),
DIRTY_FLAG_INDEX_BUFFER = (1 << 12),
DIRTY_FLAG_PRIMITIVE_TOPOLOGY = (1 << 13),
DIRTY_FLAG_VIEWPORT = (1 << 14),
DIRTY_FLAG_SCISSOR = (1 << 15),
DIRTY_FLAG_RENDER_TARGET = (1 << 16),
DIRTY_FLAG_PIPELINE = (1 << 17),
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 18),
DIRTY_FLAG_STENCIL_REF = (1 << 19),
DIRTY_BASE_STATE = DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING |
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 |
DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE |
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 |
DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PRIMITIVE_TOPOLOGY |
DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET |
DIRTY_FLAG_PIPELINE | DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_STENCIL_REF,
DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET | DIRTY_FLAG_PIPELINE |
DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_STENCIL_REF,
DIRTY_TFX_STATE = DIRTY_BASE_STATE | DIRTY_FLAG_TFX_TEXTURES | DIRTY_FLAG_TFX_SAMPLERS | DIRTY_FLAG_TFX_RT_TEXTURES,
DIRTY_UTILITY_STATE = DIRTY_BASE_STATE,

View File

@ -249,6 +249,7 @@ void GSRendererHW::Lines2Sprites()
}
// assume vertices are tightly packed and sequentially indexed (it should be the case)
const bool predivide_q = PRIM->TME && !PRIM->FST && m_vt.m_accurate_stq;
if (m_vertex.next >= 2)
{
@ -275,7 +276,7 @@ void GSRendererHW::Lines2Sprites()
v0.XYZ.Z = v1.XYZ.Z;
v0.FOG = v1.FOG;
if (PRIM->TME && !PRIM->FST)
if (predivide_q)
{
const GSVector4 st0 = GSVector4::loadl(&v0.ST.U64);
const GSVector4 st1 = GSVector4::loadl(&v1.ST.U64);
@ -319,65 +320,28 @@ void GSRendererHW::Lines2Sprites()
}
}
template <GSHWDrawConfig::VSExpand Expand>
void GSRendererHW::ExpandIndices()
void GSRendererHW::ExpandLineIndices()
{
u32 process_count = (m_index.tail + 3) / 4 * 4;
if (Expand == GSHWDrawConfig::VSExpand::Point)
{
// Make sure we have space for writing off the end slightly
while (process_count > m_vertex.maxcount)
GrowVertexBuffer();
}
u32 expansion_factor = Expand == GSHWDrawConfig::VSExpand::Point ? 6 : 3;
const u32 process_count = (m_index.tail + 3) / 4 * 4;
const u32 expansion_factor = 3;
m_index.tail *= expansion_factor;
GSVector4i* end = reinterpret_cast<GSVector4i*>(m_index.buff);
GSVector4i* read = reinterpret_cast<GSVector4i*>(m_index.buff + process_count);
GSVector4i* write = reinterpret_cast<GSVector4i*>(m_index.buff + process_count * expansion_factor);
constexpr GSVector4i low0 = GSVector4i::cxpr(0, 1, 2, 1);
constexpr GSVector4i low1 = GSVector4i::cxpr(2, 3, 0, 1);
constexpr GSVector4i low2 = GSVector4i::cxpr(2, 1, 2, 3);
while (read > end)
{
read -= 1;
write -= expansion_factor;
switch (Expand)
{
case GSHWDrawConfig::VSExpand::None:
break;
case GSHWDrawConfig::VSExpand::Point:
{
constexpr GSVector4i low0 = GSVector4i::cxpr(0, 1, 2, 1);
constexpr GSVector4i low1 = GSVector4i::cxpr(2, 3, 0, 1);
constexpr GSVector4i low2 = GSVector4i::cxpr(2, 1, 2, 3);
const GSVector4i in = read->sll32(2);
write[0] = in.xxxx() | low0;
write[1] = in.xxyy() | low1;
write[2] = in.yyyy() | low2;
write[3] = in.zzzz() | low0;
write[4] = in.zzww() | low1;
write[5] = in.wwww() | low2;
break;
}
case GSHWDrawConfig::VSExpand::Line:
{
constexpr GSVector4i low0 = GSVector4i::cxpr(0, 1, 2, 1);
constexpr GSVector4i low1 = GSVector4i::cxpr(2, 3, 0, 1);
constexpr GSVector4i low2 = GSVector4i::cxpr(2, 1, 2, 3);
const GSVector4i in = read->sll32(2);
write[0] = in.xxyx() | low0;
write[1] = in.yyzz() | low1;
write[2] = in.wzww() | low2;
break;
}
case GSHWDrawConfig::VSExpand::Sprite:
{
constexpr GSVector4i low = GSVector4i::cxpr(0, 1, 0, 1);
const GSVector4i in = read->sll32(1);
write[0] = in.xxyx() | low;
write[1] = in.yyzz() | low;
write[2] = in.wzww() | low;
break;
}
}
const GSVector4i in = read->sll32(2);
write[0] = in.xxyx() | low0;
write[1] = in.yyzz() | low1;
write[2] = in.wzww() | low2;
}
}
@ -2453,110 +2417,90 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy)
switch (m_vt.m_primclass)
{
case GS_POINT_CLASS:
m_conf.gs.topology = GSHWDrawConfig::GSTopology::Point;
m_conf.topology = GSHWDrawConfig::Topology::Point;
m_conf.indices_per_prim = 1;
if (unscale_pt_ln)
{
if (features.point_expand)
m_conf.topology = GSHWDrawConfig::Topology::Point;
m_conf.indices_per_prim = 1;
if (unscale_pt_ln)
{
if (features.point_expand)
{
m_conf.vs.point_size = true;
m_conf.cb_vs.point_size = GSVector2(target_scale);
}
else if (features.vs_expand)
{
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Point;
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
m_conf.verts = m_vertex.buff;
m_conf.nverts = m_vertex.next;
m_conf.nindices = m_index.tail * 6;
m_conf.indices_per_prim = 6;
return;
}
}
else
{
// Vulkan/GL still need to set point size.
m_conf.cb_vs.point_size = target_scale;
// M1 requires point size output on *all* points.
m_conf.vs.point_size = true;
m_conf.cb_vs.point_size = GSVector2(target_scale);
}
else if (features.geometry_shader)
{
m_conf.gs.expand = true;
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
}
else if (features.vs_expand)
{
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Point;
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
m_conf.indices_per_prim = 6;
ExpandIndices<GSHWDrawConfig::VSExpand::Point>();
}
}
else
{
// Vulkan/GL still need to set point size.
m_conf.cb_vs.point_size = target_scale;
}
break;
case GS_LINE_CLASS:
m_conf.gs.topology = GSHWDrawConfig::GSTopology::Line;
m_conf.topology = GSHWDrawConfig::Topology::Line;
m_conf.indices_per_prim = 2;
if (unscale_pt_ln)
{
if (features.line_expand)
m_conf.topology = GSHWDrawConfig::Topology::Line;
m_conf.indices_per_prim = 2;
if (unscale_pt_ln)
{
m_conf.line_expand = true;
}
else if (features.geometry_shader)
{
m_conf.gs.expand = true;
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
}
else if (features.vs_expand)
{
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Line;
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
m_conf.indices_per_prim = 6;
ExpandIndices<GSHWDrawConfig::VSExpand::Line>();
if (features.line_expand)
{
m_conf.line_expand = true;
}
else if (features.vs_expand)
{
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Line;
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
m_conf.indices_per_prim = 6;
ExpandLineIndices();
}
}
}
break;
case GS_SPRITE_CLASS:
// Heuristics: trade-off
// Lines: GPU conversion => ofc, more GPU. And also more CPU due to extra shader validation stage.
// Triangles: CPU conversion => ofc, more CPU ;) more bandwidth (72 bytes / sprite)
//
// Note: severals openGL operation does draw call under the wood like texture upload. So even if
// you do 10 consecutive draw with the geometry shader, you will still pay extra validation if new
// texture are uploaded. (game Shadow Hearts)
//
// Note2: Due to MultiThreaded driver, Nvidia suffers less of the previous issue. Still it isn't free
// Shadow Heart is 90 fps (gs) vs 113 fps (no gs)
//
// Note3: Some GPUs (Happens on GT 750m, not on Intel 5200) don't properly divide by large floats (e.g. FLT_MAX/FLT_MAX == 0)
// Lines2Sprites predivides by Q, avoiding this issue, so always use it if m_vt.m_accurate_stq
// If the draw calls contains few primitives. Geometry Shader gain with be rather small versus
// the extra validation cost of the extra stage.
//
// Note: keep Geometry Shader in the replayer to ease debug.
if (g_gs_device->Features().geometry_shader && !m_vt.m_accurate_stq && m_vertex.next > 32) // <=> 16 sprites (based on Shadow Hearts)
{
m_conf.gs.expand = true;
// Need to pre-divide ST by Q if Q is very large, to avoid precision issues on some GPUs.
// May as well just expand the whole thing out with the CPU path in such a case.
if (features.vs_expand && !m_vt.m_accurate_stq)
{
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Sprite;
m_conf.verts = m_vertex.buff;
m_conf.nverts = m_vertex.next;
m_conf.nindices = m_index.tail * 3;
m_conf.indices_per_prim = 6;
return;
}
else
{
Lines2Sprites();
m_conf.topology = GSHWDrawConfig::Topology::Line;
m_conf.indices_per_prim = 2;
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
m_conf.indices_per_prim = 6;
}
}
else if (features.vs_expand && !m_vt.m_accurate_stq)
{
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Sprite;
m_conf.indices_per_prim = 6;
ExpandIndices<GSHWDrawConfig::VSExpand::Sprite>();
}
else
{
Lines2Sprites();
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
m_conf.indices_per_prim = 6;
}
m_conf.gs.topology = GSHWDrawConfig::GSTopology::Sprite;
break;
case GS_TRIANGLE_CLASS:
m_conf.gs.topology = GSHWDrawConfig::GSTopology::Triangle;
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
m_conf.indices_per_prim = 3;
{
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
m_conf.indices_per_prim = 3;
}
break;
default:
@ -4443,7 +4387,6 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
// GS_SPRITE_CLASS are already flat (either by CPU or the GS)
m_conf.ps.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 0 : PRIM->IIP;
m_conf.gs.iip = m_conf.ps.iip;
m_conf.vs.iip = m_conf.ps.iip;
if (DATE_BARRIER)
@ -4463,7 +4406,6 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
else if (DATE_PRIMID)
{
m_conf.ps.date = 1 + m_cached_ctx.TEST.DATM;
m_conf.gs.forward_primid = 1;
}
else if (DATE)
{

View File

@ -168,7 +168,7 @@ public:
float GetUpscaleMultiplier() override;
void Lines2Sprites();
bool VerifyIndices();
template <GSHWDrawConfig::VSExpand Expand> void ExpandIndices();
void ExpandLineIndices();
void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba);
GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex);
GSVector4i ComputeBoundingBox(const GSVector2i& rtsize, float rtscale);

View File

@ -2609,7 +2609,6 @@ bool GSTextureCache::ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx
config.vs.tme = true;
config.vs.iip = true;
config.vs.fst = true;
config.gs.key = 0;
config.ps.key_lo = 0;
config.ps.key_hi = 0;
config.ps.read_ba = read_ba;

View File

@ -281,7 +281,7 @@ public:
MRCOwned<id<MTLDepthStencilState>> m_dss_stencil_write;
MRCOwned<id<MTLDepthStencilState>> m_dss_hw[1 << 5];
MRCOwned<id<MTLBuffer>> m_texture_download_buf;
MRCOwned<id<MTLBuffer>> m_expand_index_buffer;
UploadBuffer m_texture_upload_buf;
BufferPair m_vertex_upload_buf;

View File

@ -748,6 +748,21 @@ void GSDeviceMTL::DetachSurfaceOnMainThread()
m_layer = nullptr;
}
// Metal is fun and won't let you use newBufferWithBytes for private buffers
static MRCOwned<id<MTLBuffer>> CreatePrivateBufferWithContent(
id<MTLDevice> dev, id<MTLCommandBuffer> cb,
MTLResourceOptions options, NSUInteger length,
std::function<void(void*)> fill)
{
MRCOwned<id<MTLBuffer>> tmp = MRCTransfer([dev newBufferWithLength:length options:MTLResourceStorageModeShared]);
MRCOwned<id<MTLBuffer>> actual = MRCTransfer([dev newBufferWithLength:length options:options|MTLResourceStorageModePrivate]);
fill([tmp contents]);
id<MTLBlitCommandEncoder> blit = [cb blitCommandEncoder];
[blit copyFromBuffer:tmp sourceOffset:0 toBuffer:actual destinationOffset:0 size:length];
[blit endEncoding];
return actual;
}
bool GSDeviceMTL::Create(const WindowInfo& wi, VsyncMode vsync)
{ @autoreleasepool {
if (!GSDevice::Create(wi, vsync))
@ -808,7 +823,6 @@ bool GSDeviceMTL::Create(const WindowInfo& wi, VsyncMode vsync)
MTLPixelFormat layer_px_fmt = [m_layer pixelFormat];
m_features.broken_point_sampler = [[m_dev.dev name] containsString:@"AMD"];
m_features.geometry_shader = false;
m_features.vs_expand = true;
m_features.primitive_id = m_dev.features.primid;
m_features.texture_barrier = true;
@ -852,6 +866,9 @@ bool GSDeviceMTL::Create(const WindowInfo& wi, VsyncMode vsync)
m_cas_pipeline[sharpen_only] = MakeComputePipeline(LoadShader(shader), sharpen_only ? @"CAS Sharpen" : @"CAS Upscale");
}
m_expand_index_buffer = CreatePrivateBufferWithContent(m_dev.dev, initCommands, MTLResourceHazardTrackingModeUntracked, EXPAND_BUFFER_SIZE, GenerateExpansionIndexBuffer);
[m_expand_index_buffer setLabel:@"Point/Sprite Expand Indices"];
m_hw_vertex = MRCTransfer([MTLVertexDescriptor new]);
[[[m_hw_vertex layouts] objectAtIndexedSubscript:GSMTLBufferIndexHWVertices] setStride:sizeof(GSVertex)];
applyAttribute(m_hw_vertex, GSMTLAttributeIndexST, MTLVertexFormatFloat2, offsetof(GSVertex, ST), GSMTLBufferIndexHWVertices);
@ -1987,17 +2004,27 @@ void GSDeviceMTL::MREInitHWDraw(GSHWDrawConfig& config, const Map& verts)
void GSDeviceMTL::RenderHW(GSHWDrawConfig& config)
{ @autoreleasepool {
if (config.topology == GSHWDrawConfig::Topology::Point)
config.vs.point_size = 1; // M1 requires point size output on *all* points
if (config.tex && config.ds == config.tex)
EndRenderPass(); // Barrier
size_t vertsize = config.nverts * sizeof(*config.verts);
size_t idxsize = config.nindices * sizeof(*config.indices);
size_t idxsize = config.vs.UseExpandIndexBuffer() ? 0 : (config.nindices * sizeof(*config.indices));
Map allocation = Allocate(m_vertex_upload_buf, vertsize + idxsize);
memcpy(allocation.cpu_buffer, config.verts, vertsize);
memcpy(static_cast<u8*>(allocation.cpu_buffer) + vertsize, config.indices, idxsize);
id<MTLBuffer> index_buffer;
size_t index_buffer_offset;
if (!config.vs.UseExpandIndexBuffer())
{
memcpy(static_cast<u8*>(allocation.cpu_buffer) + vertsize, config.indices, idxsize);
index_buffer = allocation.gpu_buffer;
index_buffer_offset = allocation.gpu_offset + vertsize;
}
else
{
index_buffer = m_expand_index_buffer;
index_buffer_offset = 0;
}
FlushClears(config.tex);
FlushClears(config.pal);
@ -2028,7 +2055,7 @@ void GSDeviceMTL::RenderHW(GSHWDrawConfig& config)
ASSERT(config.require_full_barrier == false && config.drawlist == nullptr);
MRESetHWPipelineState(config.vs, config.ps, {}, {});
MREInitHWDraw(config, allocation);
SendHWDraw(config, m_current_render.encoder, allocation.gpu_buffer, allocation.gpu_offset + vertsize);
SendHWDraw(config, m_current_render.encoder, index_buffer, index_buffer_offset);
config.ps.date = 3;
break;
}
@ -2084,7 +2111,7 @@ void GSDeviceMTL::RenderHW(GSHWDrawConfig& config)
MRESetHWPipelineState(config.vs, config.ps, config.blend, config.colormask);
MRESetDSS(config.depth);
SendHWDraw(config, mtlenc, allocation.gpu_buffer, allocation.gpu_offset + vertsize);
SendHWDraw(config, mtlenc, index_buffer, index_buffer_offset);
if (config.alpha_second_pass.enable)
{
@ -2095,7 +2122,7 @@ void GSDeviceMTL::RenderHW(GSHWDrawConfig& config)
}
MRESetHWPipelineState(config.vs, config.alpha_second_pass.ps, config.blend, config.alpha_second_pass.colormask);
MRESetDSS(config.alpha_second_pass.depth);
SendHWDraw(config, mtlenc, allocation.gpu_buffer, allocation.gpu_offset + vertsize);
SendHWDraw(config, mtlenc, index_buffer, index_buffer_offset);
}
if (hdr_rt)
@ -2141,25 +2168,34 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id<MTLRenderCommandEncoder>
g_perfmon.Put(GSPerfMon::DrawCalls, config.drawlist->size());
g_perfmon.Put(GSPerfMon::Barriers, config.drawlist->size());
for (size_t count = 0, p = 0, n = 0; n < config.drawlist->size(); p += count, ++n)
const u32 indices_per_prim = config.indices_per_prim;
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
for (u32 n = 0, p = 0; n < draw_list_size; n++)
{
count = (*config.drawlist)[n] * config.indices_per_prim;
const u32 count = (*config.drawlist)[n] * indices_per_prim;
textureBarrier(enc);
[enc drawIndexedPrimitives:topology
indexCount:count
indexType:MTLIndexTypeUInt32
indexBuffer:buffer
indexBufferOffset:off + p * sizeof(*config.indices)];
p += count;
}
[enc popDebugGroup];
return;
}
else if (config.require_full_barrier)
{
const u32 ndraws = config.nindices / config.indices_per_prim;
const u32 indices_per_prim = config.indices_per_prim;
const u32 ndraws = config.nindices / indices_per_prim;
g_perfmon.Put(GSPerfMon::DrawCalls, ndraws);
g_perfmon.Put(GSPerfMon::Barriers, ndraws);
[enc pushDebugGroup:[NSString stringWithFormat:@"Full barrier split draw (%d prims)", ndraws]];
for (size_t p = 0; p < config.nindices; p += config.indices_per_prim)
for (u32 p = 0; p < config.nindices; p += indices_per_prim)
{
textureBarrier(enc);
[enc drawIndexedPrimitives:topology
@ -2168,30 +2204,24 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id<MTLRenderCommandEncoder>
indexBuffer:buffer
indexBufferOffset:off + p * sizeof(*config.indices)];
}
[enc popDebugGroup];
return;
}
else if (config.require_one_barrier)
{
// One barrier needed
textureBarrier(enc);
[enc drawIndexedPrimitives:topology
indexCount:config.nindices
indexType:MTLIndexTypeUInt32
indexBuffer:buffer
indexBufferOffset:off];
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
g_perfmon.Put(GSPerfMon::Barriers, 1);
}
else
{
// No barriers needed
[enc drawIndexedPrimitives:topology
indexCount:config.nindices
indexType:MTLIndexTypeUInt32
indexBuffer:buffer
indexBufferOffset:off];
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
}
[enc drawIndexedPrimitives:topology
indexCount:config.nindices
indexType:MTLIndexTypeUInt32
indexBuffer:buffer
indexBufferOffset:off];
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
}
// tbh I'm not a fan of the current debug groups

View File

@ -116,15 +116,6 @@ namespace GLLoader
bool buggy_pbo = false;
bool disable_download_pbo = false;
bool is_gles = false;
bool has_dual_source_blend = false;
bool has_clip_control = true;
bool found_framebuffer_fetch = false;
bool found_geometry_shader = true; // we require GL3.3 so geometry must be supported by default
// DX11 GPU
bool found_GL_ARB_gpu_shader5 = false; // Require IvyBridge
bool found_GL_ARB_texture_barrier = false;
static bool check_gl_version()
{
const char* vendor = (const char*)glGetString(GL_VENDOR);
@ -135,13 +126,6 @@ namespace GLLoader
else if (strstr(vendor, "Intel"))
vendor_id_intel = true;
if (GSConfig.OverrideGeometryShaders != -1)
{
found_geometry_shader = GSConfig.OverrideGeometryShaders != 0 &&
(GLAD_GL_VERSION_3_2 || GL_ARB_geometry_shader4 || GSConfig.OverrideGeometryShaders == 1);
Console.Warning("Overriding geometry shaders detection to %s", found_geometry_shader ? "true" : "false");
}
GLint major_gl = 0;
GLint minor_gl = 0;
glGetIntegerv(GL_MAJOR_VERSION, &major_gl);
@ -157,32 +141,13 @@ namespace GLLoader
static bool check_gl_supported_extension()
{
if (GLAD_GL_VERSION_3_3 && !GLAD_GL_ARB_shading_language_420pack)
if (!GLAD_GL_ARB_shading_language_420pack)
{
Host::ReportFormattedErrorAsync("GS",
"GL_ARB_shading_language_420pack is not supported, this is required for the OpenGL renderer.");
return false;
}
// GLES doesn't have ARB_clip_control.
has_clip_control = GLAD_GL_ARB_clip_control;
if (!has_clip_control && !is_gles)
{
Host::AddOSDMessage("GL_ARB_clip_control is not supported, this will cause rendering issues.",
Host::OSD_ERROR_DURATION);
}
found_GL_ARB_gpu_shader5 = GLAD_GL_ARB_gpu_shader5;
found_GL_ARB_texture_barrier = GLAD_GL_ARB_texture_barrier;
has_dual_source_blend = GLAD_GL_VERSION_3_2 || GLAD_GL_ARB_blend_func_extended;
found_framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch;
if (found_framebuffer_fetch && GSConfig.DisableFramebufferFetch)
{
Console.Warning("Framebuffer fetch was found but is disabled. This will reduce performance.");
found_framebuffer_fetch = false;
}
if (!GLAD_GL_ARB_viewport_array)
{
glScissorIndexed = ReplaceGL::ScissorIndexed;
@ -203,21 +168,6 @@ namespace GLLoader
Emulate_DSA::Init();
}
if (is_gles)
{
has_dual_source_blend = GLAD_GL_EXT_blend_func_extended || GLAD_GL_ARB_blend_func_extended;
if (!has_dual_source_blend && !found_framebuffer_fetch)
{
Host::AddOSDMessage("Both dual source blending and framebuffer fetch are missing, things will be broken.",
Host::OSD_ERROR_DURATION);
}
}
else
{
// Core in GL3.2, so everything supports it.
has_dual_source_blend = true;
}
// Don't use PBOs when we don't have ARB_buffer_storage, orphaning buffers probably ends up worse than just
// using the normal texture update routines and letting the driver take care of it.
buggy_pbo = !GLAD_GL_VERSION_4_4 && !GLAD_GL_ARB_buffer_storage && !GLAD_GL_EXT_buffer_storage;

View File

@ -31,13 +31,4 @@ namespace GLLoader
extern bool vendor_id_intel;
extern bool buggy_pbo;
extern bool disable_download_pbo;
// GL
extern bool is_gles;
extern bool has_clip_control;
extern bool has_dual_source_blend;
extern bool found_framebuffer_fetch;
extern bool found_geometry_shader;
extern bool found_GL_ARB_gpu_shader5;
extern bool found_GL_ARB_texture_barrier;
} // namespace GLLoader

View File

@ -18,6 +18,7 @@
namespace GLState
{
GLuint vao;
GLuint fbo;
GSVector2i viewport;
GSVector4i scissor;
@ -49,6 +50,7 @@ namespace GLState
void Clear()
{
vao = 0;
fbo = 0;
viewport = GSVector2i(1, 1);
scissor = GSVector4i(0, 0, 1, 1);

View File

@ -22,6 +22,7 @@ class GSTextureOGL;
namespace GLState
{
extern GLuint vao; // vertex array object
extern GLuint fbo; // frame buffer object
extern GSVector2i viewport;
extern GSVector4i scissor;

View File

@ -90,7 +90,12 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
if (!GSDevice::Create(wi, vsync))
return false;
m_gl_context = GL::Context::Create(wi);
// We need at least GL3.3.
static constexpr const GL::Context::Version version_list[] = {{GL::Context::Profile::Core, 4, 6},
{GL::Context::Profile::Core, 4, 5}, {GL::Context::Profile::Core, 4, 4}, {GL::Context::Profile::Core, 4, 3},
{GL::Context::Profile::Core, 4, 2}, {GL::Context::Profile::Core, 4, 1}, {GL::Context::Profile::Core, 4, 0},
{GL::Context::Profile::Core, 3, 3}};
m_gl_context = GL::Context::Create(wi, version_list);
if (!m_gl_context)
{
Console.Error("Failed to create any GL context");
@ -104,9 +109,6 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
return false;
}
// Check openGL requirement as soon as possible so we can switch to another
// renderer/device
GLLoader::is_gles = m_gl_context->IsGLES();
if (!GLLoader::check_gl_requirements())
return false;
@ -124,41 +126,57 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
// optional features based on context
m_features.broken_point_sampler = GLLoader::vendor_id_amd;
m_features.geometry_shader = GLLoader::found_geometry_shader;
m_features.primitive_id = true;
m_features.framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch;
if (m_features.framebuffer_fetch && GSConfig.DisableFramebufferFetch)
{
Host::AddOSDMessage("Framebuffer fetch was found but is disabled. This will reduce performance.", Host::OSD_ERROR_DURATION);
m_features.framebuffer_fetch = false;
}
if (GSConfig.OverrideTextureBarriers == 0)
m_features.texture_barrier = GLLoader::found_framebuffer_fetch; // Force Disabled
m_features.texture_barrier = m_features.framebuffer_fetch; // Force Disabled
else if (GSConfig.OverrideTextureBarriers == 1)
m_features.texture_barrier = true; // Force Enabled
else
m_features.texture_barrier = GLLoader::found_framebuffer_fetch || GLLoader::found_GL_ARB_texture_barrier;
m_features.texture_barrier = m_features.framebuffer_fetch || GLAD_GL_ARB_texture_barrier;
if (!m_features.texture_barrier)
{
Host::AddOSDMessage(
"GL_ARB_texture_barrier is not supported, blending will not be accurate.", Host::OSD_ERROR_DURATION);
}
m_features.provoking_vertex_last = true;
m_features.dxt_textures = GLAD_GL_EXT_texture_compression_s3tc;
m_features.bptc_textures = GLAD_GL_VERSION_4_2 || GLAD_GL_ARB_texture_compression_bptc || GLAD_GL_EXT_texture_compression_bptc;
m_features.prefer_new_textures = GLLoader::is_gles;
m_features.framebuffer_fetch = GLLoader::found_framebuffer_fetch;
m_features.dual_source_blend = GLLoader::has_dual_source_blend && !GSConfig.DisableDualSourceBlend;
m_features.clip_control = GLLoader::has_clip_control;
m_features.prefer_new_textures = false;
m_features.dual_source_blend = !GSConfig.DisableDualSourceBlend;
m_features.clip_control = GLAD_GL_ARB_clip_control;
if (!m_features.clip_control)
Host::AddOSDMessage("GL_ARB_clip_control is not supported, this will cause rendering issues.", Host::OSD_ERROR_DURATION);
m_features.stencil_buffer = true;
m_features.test_and_sample_depth = m_features.texture_barrier && !GLLoader::is_gles;
m_features.test_and_sample_depth = m_features.texture_barrier;
if (GLAD_GL_ARB_shader_storage_buffer_object)
{
GLint max_vertex_ssbos = 0;
glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &max_vertex_ssbos);
DevCon.WriteLn("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS: %d", max_vertex_ssbos);
m_features.vs_expand = (max_vertex_ssbos > 0);
}
if (!m_features.vs_expand)
Console.Warning("Vertex expansion is not supported. This will reduce performance.");
GLint point_range[2] = {};
glGetIntegerv(GL_ALIASED_POINT_SIZE_RANGE, point_range);
m_features.point_expand = (point_range[0] <= GSConfig.UpscaleMultiplier && point_range[1] >= GSConfig.UpscaleMultiplier);
m_features.line_expand = false;
if (GLLoader::is_gles)
{
GLint line_range[2] = {};
glGetIntegerv(GL_ALIASED_LINE_WIDTH_RANGE, line_range);
m_features.line_expand = (line_range[0] <= static_cast<GLint>(GSConfig.UpscaleMultiplier) && line_range[1] >= static_cast<GLint>(GSConfig.UpscaleMultiplier));
}
else
{
m_features.line_expand = false;
}
DevCon.WriteLn("Using %s for point expansion and %s for line expansion.",
m_features.point_expand ? "hardware" : "geometry shaders", m_features.line_expand ? "hardware" : "geometry shaders");
Console.WriteLn("Using %s for point expansion, %s for line expansion and %s for sprite expansion.",
m_features.point_expand ? "hardware" : (m_features.vs_expand ? "vertex expanding" : "UNSUPPORTED"),
m_features.line_expand ? "hardware" : (m_features.vs_expand ? "vertex expanding" : "UNSUPPORTED"),
m_features.vs_expand ? "vertex expanding" : "CPU");
// because of fbo bindings below...
GLState::Clear();
@ -168,20 +186,12 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
// ****************************************************************
if (GSConfig.UseDebugDevice)
{
if (!GLLoader::is_gles)
{
glDebugMessageCallback(DebugMessageCallback, NULL);
glDebugMessageCallback(DebugMessageCallback, NULL);
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true);
// Useless info message on Nvidia driver
GLuint ids[] = { 0x20004 };
glDebugMessageControl(GL_DEBUG_SOURCE_API_ARB, GL_DEBUG_TYPE_OTHER_ARB, GL_DONT_CARE, std::size(ids), ids, false);
}
else if (GLAD_GL_KHR_debug)
{
glDebugMessageCallbackKHR(DebugMessageCallback, NULL);
glDebugMessageControlKHR(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true);
}
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true);
// Useless info message on Nvidia driver
static constexpr const GLuint ids[] = { 0x20004 };
glDebugMessageControl(GL_DEBUG_SOURCE_API_ARB, GL_DEBUG_TYPE_OTHER_ARB, GL_DONT_CARE, std::size(ids), ids, false);
// Uncomment synchronous if you want callstacks which match where the error occurred.
glEnable(GL_DEBUG_OUTPUT);
@ -219,8 +229,8 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
{
GL_PUSH("GSDeviceOGL::Vertex Buffer");
glGenVertexArrays(1, &m_vertex_array_object);
glBindVertexArray(m_vertex_array_object);
glGenVertexArrays(1, &m_vao);
IASetVAO(m_vao);
m_vertex_stream_buffer = GL::StreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE);
m_index_stream_buffer = GL::StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE);
@ -233,14 +243,13 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
return false;
}
m_vertex_stream_buffer->Bind();
m_index_stream_buffer->Bind();
// Force UBOs to be uploaded on first use.
std::memset(&m_vs_cb_cache, 0xFF, sizeof(m_vs_cb_cache));
std::memset(&m_ps_cb_cache, 0xFF, sizeof(m_ps_cb_cache));
// rebind because of VAO state
m_vertex_stream_buffer->Bind();
m_index_stream_buffer->Bind();
static_assert(sizeof(GSVertexPT1) == sizeof(GSVertex), "wrong GSVertex size");
for (u32 i = 0; i < 8; i++)
glEnableVertexAttribArray(i);
@ -253,6 +262,29 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
glVertexAttribIPointer(5, 1, GL_UNSIGNED_INT, sizeof(GSVertex), (const GLvoid*)(20));
glVertexAttribIPointer(6, 2, GL_UNSIGNED_SHORT, sizeof(GSVertex), (const GLvoid*)(24));
glVertexAttribPointer(7, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(GSVertex), (const GLvoid*)(28));
if (m_features.vs_expand)
{
glGenVertexArrays(1, &m_expand_vao);
glBindVertexArray(m_expand_vao);
IASetVAO(m_expand_vao);
// Still need the vertex buffer bound, because uploads happen to GL_ARRAY_BUFFER.
m_vertex_stream_buffer->Bind();
std::unique_ptr<u8[]> expand_data = std::make_unique<u8[]>(EXPAND_BUFFER_SIZE);
GenerateExpansionIndexBuffer(expand_data.get());
glGenBuffers(1, &m_expand_ibo);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_expand_ibo);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, EXPAND_BUFFER_SIZE, expand_data.get(), GL_STATIC_DRAW);
// We can bind it once when using gl_BaseVertexARB.
if (GLAD_GL_ARB_shader_draw_parameters)
{
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 2, m_vertex_stream_buffer->GetGLBufferId(),
0, VERTEX_BUFFER_SIZE);
}
}
}
// ****************************************************************
@ -289,7 +321,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
{
const char* name = shaderName(static_cast<ShaderConvert>(i));
const std::string ps(GetShaderSource(name, GL_FRAGMENT_SHADER, *convert_glsl));
if (!m_shader_cache.GetProgram(&m_convert.ps[i], m_convert.vs, {}, ps))
if (!m_shader_cache.GetProgram(&m_convert.ps[i], m_convert.vs, ps))
return false;
m_convert.ps[i].SetFormattedName("Convert pipe %s", name);
@ -343,7 +375,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
{
const char* name = shaderName(static_cast<PresentShader>(i));
const std::string ps(GetShaderSource(name, GL_FRAGMENT_SHADER, *shader));
if (!m_shader_cache.GetProgram(&m_present[i], present_vs, {}, ps))
if (!m_shader_cache.GetProgram(&m_present[i], present_vs, ps))
return false;
m_present[i].SetFormattedName("Present pipe %s", name);
@ -376,7 +408,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
for (size_t i = 0; i < std::size(m_merge_obj.ps); i++)
{
const std::string ps(GetShaderSource(fmt::format("ps_main{}", i), GL_FRAGMENT_SHADER, *shader));
if (!m_shader_cache.GetProgram(&m_merge_obj.ps[i], m_convert.vs, {}, ps))
if (!m_shader_cache.GetProgram(&m_merge_obj.ps[i], m_convert.vs, ps))
return false;
m_merge_obj.ps[i].SetFormattedName("Merge pipe %zu", i);
m_merge_obj.ps[i].RegisterUniform("BGColor");
@ -399,7 +431,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
for (size_t i = 0; i < std::size(m_interlace.ps); i++)
{
const std::string ps(GetShaderSource(fmt::format("ps_main{}", i), GL_FRAGMENT_SHADER, *shader));
if (!m_shader_cache.GetProgram(&m_interlace.ps[i], m_convert.vs, {}, ps))
if (!m_shader_cache.GetProgram(&m_interlace.ps[i], m_convert.vs, ps))
return false;
m_interlace.ps[i].SetFormattedName("Merge pipe %zu", i);
m_interlace.ps[i].RegisterUniform("ZrH");
@ -421,12 +453,10 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
{
GL_PUSH("GSDeviceOGL::Rasterization");
if (!GLLoader::is_gles)
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glDisable(GL_CULL_FACE);
glEnable(GL_SCISSOR_TEST);
if (!GLLoader::is_gles)
glDisable(GL_MULTISAMPLE);
glDisable(GL_MULTISAMPLE);
glDisable(GL_DITHER); // Honestly I don't know!
}
@ -446,7 +476,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
const std::string ps(GetShaderSource(
fmt::format("ps_stencil_image_init_{}", i),
GL_FRAGMENT_SHADER, *convert_glsl));
m_shader_cache.GetProgram(&m_date.primid_ps[i], m_convert.vs, {}, ps);
m_shader_cache.GetProgram(&m_date.primid_ps[i], m_convert.vs, ps);
m_date.primid_ps[i].SetFormattedName("PrimID Destination Alpha Init %d", i);
}
}
@ -461,7 +491,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
// This extension allow FS depth to range from -1 to 1. So
// gl_position.z could range from [0, 1]
// Change depth convention
if (GLLoader::has_clip_control)
if (m_features.clip_control)
glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
// ****************************************************************
@ -602,12 +632,16 @@ void GSDeviceOGL::DestroyResources()
m_vertex_uniform_stream_buffer.reset();
glBindVertexArray(0);
if (m_vertex_array_object != 0)
glDeleteVertexArrays(1, &m_vertex_array_object);
if (m_expand_ibo != 0)
glDeleteVertexArrays(1, &m_expand_ibo);
if (m_vao != 0)
glDeleteVertexArrays(1, &m_vao);
m_index_stream_buffer.reset();
m_vertex_stream_buffer.reset();
s_texture_upload_buffer.reset();
if (m_expand_ibo)
glDeleteBuffers(1, &m_expand_ibo);
if (m_fbo != 0)
glDeleteFramebuffers(1, &m_fbo);
@ -1092,56 +1126,28 @@ std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum typ
{
std::string header;
if (GLLoader::is_gles)
// Intel's GL driver doesn't like the readonly qualifier with 3.3 GLSL.
if (m_features.vs_expand)
{
if (GLAD_GL_ES_VERSION_3_2)
header = "#version 320 es\n";
else if (GLAD_GL_ES_VERSION_3_1)
header = "#version 310 es\n";
if (GLAD_GL_EXT_blend_func_extended)
header += "#extension GL_EXT_blend_func_extended : require\n";
if (GLAD_GL_ARB_blend_func_extended)
header += "#extension GL_ARB_blend_func_extended : require\n";
if (m_features.framebuffer_fetch)
{
if (GLAD_GL_EXT_shader_framebuffer_fetch)
header += "#extension GL_EXT_shader_framebuffer_fetch : require\n";
else if (GLAD_GL_ARM_shader_framebuffer_fetch)
header += "#extension GL_ARM_shader_framebuffer_fetch : require\n";
}
header += "precision highp float;\n";
header += "precision highp int;\n";
header += "precision highp sampler2D;\n";
if (GLAD_GL_ES_VERSION_3_1)
header += "precision highp sampler2DMS;\n";
if (GLAD_GL_ES_VERSION_3_2)
header += "precision highp usamplerBuffer;\n";
if (!GLAD_GL_EXT_blend_func_extended && !GLAD_GL_ARB_blend_func_extended)
header += "#define DISABLE_DUAL_SOURCE\n";
header = "#version 430 core\n";
}
else
{
header = "#version 330 core\n";
// Need GL version 420
header += "#extension GL_ARB_shading_language_420pack: require\n";
if (m_features.framebuffer_fetch && GLAD_GL_EXT_shader_framebuffer_fetch)
header += "#extension GL_EXT_shader_framebuffer_fetch : require\n";
if (GLLoader::found_GL_ARB_gpu_shader5)
header += "#extension GL_ARB_gpu_shader5 : enable\n";
}
if (GLAD_GL_ARB_shader_draw_parameters)
header += "#extension GL_ARB_shader_draw_parameters : require\n";
if (m_features.framebuffer_fetch && GLAD_GL_EXT_shader_framebuffer_fetch)
header += "#extension GL_EXT_shader_framebuffer_fetch : require\n";
if (m_features.framebuffer_fetch)
header += "#define HAS_FRAMEBUFFER_FETCH 1\n";
else
header += "#define HAS_FRAMEBUFFER_FETCH 0\n";
if (GLLoader::has_clip_control)
if (m_features.clip_control)
header += "#define HAS_CLIP_CONTROL 1\n";
else
header += "#define HAS_CLIP_CONTROL 0\n";
@ -1178,26 +1184,14 @@ std::string GSDeviceOGL::GetVSSource(VSSelector sel)
std::string macro = fmt::format("#define VS_FST {}\n", static_cast<u32>(sel.fst))
+ fmt::format("#define VS_IIP {}\n", static_cast<u32>(sel.iip))
+ fmt::format("#define VS_POINT_SIZE {}\n", static_cast<u32>(sel.point_size));
+ fmt::format("#define VS_POINT_SIZE {}\n", static_cast<u32>(sel.point_size))
+ fmt::format("#define VS_EXPAND {}\n", static_cast<int>(sel.expand));
std::string src = GenGlslHeader("vs_main", GL_VERTEX_SHADER, macro);
src += m_shader_tfx_vgs;
return src;
}
std::string GSDeviceOGL::GetGSSource(GSSelector sel)
{
DevCon.WriteLn("Compiling new geometry shader with selector 0x%" PRIX64, sel.key);
std::string macro = fmt::format("#define GS_PRIM {}\n", static_cast<u32>(sel.topology))
+ fmt::format("#define GS_EXPAND {}\n", static_cast<u32>(sel.expand))
+ fmt::format("#define GS_IIP {}\n", static_cast<u32>(sel.iip));
std::string src = GenGlslHeader("gs_main", GL_GEOMETRY_SHADER, macro);
src += m_shader_tfx_vgs;
return src;
}
std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
{
DevCon.WriteLn("Compiling new pixel shader with selector 0x%" PRIX64 "%08X", sel.key_hi, sel.key_lo);
@ -1502,6 +1496,7 @@ void GSDeviceOGL::DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect
{GSVector4(right , bottom, 0.0f, 0.0f) , GSVector2(sRect.z , sRect.w)} ,
};
IASetVAO(m_vao);
IASetVertexBuffer(vertices, 4);
IASetPrimitiveTopology(GL_TRIANGLE_STRIP);
DrawPrimitive();
@ -1510,6 +1505,7 @@ void GSDeviceOGL::DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect
void GSDeviceOGL::DrawMultiStretchRects(
const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader)
{
IASetVAO(m_vao);
IASetPrimitiveTopology(GL_TRIANGLE_STRIP);
OMSetDepthStencilState(m_convert.dss);
OMSetBlendState(false);
@ -1672,7 +1668,7 @@ void GSDeviceOGL::DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture
bool GSDeviceOGL::CompileFXAAProgram()
{
// Needs ARB_gpu_shader5 for gather.
if (!GLLoader::is_gles && !GLLoader::found_GL_ARB_gpu_shader5)
if (!GLAD_GL_ARB_gpu_shader5)
{
Console.Warning("FXAA is not supported with the current GPU");
return true;
@ -1687,7 +1683,7 @@ bool GSDeviceOGL::CompileFXAAProgram()
}
const std::string ps(GetShaderSource("ps_main", GL_FRAGMENT_SHADER, shader->c_str(), fxaa_macro));
std::optional<GL::Program> prog = m_shader_cache.GetProgram(m_convert.vs, {}, ps);
std::optional<GL::Program> prog = m_shader_cache.GetProgram(m_convert.vs, ps);
if (!prog.has_value())
{
Console.Error("Failed to compile FXAA fragment shader");
@ -1725,7 +1721,7 @@ bool GSDeviceOGL::CompileShadeBoostProgram()
}
const std::string ps(GetShaderSource("ps_main", GL_FRAGMENT_SHADER, *shader));
if (!m_shader_cache.GetProgram(&m_shadeboost.ps, m_convert.vs, {}, ps))
if (!m_shader_cache.GetProgram(&m_shadeboost.ps, m_convert.vs, ps))
return false;
m_shadeboost.ps.RegisterUniform("params");
m_shadeboost.ps.SetName("Shadeboost pipe");
@ -1770,6 +1766,7 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
// ia
IASetVAO(m_vao);
IASetVertexBuffer(vertices, 4);
IASetPrimitiveTopology(GL_TRIANGLE_STRIP);
@ -1787,6 +1784,15 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
}
}
void GSDeviceOGL::IASetVAO(GLuint vao)
{
if (GLState::vao == vao)
return;
GLState::vao = vao;
glBindVertexArray(vao);
}
void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t count)
{
const u32 size = static_cast<u32>(count) * sizeof(GSVertexPT1);
@ -1862,12 +1868,6 @@ bool GSDeviceOGL::CreateCASPrograms()
}
const char* header =
GLLoader::is_gles ?
"#version 320 es\n"
"precision highp float;\n"
"precision highp int;\n"
"precision highp sampler2D;\n"
"precision highp image2D;\n" :
"#version 420\n"
"#extension GL_ARB_compute_shader : require\n";
const char* sharpen_params[2] = {
@ -1921,7 +1921,7 @@ bool GSDeviceOGL::CreateImGuiProgram()
}
std::optional<GL::Program> prog = m_shader_cache.GetProgram(
GetShaderSource("vs_main", GL_VERTEX_SHADER, glsl.value()), {},
GetShaderSource("vs_main", GL_VERTEX_SHADER, glsl.value()),
GetShaderSource("ps_main", GL_FRAGMENT_SHADER, glsl.value()));
if (!prog.has_value())
{
@ -1937,6 +1937,7 @@ bool GSDeviceOGL::CreateImGuiProgram()
glGenVertexArrays(1, &m_imgui.vao);
glBindVertexArray(m_imgui.vao);
m_vertex_stream_buffer->Bind();
m_index_stream_buffer->Bind();
glEnableVertexAttribArray(0);
glEnableVertexAttribArray(1);
@ -1945,7 +1946,7 @@ bool GSDeviceOGL::CreateImGuiProgram()
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(ImDrawVert), (GLvoid*)IM_OFFSETOF(ImDrawVert, uv));
glVertexAttribPointer(2, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(ImDrawVert), (GLvoid*)IM_OFFSETOF(ImDrawVert, col));
glBindVertexArray(m_vertex_array_object);
glBindVertexArray(GLState::vao);
return true;
}
@ -1973,7 +1974,7 @@ void GSDeviceOGL::RenderImGui()
m_imgui.ps.Bind();
m_imgui.ps.UniformMatrix4fv(0, &ortho_projection[0][0]);
glBindVertexArray(m_imgui.vao);
IASetVAO(m_imgui.vao);
OMSetBlendState(true, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_FUNC_ADD);
OMSetDepthStencilState(m_convert.dss);
PSSetSamplerState(m_convert.ln);
@ -2041,7 +2042,7 @@ void GSDeviceOGL::RenderImGui()
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);
}
glBindVertexArray(m_vertex_array_object);
IASetVAO(m_vao);
glScissor(GLState::scissor.x, GLState::scissor.y, GLState::scissor.width(), GLState::scissor.height());
}
@ -2224,10 +2225,9 @@ void GSDeviceOGL::SetupPipeline(const ProgramSelector& psel)
const std::string vs(GetVSSource(psel.vs));
const std::string ps(GetPSSource(psel.ps));
const std::string gs((psel.gs.key != 0) ? GetGSSource(psel.gs) : std::string());
GL::Program prog;
m_shader_cache.GetProgram(&prog, vs, gs, ps);
m_shader_cache.GetProgram(&prog, vs, ps);
it = m_programs.emplace(psel, std::move(prog)).first;
it->second.Bind();
}
@ -2330,7 +2330,26 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
}
IASetVertexBuffer(config.verts, config.nverts);
IASetIndexBuffer(config.indices, config.nindices);
if (config.vs.expand != GSHWDrawConfig::VSExpand::None && !GLAD_GL_ARB_shader_draw_parameters)
{
// Need to offset the buffer.
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 2, m_vertex_stream_buffer->GetGLBufferId(),
m_vertex.start * sizeof(GSVertex), config.nverts * sizeof(GSVertex));
m_vertex.start = 0;
}
if (config.vs.UseExpandIndexBuffer())
{
IASetVAO(m_expand_vao);
m_index.start = 0;
m_index.count = config.nindices;
}
else
{
IASetVAO(m_vao);
IASetIndexBuffer(config.indices, config.nindices);
}
GLenum topology = 0;
switch (config.topology)
{
@ -2363,15 +2382,12 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
psel.vs = config.vs;
psel.ps.key_hi = config.ps.key_hi;
psel.ps.key_lo = config.ps.key_lo;
psel.gs.key = 0;
psel.pad = 0;
if (config.gs.expand)
psel.gs.key = config.gs.key;
SetupPipeline(psel);
// additional non-pipeline config stuff
const bool point_size_enabled = config.vs.point_size && !GLLoader::is_gles;
const bool point_size_enabled = config.vs.point_size;
if (GLState::point_size != point_size_enabled)
{
if (point_size_enabled)
@ -2516,47 +2532,45 @@ void GSDeviceOGL::SendHWDraw(const GSHWDrawConfig& config, bool needs_barrier)
g_perfmon.Put(GSPerfMon::Barriers, static_cast<u32>(config.drawlist->size()));
for (size_t count = 0, p = 0, n = 0; n < config.drawlist->size(); p += count, ++n)
const u32 indices_per_prim = config.indices_per_prim;
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
for (u32 n = 0, p = 0; n < draw_list_size; n++)
{
count = (*config.drawlist)[n] * config.indices_per_prim;
const u32 count = (*config.drawlist)[n] * indices_per_prim;
glTextureBarrier();
DrawIndexedPrimitive(p, count);
p += count;
}
return;
}
const bool tex_is_ds = config.tex && config.tex == config.ds;
if ((needs_barrier && m_features.texture_barrier) || tex_is_ds)
if (needs_barrier && m_features.texture_barrier)
{
if (config.require_full_barrier)
{
GL_PUSH("Split the draw");
const u32 indices_per_prim = config.indices_per_prim;
GL_PERF("Split single draw in %d draw", config.nindices / config.indices_per_prim);
GL_PUSH("Split single draw in %d draw", config.nindices / indices_per_prim);
g_perfmon.Put(GSPerfMon::Barriers, config.nindices / config.indices_per_prim);
for (size_t p = 0; p < config.nindices; p += config.indices_per_prim)
for (u32 p = 0; p < config.nindices; p += indices_per_prim)
{
glTextureBarrier();
DrawIndexedPrimitive(p, config.indices_per_prim);
DrawIndexedPrimitive(p, indices_per_prim);
}
return;
}
if (config.require_one_barrier || tex_is_ds)
if (config.require_one_barrier)
{
// The common renderer code doesn't put a barrier here because D3D/VK need to copy the DS, so we need to check it.
// One barrier needed for non-overlapping draw.
g_perfmon.Put(GSPerfMon::Barriers, 1);
glTextureBarrier();
DrawIndexedPrimitive();
return;
}
}
// No barriers needed
DrawIndexedPrimitive();
}

View File

@ -121,7 +121,6 @@ class GSDeviceOGL final : public GSDevice
{
public:
using VSSelector = GSHWDrawConfig::VSSelector;
using GSSelector = GSHWDrawConfig::GSSelector;
using PSSelector = GSHWDrawConfig::PSSelector;
using PSSamplerSelector = GSHWDrawConfig::SamplerSelector;
using OMDepthStencilSelector = GSHWDrawConfig::DepthStencilSelector;
@ -131,7 +130,6 @@ public:
{
PSSelector ps;
VSSelector vs;
GSSelector gs;
u16 pad;
__fi bool operator==(const ProgramSelector& p) const { return (std::memcmp(this, &p, sizeof(*this)) == 0); }
@ -144,7 +142,7 @@ public:
__fi std::size_t operator()(const ProgramSelector& p) const noexcept
{
std::size_t h = 0;
HashCombine(h, p.vs.key, p.gs.key, p.ps.key_hi, p.ps.key_lo);
HashCombine(h, p.vs.key, p.ps.key_hi, p.ps.key_lo);
return h;
}
};
@ -160,7 +158,9 @@ private:
std::unique_ptr<GL::StreamBuffer> m_vertex_stream_buffer;
std::unique_ptr<GL::StreamBuffer> m_index_stream_buffer;
GLuint m_vertex_array_object = 0;
GLuint m_expand_ibo = 0;
GLuint m_vao = 0;
GLuint m_expand_vao = 0;
GLenum m_draw_topology = 0;
std::unique_ptr<GL::StreamBuffer> m_vertex_uniform_stream_buffer;
@ -344,6 +344,7 @@ public:
void SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm);
void IASetVAO(GLuint vao);
void IASetPrimitiveTopology(GLenum topology);
void IASetVertexBuffer(const void* vertices, size_t count);
void IASetIndexBuffer(const void* index, size_t count);
@ -367,7 +368,6 @@ public:
const std::string_view& macro_sel = std::string_view());
std::string GenGlslHeader(const std::string_view& entry, GLenum type, const std::string_view& macro);
std::string GetVSSource(VSSelector sel);
std::string GetGSSource(GSSelector sel);
std::string GetPSSource(const PSSelector& sel);
GLuint CreateSampler(PSSamplerSelector sel);
GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel);

View File

@ -94,7 +94,7 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format
// Depth buffer
case Format::DepthStencil:
{
if (!GLLoader::found_framebuffer_fetch)
if (!g_gs_device->Features().framebuffer_fetch)
{
gl_fmt = GL_DEPTH32F_STENCIL8;
m_int_format = GL_DEPTH_STENCIL;

View File

@ -581,17 +581,17 @@ bool GSDeviceVK::CheckFeatures()
m_features.framebuffer_fetch = g_vulkan_context->GetOptionalExtensions().vk_arm_rasterization_order_attachment_access && !GSConfig.DisableFramebufferFetch;
m_features.texture_barrier = GSConfig.OverrideTextureBarriers != 0;
m_features.broken_point_sampler = isAMD;
m_features.geometry_shader = features.geometryShader && GSConfig.OverrideGeometryShaders != 0;
// Usually, geometry shader indicates primid support
// However on Metal (MoltenVK), geometry shader is never available, but primid sometimes is
// Officially, it's available on GPUs that support barycentric coordinates (Newer AMD and Apple)
// Unofficially, it seems to work on older Intel GPUs (but breaks other things on newer Intel GPUs, see GSMTLDeviceInfo.mm for details)
// We'll only enable for the officially supported GPUs here. We'll leave in the option of force-enabling it with OverrideGeometryShaders though.
m_features.primitive_id = features.geometryShader || GSConfig.OverrideGeometryShaders == 1 || g_vulkan_context->GetOptionalExtensions().vk_khr_fragment_shader_barycentric;
m_features.primitive_id = features.geometryShader || g_vulkan_context->GetOptionalExtensions().vk_khr_fragment_shader_barycentric;
m_features.prefer_new_textures = true;
m_features.provoking_vertex_last = g_vulkan_context->GetOptionalExtensions().vk_ext_provoking_vertex;
m_features.dual_source_blend = features.dualSrcBlend && !GSConfig.DisableDualSourceBlend;
m_features.clip_control = true;
m_features.vs_expand = g_vulkan_context->GetOptionalExtensions().vk_khr_shader_draw_parameters;
if (!m_features.dual_source_blend)
Console.Warning("Vulkan driver is missing dual-source blending. This will have an impact on performance.");
@ -624,9 +624,10 @@ bool GSDeviceVK::CheckFeatures()
(features.largePoints && limits.pointSizeRange[0] <= f_upscale && limits.pointSizeRange[1] >= f_upscale);
m_features.line_expand =
(features.wideLines && limits.lineWidthRange[0] <= f_upscale && limits.lineWidthRange[1] >= f_upscale);
DevCon.WriteLn("Using %s for point expansion and %s for line expansion.",
m_features.point_expand ? "hardware" : "geometry shaders",
m_features.line_expand ? "hardware" : "geometry shaders");
m_features.point_expand ? "hardware" : "vertex expanding",
m_features.line_expand ? "hardware" : "vertex expanding");
// Check texture format support before we try to create them.
for (u32 fmt = static_cast<u32>(GSTexture::Format::Color); fmt < static_cast<u32>(GSTexture::Format::PrimID); fmt++)
@ -1004,6 +1005,7 @@ void GSDeviceVK::DoMultiStretchRects(
m_index.count = icount;
m_vertex_stream_buffer.CommitMemory(vcount * sizeof(GSVertexPT1));
m_index_stream_buffer.CommitMemory(icount * sizeof(u32));
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32);
// Even though we're batching, a cmdbuffer submit could've messed this up.
const GSVector4i rc(dTex->GetRect());
@ -1379,6 +1381,8 @@ void GSDeviceVK::IASetIndexBuffer(const void* index, size_t count)
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), index, size);
m_index_stream_buffer.CommitMemory(size);
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32);
}
void GSDeviceVK::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, FeedbackLoopFlag feedback_loop)
@ -1493,10 +1497,14 @@ static void AddMacro(std::stringstream& ss, const char* name, int value)
static void AddShaderHeader(std::stringstream& ss)
{
const GSDevice::FeatureSupport features(g_gs_device->Features());
ss << "#version 460 core\n";
ss << "#extension GL_EXT_samplerless_texture_functions : require\n";
const GSDevice::FeatureSupport features(g_gs_device->Features());
if (features.vs_expand)
ss << "#extension GL_ARB_shader_draw_parameters : require\n";
if (!features.texture_barrier)
ss << "#define DISABLE_TEXTURE_BARRIER 1\n";
if (!features.dual_source_blend)
@ -1568,7 +1576,9 @@ bool GSDeviceVK::CreateNullTexture()
bool GSDeviceVK::CreateBuffers()
{
if (!m_vertex_stream_buffer.Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_BUFFER_SIZE))
if (!m_vertex_stream_buffer.Create(
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | (m_features.vs_expand ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : 0),
VERTEX_BUFFER_SIZE))
{
Host::ReportErrorAsync("GS", "Failed to allocate vertex buffer");
return false;
@ -1593,7 +1603,14 @@ bool GSDeviceVK::CreateBuffers()
}
SetVertexBuffer(m_vertex_stream_buffer.GetBuffer(), 0);
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32);
if (!g_vulkan_context->AllocatePreinitializedGPUBuffer(EXPAND_BUFFER_SIZE, &m_expand_index_buffer,
&m_expand_index_buffer_allocation, VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
&GSDevice::GenerateExpansionIndexBuffer))
{
Host::ReportErrorAsync("GS", "Failed to allocate expansion index buffer");
return false;
}
return true;
}
@ -1625,6 +1642,8 @@ bool GSDeviceVK::CreatePipelineLayouts()
dslb.AddBinding(
0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT);
dslb.AddBinding(1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
if (m_features.vs_expand)
dslb.AddBinding(2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_VERTEX_BIT);
if ((m_tfx_ubo_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE)
return false;
Vulkan::Util::SetObjectName(dev, m_tfx_ubo_ds_layout, "TFX UBO descriptor layout");
@ -2366,9 +2385,6 @@ void GSDeviceVK::RenderImGui()
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);
}
// normal draws use 32-bit indices
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32);
}
bool GSDeviceVK::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array<u32, NUM_CAS_CONSTANTS>& constants)
@ -2449,8 +2465,6 @@ void GSDeviceVK::DestroyResources()
Vulkan::Util::SafeDestroyPipeline(it.second);
for (auto& it : m_tfx_fragment_shaders)
Vulkan::Util::SafeDestroyShaderModule(it.second);
for (auto& it : m_tfx_geometry_shaders)
Vulkan::Util::SafeDestroyShaderModule(it.second);
for (auto& it : m_tfx_vertex_shaders)
Vulkan::Util::SafeDestroyShaderModule(it.second);
for (VkPipeline& it : m_interlace)
@ -2506,6 +2520,12 @@ void GSDeviceVK::DestroyResources()
m_vertex_uniform_stream_buffer.Destroy(false);
m_index_stream_buffer.Destroy(false);
m_vertex_stream_buffer.Destroy(false);
if (m_expand_index_buffer != VK_NULL_HANDLE)
{
vmaDestroyBuffer(g_vulkan_context->GetAllocator(), m_expand_index_buffer, m_expand_index_buffer_allocation);
m_expand_index_buffer = VK_NULL_HANDLE;
m_expand_index_buffer_allocation = VK_NULL_HANDLE;
}
Vulkan::Util::SafeDestroyPipelineLayout(m_tfx_pipeline_layout);
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_tfx_rt_texture_ds_layout);
@ -2530,6 +2550,8 @@ VkShaderModule GSDeviceVK::GetTFXVertexShader(GSHWDrawConfig::VSSelector sel)
AddMacro(ss, "VS_FST", sel.fst);
AddMacro(ss, "VS_IIP", sel.iip);
AddMacro(ss, "VS_POINT_SIZE", sel.point_size);
AddMacro(ss, "VS_EXPAND", static_cast<int>(sel.expand));
AddMacro(ss, "VS_PROVOKING_VERTEX_LAST", static_cast<int>(m_features.provoking_vertex_last));
ss << m_tfx_source;
VkShaderModule mod = g_vulkan_shader_cache->GetVertexShader(ss.str());
@ -2540,29 +2562,6 @@ VkShaderModule GSDeviceVK::GetTFXVertexShader(GSHWDrawConfig::VSSelector sel)
return mod;
}
VkShaderModule GSDeviceVK::GetTFXGeometryShader(GSHWDrawConfig::GSSelector sel)
{
const auto it = m_tfx_geometry_shaders.find(sel.key);
if (it != m_tfx_geometry_shaders.end())
return it->second;
std::stringstream ss;
AddShaderHeader(ss);
AddShaderStageMacro(ss, false, true, false);
AddMacro(ss, "GS_IIP", sel.iip);
AddMacro(ss, "GS_PRIM", static_cast<int>(sel.topology));
AddMacro(ss, "GS_EXPAND", sel.expand);
AddMacro(ss, "GS_FORWARD_PRIMID", sel.forward_primid);
ss << m_tfx_source;
VkShaderModule mod = g_vulkan_shader_cache->GetGeometryShader(ss.str());
if (mod)
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), mod, "TFX Geometry %08X", sel.key);
m_tfx_geometry_shaders.emplace(sel.key, mod);
return mod;
}
VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector& sel)
{
const auto it = m_tfx_fragment_shaders.find(sel);
@ -2651,9 +2650,8 @@ VkPipeline GSDeviceVK::CreateTFXPipeline(const PipelineSelector& p)
}
VkShaderModule vs = GetTFXVertexShader(p.vs);
VkShaderModule gs = p.gs.expand ? GetTFXGeometryShader(p.gs) : VK_NULL_HANDLE;
VkShaderModule fs = GetTFXFragmentShader(pps);
if (vs == VK_NULL_HANDLE || (p.gs.expand && gs == VK_NULL_HANDLE) || fs == VK_NULL_HANDLE)
if (vs == VK_NULL_HANDLE || fs == VK_NULL_HANDLE)
return VK_NULL_HANDLE;
Vulkan::GraphicsPipelineBuilder gpb;
@ -2685,19 +2683,20 @@ VkPipeline GSDeviceVK::CreateTFXPipeline(const PipelineSelector& p)
// Shaders
gpb.SetVertexShader(vs);
if (gs != VK_NULL_HANDLE)
gpb.SetGeometryShader(gs);
gpb.SetFragmentShader(fs);
// IA
gpb.AddVertexBuffer(0, sizeof(GSVertex));
gpb.AddVertexAttribute(0, 0, VK_FORMAT_R32G32_SFLOAT, 0); // ST
gpb.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UINT, 8); // RGBA
gpb.AddVertexAttribute(2, 0, VK_FORMAT_R32_SFLOAT, 12); // Q
gpb.AddVertexAttribute(3, 0, VK_FORMAT_R16G16_UINT, 16); // XY
gpb.AddVertexAttribute(4, 0, VK_FORMAT_R32_UINT, 20); // Z
gpb.AddVertexAttribute(5, 0, VK_FORMAT_R16G16_UINT, 24); // UV
gpb.AddVertexAttribute(6, 0, VK_FORMAT_R8G8B8A8_UNORM, 28); // FOG
if (p.vs.expand == GSHWDrawConfig::VSExpand::None)
{
gpb.AddVertexBuffer(0, sizeof(GSVertex));
gpb.AddVertexAttribute(0, 0, VK_FORMAT_R32G32_SFLOAT, 0); // ST
gpb.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UINT, 8); // RGBA
gpb.AddVertexAttribute(2, 0, VK_FORMAT_R32_SFLOAT, 12); // Q
gpb.AddVertexAttribute(3, 0, VK_FORMAT_R16G16_UINT, 16); // XY
gpb.AddVertexAttribute(4, 0, VK_FORMAT_R32_UINT, 20); // Z
gpb.AddVertexAttribute(5, 0, VK_FORMAT_R16G16_UINT, 24); // UV
gpb.AddVertexAttribute(6, 0, VK_FORMAT_R8G8B8A8_UNORM, 28); // FOG
}
// DepthStencil
static const VkCompareOp ztst[] = {
@ -2753,7 +2752,7 @@ VkPipeline GSDeviceVK::CreateTFXPipeline(const PipelineSelector& p)
if (pipeline)
{
Vulkan::Util::SetObjectName(
g_vulkan_context->GetDevice(), pipeline, "TFX Pipeline %08X/%08X/%" PRIX64 "%08X", p.vs.key, p.gs.key, p.ps.key_hi, p.ps.key_lo);
g_vulkan_context->GetDevice(), pipeline, "TFX Pipeline %08X/%" PRIX64 "%08X", p.vs.key, p.ps.key_hi, p.ps.key_lo);
}
return pipeline;
@ -2822,6 +2821,11 @@ bool GSDeviceVK::CreatePersistentDescriptorSets()
m_vertex_uniform_stream_buffer.GetBuffer(), 0, sizeof(GSHWDrawConfig::VSConstantBuffer));
dsub.AddBufferDescriptorWrite(m_tfx_descriptor_sets[0], 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
m_fragment_uniform_stream_buffer.GetBuffer(), 0, sizeof(GSHWDrawConfig::PSConstantBuffer));
if (m_features.vs_expand)
{
dsub.AddBufferDescriptorWrite(m_tfx_descriptor_sets[0], 2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
m_vertex_stream_buffer.GetBuffer(), 0, VERTEX_BUFFER_SIZE);
}
dsub.Update(dev);
Vulkan::Util::SetObjectName(dev, m_tfx_descriptor_sets[0], "Persistent TFX UBO set");
return true;
@ -3476,8 +3480,7 @@ GSTextureVK* GSDeviceVK::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config)
DrawPrimitive();
// image is now filled with either -1 or INT_MAX, so now we can do the prepass
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
IASetIndexBuffer(config.indices, config.nindices);
UploadHWDrawVerticesAndIndices(config);
// cut down the configuration for the prepass, we don't need blending or any feedback loop
PipelineSelector& pipe = m_pipeline_selector;
@ -3722,10 +3725,7 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
// VB/IB upload, if we did DATE setup and it's not HDR this has already been done
if (!date_image || hdr_rt)
{
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
IASetIndexBuffer(config.indices, config.nindices);
}
UploadHWDrawVerticesAndIndices(config);
// now we can do the actual draw
if (BindDrawPipeline(pipe))
@ -3818,7 +3818,6 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
void GSDeviceVK::UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe)
{
pipe.vs.key = config.vs.key;
pipe.gs.key = config.gs.key;
pipe.ps.key_hi = config.ps.key_hi;
pipe.ps.key_lo = config.ps.key_lo;
pipe.dss.key = config.depth.key;
@ -3840,6 +3839,22 @@ void GSDeviceVK::UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelect
pipe.vs.point_size |= (config.topology == GSHWDrawConfig::Topology::Point);
}
void GSDeviceVK::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
{
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
if (config.vs.UseExpandIndexBuffer())
{
m_index.start = 0;
m_index.count = config.nindices;
SetIndexBuffer(m_expand_index_buffer, 0, VK_INDEX_TYPE_UINT32);
}
else
{
IASetIndexBuffer(config.indices, config.nindices);
}
}
void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier)
{
if (config.drawlist)
@ -3847,23 +3862,25 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
GL_PUSH("Split the draw (SPRITE)");
g_perfmon.Put(GSPerfMon::Barriers, static_cast<u32>(config.drawlist->size()) - static_cast<u32>(skip_first_barrier));
u32 count = 0;
const u32 indices_per_prim = config.indices_per_prim;
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
u32 p = 0;
u32 n = 0;
if (skip_first_barrier)
{
count = (*config.drawlist)[n] * config.indices_per_prim;
const u32 count = (*config.drawlist)[n] * indices_per_prim;
DrawIndexedPrimitive(p, count);
p += count;
++n;
}
for (; n < static_cast<u32>(config.drawlist->size()); p += count, ++n)
for (; n < draw_list_size; n++)
{
count = (*config.drawlist)[n] * config.indices_per_prim;
const u32 count = (*config.drawlist)[n] * indices_per_prim;
ColorBufferBarrier(draw_rt);
DrawIndexedPrimitive(p, count);
p += count;
}
return;
@ -3873,21 +3890,22 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
{
if (config.require_full_barrier)
{
GL_PUSH("Split single draw in %d draw", config.nindices / config.indices_per_prim);
g_perfmon.Put(GSPerfMon::Barriers, (config.nindices / config.indices_per_prim) - static_cast<u32>(skip_first_barrier));
const u32 indices_per_prim = config.indices_per_prim;
GL_PUSH("Split single draw in %d draw", config.nindices / indices_per_prim);
g_perfmon.Put(GSPerfMon::Barriers, (config.nindices / indices_per_prim) - static_cast<u32>(skip_first_barrier));
const u32 ipp = config.indices_per_prim;
u32 p = 0;
if (skip_first_barrier)
{
DrawIndexedPrimitive(p, ipp);
p += ipp;
DrawIndexedPrimitive(p, indices_per_prim);
p += indices_per_prim;
}
for (; p < config.nindices; p += ipp)
for (; p < config.nindices; p += indices_per_prim)
{
ColorBufferBarrier(draw_rt);
DrawIndexedPrimitive(p, ipp);
DrawIndexedPrimitive(p, indices_per_prim);
}
return;
@ -3897,11 +3915,8 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
{
g_perfmon.Put(GSPerfMon::Barriers, 1);
ColorBufferBarrier(draw_rt);
DrawIndexedPrimitive();
return;
}
}
// Don't need any barrier
DrawIndexedPrimitive();
}

View File

@ -58,7 +58,6 @@ public:
};
GSHWDrawConfig::VSSelector vs;
GSHWDrawConfig::GSSelector gs;
GSHWDrawConfig::DepthStencilSelector dss;
GSHWDrawConfig::ColorMaskSelector cms;
GSHWDrawConfig::BlendState bs;
@ -78,7 +77,7 @@ public:
std::size_t operator()(const PipelineSelector& e) const noexcept
{
std::size_t hash = 0;
HashCombine(hash, e.vs.key, e.gs.key, e.ps.key_hi, e.ps.key_lo, e.dss.key, e.cms.key, e.bs.key, e.key);
HashCombine(hash, e.vs.key, e.ps.key_hi, e.ps.key_lo, e.dss.key, e.cms.key, e.bs.key, e.key);
return hash;
}
};
@ -123,6 +122,8 @@ private:
Vulkan::StreamBuffer m_index_stream_buffer;
Vulkan::StreamBuffer m_vertex_uniform_stream_buffer;
Vulkan::StreamBuffer m_fragment_uniform_stream_buffer;
VkBuffer m_expand_index_buffer = VK_NULL_HANDLE;
VmaAllocation m_expand_index_buffer_allocation = VK_NULL_HANDLE;
VkSampler m_point_sampler = VK_NULL_HANDLE;
VkSampler m_linear_sampler = VK_NULL_HANDLE;
@ -142,7 +143,6 @@ private:
VkPipeline m_shadeboost_pipeline = {};
std::unordered_map<u32, VkShaderModule> m_tfx_vertex_shaders;
std::unordered_map<u32, VkShaderModule> m_tfx_geometry_shaders;
std::unordered_map<GSHWDrawConfig::PSSelector, VkShaderModule, GSHWDrawConfig::PSSelectorHash> m_tfx_fragment_shaders;
std::unordered_map<PipelineSelector, VkPipeline, PipelineSelectorHash> m_tfx_pipelines;
@ -183,7 +183,6 @@ private:
void ClearSamplerCache() final;
VkShaderModule GetTFXVertexShader(GSHWDrawConfig::VSSelector sel);
VkShaderModule GetTFXGeometryShader(GSHWDrawConfig::GSSelector sel);
VkShaderModule GetTFXFragmentShader(const GSHWDrawConfig::PSSelector& sel);
VkPipeline CreateTFXPipeline(const PipelineSelector& p);
VkPipeline GetTFXPipeline(const PipelineSelector& p);
@ -307,6 +306,7 @@ public:
void RenderHW(GSHWDrawConfig& config) override;
void UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe);
void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config);
void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier);
//////////////////////////////////////////////////////////////////////////

View File

@ -529,7 +529,6 @@ bool Pcsx2Config::GSOptions::OptionsAreEqual(const GSOptions& right) const
OpEqu(UserHacks_GPUTargetCLUTMode) &&
OpEqu(UserHacks_TextureInsideRt) &&
OpEqu(OverrideTextureBarriers) &&
OpEqu(OverrideGeometryShaders) &&
OpEqu(CAS_Sharpness) &&
OpEqu(ShadeBoost_Brightness) &&
@ -574,8 +573,7 @@ bool Pcsx2Config::GSOptions::RestartOptionsAreEqual(const GSOptions& right) cons
OpEqu(DisableDualSourceBlend) &&
OpEqu(DisableFramebufferFetch) &&
OpEqu(DisableThreadedPresentation) &&
OpEqu(OverrideTextureBarriers) &&
OpEqu(OverrideGeometryShaders);
OpEqu(OverrideTextureBarriers);
}
void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap)
@ -724,7 +722,6 @@ void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap)
GSSettingIntEnumEx(UserHacks_GPUTargetCLUTMode, "UserHacks_GPUTargetCLUTMode");
GSSettingIntEnumEx(TriFilter, "TriFilter");
GSSettingIntEx(OverrideTextureBarriers, "OverrideTextureBarriers");
GSSettingIntEx(OverrideGeometryShaders, "OverrideGeometryShaders");
GSSettingInt(ShadeBoost_Brightness);
GSSettingInt(ShadeBoost_Contrast);

View File

@ -15,4 +15,4 @@
/// Version number for GS and other shaders. Increment whenever any of the contents of the
/// shaders change, to invalidate the cache.
static constexpr u32 SHADER_CACHE_VERSION = 23;
static constexpr u32 SHADER_CACHE_VERSION = 24;