diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 982d426354..44ac5eaf08 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -1,5 +1,3 @@ -#ifdef SHADER_MODEL // make safe to include in resource file to enforce dependency - #define FMT_32 0 #define FMT_24 1 #define FMT_16 2 @@ -113,6 +111,8 @@ struct PS_INPUT #endif }; +#ifdef PIXEL_SHADER + struct PS_OUTPUT { #if !PS_NO_COLOR @@ -136,21 +136,6 @@ Texture2D RtTexture : register(t2); Texture2D PrimMinTexture : register(t3); SamplerState TextureSampler : register(s0); -#ifdef DX12 -cbuffer cb0 : register(b0) -#else -cbuffer cb0 -#endif -{ - float2 VertexScale; - float2 VertexOffset; - float2 TextureScale; - float2 TextureOffset; - float2 PointSize; - uint MaxDepth; - uint pad_cb0; -}; - #ifdef DX12 cbuffer cb1 : register(b1) #else @@ -1062,10 +1047,29 @@ PS_OUTPUT ps_main(PS_INPUT input) return output; } +#endif // PIXEL_SHADER + ////////////////////////////////////////////////////////////////////// // Vertex Shader ////////////////////////////////////////////////////////////////////// +#ifdef VERTEX_SHADER + +#ifdef DX12 +cbuffer cb0 : register(b0) +#else +cbuffer cb0 +#endif +{ + float2 VertexScale; + float2 VertexOffset; + float2 TextureScale; + float2 TextureOffset; + float2 PointSize; + uint MaxDepth; + uint BaseVertex; // Only used in DX11. +}; + VS_OUTPUT vs_main(VS_INPUT input) { // Clamp to max depth, gs doesn't wrap @@ -1118,156 +1122,101 @@ VS_OUTPUT vs_main(VS_INPUT input) return output; } -////////////////////////////////////////////////////////////////////// -// Geometry Shader -////////////////////////////////////////////////////////////////////// +#if VS_EXPAND != 0 -#if GS_FORWARD_PRIMID -#define PRIMID_IN , uint primid : SV_PrimitiveID -#define VS2PS(x) vs2ps_impl(x, primid) -PS_INPUT vs2ps_impl(VS_OUTPUT vs, uint primid) +struct VS_RAW_INPUT { - PS_INPUT o; - o.p = vs.p; - o.t = vs.t; - o.ti = vs.ti; - o.c = vs.c; - o.primid = primid; - return o; -} + float2 ST; + uint RGBA; + float Q; + uint XY; + uint Z; + uint UV; + uint FOG; +}; + +StructuredBuffer vertices : register(t0); + +VS_INPUT load_vertex(uint index) +{ +#ifdef DX12 + VS_RAW_INPUT raw = vertices.Load(index); #else -#define PRIMID_IN -#define VS2PS(x) vs2ps_impl(x) -PS_INPUT vs2ps_impl(VS_OUTPUT vs) -{ - PS_INPUT o; - o.p = vs.p; - o.t = vs.t; - o.ti = vs.ti; - o.c = vs.c; - return o; -} + VS_RAW_INPUT raw = vertices.Load(BaseVertex + index); #endif -#if GS_PRIM == 0 - -[maxvertexcount(6)] -void gs_main(point VS_OUTPUT input[1], inout TriangleStream stream PRIMID_IN) -{ - // Transform a point to a NxN sprite - PS_INPUT Point = VS2PS(input[0]); - - // Get new position - float4 lt_p = input[0].p; - float4 rb_p = input[0].p + float4(PointSize.x, PointSize.y, 0.0f, 0.0f); - float4 lb_p = rb_p; - float4 rt_p = rb_p; - lb_p.x = lt_p.x; - rt_p.y = lt_p.y; - - // Triangle 1 - Point.p = lt_p; - stream.Append(Point); - - Point.p = lb_p; - stream.Append(Point); - - Point.p = rt_p; - stream.Append(Point); - - // Triangle 2 - Point.p = lb_p; - stream.Append(Point); - - Point.p = rt_p; - stream.Append(Point); - - Point.p = rb_p; - stream.Append(Point); + VS_INPUT vert; + vert.st = raw.ST; + vert.c = uint4(raw.RGBA & 0xFFu, (raw.RGBA >> 8) & 0xFFu, (raw.RGBA >> 16) & 0xFFu, raw.RGBA >> 24); + vert.q = raw.Q; + vert.p = uint2(raw.XY & 0xFFFFu, raw.XY >> 16); + vert.z = raw.Z; + vert.uv = uint2(raw.UV & 0xFFFFu, raw.UV >> 16); + vert.f = float4(float(raw.FOG & 0xFFu), float((raw.FOG >> 8) & 0xFFu), float((raw.FOG >> 16) & 0xFFu), float(raw.FOG >> 24)) / 255.0f; + return vert; } -#elif GS_PRIM == 1 - -[maxvertexcount(6)] -void gs_main(line VS_OUTPUT input[2], inout TriangleStream stream PRIMID_IN) +VS_OUTPUT vs_main_expand(uint vid : SV_VertexID) { - // Transform a line to a thick line-sprite - PS_INPUT left = VS2PS(input[0]); - PS_INPUT right = VS2PS(input[1]); - float2 lt_p = input[0].p.xy; - float2 rt_p = input[1].p.xy; +#if VS_EXPAND == 1 // Point - // Potentially there is faster math - float2 line_vector = normalize(rt_p.xy - lt_p.xy); + VS_OUTPUT vtx = vs_main(load_vertex(vid >> 2)); + + vtx.p.x += ((vid & 1u) != 0u) ? PointSize.x : 0.0f; + vtx.p.y += ((vid & 2u) != 0u) ? PointSize.y : 0.0f; + + return vtx; + +#elif VS_EXPAND == 2 // Line + + uint vid_base = vid >> 2; + bool is_bottom = vid & 2; + bool is_right = vid & 1; + // All lines will be a pair of vertices next to each other + // Since DirectX uses provoking vertex first, the bottom point will be the lower of the two + uint vid_other = is_bottom ? vid_base + 1 : vid_base - 1; + VS_OUTPUT vtx = vs_main(load_vertex(vid_base)); + VS_OUTPUT other = vs_main(load_vertex(vid_other)); + + float2 line_vector = normalize(vtx.p.xy - other.p.xy); float2 line_normal = float2(line_vector.y, -line_vector.x); float2 line_width = (line_normal * PointSize) / 2; + // line_normal is inverted for bottom point + float2 offset = (is_bottom ^ is_right) ? line_width : -line_width; + vtx.p.xy += offset; - lt_p -= line_width; - rt_p -= line_width; - float2 lb_p = input[0].p.xy + line_width; - float2 rb_p = input[1].p.xy + line_width; + // Lines will be run as (0 1 2) (1 2 3) + // This means that both triangles will have a point based off the top line point as their first point + // So we don't have to do anything for !IIP - #if GS_IIP == 0 - left.c = right.c; - #endif + return vtx; - // Triangle 1 - left.p.xy = lt_p; - stream.Append(left); +#elif VS_EXPAND == 3 // Sprite - left.p.xy = lb_p; - stream.Append(left); + // Sprite points are always in pairs + uint vid_base = vid >> 1; + uint vid_lt = vid_base & ~1u; + uint vid_rb = vid_base | 1u; - right.p.xy = rt_p; - stream.Append(right); - stream.RestartStrip(); + VS_OUTPUT lt = vs_main(load_vertex(vid_lt)); + VS_OUTPUT rb = vs_main(load_vertex(vid_rb)); + VS_OUTPUT vtx = rb; - // Triangle 2 - left.p.xy = lb_p; - stream.Append(left); + bool is_right = ((vid & 1u) != 0u); + vtx.p.x = is_right ? lt.p.x : vtx.p.x; + vtx.t.x = is_right ? lt.t.x : vtx.t.x; + vtx.ti.xz = is_right ? lt.ti.xz : vtx.ti.xz; - right.p.xy = rt_p; - stream.Append(right); + bool is_bottom = ((vid & 2u) != 0u); + vtx.p.y = is_bottom ? lt.p.y : vtx.p.y; + vtx.t.y = is_bottom ? lt.t.y : vtx.t.y; + vtx.ti.yw = is_bottom ? lt.ti.yw : vtx.ti.yw; - right.p.xy = rb_p; - stream.Append(right); - stream.RestartStrip(); -} - -#elif GS_PRIM == 3 - -[maxvertexcount(4)] -void gs_main(line VS_OUTPUT input[2], inout TriangleStream stream PRIMID_IN) -{ - PS_INPUT lt = VS2PS(input[0]); - PS_INPUT rb = VS2PS(input[1]); - - // flat depth - lt.p.z = rb.p.z; - // flat fog and texture perspective - lt.t.zw = rb.t.zw; - - // flat color - lt.c = rb.c; - - // Swap texture and position coordinate - PS_INPUT lb = rb; - lb.p.x = lt.p.x; - lb.t.x = lt.t.x; - lb.ti.x = lt.ti.x; - lb.ti.z = lt.ti.z; - - PS_INPUT rt = rb; - rt.p.y = lt.p.y; - rt.t.y = lt.t.y; - rt.ti.y = lt.ti.y; - rt.ti.w = lt.ti.w; - - stream.Append(lt); - stream.Append(lb); - stream.Append(rt); - stream.Append(rb); -} + return vtx; #endif -#endif +} + +#endif // VS_EXPAND + +#endif // VERTEX_SHADER diff --git a/bin/resources/shaders/opengl/tfx_vgs.glsl b/bin/resources/shaders/opengl/tfx_vgs.glsl index 9e3f5e7823..9c6b0430a7 100644 --- a/bin/resources/shaders/opengl/tfx_vgs.glsl +++ b/bin/resources/shaders/opengl/tfx_vgs.glsl @@ -14,13 +14,6 @@ layout(std140, binding = 1) uniform cb20 }; #ifdef VERTEX_SHADER -layout(location = 0) in vec2 i_st; -layout(location = 2) in vec4 i_c; -layout(location = 3) in float i_q; -layout(location = 4) in uvec2 i_p; -layout(location = 5) in uint i_z; -layout(location = 6) in uvec2 i_uv; -layout(location = 7) in vec4 i_f; out SHADER { @@ -35,6 +28,16 @@ out SHADER const float exp_min32 = exp2(-32.0f); +#if VS_EXPAND == 0 + +layout(location = 0) in vec2 i_st; +layout(location = 2) in vec4 i_c; +layout(location = 3) in float i_q; +layout(location = 4) in uvec2 i_p; +layout(location = 5) in uint i_z; +layout(location = 6) in uvec2 i_uv; +layout(location = 7) in vec4 i_f; + void texture_coord() { vec2 uv = vec2(i_uv) - TextureOffset; @@ -91,163 +94,145 @@ void vs_main() #endif } -#endif +#else // VS_EXPAND -#ifdef GEOMETRY_SHADER - -in SHADER +struct RawVertex { - vec4 t_float; - vec4 t_int; - #if GS_IIP != 0 - vec4 c; - #else - flat vec4 c; - #endif -} GSin[]; + vec2 ST; + uint RGBA; + float Q; + uint XY; + uint Z; + uint UV; + uint FOG; +}; -out SHADER -{ - vec4 t_float; - vec4 t_int; - #if GS_IIP != 0 - vec4 c; - #else - flat vec4 c; - #endif -} GSout; +layout(std140, binding = 2) readonly buffer VertexBuffer { + RawVertex vertex_buffer[]; +}; -struct vertex +struct ProcessedVertex { + vec4 p; vec4 t_float; vec4 t_int; vec4 c; }; -void out_vertex(in vec4 position, in vertex v) +ProcessedVertex load_vertex(uint index) { - GSout.t_float = v.t_float; - GSout.t_int = v.t_int; - // Flat output -#if GS_PRIM == 0 - GSout.c = GSin[0].c; +#if defined(GL_ARB_shader_draw_parameters) && GL_ARB_shader_draw_parameters + RawVertex rvtx = vertex_buffer[index + gl_BaseVertexARB]; #else - GSout.c = GSin[1].c; + RawVertex rvtx = vertex_buffer[index]; #endif - gl_Position = position; - gl_PrimitiveID = gl_PrimitiveIDIn; - EmitVertex(); + + vec2 i_st = rvtx.ST; + vec4 i_c = vec4(uvec4(rvtx.RGBA & 0xFFu, (rvtx.RGBA >> 8) & 0xFFu, (rvtx.RGBA >> 16) & 0xFFu, rvtx.RGBA >> 24)); + float i_q = rvtx.Q; + uvec2 i_p = uvec2(rvtx.XY & 0xFFFFu, rvtx.XY >> 16); + uint i_z = rvtx.Z; + uvec2 i_uv = uvec2(rvtx.UV & 0xFFFFu, rvtx.UV >> 16); + vec4 i_f = unpackUnorm4x8(rvtx.FOG); + + ProcessedVertex vtx; + + uint z = min(i_z, MaxDepth); + vtx.p.xy = vec2(i_p) - vec2(0.05f, 0.05f); + vtx.p.xy = vtx.p.xy * VertexScale - VertexOffset; + vtx.p.w = 1.0f; + +#if HAS_CLIP_CONTROL + vtx.p.z = float(z) * exp_min32; +#else + vtx.p.z = min(float(z) * exp2(-23.0f), 2.0f) - 1.0f; +#endif + + vec2 uv = vec2(i_uv) - TextureOffset; + vec2 st = i_st - TextureOffset; + + vtx.t_float.xy = st; + vtx.t_float.w = i_q; + + vtx.t_int.xy = uv * TextureScale; +#if VS_FST + vtx.t_int.zw = uv; +#else + vtx.t_int.zw = st / TextureScale; +#endif + + vtx.c = i_c; + vtx.t_float.z = i_f.x; + + return vtx; } -#if GS_PRIM == 0 -layout(points) in; +void main() +{ + ProcessedVertex vtx; + +#if defined(GL_ARB_shader_draw_parameters) && GL_ARB_shader_draw_parameters + uint vid = uint(gl_VertexID - gl_BaseVertexARB); #else -layout(lines) in; + uint vid = uint(gl_VertexID); #endif -layout(triangle_strip, max_vertices = 4) out; -#if GS_PRIM == 0 +#if VS_EXPAND == 1 // Point -void gs_main() -{ - // Transform a point to a NxN sprite - vertex point = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c); + vtx = load_vertex(vid >> 2); - // Get new position - vec4 lt_p = gl_in[0].gl_Position; - vec4 rb_p = gl_in[0].gl_Position + vec4(PointSize.x, PointSize.y, 0.0f, 0.0f); - vec4 lb_p = rb_p; - vec4 rt_p = rb_p; - lb_p.x = lt_p.x; - rt_p.y = lt_p.y; + vtx.p.x += ((vid & 1u) != 0u) ? PointSize.x : 0.0f; + vtx.p.y += ((vid & 2u) != 0u) ? PointSize.y : 0.0f; - out_vertex(lt_p, point); +#elif VS_EXPAND == 2 // Line - out_vertex(lb_p, point); + uint vid_base = vid >> 2; + bool is_bottom = (vid & 2u) != 0u; + bool is_right = (vid & 1u) != 0u; + uint vid_other = is_bottom ? vid_base - 1 : vid_base + 1; + vtx = load_vertex(vid_base); + ProcessedVertex other = load_vertex(vid_other); - out_vertex(rt_p, point); - - out_vertex(rb_p, point); - - EndPrimitive(); -} - -#elif GS_PRIM == 1 - -void gs_main() -{ - // Transform a line to a thick line-sprite - vertex left = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c); - vertex right = vertex(GSin[1].t_float, GSin[1].t_int, GSin[1].c); - vec4 lt_p = gl_in[0].gl_Position; - vec4 rt_p = gl_in[1].gl_Position; - - // Potentially there is faster math - vec2 line_vector = normalize(rt_p.xy - lt_p.xy); + vec2 line_vector = normalize(vtx.p.xy - other.p.xy); vec2 line_normal = vec2(line_vector.y, -line_vector.x); - vec2 line_width = (line_normal * PointSize) / 2.0f; + vec2 line_width = (line_normal * PointSize) / 2; + // line_normal is inverted for bottom point + vec2 offset = ((uint(is_bottom) ^ uint(is_right)) != 0u) ? line_width : -line_width; + vtx.p.xy += offset; - lt_p.xy -= line_width; - rt_p.xy -= line_width; - vec4 lb_p = gl_in[0].gl_Position + vec4(line_width, 0.0f, 0.0f); - vec4 rb_p = gl_in[1].gl_Position + vec4(line_width, 0.0f, 0.0f); + // Lines will be run as (0 1 2) (1 2 3) + // This means that both triangles will have a point based off the top line point as their first point + // So we don't have to do anything for !IIP - out_vertex(lt_p, left); +#elif VS_EXPAND == 3 // Sprite - out_vertex(lb_p, left); + // Sprite points are always in pairs + uint vid_base = vid >> 1; + uint vid_lt = vid_base & ~1u; + uint vid_rb = vid_base | 1u; - out_vertex(rt_p, right); + ProcessedVertex lt = load_vertex(vid_lt); + ProcessedVertex rb = load_vertex(vid_rb); + vtx = rb; - out_vertex(rb_p, right); + bool is_right = ((vid & 1u) != 0u); + vtx.p.x = is_right ? lt.p.x : vtx.p.x; + vtx.t_float.x = is_right ? lt.t_float.x : vtx.t_float.x; + vtx.t_int.xz = is_right ? lt.t_int.xz : vtx.t_int.xz; - EndPrimitive(); -} - -#else // GS_PRIM == 3 - -void gs_main() -{ - // left top => GSin[0]; - // right bottom => GSin[1]; - vertex rb = vertex(GSin[1].t_float, GSin[1].t_int, GSin[1].c); - vertex lt = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c); - - vec4 rb_p = gl_in[1].gl_Position; - vec4 lb_p = rb_p; - vec4 rt_p = rb_p; - vec4 lt_p = gl_in[0].gl_Position; - - // flat depth - lt_p.z = rb_p.z; - // flat fog and texture perspective - lt.t_float.zw = rb.t_float.zw; - // flat color - lt.c = rb.c; - - // Swap texture and position coordinate - vertex lb = rb; - lb.t_float.x = lt.t_float.x; - lb.t_int.x = lt.t_int.x; - lb.t_int.z = lt.t_int.z; - lb_p.x = lt_p.x; - - vertex rt = rb; - rt_p.y = lt_p.y; - rt.t_float.y = lt.t_float.y; - rt.t_int.y = lt.t_int.y; - rt.t_int.w = lt.t_int.w; - - out_vertex(lt_p, lt); - - out_vertex(lb_p, lb); - - out_vertex(rt_p, rt); - - out_vertex(rb_p, rb); - - EndPrimitive(); -} + bool is_bottom = ((vid & 2u) != 0u); + vtx.p.y = is_bottom ? lt.p.y : vtx.p.y; + vtx.t_float.y = is_bottom ? lt.t_float.y : vtx.t_float.y; + vtx.t_int.yw = is_bottom ? lt.t_int.yw : vtx.t_int.yw; #endif -#endif + gl_Position = vtx.p; + VSout.t_float = vtx.t_float; + VSout.t_int = vtx.t_int; + VSout.c = vtx.c; +} + +#endif // VS_EXPAND + +#endif // VERTEX_SHADER diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 825923bb6d..89bd81f146 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -2,7 +2,7 @@ // Vertex Shader ////////////////////////////////////////////////////////////////////// -#if defined(VERTEX_SHADER) || defined(GEOMETRY_SHADER) +#if defined(VERTEX_SHADER) layout(std140, set = 0, binding = 0) uniform cb0 { @@ -15,18 +15,6 @@ layout(std140, set = 0, binding = 0) uniform cb0 uint pad_cb0; }; -#endif - -#ifdef VERTEX_SHADER - -layout(location = 0) in vec2 a_st; -layout(location = 1) in uvec4 a_c; -layout(location = 2) in float a_q; -layout(location = 3) in uvec2 a_p; -layout(location = 4) in uint a_z; -layout(location = 5) in uvec2 a_uv; -layout(location = 6) in vec4 a_f; - layout(location = 0) out VSOutput { vec4 t; @@ -39,17 +27,27 @@ layout(location = 0) out VSOutput #endif } vsOut; +#if VS_EXPAND == 0 + +layout(location = 0) in vec2 a_st; +layout(location = 1) in uvec4 a_c; +layout(location = 2) in float a_q; +layout(location = 3) in uvec2 a_p; +layout(location = 4) in uint a_z; +layout(location = 5) in uvec2 a_uv; +layout(location = 6) in vec4 a_f; + void main() { // Clamp to max depth, gs doesn't wrap - float z = min(a_z, MaxDepth); + uint z = min(a_z, MaxDepth); // pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go) // example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty // input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel // example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133 - gl_Position = vec4(a_p, z, 1.0f) - vec4(0.05f, 0.05f, 0, 0); + gl_Position = vec4(a_p, float(z), 1.0f) - vec4(0.05f, 0.05f, 0, 0); gl_Position.xy = gl_Position.xy * vec2(VertexScale.x, -VertexScale.y) - vec2(VertexOffset.x, -VertexOffset.y); gl_Position.z *= exp2(-32.0f); // integer->float depth gl_Position.y = -gl_Position.y; @@ -81,214 +79,149 @@ void main() gl_PointSize = PointSize.x; #endif - vsOut.c = a_c; + vsOut.c = vec4(a_c); vsOut.t.z = a_f.r; } -#endif +#else // VS_EXPAND -#ifdef GEOMETRY_SHADER - -layout(location = 0) in VSOutput +struct RawVertex { + vec2 ST; + uint RGBA; + float Q; + uint XY; + uint Z; + uint UV; + uint FOG; +}; + +layout(std140, set = 0, binding = 2) readonly buffer VertexBuffer { + RawVertex vertex_buffer[]; +}; + +struct ProcessedVertex +{ + vec4 p; vec4 t; vec4 ti; - #if GS_IIP != 0 - vec4 c; + vec4 c; +}; + +ProcessedVertex load_vertex(uint index) +{ + RawVertex rvtx = vertex_buffer[gl_BaseVertexARB + index]; + + vec2 a_st = rvtx.ST; + uvec4 a_c = uvec4(rvtx.RGBA & 0xFFu, (rvtx.RGBA >> 8) & 0xFFu, (rvtx.RGBA >> 16) & 0xFFu, rvtx.RGBA >> 24); + float a_q = rvtx.Q; + uvec2 a_p = uvec2(rvtx.XY & 0xFFFFu, rvtx.XY >> 16); + uint a_z = rvtx.Z; + uvec2 a_uv = uvec2(rvtx.UV & 0xFFFFu, rvtx.UV >> 16); + vec4 a_f = unpackUnorm4x8(rvtx.FOG); + + ProcessedVertex vtx; + + uint z = min(a_z, MaxDepth); + vtx.p = vec4(a_p, float(z), 1.0f) - vec4(0.05f, 0.05f, 0, 0); + vtx.p.xy = vtx.p.xy * vec2(VertexScale.x, -VertexScale.y) - vec2(VertexOffset.x, -VertexOffset.y); + vtx.p.z *= exp2(-32.0f); // integer->float depth + vtx.p.y = -vtx.p.y; + + #if VS_TME + vec2 uv = a_uv - TextureOffset; + vec2 st = a_st - TextureOffset; + vtx.ti.xy = uv * TextureScale; + + #if VS_FST + vtx.ti.zw = uv; + #else + vtx.ti.zw = st / TextureScale; + #endif + + vtx.t.xy = st; + vtx.t.w = a_q; #else - flat vec4 c; + vtx.t = vec4(0.0f, 0.0f, 0.0f, 1.0f); + vtx.ti = vec4(0.0f); #endif -} gsIn[]; -layout(location = 0) out GSOutput -{ - vec4 t; - vec4 ti; - #if GS_IIP != 0 - vec4 c; - #else - flat vec4 c; - #endif -} gsOut; + vtx.c = a_c; + vtx.t.z = a_f.r; -void WriteVertex(vec4 pos, vec4 t, vec4 ti, vec4 c) -{ -#if GS_FORWARD_PRIMID - gl_PrimitiveID = gl_PrimitiveIDIn; -#endif - gl_Position = pos; - gsOut.t = t; - gsOut.ti = ti; - gsOut.c = c; - EmitVertex(); + return vtx; } -////////////////////////////////////////////////////////////////////// -// Geometry Shader -////////////////////////////////////////////////////////////////////// - -#if GS_PRIM == 0 && GS_POINT == 0 - -layout(points) in; -layout(points, max_vertices = 1) out; -void main() -{ - WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[0].c); - EndPrimitive(); -} - -#elif GS_PRIM == 0 && GS_POINT == 1 - -layout(points) in; -layout(triangle_strip, max_vertices = 4) out; - void main() { - // Transform a point to a NxN sprite + ProcessedVertex vtx; + uint vid = uint(gl_VertexIndex - gl_BaseVertexARB); - // Get new position - vec4 lt_p = gl_in[0].gl_Position; - vec4 rb_p = gl_in[0].gl_Position + vec4(PointSize.x, PointSize.y, 0.0f, 0.0f); - vec4 lb_p = rb_p; - vec4 rt_p = rb_p; - lb_p.x = lt_p.x; - rt_p.y = lt_p.y; +#if VS_EXPAND == 1 // Point - WriteVertex(lt_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c); - WriteVertex(lb_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c); - WriteVertex(rt_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c); - WriteVertex(rb_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c); + vtx = load_vertex(vid >> 2); - EndPrimitive(); -} + vtx.p.x += ((vid & 1u) != 0u) ? PointSize.x : 0.0f; + vtx.p.y += ((vid & 2u) != 0u) ? PointSize.y : 0.0f; -#elif GS_PRIM == 1 && GS_LINE == 0 +#elif VS_EXPAND == 2 // Line -layout(lines) in; -layout(line_strip, max_vertices = 2) out; + uint vid_base = vid >> 2; -void main() -{ -#if GS_IIP == 0 - WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[1].c); - WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[1].c); + bool is_bottom = (vid & 2u) != 0u; + bool is_right = (vid & 1u) != 0u; +#ifdef VS_PROVOKING_VERTEX_LAST + uint vid_other = is_bottom ? vid_base - 1 : vid_base + 1; #else - WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[0].c); - WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[1].c); + uint vid_other = is_bottom ? vid_base + 1 : vid_base - 1; #endif - EndPrimitive(); -} + + vtx = load_vertex(vid_base); + ProcessedVertex other = load_vertex(vid_other); -#elif GS_PRIM == 1 && GS_LINE == 1 - -layout(lines) in; -layout(triangle_strip, max_vertices = 4) out; - -void main() -{ - // Transform a line to a thick line-sprite - vec4 left_t = gsIn[0].t; - vec4 left_ti = gsIn[0].ti; - vec4 left_c = gsIn[0].c; - vec4 right_t = gsIn[1].t; - vec4 right_ti = gsIn[1].ti; - vec4 right_c = gsIn[1].c; - vec4 lt_p = gl_in[0].gl_Position; - vec4 rt_p = gl_in[1].gl_Position; - - // Potentially there is faster math - vec2 line_vector = normalize(rt_p.xy - lt_p.xy); + vec2 line_vector = normalize(vtx.p.xy - other.p.xy); vec2 line_normal = vec2(line_vector.y, -line_vector.x); - vec2 line_width = (line_normal * PointSize) / 2.0; + vec2 line_width = (line_normal * PointSize) / 2; + // line_normal is inverted for bottom point + vec2 offset = ((uint(is_bottom) ^ uint(is_right)) != 0u) ? line_width : -line_width; + vtx.p.xy += offset; - lt_p.xy -= line_width; - rt_p.xy -= line_width; - vec4 lb_p = gl_in[0].gl_Position + vec4(line_width, 0.0, 0.0); - vec4 rb_p = gl_in[1].gl_Position + vec4(line_width, 0.0, 0.0); + // Lines will be run as (0 1 2) (1 2 3) + // This means that both triangles will have a point based off the top line point as their first point + // So we don't have to do anything for !IIP - #if GS_IIP == 0 - left_c = right_c; - #endif +#elif VS_EXPAND == 3 // Sprite - WriteVertex(lt_p, left_t, left_ti, left_c); - WriteVertex(lb_p, left_t, left_ti, left_c); - WriteVertex(rt_p, right_t, right_ti, right_c); - WriteVertex(rb_p, right_t, right_ti, right_c); - EndPrimitive(); -} + // Sprite points are always in pairs + uint vid_base = vid >> 1; + uint vid_lt = vid_base & ~1u; + uint vid_rb = vid_base | 1u; -#elif GS_PRIM == 2 + ProcessedVertex lt = load_vertex(vid_lt); + ProcessedVertex rb = load_vertex(vid_rb); + vtx = rb; -layout(triangles) in; -layout(triangle_strip, max_vertices = 3) out; + bool is_right = ((vid & 1u) != 0u); + vtx.p.x = is_right ? lt.p.x : vtx.p.x; + vtx.t.x = is_right ? lt.t.x : vtx.t.x; + vtx.ti.xz = is_right ? lt.ti.xz : vtx.ti.xz; -void main() -{ - #if GS_IIP == 0 - WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[2].c); - WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[2].c); - WriteVertex(gl_in[2].gl_Position, gsIn[2].t, gsIn[2].ti, gsIn[2].c); - #else - WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[0].c); - WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[0].c); - WriteVertex(gl_in[2].gl_Position, gsIn[2].t, gsIn[2].ti, gsIn[0].c); - #endif - - EndPrimitive(); -} - -#elif GS_PRIM == 3 - -layout(lines) in; -layout(triangle_strip, max_vertices = 4) out; - -void main() -{ - vec4 lt_p = gl_in[0].gl_Position; - vec4 lt_t = gsIn[0].t; - vec4 lt_ti = gsIn[0].ti; - vec4 lt_c = gsIn[0].c; - vec4 rb_p = gl_in[1].gl_Position; - vec4 rb_t = gsIn[1].t; - vec4 rb_ti = gsIn[1].ti; - vec4 rb_c = gsIn[1].c; - - // flat depth - lt_p.z = rb_p.z; - // flat fog and texture perspective - lt_t.zw = rb_t.zw; - - // flat color - lt_c = rb_c; - - // Swap texture and position coordinate - vec4 lb_p = rb_p; - vec4 lb_t = rb_t; - vec4 lb_ti = rb_ti; - vec4 lb_c = rb_c; - lb_p.x = lt_p.x; - lb_t.x = lt_t.x; - lb_ti.x = lt_ti.x; - lb_ti.z = lt_ti.z; - - vec4 rt_p = rb_p; - vec4 rt_t = rb_t; - vec4 rt_ti = rb_ti; - vec4 rt_c = rb_c; - rt_p.y = lt_p.y; - rt_t.y = lt_t.y; - rt_ti.y = lt_ti.y; - rt_ti.w = lt_ti.w; - - WriteVertex(lt_p, lt_t, lt_ti, lt_c); - WriteVertex(lb_p, lb_t, lb_ti, lb_c); - WriteVertex(rt_p, rt_t, rt_ti, rt_c); - WriteVertex(rb_p, rb_t, rb_ti, rb_c); - EndPrimitive(); -} + bool is_bottom = ((vid & 2u) != 0u); + vtx.p.y = is_bottom ? lt.p.y : vtx.p.y; + vtx.t.y = is_bottom ? lt.t.y : vtx.t.y; + vtx.ti.yw = is_bottom ? lt.ti.yw : vtx.ti.yw; #endif -#endif + + gl_Position = vtx.p; + vsOut.t = vtx.t; + vsOut.ti = vtx.ti; + vsOut.c = vtx.c; +} + +#endif // VS_EXPAND + +#endif // VERTEX_SHADER #ifdef FRAGMENT_SHADER diff --git a/common/D3D11/ShaderCache.cpp b/common/D3D11/ShaderCache.cpp index 9e0a0240e0..0f216a16e7 100644 --- a/common/D3D11/ShaderCache.cpp +++ b/common/D3D11/ShaderCache.cpp @@ -337,16 +337,6 @@ bool D3D11::ShaderCache::GetVertexShaderAndInputLayout(ID3D11Device* device, return true; } -wil::com_ptr_nothrow D3D11::ShaderCache::GetGeometryShader(ID3D11Device* device, - const std::string_view& shader_code, const D3D_SHADER_MACRO* macros /* = nullptr */, const char* entry_point /* = "main" */) -{ - wil::com_ptr_nothrow blob = GetShaderBlob(ShaderCompiler::Type::Geometry, shader_code, macros, entry_point); - if (!blob) - return {}; - - return D3D11::ShaderCompiler::CreateGeometryShader(device, blob.get()); -} - wil::com_ptr_nothrow D3D11::ShaderCache::GetPixelShader(ID3D11Device* device, const std::string_view& shader_code, const D3D_SHADER_MACRO* macros /* = nullptr */, const char* entry_point /* = "main" */) { diff --git a/common/D3D11/ShaderCache.h b/common/D3D11/ShaderCache.h index 4b5ca7f9f0..001aa14d94 100644 --- a/common/D3D11/ShaderCache.h +++ b/common/D3D11/ShaderCache.h @@ -51,9 +51,6 @@ namespace D3D11 const D3D11_INPUT_ELEMENT_DESC* layout, size_t layout_size, const std::string_view& shader_code, const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main"); - wil::com_ptr_nothrow GetGeometryShader(ID3D11Device* device, const std::string_view& shader_code, - const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main"); - wil::com_ptr_nothrow GetPixelShader(ID3D11Device* device, const std::string_view& shader_code, const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main"); diff --git a/common/D3D11/ShaderCompiler.cpp b/common/D3D11/ShaderCompiler.cpp index 37f787810b..39b78be5a0 100644 --- a/common/D3D11/ShaderCompiler.cpp +++ b/common/D3D11/ShaderCompiler.cpp @@ -31,21 +31,21 @@ wil::com_ptr_nothrow D3D11::ShaderCompiler::CompileShader(Type type, D { case D3D_FEATURE_LEVEL_10_0: { - static constexpr std::array targets = {{"vs_4_0", "gs_4_0", "ps_4_0", "cs_4_0"}}; + static constexpr std::array targets = {{"vs_4_0", "ps_4_0", "cs_4_0"}}; target = targets[static_cast(type)]; } break; case D3D_FEATURE_LEVEL_10_1: { - static constexpr std::array targets = {{"vs_4_1", "gs_4_1", "ps_4_1", "cs_4_1"}}; + static constexpr std::array targets = {{"vs_4_1", "ps_4_1", "cs_4_1"}}; target = targets[static_cast(type)]; } break; case D3D_FEATURE_LEVEL_11_0: { - static constexpr std::array targets = {{"vs_5_0", "gs_5_0", "ps_5_0", "cs_5_0"}}; + static constexpr std::array targets = {{"vs_5_0", "ps_5_0", "cs_5_0"}}; target = targets[static_cast(type)]; } break; @@ -53,7 +53,7 @@ wil::com_ptr_nothrow D3D11::ShaderCompiler::CompileShader(Type type, D case D3D_FEATURE_LEVEL_11_1: default: { - static constexpr std::array targets = {{"vs_5_1", "gs_5_1", "ps_5_1", "cs_5_1"}}; + static constexpr std::array targets = {{"vs_5_1", "ps_5_1", "cs_5_1"}}; target = targets[static_cast(type)]; } break; @@ -108,16 +108,6 @@ wil::com_ptr_nothrow D3D11::ShaderCompiler::CompileAndCreate return CreateVertexShader(device, blob.get()); } -wil::com_ptr_nothrow D3D11::ShaderCompiler::CompileAndCreateGeometryShader(ID3D11Device* device, bool debug, - const std::string_view& code, const D3D_SHADER_MACRO* macros /* = nullptr */, const char* entry_point /* = "main" */) -{ - wil::com_ptr_nothrow blob = CompileShader(Type::Geometry, device->GetFeatureLevel(), debug, code, macros, entry_point); - if (!blob) - return {}; - - return CreateGeometryShader(device, blob.get()); -} - wil::com_ptr_nothrow D3D11::ShaderCompiler::CompileAndCreatePixelShader(ID3D11Device* device, bool debug, const std::string_view& code, const D3D_SHADER_MACRO* macros /* = nullptr */, const char* entry_point /* = "main" */) { @@ -157,25 +147,6 @@ wil::com_ptr_nothrow D3D11::ShaderCompiler::CreateVertexShad const_cast(blob)->GetBufferSize()); } -wil::com_ptr_nothrow D3D11::ShaderCompiler::CreateGeometryShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length) -{ - wil::com_ptr_nothrow shader; - const HRESULT hr = device->CreateGeometryShader(bytecode, bytecode_length, nullptr, shader.put()); - if (FAILED(hr)) - { - Console.Error("Failed to create geometry shader: 0x%08X", hr); - return {}; - } - - return shader; -} - -wil::com_ptr_nothrow D3D11::ShaderCompiler::CreateGeometryShader(ID3D11Device* device, const ID3DBlob* blob) -{ - return CreateGeometryShader(device, const_cast(blob)->GetBufferPointer(), - const_cast(blob)->GetBufferSize()); -} - wil::com_ptr_nothrow D3D11::ShaderCompiler::CreatePixelShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length) { wil::com_ptr_nothrow shader; diff --git a/common/D3D11/ShaderCompiler.h b/common/D3D11/ShaderCompiler.h index 26c6442b92..fb2be0eb6a 100644 --- a/common/D3D11/ShaderCompiler.h +++ b/common/D3D11/ShaderCompiler.h @@ -27,7 +27,6 @@ namespace D3D11::ShaderCompiler enum class Type { Vertex, - Geometry, Pixel, Compute }; @@ -37,8 +36,6 @@ namespace D3D11::ShaderCompiler wil::com_ptr_nothrow CompileAndCreateVertexShader(ID3D11Device* device, bool debug, const std::string_view& code, const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main"); - wil::com_ptr_nothrow CompileAndCreateGeometryShader(ID3D11Device* device, bool debug, const std::string_view& code, - const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main"); wil::com_ptr_nothrow CompileAndCreatePixelShader(ID3D11Device* device, bool debug, const std::string_view& code, const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main"); wil::com_ptr_nothrow CompileAndCreateComputeShader(ID3D11Device* device, bool debug, const std::string_view& code, @@ -46,8 +43,6 @@ namespace D3D11::ShaderCompiler wil::com_ptr_nothrow CreateVertexShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length); wil::com_ptr_nothrow CreateVertexShader(ID3D11Device* device, const ID3DBlob* blob); - wil::com_ptr_nothrow CreateGeometryShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length); - wil::com_ptr_nothrow CreateGeometryShader(ID3D11Device* device, const ID3DBlob* blob); wil::com_ptr_nothrow CreatePixelShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length); wil::com_ptr_nothrow CreatePixelShader(ID3D11Device* device, const ID3DBlob* blob); wil::com_ptr_nothrow CreateComputeShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length); diff --git a/common/D3D12/Context.cpp b/common/D3D12/Context.cpp index aa3bbf9cab..3f6f93b070 100644 --- a/common/D3D12/Context.cpp +++ b/common/D3D12/Context.cpp @@ -665,3 +665,57 @@ void Context::SetEnableGPUTiming(bool enabled) { m_gpu_timing_enabled = enabled; } + +bool Context::AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_buffer, + D3D12MA::Allocation** gpu_allocation, const std::function& fill_callback) +{ + // Try to place the fixed index buffer in GPU local memory. + // Use the staging buffer to copy into it. + const D3D12_RESOURCE_DESC rd = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, + DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_NONE}; + + const D3D12MA::ALLOCATION_DESC cpu_ad = { + D3D12MA::ALLOCATION_FLAG_NONE, + D3D12_HEAP_TYPE_UPLOAD}; + + ComPtr cpu_buffer; + ComPtr cpu_allocation; + HRESULT hr = m_allocator->CreateResource(&cpu_ad, &rd, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + cpu_allocation.put(), IID_PPV_ARGS(cpu_buffer.put())); + pxAssertMsg(SUCCEEDED(hr), "Allocate CPU buffer"); + if (FAILED(hr)) + return false; + + static constexpr const D3D12_RANGE read_range = {}; + const D3D12_RANGE write_range = {0, size}; + void* mapped; + hr = cpu_buffer->Map(0, &read_range, &mapped); + pxAssertMsg(SUCCEEDED(hr), "Map CPU buffer"); + if (FAILED(hr)) + return false; + fill_callback(mapped); + cpu_buffer->Unmap(0, &write_range); + + const D3D12MA::ALLOCATION_DESC gpu_ad = { + D3D12MA::ALLOCATION_FLAG_COMMITTED, + D3D12_HEAP_TYPE_DEFAULT}; + + hr = m_allocator->CreateResource(&gpu_ad, &rd, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + gpu_allocation, IID_PPV_ARGS(gpu_buffer)); + pxAssertMsg(SUCCEEDED(hr), "Allocate GPU buffer"); + if (FAILED(hr)) + return false; + + GetInitCommandList()->CopyBufferRegion(*gpu_buffer, 0, cpu_buffer.get(), 0, size); + + D3D12_RESOURCE_BARRIER rb = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE}; + rb.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + rb.Transition.pResource = *gpu_buffer; + rb.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + rb.Transition.StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER; + GetInitCommandList()->ResourceBarrier(1, &rb); + + DeferResourceDestruction(cpu_allocation.get(), cpu_buffer.get()); + return true; +} diff --git a/common/D3D12/Context.h b/common/D3D12/Context.h index 869ae0605b..74c2769978 100644 --- a/common/D3D12/Context.h +++ b/common/D3D12/Context.h @@ -152,6 +152,10 @@ namespace D3D12 float GetAndResetAccumulatedGPUTime(); void SetEnableGPUTiming(bool enabled); + // Allocates a temporary CPU staging buffer, fires the callback with it to populate, then copies to a GPU buffer. + bool AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_buffer, D3D12MA::Allocation** gpu_allocation, + const std::function& fill_callback); + private: struct CommandListResources { diff --git a/common/D3D12/ShaderCache.cpp b/common/D3D12/ShaderCache.cpp index 5d6ab18210..926d826a1c 100644 --- a/common/D3D12/ShaderCache.cpp +++ b/common/D3D12/ShaderCache.cpp @@ -521,9 +521,6 @@ ShaderCache::ComPtr ShaderCache::CompileAndAddShaderBlob(const CacheIn case EntryType::VertexShader: blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Vertex, m_feature_level, m_debug, shader_code, macros, entry_point); break; - case EntryType::GeometryShader: - blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Geometry, m_feature_level, m_debug, shader_code, macros, entry_point); - break; case EntryType::PixelShader: blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Pixel, m_feature_level, m_debug, shader_code, macros, entry_point); break; diff --git a/common/D3D12/ShaderCache.h b/common/D3D12/ShaderCache.h index bab26a9988..dfca321829 100644 --- a/common/D3D12/ShaderCache.h +++ b/common/D3D12/ShaderCache.h @@ -37,7 +37,6 @@ namespace D3D12 enum class EntryType { VertexShader, - GeometryShader, PixelShader, ComputeShader, GraphicsPipeline, @@ -59,11 +58,6 @@ namespace D3D12 { return GetShaderBlob(EntryType::VertexShader, shader_code, macros, entry_point); } - __fi ComPtr GetGeometryShader(std::string_view shader_code, - const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main") - { - return GetShaderBlob(EntryType::GeometryShader, shader_code, macros, entry_point); - } __fi ComPtr GetPixelShader(std::string_view shader_code, const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main") { diff --git a/common/GL/Program.cpp b/common/GL/Program.cpp index f2c6401909..bfa0452f08 100644 --- a/common/GL/Program.cpp +++ b/common/GL/Program.cpp @@ -35,8 +35,6 @@ namespace GL prog.m_program_id = 0; m_vertex_shader_id = prog.m_vertex_shader_id; prog.m_vertex_shader_id = 0; - m_geometry_shader_id = prog.m_geometry_shader_id; - prog.m_geometry_shader_id = 0; m_fragment_shader_id = prog.m_fragment_shader_id; prog.m_fragment_shader_id = 0; m_uniform_locations = std::move(prog.m_uniform_locations); @@ -102,8 +100,7 @@ namespace GL s_last_program_id = 0; } - bool Program::Compile(const std::string_view vertex_shader, const std::string_view geometry_shader, - const std::string_view fragment_shader) + bool Program::Compile(const std::string_view vertex_shader, const std::string_view fragment_shader) { if (!vertex_shader.empty()) { @@ -112,13 +109,6 @@ namespace GL return false; } - if (!geometry_shader.empty()) - { - m_geometry_shader_id = CompileShader(GL_GEOMETRY_SHADER, geometry_shader); - if (m_geometry_shader_id == 0) - return false; - } - if (!fragment_shader.empty()) { m_fragment_shader_id = CompileShader(GL_FRAGMENT_SHADER, fragment_shader); @@ -129,8 +119,6 @@ namespace GL m_program_id = glCreateProgram(); if (m_vertex_shader_id != 0) glAttachShader(m_program_id, m_vertex_shader_id); - if (m_geometry_shader_id != 0) - glAttachShader(m_program_id, m_geometry_shader_id); if (m_fragment_shader_id != 0) glAttachShader(m_program_id, m_fragment_shader_id); return true; @@ -240,9 +228,6 @@ namespace GL if (m_vertex_shader_id != 0) glDeleteShader(m_vertex_shader_id); m_vertex_shader_id = 0; - if (m_geometry_shader_id != 0) - glDeleteShader(m_geometry_shader_id); - m_geometry_shader_id = 0; if (m_fragment_shader_id != 0) glDeleteShader(m_fragment_shader_id); m_fragment_shader_id = 0; @@ -541,8 +526,6 @@ namespace GL prog.m_program_id = 0; m_vertex_shader_id = prog.m_vertex_shader_id; prog.m_vertex_shader_id = 0; - m_geometry_shader_id = prog.m_geometry_shader_id; - prog.m_geometry_shader_id = 0; m_fragment_shader_id = prog.m_fragment_shader_id; prog.m_fragment_shader_id = 0; m_uniform_locations = std::move(prog.m_uniform_locations); diff --git a/common/GL/Program.h b/common/GL/Program.h index 86d28833da..f4f4718cdd 100644 --- a/common/GL/Program.h +++ b/common/GL/Program.h @@ -34,8 +34,7 @@ namespace GL bool IsValid() const { return m_program_id != 0; } - bool Compile(const std::string_view vertex_shader, const std::string_view geometry_shader, - const std::string_view fragment_shader); + bool Compile(const std::string_view vertex_shader, const std::string_view fragment_shader); bool CompileCompute(const std::string_view glsl); @@ -99,7 +98,6 @@ namespace GL GLuint m_program_id = 0; GLuint m_vertex_shader_id = 0; - GLuint m_geometry_shader_id = 0; GLuint m_fragment_shader_id = 0; std::vector m_uniform_locations; diff --git a/common/GL/ShaderCache.cpp b/common/GL/ShaderCache.cpp index f4047230fc..19072272e7 100644 --- a/common/GL/ShaderCache.cpp +++ b/common/GL/ShaderCache.cpp @@ -28,9 +28,6 @@ namespace GL u64 vertex_source_hash_low; u64 vertex_source_hash_high; u32 vertex_source_length; - u64 geometry_source_hash_low; - u64 geometry_source_hash_high; - u32 geometry_source_length; u64 fragment_source_hash_low; u64 fragment_source_hash_high; u32 fragment_source_length; @@ -51,9 +48,7 @@ namespace GL { return ( vertex_source_hash_low == key.vertex_source_hash_low && vertex_source_hash_high == key.vertex_source_hash_high && - vertex_source_length == key.vertex_source_length && geometry_source_hash_low == key.geometry_source_hash_low && - geometry_source_hash_high == key.geometry_source_hash_high && - geometry_source_length == key.geometry_source_length && fragment_source_hash_low == key.fragment_source_hash_low && + vertex_source_length == key.vertex_source_length && fragment_source_hash_low == key.fragment_source_hash_low && fragment_source_hash_high == key.fragment_source_hash_high && fragment_source_length == key.fragment_source_length); } @@ -61,9 +56,7 @@ namespace GL { return ( vertex_source_hash_low != key.vertex_source_hash_low || vertex_source_hash_high != key.vertex_source_hash_high || - vertex_source_length != key.vertex_source_length || geometry_source_hash_low != key.geometry_source_hash_low || - geometry_source_hash_high != key.geometry_source_hash_high || - geometry_source_length != key.geometry_source_length || fragment_source_hash_low != key.fragment_source_hash_low || + vertex_source_length != key.vertex_source_length || fragment_source_hash_low != key.fragment_source_hash_low || fragment_source_hash_high != key.fragment_source_hash_high || fragment_source_length != key.fragment_source_length); } @@ -204,7 +197,6 @@ namespace GL const CacheIndexKey key{ entry.vertex_source_hash_low, entry.vertex_source_hash_high, entry.vertex_source_length, - entry.geometry_source_hash_low, entry.geometry_source_hash_high, entry.geometry_source_length, entry.fragment_source_hash_low, entry.fragment_source_hash_high, entry.fragment_source_length}; const CacheIndexData data{entry.file_offset, entry.blob_size, entry.blob_format}; m_index.emplace(key, data); @@ -242,7 +234,6 @@ namespace GL } ShaderCache::CacheIndexKey ShaderCache::GetCacheKey(const std::string_view& vertex_shader, - const std::string_view& geometry_shader, const std::string_view& fragment_shader) { union ShaderHash @@ -256,7 +247,6 @@ namespace GL }; ShaderHash vertex_hash = {}; - ShaderHash geometry_hash = {}; ShaderHash fragment_hash = {}; MD5Digest digest; @@ -266,13 +256,6 @@ namespace GL digest.Final(vertex_hash.bytes); } - if (!geometry_shader.empty()) - { - digest.Reset(); - digest.Update(geometry_shader.data(), static_cast(geometry_shader.length())); - digest.Final(geometry_hash.bytes); - } - if (!fragment_shader.empty()) { digest.Reset(); @@ -281,7 +264,6 @@ namespace GL } return CacheIndexKey{vertex_hash.low, vertex_hash.high, static_cast(vertex_shader.length()), - geometry_hash.low, geometry_hash.high, static_cast(geometry_shader.length()), fragment_hash.low, fragment_hash.high, static_cast(fragment_shader.length())}; } @@ -296,7 +278,6 @@ namespace GL } std::optional ShaderCache::GetProgram(const std::string_view vertex_shader, - const std::string_view geometry_shader, const std::string_view fragment_shader, const PreLinkCallback& callback) { if (!m_program_binary_supported || !m_blob_file) @@ -305,7 +286,7 @@ namespace GL Common::Timer timer; #endif - std::optional res = CompileProgram(vertex_shader, geometry_shader, fragment_shader, callback, false); + std::optional res = CompileProgram(vertex_shader, fragment_shader, callback, false); #ifdef PCSX2_DEVBUILD Console.WriteLn("Time to compile shader without caching: %.2fms", timer.GetTimeMilliseconds()); @@ -313,10 +294,10 @@ namespace GL return res; } - const auto key = GetCacheKey(vertex_shader, geometry_shader, fragment_shader); + const auto key = GetCacheKey(vertex_shader, fragment_shader); auto iter = m_index.find(key); if (iter == m_index.end()) - return CompileAndAddProgram(key, vertex_shader, geometry_shader, fragment_shader, callback); + return CompileAndAddProgram(key, vertex_shader, fragment_shader, callback); std::vector data(iter->second.blob_size); if (std::fseek(m_blob_file, iter->second.file_offset, SEEK_SET) != 0 || @@ -343,16 +324,15 @@ namespace GL Console.Warning( "Failed to create program from binary, this may be due to a driver or GPU Change. Recreating cache."); if (!Recreate()) - return CompileProgram(vertex_shader, geometry_shader, fragment_shader, callback, false); + return CompileProgram(vertex_shader, fragment_shader, callback, false); else - return CompileAndAddProgram(key, vertex_shader, geometry_shader, fragment_shader, callback); + return CompileAndAddProgram(key, vertex_shader, fragment_shader, callback); } bool ShaderCache::GetProgram(Program* out_program, const std::string_view vertex_shader, - const std::string_view geometry_shader, const std::string_view fragment_shader, - const PreLinkCallback& callback /* = */) + const std::string_view fragment_shader, const PreLinkCallback& callback /* = */) { - auto prog = GetProgram(vertex_shader, geometry_shader, fragment_shader, callback); + auto prog = GetProgram(vertex_shader, fragment_shader, callback); if (!prog) return false; @@ -374,9 +354,6 @@ namespace GL entry.vertex_source_hash_low = key.vertex_source_hash_low; entry.vertex_source_hash_high = key.vertex_source_hash_high; entry.vertex_source_length = key.vertex_source_length; - entry.geometry_source_hash_low = key.geometry_source_hash_low; - entry.geometry_source_hash_high = key.geometry_source_hash_high; - entry.geometry_source_length = key.geometry_source_length; entry.fragment_source_hash_low = key.fragment_source_hash_low; entry.fragment_source_hash_high = key.fragment_source_hash_high; entry.fragment_source_length = key.fragment_source_length; @@ -397,12 +374,10 @@ namespace GL } std::optional ShaderCache::CompileProgram(const std::string_view& vertex_shader, - const std::string_view& geometry_shader, - const std::string_view& fragment_shader, - const PreLinkCallback& callback, bool set_retrievable) + const std::string_view& fragment_shader, const PreLinkCallback& callback, bool set_retrievable) { Program prog; - if (!prog.Compile(vertex_shader, geometry_shader, fragment_shader)) + if (!prog.Compile(vertex_shader, fragment_shader)) return std::nullopt; if (callback) @@ -437,16 +412,14 @@ namespace GL } std::optional ShaderCache::CompileAndAddProgram(const CacheIndexKey& key, - const std::string_view& vertex_shader, - const std::string_view& geometry_shader, - const std::string_view& fragment_shader, + const std::string_view& vertex_shader, const std::string_view& fragment_shader, const PreLinkCallback& callback) { #ifdef PCSX2_DEVBUILD Common::Timer timer; #endif - std::optional prog = CompileProgram(vertex_shader, geometry_shader, fragment_shader, callback, true); + std::optional prog = CompileProgram(vertex_shader, fragment_shader, callback, true); if (!prog) return std::nullopt; @@ -491,7 +464,7 @@ namespace GL return res; } - const auto key = GetCacheKey(glsl, std::string_view(), std::string_view()); + const auto key = GetCacheKey(glsl, std::string_view()); auto iter = m_index.find(key); if (iter == m_index.end()) return CompileAndAddComputeProgram(key, glsl, callback); diff --git a/common/GL/ShaderCache.h b/common/GL/ShaderCache.h index 6cc65bd01b..22894b046c 100644 --- a/common/GL/ShaderCache.h +++ b/common/GL/ShaderCache.h @@ -38,10 +38,8 @@ namespace GL bool Open(bool is_gles, std::string_view base_path, u32 version); void Close(); - std::optional GetProgram(const std::string_view vertex_shader, const std::string_view geometry_shader, - const std::string_view fragment_shader, const PreLinkCallback& callback = {}); - bool GetProgram(Program* out_program, const std::string_view vertex_shader, const std::string_view geometry_shader, - const std::string_view fragment_shader, const PreLinkCallback& callback = {}); + std::optional GetProgram(const std::string_view vertex_shader, const std::string_view fragment_shader, const PreLinkCallback& callback = {}); + bool GetProgram(Program* out_program, const std::string_view vertex_shader, const std::string_view fragment_shader, const PreLinkCallback& callback = {}); std::optional GetComputeProgram(const std::string_view glsl, const PreLinkCallback& callback = {}); bool GetComputeProgram(Program* out_program, const std::string_view glsl, const PreLinkCallback& callback = {}); @@ -54,9 +52,6 @@ namespace GL u64 vertex_source_hash_low; u64 vertex_source_hash_high; u32 vertex_source_length; - u64 geometry_source_hash_low; - u64 geometry_source_hash_high; - u32 geometry_source_length; u64 fragment_source_hash_low; u64 fragment_source_hash_high; u32 fragment_source_length; @@ -72,7 +67,6 @@ namespace GL std::size_t h = 0; HashCombine(h, e.vertex_source_hash_low, e.vertex_source_hash_high, e.vertex_source_length, - e.geometry_source_hash_low, e.geometry_source_hash_high, e.geometry_source_length, e.fragment_source_hash_low, e.fragment_source_hash_high, e.fragment_source_length); return h; } @@ -87,8 +81,7 @@ namespace GL using CacheIndex = std::unordered_map; - static CacheIndexKey GetCacheKey(const std::string_view& vertex_shader, const std::string_view& geometry_shader, - const std::string_view& fragment_shader); + static CacheIndexKey GetCacheKey(const std::string_view& vertex_shader, const std::string_view& fragment_shader); std::string GetIndexFileName() const; std::string GetBlobFileName() const; @@ -99,11 +92,10 @@ namespace GL bool WriteToBlobFile(const CacheIndexKey& key, const std::vector& prog_data, u32 prog_format); - std::optional CompileProgram(const std::string_view& vertex_shader, const std::string_view& geometry_shader, + std::optional CompileProgram(const std::string_view& vertex_shader, const std::string_view& fragment_shader, const PreLinkCallback& callback, bool set_retrievable); std::optional CompileAndAddProgram(const CacheIndexKey& key, const std::string_view& vertex_shader, - const std::string_view& geometry_shader, const std::string_view& fragment_shader, const PreLinkCallback& callback); std::optional CompileComputeProgram(const std::string_view& glsl, const PreLinkCallback& callback, bool set_retrievable); diff --git a/common/Vulkan/Builders.cpp b/common/Vulkan/Builders.cpp index bf2e8f6882..30f1e9b166 100644 --- a/common/Vulkan/Builders.cpp +++ b/common/Vulkan/Builders.cpp @@ -698,11 +698,6 @@ namespace Vulkan { pxAssert(m_num_writes < MAX_WRITES && (m_num_image_infos + num_views) < MAX_IMAGE_INFOS); -#if 1 - // NOTE: This is deliberately split up - updating multiple descriptors in one write is broken on Adreno. - for (u32 i = 0; i < num_views; i++) - AddCombinedImageSamplerDescriptorWrite(set, binding + i, views[i], samplers[i], layout); -#else VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; dw.dstSet = set; @@ -718,7 +713,6 @@ namespace Vulkan ii.sampler = samplers[i]; ii.imageLayout = layout; } -#endif } void DescriptorSetUpdateBuilder::AddBufferDescriptorWrite( diff --git a/common/Vulkan/Context.cpp b/common/Vulkan/Context.cpp index 78a8081b71..3bc30779a0 100644 --- a/common/Vulkan/Context.cpp +++ b/common/Vulkan/Context.cpp @@ -474,6 +474,8 @@ namespace Vulkan SupportsExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false); m_optional_extensions.vk_khr_fragment_shader_barycentric = SupportsExtension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME, false); + m_optional_extensions.vk_khr_shader_draw_parameters = + SupportsExtension(VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, false); return true; } @@ -956,11 +958,9 @@ namespace Vulkan bool Context::CreateGlobalDescriptorPool() { - // TODO: A better way to choose the number of descriptors. - VkDescriptorPoolSize pool_sizes[] = { - {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1024}, - {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1024}, - {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1}, + static constexpr const VkDescriptorPoolSize pool_sizes[] = { + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 2}, + {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2}, }; VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, @@ -2077,4 +2077,52 @@ void main() return static_cast(ts.tv_sec) * 1000000000 + ts.tv_nsec; #endif } + + bool Context::AllocatePreinitializedGPUBuffer(u32 size, VkBuffer* gpu_buffer, VmaAllocation* gpu_allocation, + VkBufferUsageFlags gpu_usage, const std::function& fill_callback) + { + // Try to place the fixed index buffer in GPU local memory. + // Use the staging buffer to copy into it. + + const VkBufferCreateInfo cpu_bci = { + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + nullptr, + 0, size, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE}; + const VmaAllocationCreateInfo cpu_aci = { + VMA_ALLOCATION_CREATE_MAPPED_BIT, VMA_MEMORY_USAGE_CPU_ONLY, 0, 0}; + VkBuffer cpu_buffer; + VmaAllocation cpu_allocation; + VmaAllocationInfo cpu_ai; + VkResult res = vmaCreateBuffer(m_allocator, &cpu_bci, &cpu_aci, &cpu_buffer, + &cpu_allocation, &cpu_ai); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vmaCreateBuffer() for CPU expand buffer failed: "); + return false; + } + + const VkBufferCreateInfo gpu_bci = { + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + nullptr, + 0, size, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_SHARING_MODE_EXCLUSIVE}; + const VmaAllocationCreateInfo gpu_aci = { + 0, VMA_MEMORY_USAGE_GPU_ONLY, 0, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT}; + VmaAllocationInfo ai; + res = vmaCreateBuffer(m_allocator, &gpu_bci, &gpu_aci, gpu_buffer, gpu_allocation, &ai); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vmaCreateBuffer() for expand buffer failed: "); + vmaDestroyBuffer(m_allocator, cpu_buffer, cpu_allocation); + return false; + } + + const VkBufferCopy buf_copy = {0u, 0u, size}; + fill_callback(cpu_ai.pMappedData); + vmaFlushAllocation(m_allocator, cpu_allocation, 0, size); + vkCmdCopyBuffer(GetCurrentInitCommandBuffer(), cpu_buffer, *gpu_buffer, 1, &buf_copy); + DeferBufferDestruction(cpu_buffer, cpu_allocation); + return true; + } } // namespace Vulkan diff --git a/common/Vulkan/Context.h b/common/Vulkan/Context.h index 3abd4ca602..4a0e867218 100644 --- a/common/Vulkan/Context.h +++ b/common/Vulkan/Context.h @@ -56,6 +56,7 @@ namespace Vulkan bool vk_khr_driver_properties : 1; bool vk_arm_rasterization_order_attachment_access : 1; bool vk_khr_fragment_shader_barycentric : 1; + bool vk_khr_shader_draw_parameters : 1; }; ~Context(); @@ -238,6 +239,10 @@ namespace Vulkan void CountRenderPass() { m_command_buffer_render_passes++; } void NotifyOfReadback(); + // Allocates a temporary CPU staging buffer, fires the callback with it to populate, then copies to a GPU buffer. + bool AllocatePreinitializedGPUBuffer(u32 size, VkBuffer* gpu_buffer, VmaAllocation* gpu_allocation, + VkBufferUsageFlags gpu_usage, const std::function& fill_callback); + private: Context(VkInstance instance, VkPhysicalDevice physical_device); diff --git a/common/Vulkan/ShaderCache.cpp b/common/Vulkan/ShaderCache.cpp index b48a7a1be1..8650a6fb0e 100644 --- a/common/Vulkan/ShaderCache.cpp +++ b/common/Vulkan/ShaderCache.cpp @@ -493,11 +493,6 @@ namespace Vulkan return GetShaderModule(ShaderCompiler::Type::Vertex, std::move(shader_code)); } - VkShaderModule ShaderCache::GetGeometryShader(std::string_view shader_code) - { - return GetShaderModule(ShaderCompiler::Type::Geometry, std::move(shader_code)); - } - VkShaderModule ShaderCache::GetFragmentShader(std::string_view shader_code) { return GetShaderModule(ShaderCompiler::Type::Fragment, std::move(shader_code)); diff --git a/common/Vulkan/ShaderCache.h b/common/Vulkan/ShaderCache.h index 06a0c3969a..7e4a7cbe84 100644 --- a/common/Vulkan/ShaderCache.h +++ b/common/Vulkan/ShaderCache.h @@ -47,7 +47,6 @@ namespace Vulkan VkShaderModule GetShaderModule(ShaderCompiler::Type type, std::string_view shader_code); VkShaderModule GetVertexShader(std::string_view shader_code); - VkShaderModule GetGeometryShader(std::string_view shader_code); VkShaderModule GetFragmentShader(std::string_view shader_code); VkShaderModule GetComputeShader(std::string_view shader_code); diff --git a/common/Vulkan/ShaderCompiler.cpp b/common/Vulkan/ShaderCompiler.cpp index 985ed81295..a7b9bc962b 100644 --- a/common/Vulkan/ShaderCompiler.cpp +++ b/common/Vulkan/ShaderCompiler.cpp @@ -154,11 +154,6 @@ namespace Vulkan::ShaderCompiler return CompileShaderToSPV(EShLangVertex, "vs", source_code, debug); } - std::optional CompileGeometryShader(std::string_view source_code, bool debug) - { - return CompileShaderToSPV(EShLangGeometry, "gs", source_code, debug); - } - std::optional CompileFragmentShader(std::string_view source_code, bool debug) { return CompileShaderToSPV(EShLangFragment, "ps", source_code, debug); @@ -176,9 +171,6 @@ namespace Vulkan::ShaderCompiler case Type::Vertex: return CompileShaderToSPV(EShLangVertex, "vs", source_code, debug); - case Type::Geometry: - return CompileShaderToSPV(EShLangGeometry, "gs", source_code, debug); - case Type::Fragment: return CompileShaderToSPV(EShLangFragment, "ps", source_code, debug); diff --git a/common/Vulkan/ShaderCompiler.h b/common/Vulkan/ShaderCompiler.h index 3ddadd8a51..66f16c546b 100644 --- a/common/Vulkan/ShaderCompiler.h +++ b/common/Vulkan/ShaderCompiler.h @@ -26,7 +26,6 @@ namespace Vulkan::ShaderCompiler enum class Type { Vertex, - Geometry, Fragment, Compute }; @@ -40,9 +39,6 @@ namespace Vulkan::ShaderCompiler // Compile a vertex shader to SPIR-V. std::optional CompileVertexShader(std::string_view source_code, bool debug); - // Compile a geometry shader to SPIR-V. - std::optional CompileGeometryShader(std::string_view source_code, bool debug); - // Compile a fragment shader to SPIR-V. std::optional CompileFragmentShader(std::string_view source_code, bool debug); diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp index a35214442b..4e7f9ddbfe 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp @@ -245,7 +245,6 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget* SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.skipPresentingDuplicateFrames, "EmuCore/GS", "SkipDuplicateFrames", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.threadedPresentation, "EmuCore/GS", "DisableThreadedPresentation", false); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideTextureBarriers, "EmuCore/GS", "OverrideTextureBarriers", -1, -1); - SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideGeometryShader, "EmuCore/GS", "OverrideGeometryShaders", -1, -1); SettingWidgetBinder::BindWidgetToIntSetting( sif, m_ui.gsDumpCompression, "EmuCore/GS", "GSDumpCompression", static_cast(GSDumpCompressionMethod::Zstandard)); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.disableFramebufferFetch, "EmuCore/GS", "DisableFramebufferFetch", false); @@ -693,10 +692,6 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget* { dialog->registerWidgetHelp(m_ui.overrideTextureBarriers, tr("Override Texture Barriers"), tr("Automatic (Default)"), tr("")); - dialog->registerWidgetHelp(m_ui.overrideGeometryShader, tr("Override Geometry Shader"), tr("Automatic (Default)"), - tr("Allows the GPU instead of just the CPU to transform lines into sprites. " - "This reduces CPU load and bandwidth requirement, but it is heavier on the GPU.")); - dialog->registerWidgetHelp(m_ui.gsDumpCompression, tr("GS Dump Compression"), tr("Zstandard (zst)"), tr("Change the compression algorithm used when creating a GS dump.")); @@ -948,7 +943,6 @@ void GraphicsSettingsWidget::updateRendererDependentOptions() m_ui.useBlitSwapChain->setEnabled(is_dx11); m_ui.overrideTextureBarriers->setDisabled(is_sw_dx); - m_ui.overrideGeometryShader->setDisabled(is_sw_dx); m_ui.disableFramebufferFetch->setDisabled(is_sw_dx); diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui index dc1f65fc00..10e5afc5e8 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui @@ -2133,33 +2133,7 @@ - - - - Override Geometry Shader: - - - - - - - - Automatic (Default) - - - - - Force Disabled - - - - - Force Enabled - - - - - + diff --git a/pcsx2/Config.h b/pcsx2/Config.h index 86fc19cc99..f2ccda2c95 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -760,7 +760,6 @@ struct Pcsx2Config GSTextureInRtMode UserHacks_TextureInsideRt{GSTextureInRtMode::Disabled}; TriFiltering TriFilter{TriFiltering::Automatic}; int OverrideTextureBarriers{-1}; - int OverrideGeometryShaders{-1}; int CAS_Sharpness{50}; int ShadeBoost_Brightness{50}; diff --git a/pcsx2/Frontend/FullscreenUI.cpp b/pcsx2/Frontend/FullscreenUI.cpp index a07dae86ca..d5d084ebdb 100644 --- a/pcsx2/Frontend/FullscreenUI.cpp +++ b/pcsx2/Frontend/FullscreenUI.cpp @@ -3325,8 +3325,6 @@ void FullscreenUI::DrawGraphicsSettingsPage() } DrawIntListSetting(bsi, "Override Texture Barriers", "Forces texture barrier functionality to the specified value.", "EmuCore/GS", "OverrideTextureBarriers", -1, s_generic_options, std::size(s_generic_options), -1); - DrawIntListSetting(bsi, "Override Geometry Shaders", "Forces geometry shader functionality to the specified value.", "EmuCore/GS", - "OverrideGeometryShaders", -1, s_generic_options, std::size(s_generic_options), -1); DrawIntListSetting(bsi, "GS Dump Compression", "Sets the compression algorithm for GS dumps.", "EmuCore/GS", "GSDumpCompression", static_cast(GSDumpCompressionMethod::LZMA), s_gsdump_compression, std::size(s_gsdump_compression)); DrawToggleSetting(bsi, "Disable Framebuffer Fetch", "Prevents the usage of framebuffer fetch when supported by host GPU.", "EmuCore/GS", diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index db884fddf8..04c8b9caa1 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -3037,6 +3037,28 @@ static constexpr u32 NumIndicesForPrim(u32 prim) } } +static constexpr u32 MaxVerticesForPrim(u32 prim) +{ + switch (prim) + { + case GS_POINTLIST: + case GS_INVALID: + // Needed due to expansion in hardware renderers. + return (std::numeric_limits::max() / 4) - 4; + + case GS_SPRITE: + return (std::numeric_limits::max() / 2) - 2; + + case GS_LINELIST: + case GS_LINESTRIP: + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + default: + return 0; + } +} + template __forceinline void GSState::VertexKick(u32 skip) { @@ -3305,6 +3327,10 @@ __forceinline void GSState::VertexKick(u32 skip) } CLUTAutoFlush(prim); + + constexpr u32 max_vertices = MaxVerticesForPrim(prim); + if (max_vertices != 0 && m_vertex.tail >= max_vertices) + Flush(VERTEXCOUNT); } /// Checks if region repeat is used (applying it does something to at least one of the values in min...max) diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index becdd9b925..7d853d43f8 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -278,6 +278,7 @@ public: AUTOFLUSH = 1 << 11, VSYNC = 1 << 12, GSREOPEN = 1 << 13, + VERTEXCOUNT = 1 << 14, }; GSFlushReason m_state_flush_reason = UNKNOWN; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index 79b25b75b8..3bca815801 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -163,6 +163,23 @@ std::string GSDevice::GetFullscreenModeString(u32 width, u32 height, float refre return StringUtil::StdStringFromFormat("%u x %u @ %f hz", width, height, refresh_rate); } +void GSDevice::GenerateExpansionIndexBuffer(void* buffer) +{ + static constexpr u32 MAX_INDEX = std::numeric_limits::max(); + + u32* idx_buffer = static_cast(buffer); + for (u32 i = 0; i < MAX_INDEX; i++) + { + const u32 base = i * 4; + *(idx_buffer++) = base + 0; + *(idx_buffer++) = base + 1; + *(idx_buffer++) = base + 2; + *(idx_buffer++) = base + 1; + *(idx_buffer++) = base + 2; + *(idx_buffer++) = base + 3; + } +} + bool GSDevice::Create(const WindowInfo& wi, VsyncMode vsync) { m_window_info = wi; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 09a9f41db5..143f136751 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -235,13 +235,6 @@ struct alignas(16) GSHWDrawConfig Line, Triangle, }; - enum class GSTopology: u8 - { - Point, - Line, - Triangle, - Sprite, - }; enum class VSExpand: u8 { None, @@ -250,22 +243,6 @@ struct alignas(16) GSHWDrawConfig Sprite, }; #pragma pack(push, 1) - struct GSSelector - { - union - { - struct - { - GSTopology topology : 2; - bool expand : 1; - bool iip : 1; - bool forward_primid : 1; - }; - u8 key; - }; - GSSelector(): key(0) {} - GSSelector(u8 k): key(k) {} - }; struct VSSelector { union @@ -275,7 +252,7 @@ struct alignas(16) GSHWDrawConfig u8 fst : 1; u8 tme : 1; u8 iip : 1; - u8 point_size : 1; ///< Set when points need to be expanded without geometry shader. + u8 point_size : 1; ///< Set when points need to be expanded without VS expanding. VSExpand expand : 2; u8 _free : 2; }; @@ -283,6 +260,9 @@ struct alignas(16) GSHWDrawConfig }; VSSelector(): key(0) {} VSSelector(u8 k): key(k) {} + + /// Returns true if the fixed index buffer should be used. + __fi bool UseExpandIndexBuffer() const { return (expand == VSExpand::Point || expand == VSExpand::Sprite); } }; #pragma pack(pop) #pragma pack(push, 4) @@ -657,7 +637,6 @@ struct alignas(16) GSHWDrawConfig Topology topology; ///< Draw topology alignas(8) PSSelector ps; - GSSelector gs; VSSelector vs; BlendState blend; @@ -713,13 +692,12 @@ public: struct FeatureSupport { bool broken_point_sampler : 1; ///< Issue with AMD cards, see tfx shader for details - bool geometry_shader : 1; ///< Supports geometry shader bool vs_expand : 1; ///< Supports expanding points/lines/sprites in the vertex shader bool primitive_id : 1; ///< Supports primitive ID for use with prim tracking destination alpha algorithm bool texture_barrier : 1; ///< Supports sampling rt and hopefully texture barrier bool provoking_vertex_last: 1; ///< Supports using the last vertex in a primitive as the value for flat shading. - bool point_expand : 1; ///< Supports point expansion in hardware without using geometry shaders. - bool line_expand : 1; ///< Supports line expansion in hardware without using geometry shaders. + bool point_expand : 1; ///< Supports point expansion in hardware. + bool line_expand : 1; ///< Supports line expansion in hardware. bool prefer_new_textures : 1; ///< Allocate textures up to the pool size before reusing them, to avoid render pass restarts. bool dxt_textures : 1; ///< Supports DXTn texture compression, i.e. S3TC and BC1-3. bool bptc_textures : 1; ///< Supports BC6/7 texture compression. @@ -771,6 +749,7 @@ protected: static constexpr float MAD_SENSITIVITY = 0.08f; static constexpr u32 MAX_POOLED_TEXTURES = 300; static constexpr u32 NUM_CAS_CONSTANTS = 12; // 8 plus src offset x/y, 16 byte alignment + static constexpr u32 EXPAND_BUFFER_SIZE = sizeof(u32) * std::numeric_limits::max() * 6; WindowInfo m_window_info; VsyncMode m_vsync_mode = VsyncMode::Off; @@ -824,6 +803,9 @@ public: /// Converts a fullscreen mode to a string. static std::string GetFullscreenModeString(u32 width, u32 height, float refresh_rate); + /// Generates a fixed index buffer for expanding points and sprites. Buffer is assumed to be at least EXPAND_BUFFER_SIZE in size. + static void GenerateExpansionIndexBuffer(void* buffer); + __fi unsigned int GetFrameNumber() const { return m_frame; } __fi u64 GetPoolMemoryUsage() const { return m_pool_memory_usage; } diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index c1b63e892a..902c14156b 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -53,7 +53,6 @@ GSDevice11::GSDevice11() m_state.topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; m_state.bf = -1; - m_features.geometry_shader = true; m_features.primitive_id = true; m_features.texture_barrier = false; m_features.provoking_vertex_last = false; @@ -365,7 +364,46 @@ bool GSDevice11::Create(const WindowInfo& wi, VsyncMode vsync) Console.Error("Failed to create index buffer."); return false; } - m_ctx->IASetIndexBuffer(m_ib.get(), DXGI_FORMAT_R32_UINT, 0); + IASetIndexBuffer(m_ib.get()); + + if (m_features.vs_expand) + { + bd.ByteWidth = VERTEX_BUFFER_SIZE; + bd.BindFlags = D3D11_BIND_SHADER_RESOURCE; + bd.StructureByteStride = sizeof(GSVertex); + bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; + + if (FAILED(m_dev->CreateBuffer(&bd, nullptr, m_expand_vb.put()))) + { + Console.Error("Failed to create expand vertex buffer."); + return false; + } + + const CD3D11_SHADER_RESOURCE_VIEW_DESC vb_srv_desc( + D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 0, VERTEX_BUFFER_SIZE / sizeof(GSVertex)); + if (FAILED(m_dev->CreateShaderResourceView(m_expand_vb.get(), &vb_srv_desc, m_expand_vb_srv.put()))) + { + Console.Error("Failed to create expand vertex buffer SRV."); + return false; + } + + m_ctx->VSSetShaderResources(0, 1, m_expand_vb_srv.addressof()); + + bd.ByteWidth = EXPAND_BUFFER_SIZE; + bd.BindFlags = D3D11_BIND_INDEX_BUFFER; + bd.StructureByteStride = 0; + bd.MiscFlags = 0; + + std::unique_ptr expand_data = std::make_unique(EXPAND_BUFFER_SIZE); + GenerateExpansionIndexBuffer(expand_data.get()); + + const D3D11_SUBRESOURCE_DATA srd = {expand_data.get()}; + if (FAILED(m_dev->CreateBuffer(&bd, &srd, m_expand_ib.put()))) + { + Console.Error("Failed to create expand index buffer."); + return false; + } + } // @@ -466,6 +504,9 @@ void GSDevice11::Destroy() m_vb.reset(); m_ib.reset(); + m_expand_vb_srv.reset(); + m_expand_vb.reset(); + m_expand_ib.reset(); m_vs.clear(); m_vs_cb.reset(); @@ -508,6 +549,9 @@ void GSDevice11::SetFeatures() SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC3_UNORM); m_features.bptc_textures = SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC7_UNORM); + + const D3D_FEATURE_LEVEL feature_level = m_dev->GetFeatureLevel(); + m_features.vs_expand = (feature_level >= D3D_FEATURE_LEVEL_11_0); } bool GSDevice11::HasSurface() const @@ -1234,11 +1278,6 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* VSSetShader(m_convert.vs.get(), nullptr); - // gs - - GSSetShader(nullptr, nullptr); - - // ps PSSetShaderResources(sTex, nullptr); @@ -1307,11 +1346,6 @@ void GSDevice11::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* VSSetShader(m_present.vs.get(), nullptr); - // gs - - GSSetShader(nullptr, nullptr); - - // ps PSSetShaderResources(sTex, nullptr); @@ -1368,7 +1402,6 @@ void GSDevice11::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_re IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); VSSetShader(m_convert.vs.get(), nullptr); - GSSetShader(nullptr, nullptr); PSSetShader(m_convert.ps[static_cast(shader)].get(), nullptr); OMSetDepthStencilState(dTex->IsRenderTarget() ? m_convert.dss.get() : m_convert.dss_write.get(), 0); @@ -1437,6 +1470,7 @@ void GSDevice11::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rect IAUnmapVertexBuffer(sizeof(GSVertexPT1), vcount); IAUnmapIndexBuffer(icount); + IASetIndexBuffer(m_ib.get()); PSSetShaderResource(0, rects[0].src); PSSetSamplerState(rects[0].linear ? m_convert.ln.get() : m_convert.pt.get()); @@ -1682,7 +1716,6 @@ void GSDevice11::RenderImGui() IASetInputLayout(m_imgui.il.get()); IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); VSSetShader(m_imgui.vs.get(), m_imgui.vs_cb.get()); - GSSetShader(nullptr, nullptr); PSSetShader(m_imgui.ps.get(), nullptr); OMSetBlendState(m_imgui.bs.get(), 0.0f); OMSetDepthStencilState(m_convert.dss.get(), 0); @@ -1761,7 +1794,7 @@ void GSDevice11::RenderImGui() } m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &m_state.vb_stride, &vb_offset); - m_ctx->IASetIndexBuffer(m_ib.get(), DXGI_FORMAT_R32_UINT, 0); + m_ctx->IASetIndexBuffer(m_state.index_buffer, DXGI_FORMAT_R32_UINT, 0); } void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm) @@ -1786,10 +1819,6 @@ void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vert VSSetShader(m_convert.vs.get(), nullptr); - // gs - - GSSetShader(nullptr, nullptr); - // ps PSSetShaderResources(rt, nullptr); PSSetSamplerState(m_convert.pt.get()); @@ -1852,6 +1881,37 @@ bool GSDevice11::IASetVertexBuffer(const void* vertex, u32 stride, u32 count) return true; } +bool GSDevice11::IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 count) +{ + const u32 size = stride * count; + if (size > VERTEX_BUFFER_SIZE) + return false; + + D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE; + + m_vertex.start = (m_structured_vb_pos + (stride - 1)) / stride; + m_structured_vb_pos = (m_vertex.start * stride) + size; + if (m_structured_vb_pos > VERTEX_BUFFER_SIZE) + { + m_vertex.start = 0; + m_structured_vb_pos = size; + type = D3D11_MAP_WRITE_DISCARD; + } + + D3D11_MAPPED_SUBRESOURCE m; + if (FAILED(m_ctx->Map(m_expand_vb.get(), 0, type, 0, &m))) + return false; + + void* map = static_cast(m.pData) + (m_vertex.start * stride); + + GSVector4i::storent(map, vertex, count * stride); + + m_ctx->Unmap(m_expand_vb.get(), 0); + + m_vertex.count = count; + return true; +} + u32* GSDevice11::IAMapIndexBuffer(u32 count) { if (count > (INDEX_BUFFER_SIZE / sizeof(u32))) @@ -1890,9 +1950,19 @@ bool GSDevice11::IASetIndexBuffer(const void* index, u32 count) std::memcpy(map, index, count * sizeof(u32)); IAUnmapIndexBuffer(count); + IASetIndexBuffer(m_ib.get()); return true; } +void GSDevice11::IASetIndexBuffer(ID3D11Buffer* buffer) +{ + if (m_state.index_buffer != buffer) + { + m_ctx->IASetIndexBuffer(buffer, DXGI_FORMAT_R32_UINT, 0); + m_state.index_buffer = buffer; + } +} + void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout) { if (m_state.layout != layout) @@ -1930,23 +2000,6 @@ void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb) } } -void GSDevice11::GSSetShader(ID3D11GeometryShader* gs, ID3D11Buffer* gs_cb) -{ - if (m_state.gs != gs) - { - m_state.gs = gs; - - m_ctx->GSSetShader(gs, nullptr, 0); - } - - if (m_state.gs_cb != gs_cb) - { - m_state.gs_cb = gs_cb; - - m_ctx->GSSetConstantBuffers(0, 1, &gs_cb); - } -} - void GSDevice11::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) { PSSetShaderResource(0, sr0); @@ -2172,12 +2225,40 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) g_perfmon.Put(GSPerfMon::TextureCopies, 1); } - if (!IASetVertexBuffer(config.verts, sizeof(*config.verts), config.nverts) || - !IASetIndexBuffer(config.indices, config.nindices)) + if (config.vs.expand != GSHWDrawConfig::VSExpand::None) { - Console.Error("Failed to upload vertices/indices (%u/%u)", config.nverts, config.nindices); - return; + if (!IASetExpandVertexBuffer(config.verts, sizeof(*config.verts), config.nverts)) + { + Console.Error("Failed to upload structured vertices (%u)", config.nverts); + return; + } + + config.cb_vs.max_depth.y = m_vertex.start; } + else + { + if (!IASetVertexBuffer(config.verts, sizeof(*config.verts), config.nverts)) + { + Console.Error("Failed to upload vertices (%u)", config.nverts); + return; + } + } + + if (config.vs.UseExpandIndexBuffer()) + { + IASetIndexBuffer(m_expand_ib.get()); + m_index.start = 0; + m_index.count = config.nindices; + } + else + { + if (!IASetIndexBuffer(config.indices, config.nindices)) + { + Console.Error("Failed to upload indices (%u)", config.nindices); + return; + } + } + D3D11_PRIMITIVE_TOPOLOGY topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; switch (config.topology) { @@ -2207,7 +2288,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) } SetupVS(config.vs, &config.cb_vs); - SetupGS(config.gs); SetupPS(config.ps, &config.cb_ps, config.sampler); if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking) @@ -2223,7 +2303,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) blend.blend_op = 3; // MIN SetupOM(dss, blend, 0); OMSetRenderTargets(primid_tex, config.ds, &config.scissor); - DrawIndexedPrimitive(); config.ps.date = 3; @@ -2234,7 +2313,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) SetupOM(config.depth, convertSel(config.colormask, config.blend), config.blend.constant); OMSetRenderTargets(hdr_rt ? hdr_rt : config.rt, config.ds, &config.scissor); - DrawIndexedPrimitive(); if (config.separate_alpha_pass) @@ -2243,7 +2321,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) SetHWDrawConfigForAlphaPass(&config.ps, &config.colormask, &sap_blend, &config.depth); SetupOM(config.depth, convertSel(config.colormask, sap_blend), config.blend.constant); SetupPS(config.ps, &config.cb_ps, config.sampler); - DrawIndexedPrimitive(); } @@ -2262,7 +2339,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) } SetupOM(config.alpha_second_pass.depth, convertSel(config.alpha_second_pass.colormask, config.blend), config.blend.constant); - DrawIndexedPrimitive(); if (config.second_separate_alpha_pass) @@ -2271,7 +2347,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) SetHWDrawConfigForAlphaPass(&config.alpha_second_pass.ps, &config.alpha_second_pass.colormask, &sap_blend, &config.alpha_second_pass.depth); SetupOM(config.alpha_second_pass.depth, convertSel(config.alpha_second_pass.colormask, sap_blend), config.blend.constant); SetupPS(config.alpha_second_pass.ps, &config.cb_ps, config.sampler); - DrawIndexedPrimitive(); } } diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.h b/pcsx2/GS/Renderers/DX11/GSDevice11.h index b7a2032efd..b9fc38981d 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.h +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.h @@ -34,7 +34,6 @@ class GSDevice11 final : public GSDevice { public: using VSSelector = GSHWDrawConfig::VSSelector; - using GSSelector = GSHWDrawConfig::GSSelector; using PSSelector = GSHWDrawConfig::PSSelector; using PSSamplerSelector = GSHWDrawConfig::SamplerSelector; using OMDepthStencilSelector = GSHWDrawConfig::DepthStencilSelector; @@ -150,8 +149,12 @@ private: wil::com_ptr_nothrow m_vb; wil::com_ptr_nothrow m_ib; + wil::com_ptr_nothrow m_expand_vb; + wil::com_ptr_nothrow m_expand_ib; + wil::com_ptr_nothrow m_expand_vb_srv; u32 m_vb_pos = 0; // bytes u32 m_ib_pos = 0; // indices/sizeof(u32) + u32 m_structured_vb_pos = 0; // bytes int m_d3d_texsize = 0; bool m_allow_tearing_supported = false; @@ -162,10 +165,9 @@ private: { ID3D11InputLayout* layout; D3D11_PRIMITIVE_TOPOLOGY topology; + ID3D11Buffer* index_buffer; ID3D11VertexShader* vs; ID3D11Buffer* vs_cb; - ID3D11GeometryShader* gs; - ID3D11Buffer* gs_cb; std::array ps_sr_views; ID3D11PixelShader* ps; ID3D11Buffer* ps_cb; @@ -339,16 +341,17 @@ public: void* IAMapVertexBuffer(u32 stride, u32 count); void IAUnmapVertexBuffer(u32 stride, u32 count); bool IASetVertexBuffer(const void* vertex, u32 stride, u32 count); + bool IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 count); u32* IAMapIndexBuffer(u32 count); void IAUnmapIndexBuffer(u32 count); bool IASetIndexBuffer(const void* index, u32 count); + void IASetIndexBuffer(ID3D11Buffer* buffer); void IASetInputLayout(ID3D11InputLayout* layout); void IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology); void VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb); - void GSSetShader(ID3D11GeometryShader* gs, ID3D11Buffer* gs_cb = nullptr); void PSSetShaderResources(GSTexture* sr0, GSTexture* sr1); void PSSetShaderResource(int i, GSTexture* sr); @@ -364,7 +367,6 @@ public: bool CreateTextureFX(); void SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer* cb); - void SetupGS(GSSelector sel); void SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstantBuffer* cb, PSSamplerSelector ssel); void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 afix); diff --git a/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp b/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp index e302feaa65..4677fb316c 100644 --- a/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp @@ -53,8 +53,6 @@ bool GSDevice11::CreateTextureFX() SetupVS(sel, &cb); - SetupGS(GSSelector(1)); - // return true; @@ -68,11 +66,13 @@ void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer* { ShaderMacro sm(m_shader_cache.GetFeatureLevel()); + sm.AddMacro("VERTEX_SHADER", 1); sm.AddMacro("VS_TME", sel.tme); sm.AddMacro("VS_FST", sel.fst); sm.AddMacro("VS_IIP", sel.iip); + sm.AddMacro("VS_EXPAND", static_cast(sel.expand)); - D3D11_INPUT_ELEMENT_DESC layout[] = + static constexpr const D3D11_INPUT_ELEMENT_DESC layout[] = { {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0}, {"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0}, @@ -84,8 +84,16 @@ void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer* }; GSVertexShader11 vs; - m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(), - vs.vs.put(), vs.il.put(), layout, std::size(layout), m_tfx_source, sm.GetPtr(), "vs_main"); + if (sel.expand == GSHWDrawConfig::VSExpand::None) + { + m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(), vs.vs.put(), vs.il.put(), layout, + std::size(layout), m_tfx_source, sm.GetPtr(), "vs_main"); + } + else + { + vs.vs = m_shader_cache.GetVertexShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "vs_main_expand"); + } + i = m_vs.try_emplace(sel.key, std::move(vs)).first; } @@ -99,37 +107,6 @@ void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer* IASetInputLayout(i->second.il.get()); } -void GSDevice11::SetupGS(GSSelector sel) -{ - wil::com_ptr_nothrow gs; - - // Geometry shader is disabled if sprite conversion is done on the cpu (sel.cpu_sprite). - if (sel.expand) - { - const auto i = std::as_const(m_gs).find(sel.key); - - if (i != m_gs.end()) - { - gs = i->second; - } - else - { - ShaderMacro sm(m_shader_cache.GetFeatureLevel()); - - sm.AddMacro("GS_IIP", sel.iip); - sm.AddMacro("GS_PRIM", static_cast(sel.topology)); - sm.AddMacro("GS_EXPAND", sel.expand); - sm.AddMacro("GS_FORWARD_PRIMID", sel.forward_primid); - - gs = m_shader_cache.GetGeometryShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "gs_main"); - - m_gs[sel.key] = gs; - } - } - - GSSetShader(gs.get(), m_vs_cb.get()); -} - void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstantBuffer* cb, PSSamplerSelector ssel) { auto i = std::as_const(m_ps).find(sel); @@ -138,6 +115,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant { ShaderMacro sm(m_shader_cache.GetFeatureLevel()); + sm.AddMacro("PIXEL_SHADER", 1); sm.AddMacro("PS_FST", sel.fst); sm.AddMacro("PS_WMS", sel.wms); sm.AddMacro("PS_WMT", sel.wmt); diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index eb14c7221b..c9b771e731 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -602,7 +602,6 @@ bool GSDevice12::CheckFeatures() m_features.texture_barrier = false; m_features.broken_point_sampler = isAMD; - m_features.geometry_shader = true; m_features.primitive_id = true; m_features.prefer_new_textures = true; m_features.provoking_vertex_last = false; @@ -613,6 +612,7 @@ bool GSDevice12::CheckFeatures() m_features.clip_control = true; m_features.stencil_buffer = true; m_features.test_and_sample_depth = false; + m_features.vs_expand = true; m_features.dxt_textures = g_d3d12_context->SupportsTextureFormat(DXGI_FORMAT_BC1_UNORM) && g_d3d12_context->SupportsTextureFormat(DXGI_FORMAT_BC2_UNORM) && @@ -1729,6 +1729,13 @@ bool GSDevice12::CreateBuffers() return false; } + if (!g_d3d12_context->AllocatePreinitializedGPUBuffer(EXPAND_BUFFER_SIZE, &m_expand_index_buffer, + &m_expand_index_buffer_allocation, &GSDevice::GenerateExpansionIndexBuffer)) + { + Host::ReportErrorAsync("GS", "Failed to allocate expansion index buffer"); + return false; + } + return true; } @@ -1753,6 +1760,7 @@ bool GSDevice12::CreateRootSignatures() rsb.SetInputAssemblerFlag(); rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); rsb.AddCBVParameter(1, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.AddSRVParameter(0, D3D12_SHADER_VISIBILITY_VERTEX); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, NUM_TFX_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 2, D3D12_SHADER_VISIBILITY_PIXEL); @@ -2096,7 +2104,6 @@ void GSDevice12::DestroyResources() g_d3d12_context->DeferObjectDestruction(it.second.get()); m_tfx_pipelines.clear(); m_tfx_pixel_shaders.clear(); - m_tfx_geometry_shaders.clear(); m_tfx_vertex_shaders.clear(); m_interlace = {}; m_merge = {}; @@ -2119,6 +2126,8 @@ void GSDevice12::DestroyResources() g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetSamplerHeapManager(), &m_point_sampler_cpu); g_d3d12_context->InvalidateSamplerGroups(); + m_expand_index_buffer.reset(); + m_expand_index_buffer_allocation.reset(); m_pixel_constant_buffer.Destroy(false); m_vertex_constant_buffer.Destroy(false); m_index_stream_buffer.Destroy(false); @@ -2139,32 +2148,18 @@ const ID3DBlob* GSDevice12::GetTFXVertexShader(GSHWDrawConfig::VSSelector sel) return it->second.get(); ShaderMacro sm(m_shader_cache.GetFeatureLevel()); + sm.AddMacro("VERTEX_SHADER", 1); sm.AddMacro("VS_TME", sel.tme); sm.AddMacro("VS_FST", sel.fst); sm.AddMacro("VS_IIP", sel.iip); + sm.AddMacro("VS_EXPAND", static_cast(sel.expand)); - ComPtr vs(m_shader_cache.GetVertexShader(m_tfx_source, sm.GetPtr(), "vs_main")); + const char* entry_point = (sel.expand != GSHWDrawConfig::VSExpand::None) ? "vs_main_expand" : "vs_main"; + ComPtr vs(m_shader_cache.GetVertexShader(m_tfx_source, sm.GetPtr(), entry_point)); it = m_tfx_vertex_shaders.emplace(sel.key, std::move(vs)).first; return it->second.get(); } -const ID3DBlob* GSDevice12::GetTFXGeometryShader(GSHWDrawConfig::GSSelector sel) -{ - auto it = m_tfx_geometry_shaders.find(sel.key); - if (it != m_tfx_geometry_shaders.end()) - return it->second.get(); - - ShaderMacro sm(m_shader_cache.GetFeatureLevel()); - sm.AddMacro("GS_IIP", sel.iip); - sm.AddMacro("GS_PRIM", static_cast(sel.topology)); - sm.AddMacro("GS_EXPAND", sel.expand); - sm.AddMacro("GS_FORWARD_PRIMID", sel.forward_primid); - - ComPtr gs(m_shader_cache.GetGeometryShader(m_tfx_source, sm.GetPtr(), "gs_main")); - it = m_tfx_geometry_shaders.emplace(sel.key, std::move(gs)).first; - return it->second.get(); -} - const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sel) { auto it = m_tfx_pixel_shaders.find(sel); @@ -2172,6 +2167,7 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector& return it->second.get(); ShaderMacro sm(m_shader_cache.GetFeatureLevel()); + sm.AddMacro("PIXEL_SHADER", 1); sm.AddMacro("PS_FST", sel.fst); sm.AddMacro("PS_WMS", sel.wms); sm.AddMacro("PS_WMT", sel.wmt); @@ -2246,9 +2242,8 @@ GSDevice12::ComPtr GSDevice12::CreateTFXPipeline(const Pipe } const ID3DBlob* vs = GetTFXVertexShader(p.vs); - const ID3DBlob* gs = p.gs.expand ? GetTFXGeometryShader(p.gs) : nullptr; const ID3DBlob* ps = GetTFXPixelShader(pps); - if (!vs || (p.gs.expand && !gs) || !ps) + if (!vs || !ps) return nullptr; // Common state @@ -2271,18 +2266,19 @@ GSDevice12::ComPtr GSDevice12::CreateTFXPipeline(const Pipe // Shaders gpb.SetVertexShader(vs); - if (gs) - gpb.SetGeometryShader(gs); gpb.SetPixelShader(ps); // IA - gpb.AddVertexAttribute("TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0); - gpb.AddVertexAttribute("COLOR", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, 8); - gpb.AddVertexAttribute("TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12); - gpb.AddVertexAttribute("POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16); - gpb.AddVertexAttribute("POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20); - gpb.AddVertexAttribute("TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24); - gpb.AddVertexAttribute("COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28); + if (p.vs.expand == GSHWDrawConfig::VSExpand::None) + { + gpb.AddVertexAttribute("TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0); + gpb.AddVertexAttribute("COLOR", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, 8); + gpb.AddVertexAttribute("TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12); + gpb.AddVertexAttribute("POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16); + gpb.AddVertexAttribute("POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20); + gpb.AddVertexAttribute("TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24); + gpb.AddVertexAttribute("COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28); + } // DepthStencil if (p.ds) @@ -2337,7 +2333,7 @@ GSDevice12::ComPtr GSDevice12::CreateTFXPipeline(const Pipe if (pipeline) { D3D12::SetObjectNameFormatted( - pipeline.get(), "TFX Pipeline %08X/%08X/%" PRIX64 "%08X", p.vs.key, p.gs.key, p.ps.key_hi, p.ps.key_lo); + pipeline.get(), "TFX Pipeline %08X/%" PRIX64 "%08X", p.vs.key, p.ps.key_hi, p.ps.key_lo); } return pipeline; @@ -2941,6 +2937,11 @@ bool GSDevice12::ApplyTFXState(bool already_execed) cmdlist->SetGraphicsRootConstantBufferView(TFX_ROOT_SIGNATURE_PARAM_VS_CBV, m_tfx_constant_buffers[0]); if (flags & DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING) cmdlist->SetGraphicsRootConstantBufferView(TFX_ROOT_SIGNATURE_PARAM_PS_CBV, m_tfx_constant_buffers[1]); + if (flags & DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING) + { + cmdlist->SetGraphicsRootShaderResourceView(TFX_ROOT_SIGNATURE_PARAM_VS_SRV, + m_vertex_stream_buffer.GetGPUPointer() + m_vertex.start * sizeof(GSVertex)); + } if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE) cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES, m_tfx_textures_handle_gpu); if (flags & DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE) @@ -3070,8 +3071,7 @@ GSTexture12* GSDevice12::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config, Pipe // image is now filled with either -1 or INT_MAX, so now we can do the prepass SetPrimitiveTopology(s_primitive_topology_mapping[static_cast(config.topology)]); - IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts); - IASetIndexBuffer(config.indices, config.nindices); + UploadHWDrawVerticesAndIndices(config); // cut down the configuration for the prepass, we don't need blending or any feedback loop PipelineSelector init_pipe(m_pipeline_selector); @@ -3252,10 +3252,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) // VB/IB upload, if we did DATE setup and it's not HDR this has already been done SetPrimitiveTopology(s_primitive_topology_mapping[static_cast(config.topology)]); if (!date_image || hdr_rt) - { - IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts); - IASetIndexBuffer(config.indices, config.nindices); - } + UploadHWDrawVerticesAndIndices(config); // now we can do the actual draw if (BindDrawPipeline(pipe)) @@ -3333,7 +3330,6 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) void GSDevice12::UpdateHWPipelineSelector(GSHWDrawConfig& config) { m_pipeline_selector.vs.key = config.vs.key; - m_pipeline_selector.gs.key = config.gs.key; m_pipeline_selector.ps.key_hi = config.ps.key_hi; m_pipeline_selector.ps.key_lo = config.ps.key_lo; m_pipeline_selector.dss.key = config.depth.key; @@ -3344,3 +3340,23 @@ void GSDevice12::UpdateHWPipelineSelector(GSHWDrawConfig& config) m_pipeline_selector.rt = config.rt != nullptr; m_pipeline_selector.ds = config.ds != nullptr; } + +void GSDevice12::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config) +{ + IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts); + + // Update SRV in root signature directly, rather than using a uniform for base vertex. + if (config.vs.expand != GSHWDrawConfig::VSExpand::None) + m_dirty_flags |= DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING; + + if (config.vs.UseExpandIndexBuffer()) + { + m_index.start = 0; + m_index.count = config.nindices; + SetIndexBuffer(m_expand_index_buffer->GetGPUVirtualAddress(), EXPAND_BUFFER_SIZE, DXGI_FORMAT_R32_UINT); + } + else + { + IASetIndexBuffer(config.indices, config.nindices); + } +} diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.h b/pcsx2/GS/Renderers/DX12/GSDevice12.h index a6fe9d90b4..0d5b403228 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.h +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.h @@ -52,7 +52,6 @@ public: }; GSHWDrawConfig::VSSelector vs; - GSHWDrawConfig::GSSelector gs; GSHWDrawConfig::DepthStencilSelector dss; GSHWDrawConfig::ColorMaskSelector cms; GSHWDrawConfig::BlendState bs; @@ -69,7 +68,7 @@ public: std::size_t operator()(const PipelineSelector& e) const noexcept { std::size_t hash = 0; - HashCombine(hash, e.vs.key, e.gs.key, e.ps.key_hi, e.ps.key_lo, e.dss.key, e.cms.key, e.bs.key, e.key); + HashCombine(hash, e.vs.key, e.ps.key_hi, e.ps.key_lo, e.dss.key, e.cms.key, e.bs.key, e.key); return hash; } }; @@ -124,9 +123,10 @@ public: TFX_ROOT_SIGNATURE_PARAM_VS_CBV = 0, TFX_ROOT_SIGNATURE_PARAM_PS_CBV = 1, - TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 2, - TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS = 3, - TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES = 4, + TFX_ROOT_SIGNATURE_PARAM_VS_SRV = 2, + TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 3, + TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS = 4, + TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES = 5, UTILITY_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS = 0, UTILITY_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 1, @@ -154,6 +154,8 @@ private: D3D12::StreamBuffer m_index_stream_buffer; D3D12::StreamBuffer m_vertex_constant_buffer; D3D12::StreamBuffer m_pixel_constant_buffer; + ComPtr m_expand_index_buffer; + ComPtr m_expand_index_buffer_allocation; D3D12::DescriptorHandle m_point_sampler_cpu; D3D12::DescriptorHandle m_linear_sampler_cpu; @@ -173,7 +175,6 @@ private: ComPtr m_imgui_pipeline; std::unordered_map> m_tfx_vertex_shaders; - std::unordered_map> m_tfx_geometry_shaders; std::unordered_map, GSHWDrawConfig::PSSelectorHash> m_tfx_pixel_shaders; std::unordered_map, PipelineSelectorHash> m_tfx_pipelines; @@ -209,7 +210,6 @@ private: bool GetTextureGroupDescriptors(D3D12::DescriptorHandle* gpu_handle, const D3D12::DescriptorHandle* cpu_handles, u32 count); const ID3DBlob* GetTFXVertexShader(GSHWDrawConfig::VSSelector sel); - const ID3DBlob* GetTFXGeometryShader(GSHWDrawConfig::GSSelector sel); const ID3DBlob* GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sel); ComPtr CreateTFXPipeline(const PipelineSelector& p); const ID3D12PipelineState* GetTFXPipeline(const PipelineSelector& p); @@ -317,6 +317,7 @@ public: void RenderHW(GSHWDrawConfig& config) override; void UpdateHWPipelineSelector(GSHWDrawConfig& config); + void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config); public: /// Ends any render pass, executes the command buffer, and invalidates cached state. @@ -377,25 +378,27 @@ private: DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING = (1 << 5), DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING = (1 << 6), - DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE = (1 << 7), - DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE = (1 << 8), - DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 = (1 << 9), + DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING = (1 << 7), + DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE = (1 << 8), + DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE = (1 << 9), + DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 = (1 << 10), - DIRTY_FLAG_VERTEX_BUFFER = (1 << 10), - DIRTY_FLAG_INDEX_BUFFER = (1 << 11), - DIRTY_FLAG_PRIMITIVE_TOPOLOGY = (1 << 12), - DIRTY_FLAG_VIEWPORT = (1 << 13), - DIRTY_FLAG_SCISSOR = (1 << 14), - DIRTY_FLAG_RENDER_TARGET = (1 << 15), - DIRTY_FLAG_PIPELINE = (1 << 16), - DIRTY_FLAG_BLEND_CONSTANTS = (1 << 17), - DIRTY_FLAG_STENCIL_REF = (1 << 18), + DIRTY_FLAG_VERTEX_BUFFER = (1 << 11), + DIRTY_FLAG_INDEX_BUFFER = (1 << 12), + DIRTY_FLAG_PRIMITIVE_TOPOLOGY = (1 << 13), + DIRTY_FLAG_VIEWPORT = (1 << 14), + DIRTY_FLAG_SCISSOR = (1 << 15), + DIRTY_FLAG_RENDER_TARGET = (1 << 16), + DIRTY_FLAG_PIPELINE = (1 << 17), + DIRTY_FLAG_BLEND_CONSTANTS = (1 << 18), + DIRTY_FLAG_STENCIL_REF = (1 << 19), DIRTY_BASE_STATE = DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING | - DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 | + DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | + DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 | DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PRIMITIVE_TOPOLOGY | - DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET | - DIRTY_FLAG_PIPELINE | DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_STENCIL_REF, + DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET | DIRTY_FLAG_PIPELINE | + DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_STENCIL_REF, DIRTY_TFX_STATE = DIRTY_BASE_STATE | DIRTY_FLAG_TFX_TEXTURES | DIRTY_FLAG_TFX_SAMPLERS | DIRTY_FLAG_TFX_RT_TEXTURES, DIRTY_UTILITY_STATE = DIRTY_BASE_STATE, diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 742994af9b..6bf6372d96 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -249,6 +249,7 @@ void GSRendererHW::Lines2Sprites() } // assume vertices are tightly packed and sequentially indexed (it should be the case) + const bool predivide_q = PRIM->TME && !PRIM->FST && m_vt.m_accurate_stq; if (m_vertex.next >= 2) { @@ -275,7 +276,7 @@ void GSRendererHW::Lines2Sprites() v0.XYZ.Z = v1.XYZ.Z; v0.FOG = v1.FOG; - if (PRIM->TME && !PRIM->FST) + if (predivide_q) { const GSVector4 st0 = GSVector4::loadl(&v0.ST.U64); const GSVector4 st1 = GSVector4::loadl(&v1.ST.U64); @@ -319,65 +320,28 @@ void GSRendererHW::Lines2Sprites() } } -template -void GSRendererHW::ExpandIndices() +void GSRendererHW::ExpandLineIndices() { - u32 process_count = (m_index.tail + 3) / 4 * 4; - if (Expand == GSHWDrawConfig::VSExpand::Point) - { - // Make sure we have space for writing off the end slightly - while (process_count > m_vertex.maxcount) - GrowVertexBuffer(); - } - - u32 expansion_factor = Expand == GSHWDrawConfig::VSExpand::Point ? 6 : 3; + const u32 process_count = (m_index.tail + 3) / 4 * 4; + const u32 expansion_factor = 3; m_index.tail *= expansion_factor; GSVector4i* end = reinterpret_cast(m_index.buff); GSVector4i* read = reinterpret_cast(m_index.buff + process_count); GSVector4i* write = reinterpret_cast(m_index.buff + process_count * expansion_factor); + + constexpr GSVector4i low0 = GSVector4i::cxpr(0, 1, 2, 1); + constexpr GSVector4i low1 = GSVector4i::cxpr(2, 3, 0, 1); + constexpr GSVector4i low2 = GSVector4i::cxpr(2, 1, 2, 3); + while (read > end) { read -= 1; write -= expansion_factor; - switch (Expand) - { - case GSHWDrawConfig::VSExpand::None: - break; - case GSHWDrawConfig::VSExpand::Point: - { - constexpr GSVector4i low0 = GSVector4i::cxpr(0, 1, 2, 1); - constexpr GSVector4i low1 = GSVector4i::cxpr(2, 3, 0, 1); - constexpr GSVector4i low2 = GSVector4i::cxpr(2, 1, 2, 3); - const GSVector4i in = read->sll32(2); - write[0] = in.xxxx() | low0; - write[1] = in.xxyy() | low1; - write[2] = in.yyyy() | low2; - write[3] = in.zzzz() | low0; - write[4] = in.zzww() | low1; - write[5] = in.wwww() | low2; - break; - } - case GSHWDrawConfig::VSExpand::Line: - { - constexpr GSVector4i low0 = GSVector4i::cxpr(0, 1, 2, 1); - constexpr GSVector4i low1 = GSVector4i::cxpr(2, 3, 0, 1); - constexpr GSVector4i low2 = GSVector4i::cxpr(2, 1, 2, 3); - const GSVector4i in = read->sll32(2); - write[0] = in.xxyx() | low0; - write[1] = in.yyzz() | low1; - write[2] = in.wzww() | low2; - break; - } - case GSHWDrawConfig::VSExpand::Sprite: - { - constexpr GSVector4i low = GSVector4i::cxpr(0, 1, 0, 1); - const GSVector4i in = read->sll32(1); - write[0] = in.xxyx() | low; - write[1] = in.yyzz() | low; - write[2] = in.wzww() | low; - break; - } - } + + const GSVector4i in = read->sll32(2); + write[0] = in.xxyx() | low0; + write[1] = in.yyzz() | low1; + write[2] = in.wzww() | low2; } } @@ -2453,110 +2417,90 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy) switch (m_vt.m_primclass) { case GS_POINT_CLASS: - m_conf.gs.topology = GSHWDrawConfig::GSTopology::Point; - m_conf.topology = GSHWDrawConfig::Topology::Point; - m_conf.indices_per_prim = 1; - if (unscale_pt_ln) { - if (features.point_expand) + m_conf.topology = GSHWDrawConfig::Topology::Point; + m_conf.indices_per_prim = 1; + if (unscale_pt_ln) { + if (features.point_expand) + { + m_conf.vs.point_size = true; + m_conf.cb_vs.point_size = GSVector2(target_scale); + } + else if (features.vs_expand) + { + m_conf.vs.expand = GSHWDrawConfig::VSExpand::Point; + m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy); + m_conf.topology = GSHWDrawConfig::Topology::Triangle; + m_conf.verts = m_vertex.buff; + m_conf.nverts = m_vertex.next; + m_conf.nindices = m_index.tail * 6; + m_conf.indices_per_prim = 6; + return; + } + } + else + { + // Vulkan/GL still need to set point size. + m_conf.cb_vs.point_size = target_scale; + + // M1 requires point size output on *all* points. m_conf.vs.point_size = true; - m_conf.cb_vs.point_size = GSVector2(target_scale); } - else if (features.geometry_shader) - { - m_conf.gs.expand = true; - m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy); - } - else if (features.vs_expand) - { - m_conf.vs.expand = GSHWDrawConfig::VSExpand::Point; - m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy); - m_conf.topology = GSHWDrawConfig::Topology::Triangle; - m_conf.indices_per_prim = 6; - ExpandIndices(); - } - } - else - { - // Vulkan/GL still need to set point size. - m_conf.cb_vs.point_size = target_scale; } break; case GS_LINE_CLASS: - m_conf.gs.topology = GSHWDrawConfig::GSTopology::Line; - m_conf.topology = GSHWDrawConfig::Topology::Line; - m_conf.indices_per_prim = 2; - if (unscale_pt_ln) { - if (features.line_expand) + m_conf.topology = GSHWDrawConfig::Topology::Line; + m_conf.indices_per_prim = 2; + if (unscale_pt_ln) { - m_conf.line_expand = true; - } - else if (features.geometry_shader) - { - m_conf.gs.expand = true; - m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy); - } - else if (features.vs_expand) - { - m_conf.vs.expand = GSHWDrawConfig::VSExpand::Line; - m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy); - m_conf.topology = GSHWDrawConfig::Topology::Triangle; - m_conf.indices_per_prim = 6; - ExpandIndices(); + if (features.line_expand) + { + m_conf.line_expand = true; + } + else if (features.vs_expand) + { + m_conf.vs.expand = GSHWDrawConfig::VSExpand::Line; + m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy); + m_conf.topology = GSHWDrawConfig::Topology::Triangle; + m_conf.indices_per_prim = 6; + ExpandLineIndices(); + } } } break; case GS_SPRITE_CLASS: - // Heuristics: trade-off - // Lines: GPU conversion => ofc, more GPU. And also more CPU due to extra shader validation stage. - // Triangles: CPU conversion => ofc, more CPU ;) more bandwidth (72 bytes / sprite) - // - // Note: severals openGL operation does draw call under the wood like texture upload. So even if - // you do 10 consecutive draw with the geometry shader, you will still pay extra validation if new - // texture are uploaded. (game Shadow Hearts) - // - // Note2: Due to MultiThreaded driver, Nvidia suffers less of the previous issue. Still it isn't free - // Shadow Heart is 90 fps (gs) vs 113 fps (no gs) - // - // Note3: Some GPUs (Happens on GT 750m, not on Intel 5200) don't properly divide by large floats (e.g. FLT_MAX/FLT_MAX == 0) - // Lines2Sprites predivides by Q, avoiding this issue, so always use it if m_vt.m_accurate_stq - - // If the draw calls contains few primitives. Geometry Shader gain with be rather small versus - // the extra validation cost of the extra stage. - // - // Note: keep Geometry Shader in the replayer to ease debug. - if (g_gs_device->Features().geometry_shader && !m_vt.m_accurate_stq && m_vertex.next > 32) // <=> 16 sprites (based on Shadow Hearts) { - m_conf.gs.expand = true; + // Need to pre-divide ST by Q if Q is very large, to avoid precision issues on some GPUs. + // May as well just expand the whole thing out with the CPU path in such a case. + if (features.vs_expand && !m_vt.m_accurate_stq) + { + m_conf.topology = GSHWDrawConfig::Topology::Triangle; + m_conf.vs.expand = GSHWDrawConfig::VSExpand::Sprite; + m_conf.verts = m_vertex.buff; + m_conf.nverts = m_vertex.next; + m_conf.nindices = m_index.tail * 3; + m_conf.indices_per_prim = 6; + return; + } + else + { + Lines2Sprites(); - m_conf.topology = GSHWDrawConfig::Topology::Line; - m_conf.indices_per_prim = 2; + m_conf.topology = GSHWDrawConfig::Topology::Triangle; + m_conf.indices_per_prim = 6; + } } - else if (features.vs_expand && !m_vt.m_accurate_stq) - { - m_conf.topology = GSHWDrawConfig::Topology::Triangle; - m_conf.vs.expand = GSHWDrawConfig::VSExpand::Sprite; - m_conf.indices_per_prim = 6; - ExpandIndices(); - } - else - { - Lines2Sprites(); - - m_conf.topology = GSHWDrawConfig::Topology::Triangle; - m_conf.indices_per_prim = 6; - } - m_conf.gs.topology = GSHWDrawConfig::GSTopology::Sprite; break; case GS_TRIANGLE_CLASS: - m_conf.gs.topology = GSHWDrawConfig::GSTopology::Triangle; - m_conf.topology = GSHWDrawConfig::Topology::Triangle; - m_conf.indices_per_prim = 3; + { + m_conf.topology = GSHWDrawConfig::Topology::Triangle; + m_conf.indices_per_prim = 3; + } break; default: @@ -4443,7 +4387,6 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta // GS_SPRITE_CLASS are already flat (either by CPU or the GS) m_conf.ps.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 0 : PRIM->IIP; - m_conf.gs.iip = m_conf.ps.iip; m_conf.vs.iip = m_conf.ps.iip; if (DATE_BARRIER) @@ -4463,7 +4406,6 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta else if (DATE_PRIMID) { m_conf.ps.date = 1 + m_cached_ctx.TEST.DATM; - m_conf.gs.forward_primid = 1; } else if (DATE) { diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index b4e4ce0240..d3bfdaed84 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -168,7 +168,7 @@ public: float GetUpscaleMultiplier() override; void Lines2Sprites(); bool VerifyIndices(); - template void ExpandIndices(); + void ExpandLineIndices(); void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba); GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex); GSVector4i ComputeBoundingBox(const GSVector2i& rtsize, float rtscale); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 2efe3b1516..1271189109 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -2609,7 +2609,6 @@ bool GSTextureCache::ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx config.vs.tme = true; config.vs.iip = true; config.vs.fst = true; - config.gs.key = 0; config.ps.key_lo = 0; config.ps.key_hi = 0; config.ps.read_ba = read_ba; diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h index 94f8e5646b..b3aebdccc3 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h @@ -281,7 +281,7 @@ public: MRCOwned> m_dss_stencil_write; MRCOwned> m_dss_hw[1 << 5]; - MRCOwned> m_texture_download_buf; + MRCOwned> m_expand_index_buffer; UploadBuffer m_texture_upload_buf; BufferPair m_vertex_upload_buf; diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index 586e39ebce..a2e9b8b975 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -748,6 +748,21 @@ void GSDeviceMTL::DetachSurfaceOnMainThread() m_layer = nullptr; } +// Metal is fun and won't let you use newBufferWithBytes for private buffers +static MRCOwned> CreatePrivateBufferWithContent( + id dev, id cb, + MTLResourceOptions options, NSUInteger length, + std::function fill) +{ + MRCOwned> tmp = MRCTransfer([dev newBufferWithLength:length options:MTLResourceStorageModeShared]); + MRCOwned> actual = MRCTransfer([dev newBufferWithLength:length options:options|MTLResourceStorageModePrivate]); + fill([tmp contents]); + id blit = [cb blitCommandEncoder]; + [blit copyFromBuffer:tmp sourceOffset:0 toBuffer:actual destinationOffset:0 size:length]; + [blit endEncoding]; + return actual; +} + bool GSDeviceMTL::Create(const WindowInfo& wi, VsyncMode vsync) { @autoreleasepool { if (!GSDevice::Create(wi, vsync)) @@ -808,7 +823,6 @@ bool GSDeviceMTL::Create(const WindowInfo& wi, VsyncMode vsync) MTLPixelFormat layer_px_fmt = [m_layer pixelFormat]; m_features.broken_point_sampler = [[m_dev.dev name] containsString:@"AMD"]; - m_features.geometry_shader = false; m_features.vs_expand = true; m_features.primitive_id = m_dev.features.primid; m_features.texture_barrier = true; @@ -852,6 +866,9 @@ bool GSDeviceMTL::Create(const WindowInfo& wi, VsyncMode vsync) m_cas_pipeline[sharpen_only] = MakeComputePipeline(LoadShader(shader), sharpen_only ? @"CAS Sharpen" : @"CAS Upscale"); } + m_expand_index_buffer = CreatePrivateBufferWithContent(m_dev.dev, initCommands, MTLResourceHazardTrackingModeUntracked, EXPAND_BUFFER_SIZE, GenerateExpansionIndexBuffer); + [m_expand_index_buffer setLabel:@"Point/Sprite Expand Indices"]; + m_hw_vertex = MRCTransfer([MTLVertexDescriptor new]); [[[m_hw_vertex layouts] objectAtIndexedSubscript:GSMTLBufferIndexHWVertices] setStride:sizeof(GSVertex)]; applyAttribute(m_hw_vertex, GSMTLAttributeIndexST, MTLVertexFormatFloat2, offsetof(GSVertex, ST), GSMTLBufferIndexHWVertices); @@ -1987,17 +2004,27 @@ void GSDeviceMTL::MREInitHWDraw(GSHWDrawConfig& config, const Map& verts) void GSDeviceMTL::RenderHW(GSHWDrawConfig& config) { @autoreleasepool { - if (config.topology == GSHWDrawConfig::Topology::Point) - config.vs.point_size = 1; // M1 requires point size output on *all* points - if (config.tex && config.ds == config.tex) EndRenderPass(); // Barrier size_t vertsize = config.nverts * sizeof(*config.verts); - size_t idxsize = config.nindices * sizeof(*config.indices); + size_t idxsize = config.vs.UseExpandIndexBuffer() ? 0 : (config.nindices * sizeof(*config.indices)); Map allocation = Allocate(m_vertex_upload_buf, vertsize + idxsize); memcpy(allocation.cpu_buffer, config.verts, vertsize); - memcpy(static_cast(allocation.cpu_buffer) + vertsize, config.indices, idxsize); + + id index_buffer; + size_t index_buffer_offset; + if (!config.vs.UseExpandIndexBuffer()) + { + memcpy(static_cast(allocation.cpu_buffer) + vertsize, config.indices, idxsize); + index_buffer = allocation.gpu_buffer; + index_buffer_offset = allocation.gpu_offset + vertsize; + } + else + { + index_buffer = m_expand_index_buffer; + index_buffer_offset = 0; + } FlushClears(config.tex); FlushClears(config.pal); @@ -2028,7 +2055,7 @@ void GSDeviceMTL::RenderHW(GSHWDrawConfig& config) ASSERT(config.require_full_barrier == false && config.drawlist == nullptr); MRESetHWPipelineState(config.vs, config.ps, {}, {}); MREInitHWDraw(config, allocation); - SendHWDraw(config, m_current_render.encoder, allocation.gpu_buffer, allocation.gpu_offset + vertsize); + SendHWDraw(config, m_current_render.encoder, index_buffer, index_buffer_offset); config.ps.date = 3; break; } @@ -2084,7 +2111,7 @@ void GSDeviceMTL::RenderHW(GSHWDrawConfig& config) MRESetHWPipelineState(config.vs, config.ps, config.blend, config.colormask); MRESetDSS(config.depth); - SendHWDraw(config, mtlenc, allocation.gpu_buffer, allocation.gpu_offset + vertsize); + SendHWDraw(config, mtlenc, index_buffer, index_buffer_offset); if (config.alpha_second_pass.enable) { @@ -2095,7 +2122,7 @@ void GSDeviceMTL::RenderHW(GSHWDrawConfig& config) } MRESetHWPipelineState(config.vs, config.alpha_second_pass.ps, config.blend, config.alpha_second_pass.colormask); MRESetDSS(config.alpha_second_pass.depth); - SendHWDraw(config, mtlenc, allocation.gpu_buffer, allocation.gpu_offset + vertsize); + SendHWDraw(config, mtlenc, index_buffer, index_buffer_offset); } if (hdr_rt) @@ -2141,25 +2168,34 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id g_perfmon.Put(GSPerfMon::DrawCalls, config.drawlist->size()); g_perfmon.Put(GSPerfMon::Barriers, config.drawlist->size()); - for (size_t count = 0, p = 0, n = 0; n < config.drawlist->size(); p += count, ++n) + + const u32 indices_per_prim = config.indices_per_prim; + const u32 draw_list_size = static_cast(config.drawlist->size()); + + for (u32 n = 0, p = 0; n < draw_list_size; n++) { - count = (*config.drawlist)[n] * config.indices_per_prim; + const u32 count = (*config.drawlist)[n] * indices_per_prim; textureBarrier(enc); [enc drawIndexedPrimitives:topology indexCount:count indexType:MTLIndexTypeUInt32 indexBuffer:buffer indexBufferOffset:off + p * sizeof(*config.indices)]; + p += count; } + [enc popDebugGroup]; + return; } else if (config.require_full_barrier) { - const u32 ndraws = config.nindices / config.indices_per_prim; + const u32 indices_per_prim = config.indices_per_prim; + const u32 ndraws = config.nindices / indices_per_prim; g_perfmon.Put(GSPerfMon::DrawCalls, ndraws); g_perfmon.Put(GSPerfMon::Barriers, ndraws); [enc pushDebugGroup:[NSString stringWithFormat:@"Full barrier split draw (%d prims)", ndraws]]; - for (size_t p = 0; p < config.nindices; p += config.indices_per_prim) + + for (u32 p = 0; p < config.nindices; p += indices_per_prim) { textureBarrier(enc); [enc drawIndexedPrimitives:topology @@ -2168,30 +2204,24 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id indexBuffer:buffer indexBufferOffset:off + p * sizeof(*config.indices)]; } + [enc popDebugGroup]; + return; } else if (config.require_one_barrier) { // One barrier needed textureBarrier(enc); - [enc drawIndexedPrimitives:topology - indexCount:config.nindices - indexType:MTLIndexTypeUInt32 - indexBuffer:buffer - indexBufferOffset:off]; - g_perfmon.Put(GSPerfMon::DrawCalls, 1); g_perfmon.Put(GSPerfMon::Barriers, 1); } - else - { - // No barriers needed - [enc drawIndexedPrimitives:topology - indexCount:config.nindices - indexType:MTLIndexTypeUInt32 - indexBuffer:buffer - indexBufferOffset:off]; - g_perfmon.Put(GSPerfMon::DrawCalls, 1); - } + + [enc drawIndexedPrimitives:topology + indexCount:config.nindices + indexType:MTLIndexTypeUInt32 + indexBuffer:buffer + indexBufferOffset:off]; + + g_perfmon.Put(GSPerfMon::DrawCalls, 1); } // tbh I'm not a fan of the current debug groups diff --git a/pcsx2/GS/Renderers/OpenGL/GLLoader.cpp b/pcsx2/GS/Renderers/OpenGL/GLLoader.cpp index 30c6f7ef1b..7d9a68d7dc 100644 --- a/pcsx2/GS/Renderers/OpenGL/GLLoader.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GLLoader.cpp @@ -116,15 +116,6 @@ namespace GLLoader bool buggy_pbo = false; bool disable_download_pbo = false; - bool is_gles = false; - bool has_dual_source_blend = false; - bool has_clip_control = true; - bool found_framebuffer_fetch = false; - bool found_geometry_shader = true; // we require GL3.3 so geometry must be supported by default - // DX11 GPU - bool found_GL_ARB_gpu_shader5 = false; // Require IvyBridge - bool found_GL_ARB_texture_barrier = false; - static bool check_gl_version() { const char* vendor = (const char*)glGetString(GL_VENDOR); @@ -135,13 +126,6 @@ namespace GLLoader else if (strstr(vendor, "Intel")) vendor_id_intel = true; - if (GSConfig.OverrideGeometryShaders != -1) - { - found_geometry_shader = GSConfig.OverrideGeometryShaders != 0 && - (GLAD_GL_VERSION_3_2 || GL_ARB_geometry_shader4 || GSConfig.OverrideGeometryShaders == 1); - Console.Warning("Overriding geometry shaders detection to %s", found_geometry_shader ? "true" : "false"); - } - GLint major_gl = 0; GLint minor_gl = 0; glGetIntegerv(GL_MAJOR_VERSION, &major_gl); @@ -157,32 +141,13 @@ namespace GLLoader static bool check_gl_supported_extension() { - if (GLAD_GL_VERSION_3_3 && !GLAD_GL_ARB_shading_language_420pack) + if (!GLAD_GL_ARB_shading_language_420pack) { Host::ReportFormattedErrorAsync("GS", "GL_ARB_shading_language_420pack is not supported, this is required for the OpenGL renderer."); return false; } - // GLES doesn't have ARB_clip_control. - has_clip_control = GLAD_GL_ARB_clip_control; - if (!has_clip_control && !is_gles) - { - Host::AddOSDMessage("GL_ARB_clip_control is not supported, this will cause rendering issues.", - Host::OSD_ERROR_DURATION); - } - - found_GL_ARB_gpu_shader5 = GLAD_GL_ARB_gpu_shader5; - found_GL_ARB_texture_barrier = GLAD_GL_ARB_texture_barrier; - - has_dual_source_blend = GLAD_GL_VERSION_3_2 || GLAD_GL_ARB_blend_func_extended; - found_framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch; - if (found_framebuffer_fetch && GSConfig.DisableFramebufferFetch) - { - Console.Warning("Framebuffer fetch was found but is disabled. This will reduce performance."); - found_framebuffer_fetch = false; - } - if (!GLAD_GL_ARB_viewport_array) { glScissorIndexed = ReplaceGL::ScissorIndexed; @@ -203,21 +168,6 @@ namespace GLLoader Emulate_DSA::Init(); } - if (is_gles) - { - has_dual_source_blend = GLAD_GL_EXT_blend_func_extended || GLAD_GL_ARB_blend_func_extended; - if (!has_dual_source_blend && !found_framebuffer_fetch) - { - Host::AddOSDMessage("Both dual source blending and framebuffer fetch are missing, things will be broken.", - Host::OSD_ERROR_DURATION); - } - } - else - { - // Core in GL3.2, so everything supports it. - has_dual_source_blend = true; - } - // Don't use PBOs when we don't have ARB_buffer_storage, orphaning buffers probably ends up worse than just // using the normal texture update routines and letting the driver take care of it. buggy_pbo = !GLAD_GL_VERSION_4_4 && !GLAD_GL_ARB_buffer_storage && !GLAD_GL_EXT_buffer_storage; diff --git a/pcsx2/GS/Renderers/OpenGL/GLLoader.h b/pcsx2/GS/Renderers/OpenGL/GLLoader.h index 292230570b..0b28aff398 100644 --- a/pcsx2/GS/Renderers/OpenGL/GLLoader.h +++ b/pcsx2/GS/Renderers/OpenGL/GLLoader.h @@ -31,13 +31,4 @@ namespace GLLoader extern bool vendor_id_intel; extern bool buggy_pbo; extern bool disable_download_pbo; - - // GL - extern bool is_gles; - extern bool has_clip_control; - extern bool has_dual_source_blend; - extern bool found_framebuffer_fetch; - extern bool found_geometry_shader; - extern bool found_GL_ARB_gpu_shader5; - extern bool found_GL_ARB_texture_barrier; } // namespace GLLoader diff --git a/pcsx2/GS/Renderers/OpenGL/GLState.cpp b/pcsx2/GS/Renderers/OpenGL/GLState.cpp index 83bbf6cf6c..031a48502f 100644 --- a/pcsx2/GS/Renderers/OpenGL/GLState.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GLState.cpp @@ -18,6 +18,7 @@ namespace GLState { + GLuint vao; GLuint fbo; GSVector2i viewport; GSVector4i scissor; @@ -49,6 +50,7 @@ namespace GLState void Clear() { + vao = 0; fbo = 0; viewport = GSVector2i(1, 1); scissor = GSVector4i(0, 0, 1, 1); diff --git a/pcsx2/GS/Renderers/OpenGL/GLState.h b/pcsx2/GS/Renderers/OpenGL/GLState.h index 398a63507e..b7aeb7cc67 100644 --- a/pcsx2/GS/Renderers/OpenGL/GLState.h +++ b/pcsx2/GS/Renderers/OpenGL/GLState.h @@ -22,6 +22,7 @@ class GSTextureOGL; namespace GLState { + extern GLuint vao; // vertex array object extern GLuint fbo; // frame buffer object extern GSVector2i viewport; extern GSVector4i scissor; diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index 14eac01fb8..9570248276 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -90,7 +90,12 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync) if (!GSDevice::Create(wi, vsync)) return false; - m_gl_context = GL::Context::Create(wi); + // We need at least GL3.3. + static constexpr const GL::Context::Version version_list[] = {{GL::Context::Profile::Core, 4, 6}, + {GL::Context::Profile::Core, 4, 5}, {GL::Context::Profile::Core, 4, 4}, {GL::Context::Profile::Core, 4, 3}, + {GL::Context::Profile::Core, 4, 2}, {GL::Context::Profile::Core, 4, 1}, {GL::Context::Profile::Core, 4, 0}, + {GL::Context::Profile::Core, 3, 3}}; + m_gl_context = GL::Context::Create(wi, version_list); if (!m_gl_context) { Console.Error("Failed to create any GL context"); @@ -104,9 +109,6 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync) return false; } - // Check openGL requirement as soon as possible so we can switch to another - // renderer/device - GLLoader::is_gles = m_gl_context->IsGLES(); if (!GLLoader::check_gl_requirements()) return false; @@ -124,41 +126,57 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync) // optional features based on context m_features.broken_point_sampler = GLLoader::vendor_id_amd; - m_features.geometry_shader = GLLoader::found_geometry_shader; m_features.primitive_id = true; + + m_features.framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch; + if (m_features.framebuffer_fetch && GSConfig.DisableFramebufferFetch) + { + Host::AddOSDMessage("Framebuffer fetch was found but is disabled. This will reduce performance.", Host::OSD_ERROR_DURATION); + m_features.framebuffer_fetch = false; + } + if (GSConfig.OverrideTextureBarriers == 0) - m_features.texture_barrier = GLLoader::found_framebuffer_fetch; // Force Disabled + m_features.texture_barrier = m_features.framebuffer_fetch; // Force Disabled else if (GSConfig.OverrideTextureBarriers == 1) m_features.texture_barrier = true; // Force Enabled else - m_features.texture_barrier = GLLoader::found_framebuffer_fetch || GLLoader::found_GL_ARB_texture_barrier; + m_features.texture_barrier = m_features.framebuffer_fetch || GLAD_GL_ARB_texture_barrier; + if (!m_features.texture_barrier) + { + Host::AddOSDMessage( + "GL_ARB_texture_barrier is not supported, blending will not be accurate.", Host::OSD_ERROR_DURATION); + } + m_features.provoking_vertex_last = true; m_features.dxt_textures = GLAD_GL_EXT_texture_compression_s3tc; m_features.bptc_textures = GLAD_GL_VERSION_4_2 || GLAD_GL_ARB_texture_compression_bptc || GLAD_GL_EXT_texture_compression_bptc; - m_features.prefer_new_textures = GLLoader::is_gles; - m_features.framebuffer_fetch = GLLoader::found_framebuffer_fetch; - m_features.dual_source_blend = GLLoader::has_dual_source_blend && !GSConfig.DisableDualSourceBlend; - m_features.clip_control = GLLoader::has_clip_control; + m_features.prefer_new_textures = false; + m_features.dual_source_blend = !GSConfig.DisableDualSourceBlend; + m_features.clip_control = GLAD_GL_ARB_clip_control; + if (!m_features.clip_control) + Host::AddOSDMessage("GL_ARB_clip_control is not supported, this will cause rendering issues.", Host::OSD_ERROR_DURATION); m_features.stencil_buffer = true; - m_features.test_and_sample_depth = m_features.texture_barrier && !GLLoader::is_gles; + m_features.test_and_sample_depth = m_features.texture_barrier; + + if (GLAD_GL_ARB_shader_storage_buffer_object) + { + GLint max_vertex_ssbos = 0; + glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &max_vertex_ssbos); + DevCon.WriteLn("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS: %d", max_vertex_ssbos); + m_features.vs_expand = (max_vertex_ssbos > 0); + } + if (!m_features.vs_expand) + Console.Warning("Vertex expansion is not supported. This will reduce performance."); GLint point_range[2] = {}; glGetIntegerv(GL_ALIASED_POINT_SIZE_RANGE, point_range); m_features.point_expand = (point_range[0] <= GSConfig.UpscaleMultiplier && point_range[1] >= GSConfig.UpscaleMultiplier); + m_features.line_expand = false; - if (GLLoader::is_gles) - { - GLint line_range[2] = {}; - glGetIntegerv(GL_ALIASED_LINE_WIDTH_RANGE, line_range); - m_features.line_expand = (line_range[0] <= static_cast(GSConfig.UpscaleMultiplier) && line_range[1] >= static_cast(GSConfig.UpscaleMultiplier)); - } - else - { - m_features.line_expand = false; - } - - DevCon.WriteLn("Using %s for point expansion and %s for line expansion.", - m_features.point_expand ? "hardware" : "geometry shaders", m_features.line_expand ? "hardware" : "geometry shaders"); + Console.WriteLn("Using %s for point expansion, %s for line expansion and %s for sprite expansion.", + m_features.point_expand ? "hardware" : (m_features.vs_expand ? "vertex expanding" : "UNSUPPORTED"), + m_features.line_expand ? "hardware" : (m_features.vs_expand ? "vertex expanding" : "UNSUPPORTED"), + m_features.vs_expand ? "vertex expanding" : "CPU"); // because of fbo bindings below... GLState::Clear(); @@ -168,20 +186,12 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync) // **************************************************************** if (GSConfig.UseDebugDevice) { - if (!GLLoader::is_gles) - { - glDebugMessageCallback(DebugMessageCallback, NULL); + glDebugMessageCallback(DebugMessageCallback, NULL); - glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true); - // Useless info message on Nvidia driver - GLuint ids[] = { 0x20004 }; - glDebugMessageControl(GL_DEBUG_SOURCE_API_ARB, GL_DEBUG_TYPE_OTHER_ARB, GL_DONT_CARE, std::size(ids), ids, false); - } - else if (GLAD_GL_KHR_debug) - { - glDebugMessageCallbackKHR(DebugMessageCallback, NULL); - glDebugMessageControlKHR(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true); - } + glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true); + // Useless info message on Nvidia driver + static constexpr const GLuint ids[] = { 0x20004 }; + glDebugMessageControl(GL_DEBUG_SOURCE_API_ARB, GL_DEBUG_TYPE_OTHER_ARB, GL_DONT_CARE, std::size(ids), ids, false); // Uncomment synchronous if you want callstacks which match where the error occurred. glEnable(GL_DEBUG_OUTPUT); @@ -219,8 +229,8 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync) { GL_PUSH("GSDeviceOGL::Vertex Buffer"); - glGenVertexArrays(1, &m_vertex_array_object); - glBindVertexArray(m_vertex_array_object); + glGenVertexArrays(1, &m_vao); + IASetVAO(m_vao); m_vertex_stream_buffer = GL::StreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE); m_index_stream_buffer = GL::StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE); @@ -233,14 +243,13 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync) return false; } + m_vertex_stream_buffer->Bind(); + m_index_stream_buffer->Bind(); + // Force UBOs to be uploaded on first use. std::memset(&m_vs_cb_cache, 0xFF, sizeof(m_vs_cb_cache)); std::memset(&m_ps_cb_cache, 0xFF, sizeof(m_ps_cb_cache)); - // rebind because of VAO state - m_vertex_stream_buffer->Bind(); - m_index_stream_buffer->Bind(); - static_assert(sizeof(GSVertexPT1) == sizeof(GSVertex), "wrong GSVertex size"); for (u32 i = 0; i < 8; i++) glEnableVertexAttribArray(i); @@ -253,6 +262,29 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync) glVertexAttribIPointer(5, 1, GL_UNSIGNED_INT, sizeof(GSVertex), (const GLvoid*)(20)); glVertexAttribIPointer(6, 2, GL_UNSIGNED_SHORT, sizeof(GSVertex), (const GLvoid*)(24)); glVertexAttribPointer(7, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(GSVertex), (const GLvoid*)(28)); + + if (m_features.vs_expand) + { + glGenVertexArrays(1, &m_expand_vao); + glBindVertexArray(m_expand_vao); + IASetVAO(m_expand_vao); + + // Still need the vertex buffer bound, because uploads happen to GL_ARRAY_BUFFER. + m_vertex_stream_buffer->Bind(); + + std::unique_ptr expand_data = std::make_unique(EXPAND_BUFFER_SIZE); + GenerateExpansionIndexBuffer(expand_data.get()); + glGenBuffers(1, &m_expand_ibo); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_expand_ibo); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, EXPAND_BUFFER_SIZE, expand_data.get(), GL_STATIC_DRAW); + + // We can bind it once when using gl_BaseVertexARB. + if (GLAD_GL_ARB_shader_draw_parameters) + { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 2, m_vertex_stream_buffer->GetGLBufferId(), + 0, VERTEX_BUFFER_SIZE); + } + } } // **************************************************************** @@ -289,7 +321,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync) { const char* name = shaderName(static_cast(i)); const std::string ps(GetShaderSource(name, GL_FRAGMENT_SHADER, *convert_glsl)); - if (!m_shader_cache.GetProgram(&m_convert.ps[i], m_convert.vs, {}, ps)) + if (!m_shader_cache.GetProgram(&m_convert.ps[i], m_convert.vs, ps)) return false; m_convert.ps[i].SetFormattedName("Convert pipe %s", name); @@ -343,7 +375,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync) { const char* name = shaderName(static_cast(i)); const std::string ps(GetShaderSource(name, GL_FRAGMENT_SHADER, *shader)); - if (!m_shader_cache.GetProgram(&m_present[i], present_vs, {}, ps)) + if (!m_shader_cache.GetProgram(&m_present[i], present_vs, ps)) return false; m_present[i].SetFormattedName("Present pipe %s", name); @@ -376,7 +408,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync) for (size_t i = 0; i < std::size(m_merge_obj.ps); i++) { const std::string ps(GetShaderSource(fmt::format("ps_main{}", i), GL_FRAGMENT_SHADER, *shader)); - if (!m_shader_cache.GetProgram(&m_merge_obj.ps[i], m_convert.vs, {}, ps)) + if (!m_shader_cache.GetProgram(&m_merge_obj.ps[i], m_convert.vs, ps)) return false; m_merge_obj.ps[i].SetFormattedName("Merge pipe %zu", i); m_merge_obj.ps[i].RegisterUniform("BGColor"); @@ -399,7 +431,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync) for (size_t i = 0; i < std::size(m_interlace.ps); i++) { const std::string ps(GetShaderSource(fmt::format("ps_main{}", i), GL_FRAGMENT_SHADER, *shader)); - if (!m_shader_cache.GetProgram(&m_interlace.ps[i], m_convert.vs, {}, ps)) + if (!m_shader_cache.GetProgram(&m_interlace.ps[i], m_convert.vs, ps)) return false; m_interlace.ps[i].SetFormattedName("Merge pipe %zu", i); m_interlace.ps[i].RegisterUniform("ZrH"); @@ -421,12 +453,10 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync) { GL_PUSH("GSDeviceOGL::Rasterization"); - if (!GLLoader::is_gles) - glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); glDisable(GL_CULL_FACE); glEnable(GL_SCISSOR_TEST); - if (!GLLoader::is_gles) - glDisable(GL_MULTISAMPLE); + glDisable(GL_MULTISAMPLE); glDisable(GL_DITHER); // Honestly I don't know! } @@ -446,7 +476,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync) const std::string ps(GetShaderSource( fmt::format("ps_stencil_image_init_{}", i), GL_FRAGMENT_SHADER, *convert_glsl)); - m_shader_cache.GetProgram(&m_date.primid_ps[i], m_convert.vs, {}, ps); + m_shader_cache.GetProgram(&m_date.primid_ps[i], m_convert.vs, ps); m_date.primid_ps[i].SetFormattedName("PrimID Destination Alpha Init %d", i); } } @@ -461,7 +491,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync) // This extension allow FS depth to range from -1 to 1. So // gl_position.z could range from [0, 1] // Change depth convention - if (GLLoader::has_clip_control) + if (m_features.clip_control) glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); // **************************************************************** @@ -602,12 +632,16 @@ void GSDeviceOGL::DestroyResources() m_vertex_uniform_stream_buffer.reset(); glBindVertexArray(0); - if (m_vertex_array_object != 0) - glDeleteVertexArrays(1, &m_vertex_array_object); + if (m_expand_ibo != 0) + glDeleteVertexArrays(1, &m_expand_ibo); + if (m_vao != 0) + glDeleteVertexArrays(1, &m_vao); m_index_stream_buffer.reset(); m_vertex_stream_buffer.reset(); s_texture_upload_buffer.reset(); + if (m_expand_ibo) + glDeleteBuffers(1, &m_expand_ibo); if (m_fbo != 0) glDeleteFramebuffers(1, &m_fbo); @@ -1092,56 +1126,28 @@ std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum typ { std::string header; - if (GLLoader::is_gles) + // Intel's GL driver doesn't like the readonly qualifier with 3.3 GLSL. + if (m_features.vs_expand) { - if (GLAD_GL_ES_VERSION_3_2) - header = "#version 320 es\n"; - else if (GLAD_GL_ES_VERSION_3_1) - header = "#version 310 es\n"; - - if (GLAD_GL_EXT_blend_func_extended) - header += "#extension GL_EXT_blend_func_extended : require\n"; - if (GLAD_GL_ARB_blend_func_extended) - header += "#extension GL_ARB_blend_func_extended : require\n"; - if (m_features.framebuffer_fetch) - { - if (GLAD_GL_EXT_shader_framebuffer_fetch) - header += "#extension GL_EXT_shader_framebuffer_fetch : require\n"; - else if (GLAD_GL_ARM_shader_framebuffer_fetch) - header += "#extension GL_ARM_shader_framebuffer_fetch : require\n"; - } - - header += "precision highp float;\n"; - header += "precision highp int;\n"; - header += "precision highp sampler2D;\n"; - if (GLAD_GL_ES_VERSION_3_1) - header += "precision highp sampler2DMS;\n"; - if (GLAD_GL_ES_VERSION_3_2) - header += "precision highp usamplerBuffer;\n"; - - if (!GLAD_GL_EXT_blend_func_extended && !GLAD_GL_ARB_blend_func_extended) - header += "#define DISABLE_DUAL_SOURCE\n"; + header = "#version 430 core\n"; } else { header = "#version 330 core\n"; - - // Need GL version 420 header += "#extension GL_ARB_shading_language_420pack: require\n"; - - if (m_features.framebuffer_fetch && GLAD_GL_EXT_shader_framebuffer_fetch) - header += "#extension GL_EXT_shader_framebuffer_fetch : require\n"; - - if (GLLoader::found_GL_ARB_gpu_shader5) - header += "#extension GL_ARB_gpu_shader5 : enable\n"; } + if (GLAD_GL_ARB_shader_draw_parameters) + header += "#extension GL_ARB_shader_draw_parameters : require\n"; + if (m_features.framebuffer_fetch && GLAD_GL_EXT_shader_framebuffer_fetch) + header += "#extension GL_EXT_shader_framebuffer_fetch : require\n"; + if (m_features.framebuffer_fetch) header += "#define HAS_FRAMEBUFFER_FETCH 1\n"; else header += "#define HAS_FRAMEBUFFER_FETCH 0\n"; - if (GLLoader::has_clip_control) + if (m_features.clip_control) header += "#define HAS_CLIP_CONTROL 1\n"; else header += "#define HAS_CLIP_CONTROL 0\n"; @@ -1178,26 +1184,14 @@ std::string GSDeviceOGL::GetVSSource(VSSelector sel) std::string macro = fmt::format("#define VS_FST {}\n", static_cast(sel.fst)) + fmt::format("#define VS_IIP {}\n", static_cast(sel.iip)) - + fmt::format("#define VS_POINT_SIZE {}\n", static_cast(sel.point_size)); + + fmt::format("#define VS_POINT_SIZE {}\n", static_cast(sel.point_size)) + + fmt::format("#define VS_EXPAND {}\n", static_cast(sel.expand)); std::string src = GenGlslHeader("vs_main", GL_VERTEX_SHADER, macro); src += m_shader_tfx_vgs; return src; } -std::string GSDeviceOGL::GetGSSource(GSSelector sel) -{ - DevCon.WriteLn("Compiling new geometry shader with selector 0x%" PRIX64, sel.key); - - std::string macro = fmt::format("#define GS_PRIM {}\n", static_cast(sel.topology)) - + fmt::format("#define GS_EXPAND {}\n", static_cast(sel.expand)) - + fmt::format("#define GS_IIP {}\n", static_cast(sel.iip)); - - std::string src = GenGlslHeader("gs_main", GL_GEOMETRY_SHADER, macro); - src += m_shader_tfx_vgs; - return src; -} - std::string GSDeviceOGL::GetPSSource(const PSSelector& sel) { DevCon.WriteLn("Compiling new pixel shader with selector 0x%" PRIX64 "%08X", sel.key_hi, sel.key_lo); @@ -1502,6 +1496,7 @@ void GSDeviceOGL::DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect {GSVector4(right , bottom, 0.0f, 0.0f) , GSVector2(sRect.z , sRect.w)} , }; + IASetVAO(m_vao); IASetVertexBuffer(vertices, 4); IASetPrimitiveTopology(GL_TRIANGLE_STRIP); DrawPrimitive(); @@ -1510,6 +1505,7 @@ void GSDeviceOGL::DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect void GSDeviceOGL::DrawMultiStretchRects( const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) { + IASetVAO(m_vao); IASetPrimitiveTopology(GL_TRIANGLE_STRIP); OMSetDepthStencilState(m_convert.dss); OMSetBlendState(false); @@ -1672,7 +1668,7 @@ void GSDeviceOGL::DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture bool GSDeviceOGL::CompileFXAAProgram() { // Needs ARB_gpu_shader5 for gather. - if (!GLLoader::is_gles && !GLLoader::found_GL_ARB_gpu_shader5) + if (!GLAD_GL_ARB_gpu_shader5) { Console.Warning("FXAA is not supported with the current GPU"); return true; @@ -1687,7 +1683,7 @@ bool GSDeviceOGL::CompileFXAAProgram() } const std::string ps(GetShaderSource("ps_main", GL_FRAGMENT_SHADER, shader->c_str(), fxaa_macro)); - std::optional prog = m_shader_cache.GetProgram(m_convert.vs, {}, ps); + std::optional prog = m_shader_cache.GetProgram(m_convert.vs, ps); if (!prog.has_value()) { Console.Error("Failed to compile FXAA fragment shader"); @@ -1725,7 +1721,7 @@ bool GSDeviceOGL::CompileShadeBoostProgram() } const std::string ps(GetShaderSource("ps_main", GL_FRAGMENT_SHADER, *shader)); - if (!m_shader_cache.GetProgram(&m_shadeboost.ps, m_convert.vs, {}, ps)) + if (!m_shader_cache.GetProgram(&m_shadeboost.ps, m_convert.vs, ps)) return false; m_shadeboost.ps.RegisterUniform("params"); m_shadeboost.ps.SetName("Shadeboost pipe"); @@ -1770,6 +1766,7 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver // ia + IASetVAO(m_vao); IASetVertexBuffer(vertices, 4); IASetPrimitiveTopology(GL_TRIANGLE_STRIP); @@ -1787,6 +1784,15 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver } } +void GSDeviceOGL::IASetVAO(GLuint vao) +{ + if (GLState::vao == vao) + return; + + GLState::vao = vao; + glBindVertexArray(vao); +} + void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t count) { const u32 size = static_cast(count) * sizeof(GSVertexPT1); @@ -1862,12 +1868,6 @@ bool GSDeviceOGL::CreateCASPrograms() } const char* header = - GLLoader::is_gles ? - "#version 320 es\n" - "precision highp float;\n" - "precision highp int;\n" - "precision highp sampler2D;\n" - "precision highp image2D;\n" : "#version 420\n" "#extension GL_ARB_compute_shader : require\n"; const char* sharpen_params[2] = { @@ -1921,7 +1921,7 @@ bool GSDeviceOGL::CreateImGuiProgram() } std::optional prog = m_shader_cache.GetProgram( - GetShaderSource("vs_main", GL_VERTEX_SHADER, glsl.value()), {}, + GetShaderSource("vs_main", GL_VERTEX_SHADER, glsl.value()), GetShaderSource("ps_main", GL_FRAGMENT_SHADER, glsl.value())); if (!prog.has_value()) { @@ -1937,6 +1937,7 @@ bool GSDeviceOGL::CreateImGuiProgram() glGenVertexArrays(1, &m_imgui.vao); glBindVertexArray(m_imgui.vao); m_vertex_stream_buffer->Bind(); + m_index_stream_buffer->Bind(); glEnableVertexAttribArray(0); glEnableVertexAttribArray(1); @@ -1945,7 +1946,7 @@ bool GSDeviceOGL::CreateImGuiProgram() glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(ImDrawVert), (GLvoid*)IM_OFFSETOF(ImDrawVert, uv)); glVertexAttribPointer(2, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(ImDrawVert), (GLvoid*)IM_OFFSETOF(ImDrawVert, col)); - glBindVertexArray(m_vertex_array_object); + glBindVertexArray(GLState::vao); return true; } @@ -1973,7 +1974,7 @@ void GSDeviceOGL::RenderImGui() m_imgui.ps.Bind(); m_imgui.ps.UniformMatrix4fv(0, &ortho_projection[0][0]); - glBindVertexArray(m_imgui.vao); + IASetVAO(m_imgui.vao); OMSetBlendState(true, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_FUNC_ADD); OMSetDepthStencilState(m_convert.dss); PSSetSamplerState(m_convert.ln); @@ -2041,7 +2042,7 @@ void GSDeviceOGL::RenderImGui() g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size); } - glBindVertexArray(m_vertex_array_object); + IASetVAO(m_vao); glScissor(GLState::scissor.x, GLState::scissor.y, GLState::scissor.width(), GLState::scissor.height()); } @@ -2224,10 +2225,9 @@ void GSDeviceOGL::SetupPipeline(const ProgramSelector& psel) const std::string vs(GetVSSource(psel.vs)); const std::string ps(GetPSSource(psel.ps)); - const std::string gs((psel.gs.key != 0) ? GetGSSource(psel.gs) : std::string()); GL::Program prog; - m_shader_cache.GetProgram(&prog, vs, gs, ps); + m_shader_cache.GetProgram(&prog, vs, ps); it = m_programs.emplace(psel, std::move(prog)).first; it->second.Bind(); } @@ -2330,7 +2330,26 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config) } IASetVertexBuffer(config.verts, config.nverts); - IASetIndexBuffer(config.indices, config.nindices); + if (config.vs.expand != GSHWDrawConfig::VSExpand::None && !GLAD_GL_ARB_shader_draw_parameters) + { + // Need to offset the buffer. + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 2, m_vertex_stream_buffer->GetGLBufferId(), + m_vertex.start * sizeof(GSVertex), config.nverts * sizeof(GSVertex)); + m_vertex.start = 0; + } + + if (config.vs.UseExpandIndexBuffer()) + { + IASetVAO(m_expand_vao); + m_index.start = 0; + m_index.count = config.nindices; + } + else + { + IASetVAO(m_vao); + IASetIndexBuffer(config.indices, config.nindices); + } + GLenum topology = 0; switch (config.topology) { @@ -2363,15 +2382,12 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config) psel.vs = config.vs; psel.ps.key_hi = config.ps.key_hi; psel.ps.key_lo = config.ps.key_lo; - psel.gs.key = 0; psel.pad = 0; - if (config.gs.expand) - psel.gs.key = config.gs.key; SetupPipeline(psel); // additional non-pipeline config stuff - const bool point_size_enabled = config.vs.point_size && !GLLoader::is_gles; + const bool point_size_enabled = config.vs.point_size; if (GLState::point_size != point_size_enabled) { if (point_size_enabled) @@ -2516,47 +2532,45 @@ void GSDeviceOGL::SendHWDraw(const GSHWDrawConfig& config, bool needs_barrier) g_perfmon.Put(GSPerfMon::Barriers, static_cast(config.drawlist->size())); - for (size_t count = 0, p = 0, n = 0; n < config.drawlist->size(); p += count, ++n) + const u32 indices_per_prim = config.indices_per_prim; + const u32 draw_list_size = static_cast(config.drawlist->size()); + + for (u32 n = 0, p = 0; n < draw_list_size; n++) { - count = (*config.drawlist)[n] * config.indices_per_prim; + const u32 count = (*config.drawlist)[n] * indices_per_prim; glTextureBarrier(); DrawIndexedPrimitive(p, count); + p += count; } return; } - const bool tex_is_ds = config.tex && config.tex == config.ds; - if ((needs_barrier && m_features.texture_barrier) || tex_is_ds) + if (needs_barrier && m_features.texture_barrier) { if (config.require_full_barrier) { - GL_PUSH("Split the draw"); + const u32 indices_per_prim = config.indices_per_prim; - GL_PERF("Split single draw in %d draw", config.nindices / config.indices_per_prim); + GL_PUSH("Split single draw in %d draw", config.nindices / indices_per_prim); g_perfmon.Put(GSPerfMon::Barriers, config.nindices / config.indices_per_prim); - for (size_t p = 0; p < config.nindices; p += config.indices_per_prim) + for (u32 p = 0; p < config.nindices; p += indices_per_prim) { glTextureBarrier(); - DrawIndexedPrimitive(p, config.indices_per_prim); + DrawIndexedPrimitive(p, indices_per_prim); } return; } - if (config.require_one_barrier || tex_is_ds) + if (config.require_one_barrier) { - // The common renderer code doesn't put a barrier here because D3D/VK need to copy the DS, so we need to check it. - // One barrier needed for non-overlapping draw. g_perfmon.Put(GSPerfMon::Barriers, 1); glTextureBarrier(); - DrawIndexedPrimitive(); - return; } } - // No barriers needed DrawIndexedPrimitive(); } diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h index 18936bbd47..fe69b4205e 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h @@ -121,7 +121,6 @@ class GSDeviceOGL final : public GSDevice { public: using VSSelector = GSHWDrawConfig::VSSelector; - using GSSelector = GSHWDrawConfig::GSSelector; using PSSelector = GSHWDrawConfig::PSSelector; using PSSamplerSelector = GSHWDrawConfig::SamplerSelector; using OMDepthStencilSelector = GSHWDrawConfig::DepthStencilSelector; @@ -131,7 +130,6 @@ public: { PSSelector ps; VSSelector vs; - GSSelector gs; u16 pad; __fi bool operator==(const ProgramSelector& p) const { return (std::memcmp(this, &p, sizeof(*this)) == 0); } @@ -144,7 +142,7 @@ public: __fi std::size_t operator()(const ProgramSelector& p) const noexcept { std::size_t h = 0; - HashCombine(h, p.vs.key, p.gs.key, p.ps.key_hi, p.ps.key_lo); + HashCombine(h, p.vs.key, p.ps.key_hi, p.ps.key_lo); return h; } }; @@ -160,7 +158,9 @@ private: std::unique_ptr m_vertex_stream_buffer; std::unique_ptr m_index_stream_buffer; - GLuint m_vertex_array_object = 0; + GLuint m_expand_ibo = 0; + GLuint m_vao = 0; + GLuint m_expand_vao = 0; GLenum m_draw_topology = 0; std::unique_ptr m_vertex_uniform_stream_buffer; @@ -344,6 +344,7 @@ public: void SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm); + void IASetVAO(GLuint vao); void IASetPrimitiveTopology(GLenum topology); void IASetVertexBuffer(const void* vertices, size_t count); void IASetIndexBuffer(const void* index, size_t count); @@ -367,7 +368,6 @@ public: const std::string_view& macro_sel = std::string_view()); std::string GenGlslHeader(const std::string_view& entry, GLenum type, const std::string_view& macro); std::string GetVSSource(VSSelector sel); - std::string GetGSSource(GSSelector sel); std::string GetPSSource(const PSSelector& sel); GLuint CreateSampler(PSSamplerSelector sel); GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel); diff --git a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp index 6b938336b5..0c17a70616 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp @@ -94,7 +94,7 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format // Depth buffer case Format::DepthStencil: { - if (!GLLoader::found_framebuffer_fetch) + if (!g_gs_device->Features().framebuffer_fetch) { gl_fmt = GL_DEPTH32F_STENCIL8; m_int_format = GL_DEPTH_STENCIL; diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 363c45ceb6..e4a5f445ef 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -581,17 +581,17 @@ bool GSDeviceVK::CheckFeatures() m_features.framebuffer_fetch = g_vulkan_context->GetOptionalExtensions().vk_arm_rasterization_order_attachment_access && !GSConfig.DisableFramebufferFetch; m_features.texture_barrier = GSConfig.OverrideTextureBarriers != 0; m_features.broken_point_sampler = isAMD; - m_features.geometry_shader = features.geometryShader && GSConfig.OverrideGeometryShaders != 0; // Usually, geometry shader indicates primid support // However on Metal (MoltenVK), geometry shader is never available, but primid sometimes is // Officially, it's available on GPUs that support barycentric coordinates (Newer AMD and Apple) // Unofficially, it seems to work on older Intel GPUs (but breaks other things on newer Intel GPUs, see GSMTLDeviceInfo.mm for details) // We'll only enable for the officially supported GPUs here. We'll leave in the option of force-enabling it with OverrideGeometryShaders though. - m_features.primitive_id = features.geometryShader || GSConfig.OverrideGeometryShaders == 1 || g_vulkan_context->GetOptionalExtensions().vk_khr_fragment_shader_barycentric; + m_features.primitive_id = features.geometryShader || g_vulkan_context->GetOptionalExtensions().vk_khr_fragment_shader_barycentric; m_features.prefer_new_textures = true; m_features.provoking_vertex_last = g_vulkan_context->GetOptionalExtensions().vk_ext_provoking_vertex; m_features.dual_source_blend = features.dualSrcBlend && !GSConfig.DisableDualSourceBlend; m_features.clip_control = true; + m_features.vs_expand = g_vulkan_context->GetOptionalExtensions().vk_khr_shader_draw_parameters; if (!m_features.dual_source_blend) Console.Warning("Vulkan driver is missing dual-source blending. This will have an impact on performance."); @@ -624,9 +624,10 @@ bool GSDeviceVK::CheckFeatures() (features.largePoints && limits.pointSizeRange[0] <= f_upscale && limits.pointSizeRange[1] >= f_upscale); m_features.line_expand = (features.wideLines && limits.lineWidthRange[0] <= f_upscale && limits.lineWidthRange[1] >= f_upscale); + DevCon.WriteLn("Using %s for point expansion and %s for line expansion.", - m_features.point_expand ? "hardware" : "geometry shaders", - m_features.line_expand ? "hardware" : "geometry shaders"); + m_features.point_expand ? "hardware" : "vertex expanding", + m_features.line_expand ? "hardware" : "vertex expanding"); // Check texture format support before we try to create them. for (u32 fmt = static_cast(GSTexture::Format::Color); fmt < static_cast(GSTexture::Format::PrimID); fmt++) @@ -1004,6 +1005,7 @@ void GSDeviceVK::DoMultiStretchRects( m_index.count = icount; m_vertex_stream_buffer.CommitMemory(vcount * sizeof(GSVertexPT1)); m_index_stream_buffer.CommitMemory(icount * sizeof(u32)); + SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32); // Even though we're batching, a cmdbuffer submit could've messed this up. const GSVector4i rc(dTex->GetRect()); @@ -1379,6 +1381,8 @@ void GSDeviceVK::IASetIndexBuffer(const void* index, size_t count) std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), index, size); m_index_stream_buffer.CommitMemory(size); + + SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32); } void GSDeviceVK::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, FeedbackLoopFlag feedback_loop) @@ -1493,10 +1497,14 @@ static void AddMacro(std::stringstream& ss, const char* name, int value) static void AddShaderHeader(std::stringstream& ss) { + const GSDevice::FeatureSupport features(g_gs_device->Features()); + ss << "#version 460 core\n"; ss << "#extension GL_EXT_samplerless_texture_functions : require\n"; - const GSDevice::FeatureSupport features(g_gs_device->Features()); + if (features.vs_expand) + ss << "#extension GL_ARB_shader_draw_parameters : require\n"; + if (!features.texture_barrier) ss << "#define DISABLE_TEXTURE_BARRIER 1\n"; if (!features.dual_source_blend) @@ -1568,7 +1576,9 @@ bool GSDeviceVK::CreateNullTexture() bool GSDeviceVK::CreateBuffers() { - if (!m_vertex_stream_buffer.Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_BUFFER_SIZE)) + if (!m_vertex_stream_buffer.Create( + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | (m_features.vs_expand ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : 0), + VERTEX_BUFFER_SIZE)) { Host::ReportErrorAsync("GS", "Failed to allocate vertex buffer"); return false; @@ -1593,7 +1603,14 @@ bool GSDeviceVK::CreateBuffers() } SetVertexBuffer(m_vertex_stream_buffer.GetBuffer(), 0); - SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32); + + if (!g_vulkan_context->AllocatePreinitializedGPUBuffer(EXPAND_BUFFER_SIZE, &m_expand_index_buffer, + &m_expand_index_buffer_allocation, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + &GSDevice::GenerateExpansionIndexBuffer)) + { + Host::ReportErrorAsync("GS", "Failed to allocate expansion index buffer"); + return false; + } return true; } @@ -1625,6 +1642,8 @@ bool GSDeviceVK::CreatePipelineLayouts() dslb.AddBinding( 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT); dslb.AddBinding(1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + if (m_features.vs_expand) + dslb.AddBinding(2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_VERTEX_BIT); if ((m_tfx_ubo_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE) return false; Vulkan::Util::SetObjectName(dev, m_tfx_ubo_ds_layout, "TFX UBO descriptor layout"); @@ -2366,9 +2385,6 @@ void GSDeviceVK::RenderImGui() g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size); } - - // normal draws use 32-bit indices - SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32); } bool GSDeviceVK::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array& constants) @@ -2449,8 +2465,6 @@ void GSDeviceVK::DestroyResources() Vulkan::Util::SafeDestroyPipeline(it.second); for (auto& it : m_tfx_fragment_shaders) Vulkan::Util::SafeDestroyShaderModule(it.second); - for (auto& it : m_tfx_geometry_shaders) - Vulkan::Util::SafeDestroyShaderModule(it.second); for (auto& it : m_tfx_vertex_shaders) Vulkan::Util::SafeDestroyShaderModule(it.second); for (VkPipeline& it : m_interlace) @@ -2506,6 +2520,12 @@ void GSDeviceVK::DestroyResources() m_vertex_uniform_stream_buffer.Destroy(false); m_index_stream_buffer.Destroy(false); m_vertex_stream_buffer.Destroy(false); + if (m_expand_index_buffer != VK_NULL_HANDLE) + { + vmaDestroyBuffer(g_vulkan_context->GetAllocator(), m_expand_index_buffer, m_expand_index_buffer_allocation); + m_expand_index_buffer = VK_NULL_HANDLE; + m_expand_index_buffer_allocation = VK_NULL_HANDLE; + } Vulkan::Util::SafeDestroyPipelineLayout(m_tfx_pipeline_layout); Vulkan::Util::SafeDestroyDescriptorSetLayout(m_tfx_rt_texture_ds_layout); @@ -2530,6 +2550,8 @@ VkShaderModule GSDeviceVK::GetTFXVertexShader(GSHWDrawConfig::VSSelector sel) AddMacro(ss, "VS_FST", sel.fst); AddMacro(ss, "VS_IIP", sel.iip); AddMacro(ss, "VS_POINT_SIZE", sel.point_size); + AddMacro(ss, "VS_EXPAND", static_cast(sel.expand)); + AddMacro(ss, "VS_PROVOKING_VERTEX_LAST", static_cast(m_features.provoking_vertex_last)); ss << m_tfx_source; VkShaderModule mod = g_vulkan_shader_cache->GetVertexShader(ss.str()); @@ -2540,29 +2562,6 @@ VkShaderModule GSDeviceVK::GetTFXVertexShader(GSHWDrawConfig::VSSelector sel) return mod; } -VkShaderModule GSDeviceVK::GetTFXGeometryShader(GSHWDrawConfig::GSSelector sel) -{ - const auto it = m_tfx_geometry_shaders.find(sel.key); - if (it != m_tfx_geometry_shaders.end()) - return it->second; - - std::stringstream ss; - AddShaderHeader(ss); - AddShaderStageMacro(ss, false, true, false); - AddMacro(ss, "GS_IIP", sel.iip); - AddMacro(ss, "GS_PRIM", static_cast(sel.topology)); - AddMacro(ss, "GS_EXPAND", sel.expand); - AddMacro(ss, "GS_FORWARD_PRIMID", sel.forward_primid); - ss << m_tfx_source; - - VkShaderModule mod = g_vulkan_shader_cache->GetGeometryShader(ss.str()); - if (mod) - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), mod, "TFX Geometry %08X", sel.key); - - m_tfx_geometry_shaders.emplace(sel.key, mod); - return mod; -} - VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector& sel) { const auto it = m_tfx_fragment_shaders.find(sel); @@ -2651,9 +2650,8 @@ VkPipeline GSDeviceVK::CreateTFXPipeline(const PipelineSelector& p) } VkShaderModule vs = GetTFXVertexShader(p.vs); - VkShaderModule gs = p.gs.expand ? GetTFXGeometryShader(p.gs) : VK_NULL_HANDLE; VkShaderModule fs = GetTFXFragmentShader(pps); - if (vs == VK_NULL_HANDLE || (p.gs.expand && gs == VK_NULL_HANDLE) || fs == VK_NULL_HANDLE) + if (vs == VK_NULL_HANDLE || fs == VK_NULL_HANDLE) return VK_NULL_HANDLE; Vulkan::GraphicsPipelineBuilder gpb; @@ -2685,19 +2683,20 @@ VkPipeline GSDeviceVK::CreateTFXPipeline(const PipelineSelector& p) // Shaders gpb.SetVertexShader(vs); - if (gs != VK_NULL_HANDLE) - gpb.SetGeometryShader(gs); gpb.SetFragmentShader(fs); // IA - gpb.AddVertexBuffer(0, sizeof(GSVertex)); - gpb.AddVertexAttribute(0, 0, VK_FORMAT_R32G32_SFLOAT, 0); // ST - gpb.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UINT, 8); // RGBA - gpb.AddVertexAttribute(2, 0, VK_FORMAT_R32_SFLOAT, 12); // Q - gpb.AddVertexAttribute(3, 0, VK_FORMAT_R16G16_UINT, 16); // XY - gpb.AddVertexAttribute(4, 0, VK_FORMAT_R32_UINT, 20); // Z - gpb.AddVertexAttribute(5, 0, VK_FORMAT_R16G16_UINT, 24); // UV - gpb.AddVertexAttribute(6, 0, VK_FORMAT_R8G8B8A8_UNORM, 28); // FOG + if (p.vs.expand == GSHWDrawConfig::VSExpand::None) + { + gpb.AddVertexBuffer(0, sizeof(GSVertex)); + gpb.AddVertexAttribute(0, 0, VK_FORMAT_R32G32_SFLOAT, 0); // ST + gpb.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UINT, 8); // RGBA + gpb.AddVertexAttribute(2, 0, VK_FORMAT_R32_SFLOAT, 12); // Q + gpb.AddVertexAttribute(3, 0, VK_FORMAT_R16G16_UINT, 16); // XY + gpb.AddVertexAttribute(4, 0, VK_FORMAT_R32_UINT, 20); // Z + gpb.AddVertexAttribute(5, 0, VK_FORMAT_R16G16_UINT, 24); // UV + gpb.AddVertexAttribute(6, 0, VK_FORMAT_R8G8B8A8_UNORM, 28); // FOG + } // DepthStencil static const VkCompareOp ztst[] = { @@ -2753,7 +2752,7 @@ VkPipeline GSDeviceVK::CreateTFXPipeline(const PipelineSelector& p) if (pipeline) { Vulkan::Util::SetObjectName( - g_vulkan_context->GetDevice(), pipeline, "TFX Pipeline %08X/%08X/%" PRIX64 "%08X", p.vs.key, p.gs.key, p.ps.key_hi, p.ps.key_lo); + g_vulkan_context->GetDevice(), pipeline, "TFX Pipeline %08X/%" PRIX64 "%08X", p.vs.key, p.ps.key_hi, p.ps.key_lo); } return pipeline; @@ -2822,6 +2821,11 @@ bool GSDeviceVK::CreatePersistentDescriptorSets() m_vertex_uniform_stream_buffer.GetBuffer(), 0, sizeof(GSHWDrawConfig::VSConstantBuffer)); dsub.AddBufferDescriptorWrite(m_tfx_descriptor_sets[0], 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, m_fragment_uniform_stream_buffer.GetBuffer(), 0, sizeof(GSHWDrawConfig::PSConstantBuffer)); + if (m_features.vs_expand) + { + dsub.AddBufferDescriptorWrite(m_tfx_descriptor_sets[0], 2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + m_vertex_stream_buffer.GetBuffer(), 0, VERTEX_BUFFER_SIZE); + } dsub.Update(dev); Vulkan::Util::SetObjectName(dev, m_tfx_descriptor_sets[0], "Persistent TFX UBO set"); return true; @@ -3476,8 +3480,7 @@ GSTextureVK* GSDeviceVK::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config) DrawPrimitive(); // image is now filled with either -1 or INT_MAX, so now we can do the prepass - IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts); - IASetIndexBuffer(config.indices, config.nindices); + UploadHWDrawVerticesAndIndices(config); // cut down the configuration for the prepass, we don't need blending or any feedback loop PipelineSelector& pipe = m_pipeline_selector; @@ -3722,10 +3725,7 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) // VB/IB upload, if we did DATE setup and it's not HDR this has already been done if (!date_image || hdr_rt) - { - IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts); - IASetIndexBuffer(config.indices, config.nindices); - } + UploadHWDrawVerticesAndIndices(config); // now we can do the actual draw if (BindDrawPipeline(pipe)) @@ -3818,7 +3818,6 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) void GSDeviceVK::UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe) { pipe.vs.key = config.vs.key; - pipe.gs.key = config.gs.key; pipe.ps.key_hi = config.ps.key_hi; pipe.ps.key_lo = config.ps.key_lo; pipe.dss.key = config.depth.key; @@ -3840,6 +3839,22 @@ void GSDeviceVK::UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelect pipe.vs.point_size |= (config.topology == GSHWDrawConfig::Topology::Point); } +void GSDeviceVK::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config) +{ + IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts); + + if (config.vs.UseExpandIndexBuffer()) + { + m_index.start = 0; + m_index.count = config.nindices; + SetIndexBuffer(m_expand_index_buffer, 0, VK_INDEX_TYPE_UINT32); + } + else + { + IASetIndexBuffer(config.indices, config.nindices); + } +} + void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier) { if (config.drawlist) @@ -3847,23 +3862,25 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, GL_PUSH("Split the draw (SPRITE)"); g_perfmon.Put(GSPerfMon::Barriers, static_cast(config.drawlist->size()) - static_cast(skip_first_barrier)); - u32 count = 0; + const u32 indices_per_prim = config.indices_per_prim; + const u32 draw_list_size = static_cast(config.drawlist->size()); u32 p = 0; u32 n = 0; if (skip_first_barrier) { - count = (*config.drawlist)[n] * config.indices_per_prim; + const u32 count = (*config.drawlist)[n] * indices_per_prim; DrawIndexedPrimitive(p, count); p += count; ++n; } - for (; n < static_cast(config.drawlist->size()); p += count, ++n) + for (; n < draw_list_size; n++) { - count = (*config.drawlist)[n] * config.indices_per_prim; + const u32 count = (*config.drawlist)[n] * indices_per_prim; ColorBufferBarrier(draw_rt); DrawIndexedPrimitive(p, count); + p += count; } return; @@ -3873,21 +3890,22 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, { if (config.require_full_barrier) { - GL_PUSH("Split single draw in %d draw", config.nindices / config.indices_per_prim); - g_perfmon.Put(GSPerfMon::Barriers, (config.nindices / config.indices_per_prim) - static_cast(skip_first_barrier)); + const u32 indices_per_prim = config.indices_per_prim; + + GL_PUSH("Split single draw in %d draw", config.nindices / indices_per_prim); + g_perfmon.Put(GSPerfMon::Barriers, (config.nindices / indices_per_prim) - static_cast(skip_first_barrier)); - const u32 ipp = config.indices_per_prim; u32 p = 0; if (skip_first_barrier) { - DrawIndexedPrimitive(p, ipp); - p += ipp; + DrawIndexedPrimitive(p, indices_per_prim); + p += indices_per_prim; } - for (; p < config.nindices; p += ipp) + for (; p < config.nindices; p += indices_per_prim) { ColorBufferBarrier(draw_rt); - DrawIndexedPrimitive(p, ipp); + DrawIndexedPrimitive(p, indices_per_prim); } return; @@ -3897,11 +3915,8 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, { g_perfmon.Put(GSPerfMon::Barriers, 1); ColorBufferBarrier(draw_rt); - DrawIndexedPrimitive(); - return; } } - // Don't need any barrier DrawIndexedPrimitive(); } diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h index 16344c6786..f0375a0f96 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h @@ -58,7 +58,6 @@ public: }; GSHWDrawConfig::VSSelector vs; - GSHWDrawConfig::GSSelector gs; GSHWDrawConfig::DepthStencilSelector dss; GSHWDrawConfig::ColorMaskSelector cms; GSHWDrawConfig::BlendState bs; @@ -78,7 +77,7 @@ public: std::size_t operator()(const PipelineSelector& e) const noexcept { std::size_t hash = 0; - HashCombine(hash, e.vs.key, e.gs.key, e.ps.key_hi, e.ps.key_lo, e.dss.key, e.cms.key, e.bs.key, e.key); + HashCombine(hash, e.vs.key, e.ps.key_hi, e.ps.key_lo, e.dss.key, e.cms.key, e.bs.key, e.key); return hash; } }; @@ -123,6 +122,8 @@ private: Vulkan::StreamBuffer m_index_stream_buffer; Vulkan::StreamBuffer m_vertex_uniform_stream_buffer; Vulkan::StreamBuffer m_fragment_uniform_stream_buffer; + VkBuffer m_expand_index_buffer = VK_NULL_HANDLE; + VmaAllocation m_expand_index_buffer_allocation = VK_NULL_HANDLE; VkSampler m_point_sampler = VK_NULL_HANDLE; VkSampler m_linear_sampler = VK_NULL_HANDLE; @@ -142,7 +143,6 @@ private: VkPipeline m_shadeboost_pipeline = {}; std::unordered_map m_tfx_vertex_shaders; - std::unordered_map m_tfx_geometry_shaders; std::unordered_map m_tfx_fragment_shaders; std::unordered_map m_tfx_pipelines; @@ -183,7 +183,6 @@ private: void ClearSamplerCache() final; VkShaderModule GetTFXVertexShader(GSHWDrawConfig::VSSelector sel); - VkShaderModule GetTFXGeometryShader(GSHWDrawConfig::GSSelector sel); VkShaderModule GetTFXFragmentShader(const GSHWDrawConfig::PSSelector& sel); VkPipeline CreateTFXPipeline(const PipelineSelector& p); VkPipeline GetTFXPipeline(const PipelineSelector& p); @@ -307,6 +306,7 @@ public: void RenderHW(GSHWDrawConfig& config) override; void UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe); + void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config); void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier); ////////////////////////////////////////////////////////////////////////// diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index 9ed6bdde7c..208b3048e1 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -529,7 +529,6 @@ bool Pcsx2Config::GSOptions::OptionsAreEqual(const GSOptions& right) const OpEqu(UserHacks_GPUTargetCLUTMode) && OpEqu(UserHacks_TextureInsideRt) && OpEqu(OverrideTextureBarriers) && - OpEqu(OverrideGeometryShaders) && OpEqu(CAS_Sharpness) && OpEqu(ShadeBoost_Brightness) && @@ -574,8 +573,7 @@ bool Pcsx2Config::GSOptions::RestartOptionsAreEqual(const GSOptions& right) cons OpEqu(DisableDualSourceBlend) && OpEqu(DisableFramebufferFetch) && OpEqu(DisableThreadedPresentation) && - OpEqu(OverrideTextureBarriers) && - OpEqu(OverrideGeometryShaders); + OpEqu(OverrideTextureBarriers); } void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap) @@ -724,7 +722,6 @@ void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap) GSSettingIntEnumEx(UserHacks_GPUTargetCLUTMode, "UserHacks_GPUTargetCLUTMode"); GSSettingIntEnumEx(TriFilter, "TriFilter"); GSSettingIntEx(OverrideTextureBarriers, "OverrideTextureBarriers"); - GSSettingIntEx(OverrideGeometryShaders, "OverrideGeometryShaders"); GSSettingInt(ShadeBoost_Brightness); GSSettingInt(ShadeBoost_Contrast); diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index 4b7670e1f9..7ad59e8e89 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -15,4 +15,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 23; +static constexpr u32 SHADER_CACHE_VERSION = 24;