mirror of https://github.com/PCSX2/pcsx2.git
GS/HW: VS expand instead of GS for DX/GL/Vulkan
This commit is contained in:
parent
6877abb2ec
commit
7f7dd60587
|
@ -1,5 +1,3 @@
|
|||
#ifdef SHADER_MODEL // make safe to include in resource file to enforce dependency
|
||||
|
||||
#define FMT_32 0
|
||||
#define FMT_24 1
|
||||
#define FMT_16 2
|
||||
|
@ -113,6 +111,8 @@ struct PS_INPUT
|
|||
#endif
|
||||
};
|
||||
|
||||
#ifdef PIXEL_SHADER
|
||||
|
||||
struct PS_OUTPUT
|
||||
{
|
||||
#if !PS_NO_COLOR
|
||||
|
@ -136,21 +136,6 @@ Texture2D<float4> RtTexture : register(t2);
|
|||
Texture2D<float> PrimMinTexture : register(t3);
|
||||
SamplerState TextureSampler : register(s0);
|
||||
|
||||
#ifdef DX12
|
||||
cbuffer cb0 : register(b0)
|
||||
#else
|
||||
cbuffer cb0
|
||||
#endif
|
||||
{
|
||||
float2 VertexScale;
|
||||
float2 VertexOffset;
|
||||
float2 TextureScale;
|
||||
float2 TextureOffset;
|
||||
float2 PointSize;
|
||||
uint MaxDepth;
|
||||
uint pad_cb0;
|
||||
};
|
||||
|
||||
#ifdef DX12
|
||||
cbuffer cb1 : register(b1)
|
||||
#else
|
||||
|
@ -1062,10 +1047,29 @@ PS_OUTPUT ps_main(PS_INPUT input)
|
|||
return output;
|
||||
}
|
||||
|
||||
#endif // PIXEL_SHADER
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Vertex Shader
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef VERTEX_SHADER
|
||||
|
||||
#ifdef DX12
|
||||
cbuffer cb0 : register(b0)
|
||||
#else
|
||||
cbuffer cb0
|
||||
#endif
|
||||
{
|
||||
float2 VertexScale;
|
||||
float2 VertexOffset;
|
||||
float2 TextureScale;
|
||||
float2 TextureOffset;
|
||||
float2 PointSize;
|
||||
uint MaxDepth;
|
||||
uint BaseVertex; // Only used in DX11.
|
||||
};
|
||||
|
||||
VS_OUTPUT vs_main(VS_INPUT input)
|
||||
{
|
||||
// Clamp to max depth, gs doesn't wrap
|
||||
|
@ -1118,156 +1122,101 @@ VS_OUTPUT vs_main(VS_INPUT input)
|
|||
return output;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Geometry Shader
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
#if VS_EXPAND != 0
|
||||
|
||||
#if GS_FORWARD_PRIMID
|
||||
#define PRIMID_IN , uint primid : SV_PrimitiveID
|
||||
#define VS2PS(x) vs2ps_impl(x, primid)
|
||||
PS_INPUT vs2ps_impl(VS_OUTPUT vs, uint primid)
|
||||
struct VS_RAW_INPUT
|
||||
{
|
||||
PS_INPUT o;
|
||||
o.p = vs.p;
|
||||
o.t = vs.t;
|
||||
o.ti = vs.ti;
|
||||
o.c = vs.c;
|
||||
o.primid = primid;
|
||||
return o;
|
||||
}
|
||||
float2 ST;
|
||||
uint RGBA;
|
||||
float Q;
|
||||
uint XY;
|
||||
uint Z;
|
||||
uint UV;
|
||||
uint FOG;
|
||||
};
|
||||
|
||||
StructuredBuffer<VS_RAW_INPUT> vertices : register(t0);
|
||||
|
||||
VS_INPUT load_vertex(uint index)
|
||||
{
|
||||
#ifdef DX12
|
||||
VS_RAW_INPUT raw = vertices.Load(index);
|
||||
#else
|
||||
#define PRIMID_IN
|
||||
#define VS2PS(x) vs2ps_impl(x)
|
||||
PS_INPUT vs2ps_impl(VS_OUTPUT vs)
|
||||
{
|
||||
PS_INPUT o;
|
||||
o.p = vs.p;
|
||||
o.t = vs.t;
|
||||
o.ti = vs.ti;
|
||||
o.c = vs.c;
|
||||
return o;
|
||||
}
|
||||
VS_RAW_INPUT raw = vertices.Load(BaseVertex + index);
|
||||
#endif
|
||||
|
||||
#if GS_PRIM == 0
|
||||
|
||||
[maxvertexcount(6)]
|
||||
void gs_main(point VS_OUTPUT input[1], inout TriangleStream<PS_INPUT> stream PRIMID_IN)
|
||||
{
|
||||
// Transform a point to a NxN sprite
|
||||
PS_INPUT Point = VS2PS(input[0]);
|
||||
|
||||
// Get new position
|
||||
float4 lt_p = input[0].p;
|
||||
float4 rb_p = input[0].p + float4(PointSize.x, PointSize.y, 0.0f, 0.0f);
|
||||
float4 lb_p = rb_p;
|
||||
float4 rt_p = rb_p;
|
||||
lb_p.x = lt_p.x;
|
||||
rt_p.y = lt_p.y;
|
||||
|
||||
// Triangle 1
|
||||
Point.p = lt_p;
|
||||
stream.Append(Point);
|
||||
|
||||
Point.p = lb_p;
|
||||
stream.Append(Point);
|
||||
|
||||
Point.p = rt_p;
|
||||
stream.Append(Point);
|
||||
|
||||
// Triangle 2
|
||||
Point.p = lb_p;
|
||||
stream.Append(Point);
|
||||
|
||||
Point.p = rt_p;
|
||||
stream.Append(Point);
|
||||
|
||||
Point.p = rb_p;
|
||||
stream.Append(Point);
|
||||
VS_INPUT vert;
|
||||
vert.st = raw.ST;
|
||||
vert.c = uint4(raw.RGBA & 0xFFu, (raw.RGBA >> 8) & 0xFFu, (raw.RGBA >> 16) & 0xFFu, raw.RGBA >> 24);
|
||||
vert.q = raw.Q;
|
||||
vert.p = uint2(raw.XY & 0xFFFFu, raw.XY >> 16);
|
||||
vert.z = raw.Z;
|
||||
vert.uv = uint2(raw.UV & 0xFFFFu, raw.UV >> 16);
|
||||
vert.f = float4(float(raw.FOG & 0xFFu), float((raw.FOG >> 8) & 0xFFu), float((raw.FOG >> 16) & 0xFFu), float(raw.FOG >> 24)) / 255.0f;
|
||||
return vert;
|
||||
}
|
||||
|
||||
#elif GS_PRIM == 1
|
||||
|
||||
[maxvertexcount(6)]
|
||||
void gs_main(line VS_OUTPUT input[2], inout TriangleStream<PS_INPUT> stream PRIMID_IN)
|
||||
VS_OUTPUT vs_main_expand(uint vid : SV_VertexID)
|
||||
{
|
||||
// Transform a line to a thick line-sprite
|
||||
PS_INPUT left = VS2PS(input[0]);
|
||||
PS_INPUT right = VS2PS(input[1]);
|
||||
float2 lt_p = input[0].p.xy;
|
||||
float2 rt_p = input[1].p.xy;
|
||||
#if VS_EXPAND == 1 // Point
|
||||
|
||||
// Potentially there is faster math
|
||||
float2 line_vector = normalize(rt_p.xy - lt_p.xy);
|
||||
VS_OUTPUT vtx = vs_main(load_vertex(vid >> 2));
|
||||
|
||||
vtx.p.x += ((vid & 1u) != 0u) ? PointSize.x : 0.0f;
|
||||
vtx.p.y += ((vid & 2u) != 0u) ? PointSize.y : 0.0f;
|
||||
|
||||
return vtx;
|
||||
|
||||
#elif VS_EXPAND == 2 // Line
|
||||
|
||||
uint vid_base = vid >> 2;
|
||||
bool is_bottom = vid & 2;
|
||||
bool is_right = vid & 1;
|
||||
// All lines will be a pair of vertices next to each other
|
||||
// Since DirectX uses provoking vertex first, the bottom point will be the lower of the two
|
||||
uint vid_other = is_bottom ? vid_base + 1 : vid_base - 1;
|
||||
VS_OUTPUT vtx = vs_main(load_vertex(vid_base));
|
||||
VS_OUTPUT other = vs_main(load_vertex(vid_other));
|
||||
|
||||
float2 line_vector = normalize(vtx.p.xy - other.p.xy);
|
||||
float2 line_normal = float2(line_vector.y, -line_vector.x);
|
||||
float2 line_width = (line_normal * PointSize) / 2;
|
||||
// line_normal is inverted for bottom point
|
||||
float2 offset = (is_bottom ^ is_right) ? line_width : -line_width;
|
||||
vtx.p.xy += offset;
|
||||
|
||||
lt_p -= line_width;
|
||||
rt_p -= line_width;
|
||||
float2 lb_p = input[0].p.xy + line_width;
|
||||
float2 rb_p = input[1].p.xy + line_width;
|
||||
// Lines will be run as (0 1 2) (1 2 3)
|
||||
// This means that both triangles will have a point based off the top line point as their first point
|
||||
// So we don't have to do anything for !IIP
|
||||
|
||||
#if GS_IIP == 0
|
||||
left.c = right.c;
|
||||
#endif
|
||||
return vtx;
|
||||
|
||||
// Triangle 1
|
||||
left.p.xy = lt_p;
|
||||
stream.Append(left);
|
||||
#elif VS_EXPAND == 3 // Sprite
|
||||
|
||||
left.p.xy = lb_p;
|
||||
stream.Append(left);
|
||||
// Sprite points are always in pairs
|
||||
uint vid_base = vid >> 1;
|
||||
uint vid_lt = vid_base & ~1u;
|
||||
uint vid_rb = vid_base | 1u;
|
||||
|
||||
right.p.xy = rt_p;
|
||||
stream.Append(right);
|
||||
stream.RestartStrip();
|
||||
VS_OUTPUT lt = vs_main(load_vertex(vid_lt));
|
||||
VS_OUTPUT rb = vs_main(load_vertex(vid_rb));
|
||||
VS_OUTPUT vtx = rb;
|
||||
|
||||
// Triangle 2
|
||||
left.p.xy = lb_p;
|
||||
stream.Append(left);
|
||||
bool is_right = ((vid & 1u) != 0u);
|
||||
vtx.p.x = is_right ? lt.p.x : vtx.p.x;
|
||||
vtx.t.x = is_right ? lt.t.x : vtx.t.x;
|
||||
vtx.ti.xz = is_right ? lt.ti.xz : vtx.ti.xz;
|
||||
|
||||
right.p.xy = rt_p;
|
||||
stream.Append(right);
|
||||
bool is_bottom = ((vid & 2u) != 0u);
|
||||
vtx.p.y = is_bottom ? lt.p.y : vtx.p.y;
|
||||
vtx.t.y = is_bottom ? lt.t.y : vtx.t.y;
|
||||
vtx.ti.yw = is_bottom ? lt.ti.yw : vtx.ti.yw;
|
||||
|
||||
right.p.xy = rb_p;
|
||||
stream.Append(right);
|
||||
stream.RestartStrip();
|
||||
}
|
||||
|
||||
#elif GS_PRIM == 3
|
||||
|
||||
[maxvertexcount(4)]
|
||||
void gs_main(line VS_OUTPUT input[2], inout TriangleStream<PS_INPUT> stream PRIMID_IN)
|
||||
{
|
||||
PS_INPUT lt = VS2PS(input[0]);
|
||||
PS_INPUT rb = VS2PS(input[1]);
|
||||
|
||||
// flat depth
|
||||
lt.p.z = rb.p.z;
|
||||
// flat fog and texture perspective
|
||||
lt.t.zw = rb.t.zw;
|
||||
|
||||
// flat color
|
||||
lt.c = rb.c;
|
||||
|
||||
// Swap texture and position coordinate
|
||||
PS_INPUT lb = rb;
|
||||
lb.p.x = lt.p.x;
|
||||
lb.t.x = lt.t.x;
|
||||
lb.ti.x = lt.ti.x;
|
||||
lb.ti.z = lt.ti.z;
|
||||
|
||||
PS_INPUT rt = rb;
|
||||
rt.p.y = lt.p.y;
|
||||
rt.t.y = lt.t.y;
|
||||
rt.ti.y = lt.ti.y;
|
||||
rt.ti.w = lt.ti.w;
|
||||
|
||||
stream.Append(lt);
|
||||
stream.Append(lb);
|
||||
stream.Append(rt);
|
||||
stream.Append(rb);
|
||||
}
|
||||
return vtx;
|
||||
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // VS_EXPAND
|
||||
|
||||
#endif // VERTEX_SHADER
|
||||
|
|
|
@ -14,13 +14,6 @@ layout(std140, binding = 1) uniform cb20
|
|||
};
|
||||
|
||||
#ifdef VERTEX_SHADER
|
||||
layout(location = 0) in vec2 i_st;
|
||||
layout(location = 2) in vec4 i_c;
|
||||
layout(location = 3) in float i_q;
|
||||
layout(location = 4) in uvec2 i_p;
|
||||
layout(location = 5) in uint i_z;
|
||||
layout(location = 6) in uvec2 i_uv;
|
||||
layout(location = 7) in vec4 i_f;
|
||||
|
||||
out SHADER
|
||||
{
|
||||
|
@ -35,6 +28,16 @@ out SHADER
|
|||
|
||||
const float exp_min32 = exp2(-32.0f);
|
||||
|
||||
#if VS_EXPAND == 0
|
||||
|
||||
layout(location = 0) in vec2 i_st;
|
||||
layout(location = 2) in vec4 i_c;
|
||||
layout(location = 3) in float i_q;
|
||||
layout(location = 4) in uvec2 i_p;
|
||||
layout(location = 5) in uint i_z;
|
||||
layout(location = 6) in uvec2 i_uv;
|
||||
layout(location = 7) in vec4 i_f;
|
||||
|
||||
void texture_coord()
|
||||
{
|
||||
vec2 uv = vec2(i_uv) - TextureOffset;
|
||||
|
@ -91,163 +94,145 @@ void vs_main()
|
|||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
#else // VS_EXPAND
|
||||
|
||||
#ifdef GEOMETRY_SHADER
|
||||
|
||||
in SHADER
|
||||
struct RawVertex
|
||||
{
|
||||
vec4 t_float;
|
||||
vec4 t_int;
|
||||
#if GS_IIP != 0
|
||||
vec4 c;
|
||||
#else
|
||||
flat vec4 c;
|
||||
#endif
|
||||
} GSin[];
|
||||
vec2 ST;
|
||||
uint RGBA;
|
||||
float Q;
|
||||
uint XY;
|
||||
uint Z;
|
||||
uint UV;
|
||||
uint FOG;
|
||||
};
|
||||
|
||||
out SHADER
|
||||
{
|
||||
vec4 t_float;
|
||||
vec4 t_int;
|
||||
#if GS_IIP != 0
|
||||
vec4 c;
|
||||
#else
|
||||
flat vec4 c;
|
||||
#endif
|
||||
} GSout;
|
||||
layout(std140, binding = 2) readonly buffer VertexBuffer {
|
||||
RawVertex vertex_buffer[];
|
||||
};
|
||||
|
||||
struct vertex
|
||||
struct ProcessedVertex
|
||||
{
|
||||
vec4 p;
|
||||
vec4 t_float;
|
||||
vec4 t_int;
|
||||
vec4 c;
|
||||
};
|
||||
|
||||
void out_vertex(in vec4 position, in vertex v)
|
||||
ProcessedVertex load_vertex(uint index)
|
||||
{
|
||||
GSout.t_float = v.t_float;
|
||||
GSout.t_int = v.t_int;
|
||||
// Flat output
|
||||
#if GS_PRIM == 0
|
||||
GSout.c = GSin[0].c;
|
||||
#if defined(GL_ARB_shader_draw_parameters) && GL_ARB_shader_draw_parameters
|
||||
RawVertex rvtx = vertex_buffer[index + gl_BaseVertexARB];
|
||||
#else
|
||||
GSout.c = GSin[1].c;
|
||||
RawVertex rvtx = vertex_buffer[index];
|
||||
#endif
|
||||
gl_Position = position;
|
||||
gl_PrimitiveID = gl_PrimitiveIDIn;
|
||||
EmitVertex();
|
||||
|
||||
vec2 i_st = rvtx.ST;
|
||||
vec4 i_c = vec4(uvec4(rvtx.RGBA & 0xFFu, (rvtx.RGBA >> 8) & 0xFFu, (rvtx.RGBA >> 16) & 0xFFu, rvtx.RGBA >> 24));
|
||||
float i_q = rvtx.Q;
|
||||
uvec2 i_p = uvec2(rvtx.XY & 0xFFFFu, rvtx.XY >> 16);
|
||||
uint i_z = rvtx.Z;
|
||||
uvec2 i_uv = uvec2(rvtx.UV & 0xFFFFu, rvtx.UV >> 16);
|
||||
vec4 i_f = unpackUnorm4x8(rvtx.FOG);
|
||||
|
||||
ProcessedVertex vtx;
|
||||
|
||||
uint z = min(i_z, MaxDepth);
|
||||
vtx.p.xy = vec2(i_p) - vec2(0.05f, 0.05f);
|
||||
vtx.p.xy = vtx.p.xy * VertexScale - VertexOffset;
|
||||
vtx.p.w = 1.0f;
|
||||
|
||||
#if HAS_CLIP_CONTROL
|
||||
vtx.p.z = float(z) * exp_min32;
|
||||
#else
|
||||
vtx.p.z = min(float(z) * exp2(-23.0f), 2.0f) - 1.0f;
|
||||
#endif
|
||||
|
||||
vec2 uv = vec2(i_uv) - TextureOffset;
|
||||
vec2 st = i_st - TextureOffset;
|
||||
|
||||
vtx.t_float.xy = st;
|
||||
vtx.t_float.w = i_q;
|
||||
|
||||
vtx.t_int.xy = uv * TextureScale;
|
||||
#if VS_FST
|
||||
vtx.t_int.zw = uv;
|
||||
#else
|
||||
vtx.t_int.zw = st / TextureScale;
|
||||
#endif
|
||||
|
||||
vtx.c = i_c;
|
||||
vtx.t_float.z = i_f.x;
|
||||
|
||||
return vtx;
|
||||
}
|
||||
|
||||
#if GS_PRIM == 0
|
||||
layout(points) in;
|
||||
void main()
|
||||
{
|
||||
ProcessedVertex vtx;
|
||||
|
||||
#if defined(GL_ARB_shader_draw_parameters) && GL_ARB_shader_draw_parameters
|
||||
uint vid = uint(gl_VertexID - gl_BaseVertexARB);
|
||||
#else
|
||||
layout(lines) in;
|
||||
uint vid = uint(gl_VertexID);
|
||||
#endif
|
||||
layout(triangle_strip, max_vertices = 4) out;
|
||||
|
||||
#if GS_PRIM == 0
|
||||
#if VS_EXPAND == 1 // Point
|
||||
|
||||
void gs_main()
|
||||
{
|
||||
// Transform a point to a NxN sprite
|
||||
vertex point = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c);
|
||||
vtx = load_vertex(vid >> 2);
|
||||
|
||||
// Get new position
|
||||
vec4 lt_p = gl_in[0].gl_Position;
|
||||
vec4 rb_p = gl_in[0].gl_Position + vec4(PointSize.x, PointSize.y, 0.0f, 0.0f);
|
||||
vec4 lb_p = rb_p;
|
||||
vec4 rt_p = rb_p;
|
||||
lb_p.x = lt_p.x;
|
||||
rt_p.y = lt_p.y;
|
||||
vtx.p.x += ((vid & 1u) != 0u) ? PointSize.x : 0.0f;
|
||||
vtx.p.y += ((vid & 2u) != 0u) ? PointSize.y : 0.0f;
|
||||
|
||||
out_vertex(lt_p, point);
|
||||
#elif VS_EXPAND == 2 // Line
|
||||
|
||||
out_vertex(lb_p, point);
|
||||
uint vid_base = vid >> 2;
|
||||
bool is_bottom = (vid & 2u) != 0u;
|
||||
bool is_right = (vid & 1u) != 0u;
|
||||
uint vid_other = is_bottom ? vid_base - 1 : vid_base + 1;
|
||||
vtx = load_vertex(vid_base);
|
||||
ProcessedVertex other = load_vertex(vid_other);
|
||||
|
||||
out_vertex(rt_p, point);
|
||||
|
||||
out_vertex(rb_p, point);
|
||||
|
||||
EndPrimitive();
|
||||
}
|
||||
|
||||
#elif GS_PRIM == 1
|
||||
|
||||
void gs_main()
|
||||
{
|
||||
// Transform a line to a thick line-sprite
|
||||
vertex left = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c);
|
||||
vertex right = vertex(GSin[1].t_float, GSin[1].t_int, GSin[1].c);
|
||||
vec4 lt_p = gl_in[0].gl_Position;
|
||||
vec4 rt_p = gl_in[1].gl_Position;
|
||||
|
||||
// Potentially there is faster math
|
||||
vec2 line_vector = normalize(rt_p.xy - lt_p.xy);
|
||||
vec2 line_vector = normalize(vtx.p.xy - other.p.xy);
|
||||
vec2 line_normal = vec2(line_vector.y, -line_vector.x);
|
||||
vec2 line_width = (line_normal * PointSize) / 2.0f;
|
||||
vec2 line_width = (line_normal * PointSize) / 2;
|
||||
// line_normal is inverted for bottom point
|
||||
vec2 offset = ((uint(is_bottom) ^ uint(is_right)) != 0u) ? line_width : -line_width;
|
||||
vtx.p.xy += offset;
|
||||
|
||||
lt_p.xy -= line_width;
|
||||
rt_p.xy -= line_width;
|
||||
vec4 lb_p = gl_in[0].gl_Position + vec4(line_width, 0.0f, 0.0f);
|
||||
vec4 rb_p = gl_in[1].gl_Position + vec4(line_width, 0.0f, 0.0f);
|
||||
// Lines will be run as (0 1 2) (1 2 3)
|
||||
// This means that both triangles will have a point based off the top line point as their first point
|
||||
// So we don't have to do anything for !IIP
|
||||
|
||||
out_vertex(lt_p, left);
|
||||
#elif VS_EXPAND == 3 // Sprite
|
||||
|
||||
out_vertex(lb_p, left);
|
||||
// Sprite points are always in pairs
|
||||
uint vid_base = vid >> 1;
|
||||
uint vid_lt = vid_base & ~1u;
|
||||
uint vid_rb = vid_base | 1u;
|
||||
|
||||
out_vertex(rt_p, right);
|
||||
ProcessedVertex lt = load_vertex(vid_lt);
|
||||
ProcessedVertex rb = load_vertex(vid_rb);
|
||||
vtx = rb;
|
||||
|
||||
out_vertex(rb_p, right);
|
||||
bool is_right = ((vid & 1u) != 0u);
|
||||
vtx.p.x = is_right ? lt.p.x : vtx.p.x;
|
||||
vtx.t_float.x = is_right ? lt.t_float.x : vtx.t_float.x;
|
||||
vtx.t_int.xz = is_right ? lt.t_int.xz : vtx.t_int.xz;
|
||||
|
||||
EndPrimitive();
|
||||
}
|
||||
|
||||
#else // GS_PRIM == 3
|
||||
|
||||
void gs_main()
|
||||
{
|
||||
// left top => GSin[0];
|
||||
// right bottom => GSin[1];
|
||||
vertex rb = vertex(GSin[1].t_float, GSin[1].t_int, GSin[1].c);
|
||||
vertex lt = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c);
|
||||
|
||||
vec4 rb_p = gl_in[1].gl_Position;
|
||||
vec4 lb_p = rb_p;
|
||||
vec4 rt_p = rb_p;
|
||||
vec4 lt_p = gl_in[0].gl_Position;
|
||||
|
||||
// flat depth
|
||||
lt_p.z = rb_p.z;
|
||||
// flat fog and texture perspective
|
||||
lt.t_float.zw = rb.t_float.zw;
|
||||
// flat color
|
||||
lt.c = rb.c;
|
||||
|
||||
// Swap texture and position coordinate
|
||||
vertex lb = rb;
|
||||
lb.t_float.x = lt.t_float.x;
|
||||
lb.t_int.x = lt.t_int.x;
|
||||
lb.t_int.z = lt.t_int.z;
|
||||
lb_p.x = lt_p.x;
|
||||
|
||||
vertex rt = rb;
|
||||
rt_p.y = lt_p.y;
|
||||
rt.t_float.y = lt.t_float.y;
|
||||
rt.t_int.y = lt.t_int.y;
|
||||
rt.t_int.w = lt.t_int.w;
|
||||
|
||||
out_vertex(lt_p, lt);
|
||||
|
||||
out_vertex(lb_p, lb);
|
||||
|
||||
out_vertex(rt_p, rt);
|
||||
|
||||
out_vertex(rb_p, rb);
|
||||
|
||||
EndPrimitive();
|
||||
}
|
||||
bool is_bottom = ((vid & 2u) != 0u);
|
||||
vtx.p.y = is_bottom ? lt.p.y : vtx.p.y;
|
||||
vtx.t_float.y = is_bottom ? lt.t_float.y : vtx.t_float.y;
|
||||
vtx.t_int.yw = is_bottom ? lt.t_int.yw : vtx.t_int.yw;
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
gl_Position = vtx.p;
|
||||
VSout.t_float = vtx.t_float;
|
||||
VSout.t_int = vtx.t_int;
|
||||
VSout.c = vtx.c;
|
||||
}
|
||||
|
||||
#endif // VS_EXPAND
|
||||
|
||||
#endif // VERTEX_SHADER
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// Vertex Shader
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(VERTEX_SHADER) || defined(GEOMETRY_SHADER)
|
||||
#if defined(VERTEX_SHADER)
|
||||
|
||||
layout(std140, set = 0, binding = 0) uniform cb0
|
||||
{
|
||||
|
@ -15,18 +15,6 @@ layout(std140, set = 0, binding = 0) uniform cb0
|
|||
uint pad_cb0;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef VERTEX_SHADER
|
||||
|
||||
layout(location = 0) in vec2 a_st;
|
||||
layout(location = 1) in uvec4 a_c;
|
||||
layout(location = 2) in float a_q;
|
||||
layout(location = 3) in uvec2 a_p;
|
||||
layout(location = 4) in uint a_z;
|
||||
layout(location = 5) in uvec2 a_uv;
|
||||
layout(location = 6) in vec4 a_f;
|
||||
|
||||
layout(location = 0) out VSOutput
|
||||
{
|
||||
vec4 t;
|
||||
|
@ -39,17 +27,27 @@ layout(location = 0) out VSOutput
|
|||
#endif
|
||||
} vsOut;
|
||||
|
||||
#if VS_EXPAND == 0
|
||||
|
||||
layout(location = 0) in vec2 a_st;
|
||||
layout(location = 1) in uvec4 a_c;
|
||||
layout(location = 2) in float a_q;
|
||||
layout(location = 3) in uvec2 a_p;
|
||||
layout(location = 4) in uint a_z;
|
||||
layout(location = 5) in uvec2 a_uv;
|
||||
layout(location = 6) in vec4 a_f;
|
||||
|
||||
void main()
|
||||
{
|
||||
// Clamp to max depth, gs doesn't wrap
|
||||
float z = min(a_z, MaxDepth);
|
||||
uint z = min(a_z, MaxDepth);
|
||||
|
||||
// pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)
|
||||
// example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty
|
||||
// input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel
|
||||
// example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133
|
||||
|
||||
gl_Position = vec4(a_p, z, 1.0f) - vec4(0.05f, 0.05f, 0, 0);
|
||||
gl_Position = vec4(a_p, float(z), 1.0f) - vec4(0.05f, 0.05f, 0, 0);
|
||||
gl_Position.xy = gl_Position.xy * vec2(VertexScale.x, -VertexScale.y) - vec2(VertexOffset.x, -VertexOffset.y);
|
||||
gl_Position.z *= exp2(-32.0f); // integer->float depth
|
||||
gl_Position.y = -gl_Position.y;
|
||||
|
@ -81,214 +79,149 @@ void main()
|
|||
gl_PointSize = PointSize.x;
|
||||
#endif
|
||||
|
||||
vsOut.c = a_c;
|
||||
vsOut.c = vec4(a_c);
|
||||
vsOut.t.z = a_f.r;
|
||||
}
|
||||
|
||||
#endif
|
||||
#else // VS_EXPAND
|
||||
|
||||
#ifdef GEOMETRY_SHADER
|
||||
|
||||
layout(location = 0) in VSOutput
|
||||
struct RawVertex
|
||||
{
|
||||
vec2 ST;
|
||||
uint RGBA;
|
||||
float Q;
|
||||
uint XY;
|
||||
uint Z;
|
||||
uint UV;
|
||||
uint FOG;
|
||||
};
|
||||
|
||||
layout(std140, set = 0, binding = 2) readonly buffer VertexBuffer {
|
||||
RawVertex vertex_buffer[];
|
||||
};
|
||||
|
||||
struct ProcessedVertex
|
||||
{
|
||||
vec4 p;
|
||||
vec4 t;
|
||||
vec4 ti;
|
||||
#if GS_IIP != 0
|
||||
vec4 c;
|
||||
vec4 c;
|
||||
};
|
||||
|
||||
ProcessedVertex load_vertex(uint index)
|
||||
{
|
||||
RawVertex rvtx = vertex_buffer[gl_BaseVertexARB + index];
|
||||
|
||||
vec2 a_st = rvtx.ST;
|
||||
uvec4 a_c = uvec4(rvtx.RGBA & 0xFFu, (rvtx.RGBA >> 8) & 0xFFu, (rvtx.RGBA >> 16) & 0xFFu, rvtx.RGBA >> 24);
|
||||
float a_q = rvtx.Q;
|
||||
uvec2 a_p = uvec2(rvtx.XY & 0xFFFFu, rvtx.XY >> 16);
|
||||
uint a_z = rvtx.Z;
|
||||
uvec2 a_uv = uvec2(rvtx.UV & 0xFFFFu, rvtx.UV >> 16);
|
||||
vec4 a_f = unpackUnorm4x8(rvtx.FOG);
|
||||
|
||||
ProcessedVertex vtx;
|
||||
|
||||
uint z = min(a_z, MaxDepth);
|
||||
vtx.p = vec4(a_p, float(z), 1.0f) - vec4(0.05f, 0.05f, 0, 0);
|
||||
vtx.p.xy = vtx.p.xy * vec2(VertexScale.x, -VertexScale.y) - vec2(VertexOffset.x, -VertexOffset.y);
|
||||
vtx.p.z *= exp2(-32.0f); // integer->float depth
|
||||
vtx.p.y = -vtx.p.y;
|
||||
|
||||
#if VS_TME
|
||||
vec2 uv = a_uv - TextureOffset;
|
||||
vec2 st = a_st - TextureOffset;
|
||||
vtx.ti.xy = uv * TextureScale;
|
||||
|
||||
#if VS_FST
|
||||
vtx.ti.zw = uv;
|
||||
#else
|
||||
vtx.ti.zw = st / TextureScale;
|
||||
#endif
|
||||
|
||||
vtx.t.xy = st;
|
||||
vtx.t.w = a_q;
|
||||
#else
|
||||
flat vec4 c;
|
||||
vtx.t = vec4(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
vtx.ti = vec4(0.0f);
|
||||
#endif
|
||||
} gsIn[];
|
||||
|
||||
layout(location = 0) out GSOutput
|
||||
{
|
||||
vec4 t;
|
||||
vec4 ti;
|
||||
#if GS_IIP != 0
|
||||
vec4 c;
|
||||
#else
|
||||
flat vec4 c;
|
||||
#endif
|
||||
} gsOut;
|
||||
vtx.c = a_c;
|
||||
vtx.t.z = a_f.r;
|
||||
|
||||
void WriteVertex(vec4 pos, vec4 t, vec4 ti, vec4 c)
|
||||
{
|
||||
#if GS_FORWARD_PRIMID
|
||||
gl_PrimitiveID = gl_PrimitiveIDIn;
|
||||
#endif
|
||||
gl_Position = pos;
|
||||
gsOut.t = t;
|
||||
gsOut.ti = ti;
|
||||
gsOut.c = c;
|
||||
EmitVertex();
|
||||
return vtx;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Geometry Shader
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if GS_PRIM == 0 && GS_POINT == 0
|
||||
|
||||
layout(points) in;
|
||||
layout(points, max_vertices = 1) out;
|
||||
void main()
|
||||
{
|
||||
WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[0].c);
|
||||
EndPrimitive();
|
||||
}
|
||||
|
||||
#elif GS_PRIM == 0 && GS_POINT == 1
|
||||
|
||||
layout(points) in;
|
||||
layout(triangle_strip, max_vertices = 4) out;
|
||||
|
||||
void main()
|
||||
{
|
||||
// Transform a point to a NxN sprite
|
||||
ProcessedVertex vtx;
|
||||
uint vid = uint(gl_VertexIndex - gl_BaseVertexARB);
|
||||
|
||||
// Get new position
|
||||
vec4 lt_p = gl_in[0].gl_Position;
|
||||
vec4 rb_p = gl_in[0].gl_Position + vec4(PointSize.x, PointSize.y, 0.0f, 0.0f);
|
||||
vec4 lb_p = rb_p;
|
||||
vec4 rt_p = rb_p;
|
||||
lb_p.x = lt_p.x;
|
||||
rt_p.y = lt_p.y;
|
||||
#if VS_EXPAND == 1 // Point
|
||||
|
||||
WriteVertex(lt_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c);
|
||||
WriteVertex(lb_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c);
|
||||
WriteVertex(rt_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c);
|
||||
WriteVertex(rb_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c);
|
||||
vtx = load_vertex(vid >> 2);
|
||||
|
||||
EndPrimitive();
|
||||
}
|
||||
vtx.p.x += ((vid & 1u) != 0u) ? PointSize.x : 0.0f;
|
||||
vtx.p.y += ((vid & 2u) != 0u) ? PointSize.y : 0.0f;
|
||||
|
||||
#elif GS_PRIM == 1 && GS_LINE == 0
|
||||
#elif VS_EXPAND == 2 // Line
|
||||
|
||||
layout(lines) in;
|
||||
layout(line_strip, max_vertices = 2) out;
|
||||
uint vid_base = vid >> 2;
|
||||
|
||||
void main()
|
||||
{
|
||||
#if GS_IIP == 0
|
||||
WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[1].c);
|
||||
WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[1].c);
|
||||
bool is_bottom = (vid & 2u) != 0u;
|
||||
bool is_right = (vid & 1u) != 0u;
|
||||
#ifdef VS_PROVOKING_VERTEX_LAST
|
||||
uint vid_other = is_bottom ? vid_base - 1 : vid_base + 1;
|
||||
#else
|
||||
WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[0].c);
|
||||
WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[1].c);
|
||||
uint vid_other = is_bottom ? vid_base + 1 : vid_base - 1;
|
||||
#endif
|
||||
EndPrimitive();
|
||||
}
|
||||
|
||||
vtx = load_vertex(vid_base);
|
||||
ProcessedVertex other = load_vertex(vid_other);
|
||||
|
||||
#elif GS_PRIM == 1 && GS_LINE == 1
|
||||
|
||||
layout(lines) in;
|
||||
layout(triangle_strip, max_vertices = 4) out;
|
||||
|
||||
void main()
|
||||
{
|
||||
// Transform a line to a thick line-sprite
|
||||
vec4 left_t = gsIn[0].t;
|
||||
vec4 left_ti = gsIn[0].ti;
|
||||
vec4 left_c = gsIn[0].c;
|
||||
vec4 right_t = gsIn[1].t;
|
||||
vec4 right_ti = gsIn[1].ti;
|
||||
vec4 right_c = gsIn[1].c;
|
||||
vec4 lt_p = gl_in[0].gl_Position;
|
||||
vec4 rt_p = gl_in[1].gl_Position;
|
||||
|
||||
// Potentially there is faster math
|
||||
vec2 line_vector = normalize(rt_p.xy - lt_p.xy);
|
||||
vec2 line_vector = normalize(vtx.p.xy - other.p.xy);
|
||||
vec2 line_normal = vec2(line_vector.y, -line_vector.x);
|
||||
vec2 line_width = (line_normal * PointSize) / 2.0;
|
||||
vec2 line_width = (line_normal * PointSize) / 2;
|
||||
// line_normal is inverted for bottom point
|
||||
vec2 offset = ((uint(is_bottom) ^ uint(is_right)) != 0u) ? line_width : -line_width;
|
||||
vtx.p.xy += offset;
|
||||
|
||||
lt_p.xy -= line_width;
|
||||
rt_p.xy -= line_width;
|
||||
vec4 lb_p = gl_in[0].gl_Position + vec4(line_width, 0.0, 0.0);
|
||||
vec4 rb_p = gl_in[1].gl_Position + vec4(line_width, 0.0, 0.0);
|
||||
// Lines will be run as (0 1 2) (1 2 3)
|
||||
// This means that both triangles will have a point based off the top line point as their first point
|
||||
// So we don't have to do anything for !IIP
|
||||
|
||||
#if GS_IIP == 0
|
||||
left_c = right_c;
|
||||
#endif
|
||||
#elif VS_EXPAND == 3 // Sprite
|
||||
|
||||
WriteVertex(lt_p, left_t, left_ti, left_c);
|
||||
WriteVertex(lb_p, left_t, left_ti, left_c);
|
||||
WriteVertex(rt_p, right_t, right_ti, right_c);
|
||||
WriteVertex(rb_p, right_t, right_ti, right_c);
|
||||
EndPrimitive();
|
||||
}
|
||||
// Sprite points are always in pairs
|
||||
uint vid_base = vid >> 1;
|
||||
uint vid_lt = vid_base & ~1u;
|
||||
uint vid_rb = vid_base | 1u;
|
||||
|
||||
#elif GS_PRIM == 2
|
||||
ProcessedVertex lt = load_vertex(vid_lt);
|
||||
ProcessedVertex rb = load_vertex(vid_rb);
|
||||
vtx = rb;
|
||||
|
||||
layout(triangles) in;
|
||||
layout(triangle_strip, max_vertices = 3) out;
|
||||
bool is_right = ((vid & 1u) != 0u);
|
||||
vtx.p.x = is_right ? lt.p.x : vtx.p.x;
|
||||
vtx.t.x = is_right ? lt.t.x : vtx.t.x;
|
||||
vtx.ti.xz = is_right ? lt.ti.xz : vtx.ti.xz;
|
||||
|
||||
void main()
|
||||
{
|
||||
#if GS_IIP == 0
|
||||
WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[2].c);
|
||||
WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[2].c);
|
||||
WriteVertex(gl_in[2].gl_Position, gsIn[2].t, gsIn[2].ti, gsIn[2].c);
|
||||
#else
|
||||
WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[0].c);
|
||||
WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[0].c);
|
||||
WriteVertex(gl_in[2].gl_Position, gsIn[2].t, gsIn[2].ti, gsIn[0].c);
|
||||
#endif
|
||||
|
||||
EndPrimitive();
|
||||
}
|
||||
|
||||
#elif GS_PRIM == 3
|
||||
|
||||
layout(lines) in;
|
||||
layout(triangle_strip, max_vertices = 4) out;
|
||||
|
||||
void main()
|
||||
{
|
||||
vec4 lt_p = gl_in[0].gl_Position;
|
||||
vec4 lt_t = gsIn[0].t;
|
||||
vec4 lt_ti = gsIn[0].ti;
|
||||
vec4 lt_c = gsIn[0].c;
|
||||
vec4 rb_p = gl_in[1].gl_Position;
|
||||
vec4 rb_t = gsIn[1].t;
|
||||
vec4 rb_ti = gsIn[1].ti;
|
||||
vec4 rb_c = gsIn[1].c;
|
||||
|
||||
// flat depth
|
||||
lt_p.z = rb_p.z;
|
||||
// flat fog and texture perspective
|
||||
lt_t.zw = rb_t.zw;
|
||||
|
||||
// flat color
|
||||
lt_c = rb_c;
|
||||
|
||||
// Swap texture and position coordinate
|
||||
vec4 lb_p = rb_p;
|
||||
vec4 lb_t = rb_t;
|
||||
vec4 lb_ti = rb_ti;
|
||||
vec4 lb_c = rb_c;
|
||||
lb_p.x = lt_p.x;
|
||||
lb_t.x = lt_t.x;
|
||||
lb_ti.x = lt_ti.x;
|
||||
lb_ti.z = lt_ti.z;
|
||||
|
||||
vec4 rt_p = rb_p;
|
||||
vec4 rt_t = rb_t;
|
||||
vec4 rt_ti = rb_ti;
|
||||
vec4 rt_c = rb_c;
|
||||
rt_p.y = lt_p.y;
|
||||
rt_t.y = lt_t.y;
|
||||
rt_ti.y = lt_ti.y;
|
||||
rt_ti.w = lt_ti.w;
|
||||
|
||||
WriteVertex(lt_p, lt_t, lt_ti, lt_c);
|
||||
WriteVertex(lb_p, lb_t, lb_ti, lb_c);
|
||||
WriteVertex(rt_p, rt_t, rt_ti, rt_c);
|
||||
WriteVertex(rb_p, rb_t, rb_ti, rb_c);
|
||||
EndPrimitive();
|
||||
}
|
||||
bool is_bottom = ((vid & 2u) != 0u);
|
||||
vtx.p.y = is_bottom ? lt.p.y : vtx.p.y;
|
||||
vtx.t.y = is_bottom ? lt.t.y : vtx.t.y;
|
||||
vtx.ti.yw = is_bottom ? lt.ti.yw : vtx.ti.yw;
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
gl_Position = vtx.p;
|
||||
vsOut.t = vtx.t;
|
||||
vsOut.ti = vtx.ti;
|
||||
vsOut.c = vtx.c;
|
||||
}
|
||||
|
||||
#endif // VS_EXPAND
|
||||
|
||||
#endif // VERTEX_SHADER
|
||||
|
||||
#ifdef FRAGMENT_SHADER
|
||||
|
||||
|
|
|
@ -337,16 +337,6 @@ bool D3D11::ShaderCache::GetVertexShaderAndInputLayout(ID3D11Device* device,
|
|||
return true;
|
||||
}
|
||||
|
||||
wil::com_ptr_nothrow<ID3D11GeometryShader> D3D11::ShaderCache::GetGeometryShader(ID3D11Device* device,
|
||||
const std::string_view& shader_code, const D3D_SHADER_MACRO* macros /* = nullptr */, const char* entry_point /* = "main" */)
|
||||
{
|
||||
wil::com_ptr_nothrow<ID3DBlob> blob = GetShaderBlob(ShaderCompiler::Type::Geometry, shader_code, macros, entry_point);
|
||||
if (!blob)
|
||||
return {};
|
||||
|
||||
return D3D11::ShaderCompiler::CreateGeometryShader(device, blob.get());
|
||||
}
|
||||
|
||||
wil::com_ptr_nothrow<ID3D11PixelShader> D3D11::ShaderCache::GetPixelShader(ID3D11Device* device,
|
||||
const std::string_view& shader_code, const D3D_SHADER_MACRO* macros /* = nullptr */, const char* entry_point /* = "main" */)
|
||||
{
|
||||
|
|
|
@ -51,9 +51,6 @@ namespace D3D11
|
|||
const D3D11_INPUT_ELEMENT_DESC* layout, size_t layout_size,
|
||||
const std::string_view& shader_code, const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main");
|
||||
|
||||
wil::com_ptr_nothrow<ID3D11GeometryShader> GetGeometryShader(ID3D11Device* device, const std::string_view& shader_code,
|
||||
const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main");
|
||||
|
||||
wil::com_ptr_nothrow<ID3D11PixelShader> GetPixelShader(ID3D11Device* device, const std::string_view& shader_code,
|
||||
const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main");
|
||||
|
||||
|
|
|
@ -31,21 +31,21 @@ wil::com_ptr_nothrow<ID3DBlob> D3D11::ShaderCompiler::CompileShader(Type type, D
|
|||
{
|
||||
case D3D_FEATURE_LEVEL_10_0:
|
||||
{
|
||||
static constexpr std::array<const char*, 4> targets = {{"vs_4_0", "gs_4_0", "ps_4_0", "cs_4_0"}};
|
||||
static constexpr std::array<const char*, 4> targets = {{"vs_4_0", "ps_4_0", "cs_4_0"}};
|
||||
target = targets[static_cast<int>(type)];
|
||||
}
|
||||
break;
|
||||
|
||||
case D3D_FEATURE_LEVEL_10_1:
|
||||
{
|
||||
static constexpr std::array<const char*, 4> targets = {{"vs_4_1", "gs_4_1", "ps_4_1", "cs_4_1"}};
|
||||
static constexpr std::array<const char*, 4> targets = {{"vs_4_1", "ps_4_1", "cs_4_1"}};
|
||||
target = targets[static_cast<int>(type)];
|
||||
}
|
||||
break;
|
||||
|
||||
case D3D_FEATURE_LEVEL_11_0:
|
||||
{
|
||||
static constexpr std::array<const char*, 4> targets = {{"vs_5_0", "gs_5_0", "ps_5_0", "cs_5_0"}};
|
||||
static constexpr std::array<const char*, 4> targets = {{"vs_5_0", "ps_5_0", "cs_5_0"}};
|
||||
target = targets[static_cast<int>(type)];
|
||||
}
|
||||
break;
|
||||
|
@ -53,7 +53,7 @@ wil::com_ptr_nothrow<ID3DBlob> D3D11::ShaderCompiler::CompileShader(Type type, D
|
|||
case D3D_FEATURE_LEVEL_11_1:
|
||||
default:
|
||||
{
|
||||
static constexpr std::array<const char*, 4> targets = {{"vs_5_1", "gs_5_1", "ps_5_1", "cs_5_1"}};
|
||||
static constexpr std::array<const char*, 4> targets = {{"vs_5_1", "ps_5_1", "cs_5_1"}};
|
||||
target = targets[static_cast<int>(type)];
|
||||
}
|
||||
break;
|
||||
|
@ -108,16 +108,6 @@ wil::com_ptr_nothrow<ID3D11VertexShader> D3D11::ShaderCompiler::CompileAndCreate
|
|||
return CreateVertexShader(device, blob.get());
|
||||
}
|
||||
|
||||
wil::com_ptr_nothrow<ID3D11GeometryShader> D3D11::ShaderCompiler::CompileAndCreateGeometryShader(ID3D11Device* device, bool debug,
|
||||
const std::string_view& code, const D3D_SHADER_MACRO* macros /* = nullptr */, const char* entry_point /* = "main" */)
|
||||
{
|
||||
wil::com_ptr_nothrow<ID3DBlob> blob = CompileShader(Type::Geometry, device->GetFeatureLevel(), debug, code, macros, entry_point);
|
||||
if (!blob)
|
||||
return {};
|
||||
|
||||
return CreateGeometryShader(device, blob.get());
|
||||
}
|
||||
|
||||
wil::com_ptr_nothrow<ID3D11PixelShader> D3D11::ShaderCompiler::CompileAndCreatePixelShader(ID3D11Device* device, bool debug,
|
||||
const std::string_view& code, const D3D_SHADER_MACRO* macros /* = nullptr */, const char* entry_point /* = "main" */)
|
||||
{
|
||||
|
@ -157,25 +147,6 @@ wil::com_ptr_nothrow<ID3D11VertexShader> D3D11::ShaderCompiler::CreateVertexShad
|
|||
const_cast<ID3DBlob*>(blob)->GetBufferSize());
|
||||
}
|
||||
|
||||
wil::com_ptr_nothrow<ID3D11GeometryShader> D3D11::ShaderCompiler::CreateGeometryShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length)
|
||||
{
|
||||
wil::com_ptr_nothrow<ID3D11GeometryShader> shader;
|
||||
const HRESULT hr = device->CreateGeometryShader(bytecode, bytecode_length, nullptr, shader.put());
|
||||
if (FAILED(hr))
|
||||
{
|
||||
Console.Error("Failed to create geometry shader: 0x%08X", hr);
|
||||
return {};
|
||||
}
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
||||
wil::com_ptr_nothrow<ID3D11GeometryShader> D3D11::ShaderCompiler::CreateGeometryShader(ID3D11Device* device, const ID3DBlob* blob)
|
||||
{
|
||||
return CreateGeometryShader(device, const_cast<ID3DBlob*>(blob)->GetBufferPointer(),
|
||||
const_cast<ID3DBlob*>(blob)->GetBufferSize());
|
||||
}
|
||||
|
||||
wil::com_ptr_nothrow<ID3D11PixelShader> D3D11::ShaderCompiler::CreatePixelShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length)
|
||||
{
|
||||
wil::com_ptr_nothrow<ID3D11PixelShader> shader;
|
||||
|
|
|
@ -27,7 +27,6 @@ namespace D3D11::ShaderCompiler
|
|||
enum class Type
|
||||
{
|
||||
Vertex,
|
||||
Geometry,
|
||||
Pixel,
|
||||
Compute
|
||||
};
|
||||
|
@ -37,8 +36,6 @@ namespace D3D11::ShaderCompiler
|
|||
|
||||
wil::com_ptr_nothrow<ID3D11VertexShader> CompileAndCreateVertexShader(ID3D11Device* device, bool debug, const std::string_view& code,
|
||||
const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main");
|
||||
wil::com_ptr_nothrow<ID3D11GeometryShader> CompileAndCreateGeometryShader(ID3D11Device* device, bool debug, const std::string_view& code,
|
||||
const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main");
|
||||
wil::com_ptr_nothrow<ID3D11PixelShader> CompileAndCreatePixelShader(ID3D11Device* device, bool debug, const std::string_view& code,
|
||||
const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main");
|
||||
wil::com_ptr_nothrow<ID3D11ComputeShader> CompileAndCreateComputeShader(ID3D11Device* device, bool debug, const std::string_view& code,
|
||||
|
@ -46,8 +43,6 @@ namespace D3D11::ShaderCompiler
|
|||
|
||||
wil::com_ptr_nothrow<ID3D11VertexShader> CreateVertexShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length);
|
||||
wil::com_ptr_nothrow<ID3D11VertexShader> CreateVertexShader(ID3D11Device* device, const ID3DBlob* blob);
|
||||
wil::com_ptr_nothrow<ID3D11GeometryShader> CreateGeometryShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length);
|
||||
wil::com_ptr_nothrow<ID3D11GeometryShader> CreateGeometryShader(ID3D11Device* device, const ID3DBlob* blob);
|
||||
wil::com_ptr_nothrow<ID3D11PixelShader> CreatePixelShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length);
|
||||
wil::com_ptr_nothrow<ID3D11PixelShader> CreatePixelShader(ID3D11Device* device, const ID3DBlob* blob);
|
||||
wil::com_ptr_nothrow<ID3D11ComputeShader> CreateComputeShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length);
|
||||
|
|
|
@ -665,3 +665,57 @@ void Context::SetEnableGPUTiming(bool enabled)
|
|||
{
|
||||
m_gpu_timing_enabled = enabled;
|
||||
}
|
||||
|
||||
bool Context::AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_buffer,
|
||||
D3D12MA::Allocation** gpu_allocation, const std::function<void(void*)>& fill_callback)
|
||||
{
|
||||
// Try to place the fixed index buffer in GPU local memory.
|
||||
// Use the staging buffer to copy into it.
|
||||
const D3D12_RESOURCE_DESC rd = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1,
|
||||
DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
D3D12_RESOURCE_FLAG_NONE};
|
||||
|
||||
const D3D12MA::ALLOCATION_DESC cpu_ad = {
|
||||
D3D12MA::ALLOCATION_FLAG_NONE,
|
||||
D3D12_HEAP_TYPE_UPLOAD};
|
||||
|
||||
ComPtr<ID3D12Resource> cpu_buffer;
|
||||
ComPtr<D3D12MA::Allocation> cpu_allocation;
|
||||
HRESULT hr = m_allocator->CreateResource(&cpu_ad, &rd, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
|
||||
cpu_allocation.put(), IID_PPV_ARGS(cpu_buffer.put()));
|
||||
pxAssertMsg(SUCCEEDED(hr), "Allocate CPU buffer");
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
|
||||
static constexpr const D3D12_RANGE read_range = {};
|
||||
const D3D12_RANGE write_range = {0, size};
|
||||
void* mapped;
|
||||
hr = cpu_buffer->Map(0, &read_range, &mapped);
|
||||
pxAssertMsg(SUCCEEDED(hr), "Map CPU buffer");
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
fill_callback(mapped);
|
||||
cpu_buffer->Unmap(0, &write_range);
|
||||
|
||||
const D3D12MA::ALLOCATION_DESC gpu_ad = {
|
||||
D3D12MA::ALLOCATION_FLAG_COMMITTED,
|
||||
D3D12_HEAP_TYPE_DEFAULT};
|
||||
|
||||
hr = m_allocator->CreateResource(&gpu_ad, &rd, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
|
||||
gpu_allocation, IID_PPV_ARGS(gpu_buffer));
|
||||
pxAssertMsg(SUCCEEDED(hr), "Allocate GPU buffer");
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
|
||||
GetInitCommandList()->CopyBufferRegion(*gpu_buffer, 0, cpu_buffer.get(), 0, size);
|
||||
|
||||
D3D12_RESOURCE_BARRIER rb = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE};
|
||||
rb.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
||||
rb.Transition.pResource = *gpu_buffer;
|
||||
rb.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||
rb.Transition.StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER;
|
||||
GetInitCommandList()->ResourceBarrier(1, &rb);
|
||||
|
||||
DeferResourceDestruction(cpu_allocation.get(), cpu_buffer.get());
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -152,6 +152,10 @@ namespace D3D12
|
|||
float GetAndResetAccumulatedGPUTime();
|
||||
void SetEnableGPUTiming(bool enabled);
|
||||
|
||||
// Allocates a temporary CPU staging buffer, fires the callback with it to populate, then copies to a GPU buffer.
|
||||
bool AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_buffer, D3D12MA::Allocation** gpu_allocation,
|
||||
const std::function<void(void*)>& fill_callback);
|
||||
|
||||
private:
|
||||
struct CommandListResources
|
||||
{
|
||||
|
|
|
@ -521,9 +521,6 @@ ShaderCache::ComPtr<ID3DBlob> ShaderCache::CompileAndAddShaderBlob(const CacheIn
|
|||
case EntryType::VertexShader:
|
||||
blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Vertex, m_feature_level, m_debug, shader_code, macros, entry_point);
|
||||
break;
|
||||
case EntryType::GeometryShader:
|
||||
blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Geometry, m_feature_level, m_debug, shader_code, macros, entry_point);
|
||||
break;
|
||||
case EntryType::PixelShader:
|
||||
blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Pixel, m_feature_level, m_debug, shader_code, macros, entry_point);
|
||||
break;
|
||||
|
|
|
@ -37,7 +37,6 @@ namespace D3D12
|
|||
enum class EntryType
|
||||
{
|
||||
VertexShader,
|
||||
GeometryShader,
|
||||
PixelShader,
|
||||
ComputeShader,
|
||||
GraphicsPipeline,
|
||||
|
@ -59,11 +58,6 @@ namespace D3D12
|
|||
{
|
||||
return GetShaderBlob(EntryType::VertexShader, shader_code, macros, entry_point);
|
||||
}
|
||||
__fi ComPtr<ID3DBlob> GetGeometryShader(std::string_view shader_code,
|
||||
const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main")
|
||||
{
|
||||
return GetShaderBlob(EntryType::GeometryShader, shader_code, macros, entry_point);
|
||||
}
|
||||
__fi ComPtr<ID3DBlob> GetPixelShader(std::string_view shader_code,
|
||||
const D3D_SHADER_MACRO* macros = nullptr, const char* entry_point = "main")
|
||||
{
|
||||
|
|
|
@ -35,8 +35,6 @@ namespace GL
|
|||
prog.m_program_id = 0;
|
||||
m_vertex_shader_id = prog.m_vertex_shader_id;
|
||||
prog.m_vertex_shader_id = 0;
|
||||
m_geometry_shader_id = prog.m_geometry_shader_id;
|
||||
prog.m_geometry_shader_id = 0;
|
||||
m_fragment_shader_id = prog.m_fragment_shader_id;
|
||||
prog.m_fragment_shader_id = 0;
|
||||
m_uniform_locations = std::move(prog.m_uniform_locations);
|
||||
|
@ -102,8 +100,7 @@ namespace GL
|
|||
s_last_program_id = 0;
|
||||
}
|
||||
|
||||
bool Program::Compile(const std::string_view vertex_shader, const std::string_view geometry_shader,
|
||||
const std::string_view fragment_shader)
|
||||
bool Program::Compile(const std::string_view vertex_shader, const std::string_view fragment_shader)
|
||||
{
|
||||
if (!vertex_shader.empty())
|
||||
{
|
||||
|
@ -112,13 +109,6 @@ namespace GL
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!geometry_shader.empty())
|
||||
{
|
||||
m_geometry_shader_id = CompileShader(GL_GEOMETRY_SHADER, geometry_shader);
|
||||
if (m_geometry_shader_id == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!fragment_shader.empty())
|
||||
{
|
||||
m_fragment_shader_id = CompileShader(GL_FRAGMENT_SHADER, fragment_shader);
|
||||
|
@ -129,8 +119,6 @@ namespace GL
|
|||
m_program_id = glCreateProgram();
|
||||
if (m_vertex_shader_id != 0)
|
||||
glAttachShader(m_program_id, m_vertex_shader_id);
|
||||
if (m_geometry_shader_id != 0)
|
||||
glAttachShader(m_program_id, m_geometry_shader_id);
|
||||
if (m_fragment_shader_id != 0)
|
||||
glAttachShader(m_program_id, m_fragment_shader_id);
|
||||
return true;
|
||||
|
@ -240,9 +228,6 @@ namespace GL
|
|||
if (m_vertex_shader_id != 0)
|
||||
glDeleteShader(m_vertex_shader_id);
|
||||
m_vertex_shader_id = 0;
|
||||
if (m_geometry_shader_id != 0)
|
||||
glDeleteShader(m_geometry_shader_id);
|
||||
m_geometry_shader_id = 0;
|
||||
if (m_fragment_shader_id != 0)
|
||||
glDeleteShader(m_fragment_shader_id);
|
||||
m_fragment_shader_id = 0;
|
||||
|
@ -541,8 +526,6 @@ namespace GL
|
|||
prog.m_program_id = 0;
|
||||
m_vertex_shader_id = prog.m_vertex_shader_id;
|
||||
prog.m_vertex_shader_id = 0;
|
||||
m_geometry_shader_id = prog.m_geometry_shader_id;
|
||||
prog.m_geometry_shader_id = 0;
|
||||
m_fragment_shader_id = prog.m_fragment_shader_id;
|
||||
prog.m_fragment_shader_id = 0;
|
||||
m_uniform_locations = std::move(prog.m_uniform_locations);
|
||||
|
|
|
@ -34,8 +34,7 @@ namespace GL
|
|||
|
||||
bool IsValid() const { return m_program_id != 0; }
|
||||
|
||||
bool Compile(const std::string_view vertex_shader, const std::string_view geometry_shader,
|
||||
const std::string_view fragment_shader);
|
||||
bool Compile(const std::string_view vertex_shader, const std::string_view fragment_shader);
|
||||
|
||||
bool CompileCompute(const std::string_view glsl);
|
||||
|
||||
|
@ -99,7 +98,6 @@ namespace GL
|
|||
|
||||
GLuint m_program_id = 0;
|
||||
GLuint m_vertex_shader_id = 0;
|
||||
GLuint m_geometry_shader_id = 0;
|
||||
GLuint m_fragment_shader_id = 0;
|
||||
|
||||
std::vector<GLint> m_uniform_locations;
|
||||
|
|
|
@ -28,9 +28,6 @@ namespace GL
|
|||
u64 vertex_source_hash_low;
|
||||
u64 vertex_source_hash_high;
|
||||
u32 vertex_source_length;
|
||||
u64 geometry_source_hash_low;
|
||||
u64 geometry_source_hash_high;
|
||||
u32 geometry_source_length;
|
||||
u64 fragment_source_hash_low;
|
||||
u64 fragment_source_hash_high;
|
||||
u32 fragment_source_length;
|
||||
|
@ -51,9 +48,7 @@ namespace GL
|
|||
{
|
||||
return (
|
||||
vertex_source_hash_low == key.vertex_source_hash_low && vertex_source_hash_high == key.vertex_source_hash_high &&
|
||||
vertex_source_length == key.vertex_source_length && geometry_source_hash_low == key.geometry_source_hash_low &&
|
||||
geometry_source_hash_high == key.geometry_source_hash_high &&
|
||||
geometry_source_length == key.geometry_source_length && fragment_source_hash_low == key.fragment_source_hash_low &&
|
||||
vertex_source_length == key.vertex_source_length && fragment_source_hash_low == key.fragment_source_hash_low &&
|
||||
fragment_source_hash_high == key.fragment_source_hash_high && fragment_source_length == key.fragment_source_length);
|
||||
}
|
||||
|
||||
|
@ -61,9 +56,7 @@ namespace GL
|
|||
{
|
||||
return (
|
||||
vertex_source_hash_low != key.vertex_source_hash_low || vertex_source_hash_high != key.vertex_source_hash_high ||
|
||||
vertex_source_length != key.vertex_source_length || geometry_source_hash_low != key.geometry_source_hash_low ||
|
||||
geometry_source_hash_high != key.geometry_source_hash_high ||
|
||||
geometry_source_length != key.geometry_source_length || fragment_source_hash_low != key.fragment_source_hash_low ||
|
||||
vertex_source_length != key.vertex_source_length || fragment_source_hash_low != key.fragment_source_hash_low ||
|
||||
fragment_source_hash_high != key.fragment_source_hash_high || fragment_source_length != key.fragment_source_length);
|
||||
}
|
||||
|
||||
|
@ -204,7 +197,6 @@ namespace GL
|
|||
|
||||
const CacheIndexKey key{
|
||||
entry.vertex_source_hash_low, entry.vertex_source_hash_high, entry.vertex_source_length,
|
||||
entry.geometry_source_hash_low, entry.geometry_source_hash_high, entry.geometry_source_length,
|
||||
entry.fragment_source_hash_low, entry.fragment_source_hash_high, entry.fragment_source_length};
|
||||
const CacheIndexData data{entry.file_offset, entry.blob_size, entry.blob_format};
|
||||
m_index.emplace(key, data);
|
||||
|
@ -242,7 +234,6 @@ namespace GL
|
|||
}
|
||||
|
||||
ShaderCache::CacheIndexKey ShaderCache::GetCacheKey(const std::string_view& vertex_shader,
|
||||
const std::string_view& geometry_shader,
|
||||
const std::string_view& fragment_shader)
|
||||
{
|
||||
union ShaderHash
|
||||
|
@ -256,7 +247,6 @@ namespace GL
|
|||
};
|
||||
|
||||
ShaderHash vertex_hash = {};
|
||||
ShaderHash geometry_hash = {};
|
||||
ShaderHash fragment_hash = {};
|
||||
|
||||
MD5Digest digest;
|
||||
|
@ -266,13 +256,6 @@ namespace GL
|
|||
digest.Final(vertex_hash.bytes);
|
||||
}
|
||||
|
||||
if (!geometry_shader.empty())
|
||||
{
|
||||
digest.Reset();
|
||||
digest.Update(geometry_shader.data(), static_cast<u32>(geometry_shader.length()));
|
||||
digest.Final(geometry_hash.bytes);
|
||||
}
|
||||
|
||||
if (!fragment_shader.empty())
|
||||
{
|
||||
digest.Reset();
|
||||
|
@ -281,7 +264,6 @@ namespace GL
|
|||
}
|
||||
|
||||
return CacheIndexKey{vertex_hash.low, vertex_hash.high, static_cast<u32>(vertex_shader.length()),
|
||||
geometry_hash.low, geometry_hash.high, static_cast<u32>(geometry_shader.length()),
|
||||
fragment_hash.low, fragment_hash.high, static_cast<u32>(fragment_shader.length())};
|
||||
}
|
||||
|
||||
|
@ -296,7 +278,6 @@ namespace GL
|
|||
}
|
||||
|
||||
std::optional<Program> ShaderCache::GetProgram(const std::string_view vertex_shader,
|
||||
const std::string_view geometry_shader,
|
||||
const std::string_view fragment_shader, const PreLinkCallback& callback)
|
||||
{
|
||||
if (!m_program_binary_supported || !m_blob_file)
|
||||
|
@ -305,7 +286,7 @@ namespace GL
|
|||
Common::Timer timer;
|
||||
#endif
|
||||
|
||||
std::optional<Program> res = CompileProgram(vertex_shader, geometry_shader, fragment_shader, callback, false);
|
||||
std::optional<Program> res = CompileProgram(vertex_shader, fragment_shader, callback, false);
|
||||
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
Console.WriteLn("Time to compile shader without caching: %.2fms", timer.GetTimeMilliseconds());
|
||||
|
@ -313,10 +294,10 @@ namespace GL
|
|||
return res;
|
||||
}
|
||||
|
||||
const auto key = GetCacheKey(vertex_shader, geometry_shader, fragment_shader);
|
||||
const auto key = GetCacheKey(vertex_shader, fragment_shader);
|
||||
auto iter = m_index.find(key);
|
||||
if (iter == m_index.end())
|
||||
return CompileAndAddProgram(key, vertex_shader, geometry_shader, fragment_shader, callback);
|
||||
return CompileAndAddProgram(key, vertex_shader, fragment_shader, callback);
|
||||
|
||||
std::vector<u8> data(iter->second.blob_size);
|
||||
if (std::fseek(m_blob_file, iter->second.file_offset, SEEK_SET) != 0 ||
|
||||
|
@ -343,16 +324,15 @@ namespace GL
|
|||
Console.Warning(
|
||||
"Failed to create program from binary, this may be due to a driver or GPU Change. Recreating cache.");
|
||||
if (!Recreate())
|
||||
return CompileProgram(vertex_shader, geometry_shader, fragment_shader, callback, false);
|
||||
return CompileProgram(vertex_shader, fragment_shader, callback, false);
|
||||
else
|
||||
return CompileAndAddProgram(key, vertex_shader, geometry_shader, fragment_shader, callback);
|
||||
return CompileAndAddProgram(key, vertex_shader, fragment_shader, callback);
|
||||
}
|
||||
|
||||
bool ShaderCache::GetProgram(Program* out_program, const std::string_view vertex_shader,
|
||||
const std::string_view geometry_shader, const std::string_view fragment_shader,
|
||||
const PreLinkCallback& callback /* = */)
|
||||
const std::string_view fragment_shader, const PreLinkCallback& callback /* = */)
|
||||
{
|
||||
auto prog = GetProgram(vertex_shader, geometry_shader, fragment_shader, callback);
|
||||
auto prog = GetProgram(vertex_shader, fragment_shader, callback);
|
||||
if (!prog)
|
||||
return false;
|
||||
|
||||
|
@ -374,9 +354,6 @@ namespace GL
|
|||
entry.vertex_source_hash_low = key.vertex_source_hash_low;
|
||||
entry.vertex_source_hash_high = key.vertex_source_hash_high;
|
||||
entry.vertex_source_length = key.vertex_source_length;
|
||||
entry.geometry_source_hash_low = key.geometry_source_hash_low;
|
||||
entry.geometry_source_hash_high = key.geometry_source_hash_high;
|
||||
entry.geometry_source_length = key.geometry_source_length;
|
||||
entry.fragment_source_hash_low = key.fragment_source_hash_low;
|
||||
entry.fragment_source_hash_high = key.fragment_source_hash_high;
|
||||
entry.fragment_source_length = key.fragment_source_length;
|
||||
|
@ -397,12 +374,10 @@ namespace GL
|
|||
}
|
||||
|
||||
std::optional<Program> ShaderCache::CompileProgram(const std::string_view& vertex_shader,
|
||||
const std::string_view& geometry_shader,
|
||||
const std::string_view& fragment_shader,
|
||||
const PreLinkCallback& callback, bool set_retrievable)
|
||||
const std::string_view& fragment_shader, const PreLinkCallback& callback, bool set_retrievable)
|
||||
{
|
||||
Program prog;
|
||||
if (!prog.Compile(vertex_shader, geometry_shader, fragment_shader))
|
||||
if (!prog.Compile(vertex_shader, fragment_shader))
|
||||
return std::nullopt;
|
||||
|
||||
if (callback)
|
||||
|
@ -437,16 +412,14 @@ namespace GL
|
|||
}
|
||||
|
||||
std::optional<Program> ShaderCache::CompileAndAddProgram(const CacheIndexKey& key,
|
||||
const std::string_view& vertex_shader,
|
||||
const std::string_view& geometry_shader,
|
||||
const std::string_view& fragment_shader,
|
||||
const std::string_view& vertex_shader, const std::string_view& fragment_shader,
|
||||
const PreLinkCallback& callback)
|
||||
{
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
Common::Timer timer;
|
||||
#endif
|
||||
|
||||
std::optional<Program> prog = CompileProgram(vertex_shader, geometry_shader, fragment_shader, callback, true);
|
||||
std::optional<Program> prog = CompileProgram(vertex_shader, fragment_shader, callback, true);
|
||||
if (!prog)
|
||||
return std::nullopt;
|
||||
|
||||
|
@ -491,7 +464,7 @@ namespace GL
|
|||
return res;
|
||||
}
|
||||
|
||||
const auto key = GetCacheKey(glsl, std::string_view(), std::string_view());
|
||||
const auto key = GetCacheKey(glsl, std::string_view());
|
||||
auto iter = m_index.find(key);
|
||||
if (iter == m_index.end())
|
||||
return CompileAndAddComputeProgram(key, glsl, callback);
|
||||
|
|
|
@ -38,10 +38,8 @@ namespace GL
|
|||
bool Open(bool is_gles, std::string_view base_path, u32 version);
|
||||
void Close();
|
||||
|
||||
std::optional<Program> GetProgram(const std::string_view vertex_shader, const std::string_view geometry_shader,
|
||||
const std::string_view fragment_shader, const PreLinkCallback& callback = {});
|
||||
bool GetProgram(Program* out_program, const std::string_view vertex_shader, const std::string_view geometry_shader,
|
||||
const std::string_view fragment_shader, const PreLinkCallback& callback = {});
|
||||
std::optional<Program> GetProgram(const std::string_view vertex_shader, const std::string_view fragment_shader, const PreLinkCallback& callback = {});
|
||||
bool GetProgram(Program* out_program, const std::string_view vertex_shader, const std::string_view fragment_shader, const PreLinkCallback& callback = {});
|
||||
|
||||
std::optional<Program> GetComputeProgram(const std::string_view glsl, const PreLinkCallback& callback = {});
|
||||
bool GetComputeProgram(Program* out_program, const std::string_view glsl, const PreLinkCallback& callback = {});
|
||||
|
@ -54,9 +52,6 @@ namespace GL
|
|||
u64 vertex_source_hash_low;
|
||||
u64 vertex_source_hash_high;
|
||||
u32 vertex_source_length;
|
||||
u64 geometry_source_hash_low;
|
||||
u64 geometry_source_hash_high;
|
||||
u32 geometry_source_length;
|
||||
u64 fragment_source_hash_low;
|
||||
u64 fragment_source_hash_high;
|
||||
u32 fragment_source_length;
|
||||
|
@ -72,7 +67,6 @@ namespace GL
|
|||
std::size_t h = 0;
|
||||
HashCombine(h,
|
||||
e.vertex_source_hash_low, e.vertex_source_hash_high, e.vertex_source_length,
|
||||
e.geometry_source_hash_low, e.geometry_source_hash_high, e.geometry_source_length,
|
||||
e.fragment_source_hash_low, e.fragment_source_hash_high, e.fragment_source_length);
|
||||
return h;
|
||||
}
|
||||
|
@ -87,8 +81,7 @@ namespace GL
|
|||
|
||||
using CacheIndex = std::unordered_map<CacheIndexKey, CacheIndexData, CacheIndexEntryHasher>;
|
||||
|
||||
static CacheIndexKey GetCacheKey(const std::string_view& vertex_shader, const std::string_view& geometry_shader,
|
||||
const std::string_view& fragment_shader);
|
||||
static CacheIndexKey GetCacheKey(const std::string_view& vertex_shader, const std::string_view& fragment_shader);
|
||||
|
||||
std::string GetIndexFileName() const;
|
||||
std::string GetBlobFileName() const;
|
||||
|
@ -99,11 +92,10 @@ namespace GL
|
|||
|
||||
bool WriteToBlobFile(const CacheIndexKey& key, const std::vector<u8>& prog_data, u32 prog_format);
|
||||
|
||||
std::optional<Program> CompileProgram(const std::string_view& vertex_shader, const std::string_view& geometry_shader,
|
||||
std::optional<Program> CompileProgram(const std::string_view& vertex_shader,
|
||||
const std::string_view& fragment_shader, const PreLinkCallback& callback,
|
||||
bool set_retrievable);
|
||||
std::optional<Program> CompileAndAddProgram(const CacheIndexKey& key, const std::string_view& vertex_shader,
|
||||
const std::string_view& geometry_shader,
|
||||
const std::string_view& fragment_shader, const PreLinkCallback& callback);
|
||||
|
||||
std::optional<Program> CompileComputeProgram(const std::string_view& glsl, const PreLinkCallback& callback, bool set_retrievable);
|
||||
|
|
|
@ -698,11 +698,6 @@ namespace Vulkan
|
|||
{
|
||||
pxAssert(m_num_writes < MAX_WRITES && (m_num_image_infos + num_views) < MAX_IMAGE_INFOS);
|
||||
|
||||
#if 1
|
||||
// NOTE: This is deliberately split up - updating multiple descriptors in one write is broken on Adreno.
|
||||
for (u32 i = 0; i < num_views; i++)
|
||||
AddCombinedImageSamplerDescriptorWrite(set, binding + i, views[i], samplers[i], layout);
|
||||
#else
|
||||
VkWriteDescriptorSet& dw = m_writes[m_num_writes++];
|
||||
dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
dw.dstSet = set;
|
||||
|
@ -718,7 +713,6 @@ namespace Vulkan
|
|||
ii.sampler = samplers[i];
|
||||
ii.imageLayout = layout;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void DescriptorSetUpdateBuilder::AddBufferDescriptorWrite(
|
||||
|
|
|
@ -474,6 +474,8 @@ namespace Vulkan
|
|||
SupportsExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false);
|
||||
m_optional_extensions.vk_khr_fragment_shader_barycentric =
|
||||
SupportsExtension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME, false);
|
||||
m_optional_extensions.vk_khr_shader_draw_parameters =
|
||||
SupportsExtension(VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, false);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -956,11 +958,9 @@ namespace Vulkan
|
|||
|
||||
bool Context::CreateGlobalDescriptorPool()
|
||||
{
|
||||
// TODO: A better way to choose the number of descriptors.
|
||||
VkDescriptorPoolSize pool_sizes[] = {
|
||||
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1024},
|
||||
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1024},
|
||||
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1},
|
||||
static constexpr const VkDescriptorPoolSize pool_sizes[] = {
|
||||
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 2},
|
||||
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2},
|
||||
};
|
||||
|
||||
VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr,
|
||||
|
@ -2077,4 +2077,52 @@ void main()
|
|||
return static_cast<u64>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool Context::AllocatePreinitializedGPUBuffer(u32 size, VkBuffer* gpu_buffer, VmaAllocation* gpu_allocation,
|
||||
VkBufferUsageFlags gpu_usage, const std::function<void(void*)>& fill_callback)
|
||||
{
|
||||
// Try to place the fixed index buffer in GPU local memory.
|
||||
// Use the staging buffer to copy into it.
|
||||
|
||||
const VkBufferCreateInfo cpu_bci = {
|
||||
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
nullptr,
|
||||
0, size,
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE};
|
||||
const VmaAllocationCreateInfo cpu_aci = {
|
||||
VMA_ALLOCATION_CREATE_MAPPED_BIT, VMA_MEMORY_USAGE_CPU_ONLY, 0, 0};
|
||||
VkBuffer cpu_buffer;
|
||||
VmaAllocation cpu_allocation;
|
||||
VmaAllocationInfo cpu_ai;
|
||||
VkResult res = vmaCreateBuffer(m_allocator, &cpu_bci, &cpu_aci, &cpu_buffer,
|
||||
&cpu_allocation, &cpu_ai);
|
||||
if (res != VK_SUCCESS)
|
||||
{
|
||||
LOG_VULKAN_ERROR(res, "vmaCreateBuffer() for CPU expand buffer failed: ");
|
||||
return false;
|
||||
}
|
||||
|
||||
const VkBufferCreateInfo gpu_bci = {
|
||||
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
nullptr,
|
||||
0, size,
|
||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_SHARING_MODE_EXCLUSIVE};
|
||||
const VmaAllocationCreateInfo gpu_aci = {
|
||||
0, VMA_MEMORY_USAGE_GPU_ONLY, 0, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT};
|
||||
VmaAllocationInfo ai;
|
||||
res = vmaCreateBuffer(m_allocator, &gpu_bci, &gpu_aci, gpu_buffer, gpu_allocation, &ai);
|
||||
if (res != VK_SUCCESS)
|
||||
{
|
||||
LOG_VULKAN_ERROR(res, "vmaCreateBuffer() for expand buffer failed: ");
|
||||
vmaDestroyBuffer(m_allocator, cpu_buffer, cpu_allocation);
|
||||
return false;
|
||||
}
|
||||
|
||||
const VkBufferCopy buf_copy = {0u, 0u, size};
|
||||
fill_callback(cpu_ai.pMappedData);
|
||||
vmaFlushAllocation(m_allocator, cpu_allocation, 0, size);
|
||||
vkCmdCopyBuffer(GetCurrentInitCommandBuffer(), cpu_buffer, *gpu_buffer, 1, &buf_copy);
|
||||
DeferBufferDestruction(cpu_buffer, cpu_allocation);
|
||||
return true;
|
||||
}
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -56,6 +56,7 @@ namespace Vulkan
|
|||
bool vk_khr_driver_properties : 1;
|
||||
bool vk_arm_rasterization_order_attachment_access : 1;
|
||||
bool vk_khr_fragment_shader_barycentric : 1;
|
||||
bool vk_khr_shader_draw_parameters : 1;
|
||||
};
|
||||
|
||||
~Context();
|
||||
|
@ -238,6 +239,10 @@ namespace Vulkan
|
|||
void CountRenderPass() { m_command_buffer_render_passes++; }
|
||||
void NotifyOfReadback();
|
||||
|
||||
// Allocates a temporary CPU staging buffer, fires the callback with it to populate, then copies to a GPU buffer.
|
||||
bool AllocatePreinitializedGPUBuffer(u32 size, VkBuffer* gpu_buffer, VmaAllocation* gpu_allocation,
|
||||
VkBufferUsageFlags gpu_usage, const std::function<void(void*)>& fill_callback);
|
||||
|
||||
private:
|
||||
Context(VkInstance instance, VkPhysicalDevice physical_device);
|
||||
|
||||
|
|
|
@ -493,11 +493,6 @@ namespace Vulkan
|
|||
return GetShaderModule(ShaderCompiler::Type::Vertex, std::move(shader_code));
|
||||
}
|
||||
|
||||
VkShaderModule ShaderCache::GetGeometryShader(std::string_view shader_code)
|
||||
{
|
||||
return GetShaderModule(ShaderCompiler::Type::Geometry, std::move(shader_code));
|
||||
}
|
||||
|
||||
VkShaderModule ShaderCache::GetFragmentShader(std::string_view shader_code)
|
||||
{
|
||||
return GetShaderModule(ShaderCompiler::Type::Fragment, std::move(shader_code));
|
||||
|
|
|
@ -47,7 +47,6 @@ namespace Vulkan
|
|||
VkShaderModule GetShaderModule(ShaderCompiler::Type type, std::string_view shader_code);
|
||||
|
||||
VkShaderModule GetVertexShader(std::string_view shader_code);
|
||||
VkShaderModule GetGeometryShader(std::string_view shader_code);
|
||||
VkShaderModule GetFragmentShader(std::string_view shader_code);
|
||||
VkShaderModule GetComputeShader(std::string_view shader_code);
|
||||
|
||||
|
|
|
@ -154,11 +154,6 @@ namespace Vulkan::ShaderCompiler
|
|||
return CompileShaderToSPV(EShLangVertex, "vs", source_code, debug);
|
||||
}
|
||||
|
||||
std::optional<SPIRVCodeVector> CompileGeometryShader(std::string_view source_code, bool debug)
|
||||
{
|
||||
return CompileShaderToSPV(EShLangGeometry, "gs", source_code, debug);
|
||||
}
|
||||
|
||||
std::optional<SPIRVCodeVector> CompileFragmentShader(std::string_view source_code, bool debug)
|
||||
{
|
||||
return CompileShaderToSPV(EShLangFragment, "ps", source_code, debug);
|
||||
|
@ -176,9 +171,6 @@ namespace Vulkan::ShaderCompiler
|
|||
case Type::Vertex:
|
||||
return CompileShaderToSPV(EShLangVertex, "vs", source_code, debug);
|
||||
|
||||
case Type::Geometry:
|
||||
return CompileShaderToSPV(EShLangGeometry, "gs", source_code, debug);
|
||||
|
||||
case Type::Fragment:
|
||||
return CompileShaderToSPV(EShLangFragment, "ps", source_code, debug);
|
||||
|
||||
|
|
|
@ -26,7 +26,6 @@ namespace Vulkan::ShaderCompiler
|
|||
enum class Type
|
||||
{
|
||||
Vertex,
|
||||
Geometry,
|
||||
Fragment,
|
||||
Compute
|
||||
};
|
||||
|
@ -40,9 +39,6 @@ namespace Vulkan::ShaderCompiler
|
|||
// Compile a vertex shader to SPIR-V.
|
||||
std::optional<SPIRVCodeVector> CompileVertexShader(std::string_view source_code, bool debug);
|
||||
|
||||
// Compile a geometry shader to SPIR-V.
|
||||
std::optional<SPIRVCodeVector> CompileGeometryShader(std::string_view source_code, bool debug);
|
||||
|
||||
// Compile a fragment shader to SPIR-V.
|
||||
std::optional<SPIRVCodeVector> CompileFragmentShader(std::string_view source_code, bool debug);
|
||||
|
||||
|
|
|
@ -245,7 +245,6 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget*
|
|||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.skipPresentingDuplicateFrames, "EmuCore/GS", "SkipDuplicateFrames", false);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.threadedPresentation, "EmuCore/GS", "DisableThreadedPresentation", false);
|
||||
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideTextureBarriers, "EmuCore/GS", "OverrideTextureBarriers", -1, -1);
|
||||
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideGeometryShader, "EmuCore/GS", "OverrideGeometryShaders", -1, -1);
|
||||
SettingWidgetBinder::BindWidgetToIntSetting(
|
||||
sif, m_ui.gsDumpCompression, "EmuCore/GS", "GSDumpCompression", static_cast<int>(GSDumpCompressionMethod::Zstandard));
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.disableFramebufferFetch, "EmuCore/GS", "DisableFramebufferFetch", false);
|
||||
|
@ -693,10 +692,6 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget*
|
|||
{
|
||||
dialog->registerWidgetHelp(m_ui.overrideTextureBarriers, tr("Override Texture Barriers"), tr("Automatic (Default)"), tr(""));
|
||||
|
||||
dialog->registerWidgetHelp(m_ui.overrideGeometryShader, tr("Override Geometry Shader"), tr("Automatic (Default)"),
|
||||
tr("Allows the GPU instead of just the CPU to transform lines into sprites. "
|
||||
"This reduces CPU load and bandwidth requirement, but it is heavier on the GPU."));
|
||||
|
||||
dialog->registerWidgetHelp(m_ui.gsDumpCompression, tr("GS Dump Compression"), tr("Zstandard (zst)"),
|
||||
tr("Change the compression algorithm used when creating a GS dump."));
|
||||
|
||||
|
@ -948,7 +943,6 @@ void GraphicsSettingsWidget::updateRendererDependentOptions()
|
|||
m_ui.useBlitSwapChain->setEnabled(is_dx11);
|
||||
|
||||
m_ui.overrideTextureBarriers->setDisabled(is_sw_dx);
|
||||
m_ui.overrideGeometryShader->setDisabled(is_sw_dx);
|
||||
|
||||
m_ui.disableFramebufferFetch->setDisabled(is_sw_dx);
|
||||
|
||||
|
|
|
@ -2133,33 +2133,7 @@
|
|||
</item>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<widget class="QLabel" name="label_30">
|
||||
<property name="text">
|
||||
<string>Override Geometry Shader:</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1">
|
||||
<widget class="QComboBox" name="overrideGeometryShader">
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Automatic (Default)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Force Disabled</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Force Enabled</string>
|
||||
</property>
|
||||
</item>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="0" colspan="2">
|
||||
<item row="2" column="0" colspan="2">
|
||||
<layout class="QGridLayout" name="gridLayout_7">
|
||||
<item row="1" column="0">
|
||||
<widget class="QCheckBox" name="useDebugDevice">
|
||||
|
|
|
@ -760,7 +760,6 @@ struct Pcsx2Config
|
|||
GSTextureInRtMode UserHacks_TextureInsideRt{GSTextureInRtMode::Disabled};
|
||||
TriFiltering TriFilter{TriFiltering::Automatic};
|
||||
int OverrideTextureBarriers{-1};
|
||||
int OverrideGeometryShaders{-1};
|
||||
|
||||
int CAS_Sharpness{50};
|
||||
int ShadeBoost_Brightness{50};
|
||||
|
|
|
@ -3325,8 +3325,6 @@ void FullscreenUI::DrawGraphicsSettingsPage()
|
|||
}
|
||||
DrawIntListSetting(bsi, "Override Texture Barriers", "Forces texture barrier functionality to the specified value.", "EmuCore/GS",
|
||||
"OverrideTextureBarriers", -1, s_generic_options, std::size(s_generic_options), -1);
|
||||
DrawIntListSetting(bsi, "Override Geometry Shaders", "Forces geometry shader functionality to the specified value.", "EmuCore/GS",
|
||||
"OverrideGeometryShaders", -1, s_generic_options, std::size(s_generic_options), -1);
|
||||
DrawIntListSetting(bsi, "GS Dump Compression", "Sets the compression algorithm for GS dumps.", "EmuCore/GS", "GSDumpCompression",
|
||||
static_cast<int>(GSDumpCompressionMethod::LZMA), s_gsdump_compression, std::size(s_gsdump_compression));
|
||||
DrawToggleSetting(bsi, "Disable Framebuffer Fetch", "Prevents the usage of framebuffer fetch when supported by host GPU.", "EmuCore/GS",
|
||||
|
|
|
@ -3037,6 +3037,28 @@ static constexpr u32 NumIndicesForPrim(u32 prim)
|
|||
}
|
||||
}
|
||||
|
||||
static constexpr u32 MaxVerticesForPrim(u32 prim)
|
||||
{
|
||||
switch (prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
case GS_INVALID:
|
||||
// Needed due to expansion in hardware renderers.
|
||||
return (std::numeric_limits<u16>::max() / 4) - 4;
|
||||
|
||||
case GS_SPRITE:
|
||||
return (std::numeric_limits<u16>::max() / 2) - 2;
|
||||
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <u32 prim, bool auto_flush, bool index_swap>
|
||||
__forceinline void GSState::VertexKick(u32 skip)
|
||||
{
|
||||
|
@ -3305,6 +3327,10 @@ __forceinline void GSState::VertexKick(u32 skip)
|
|||
}
|
||||
|
||||
CLUTAutoFlush(prim);
|
||||
|
||||
constexpr u32 max_vertices = MaxVerticesForPrim(prim);
|
||||
if (max_vertices != 0 && m_vertex.tail >= max_vertices)
|
||||
Flush(VERTEXCOUNT);
|
||||
}
|
||||
|
||||
/// Checks if region repeat is used (applying it does something to at least one of the values in min...max)
|
||||
|
|
|
@ -278,6 +278,7 @@ public:
|
|||
AUTOFLUSH = 1 << 11,
|
||||
VSYNC = 1 << 12,
|
||||
GSREOPEN = 1 << 13,
|
||||
VERTEXCOUNT = 1 << 14,
|
||||
};
|
||||
|
||||
GSFlushReason m_state_flush_reason = UNKNOWN;
|
||||
|
|
|
@ -163,6 +163,23 @@ std::string GSDevice::GetFullscreenModeString(u32 width, u32 height, float refre
|
|||
return StringUtil::StdStringFromFormat("%u x %u @ %f hz", width, height, refresh_rate);
|
||||
}
|
||||
|
||||
void GSDevice::GenerateExpansionIndexBuffer(void* buffer)
|
||||
{
|
||||
static constexpr u32 MAX_INDEX = std::numeric_limits<u16>::max();
|
||||
|
||||
u32* idx_buffer = static_cast<u32*>(buffer);
|
||||
for (u32 i = 0; i < MAX_INDEX; i++)
|
||||
{
|
||||
const u32 base = i * 4;
|
||||
*(idx_buffer++) = base + 0;
|
||||
*(idx_buffer++) = base + 1;
|
||||
*(idx_buffer++) = base + 2;
|
||||
*(idx_buffer++) = base + 1;
|
||||
*(idx_buffer++) = base + 2;
|
||||
*(idx_buffer++) = base + 3;
|
||||
}
|
||||
}
|
||||
|
||||
bool GSDevice::Create(const WindowInfo& wi, VsyncMode vsync)
|
||||
{
|
||||
m_window_info = wi;
|
||||
|
|
|
@ -235,13 +235,6 @@ struct alignas(16) GSHWDrawConfig
|
|||
Line,
|
||||
Triangle,
|
||||
};
|
||||
enum class GSTopology: u8
|
||||
{
|
||||
Point,
|
||||
Line,
|
||||
Triangle,
|
||||
Sprite,
|
||||
};
|
||||
enum class VSExpand: u8
|
||||
{
|
||||
None,
|
||||
|
@ -250,22 +243,6 @@ struct alignas(16) GSHWDrawConfig
|
|||
Sprite,
|
||||
};
|
||||
#pragma pack(push, 1)
|
||||
struct GSSelector
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
GSTopology topology : 2;
|
||||
bool expand : 1;
|
||||
bool iip : 1;
|
||||
bool forward_primid : 1;
|
||||
};
|
||||
u8 key;
|
||||
};
|
||||
GSSelector(): key(0) {}
|
||||
GSSelector(u8 k): key(k) {}
|
||||
};
|
||||
struct VSSelector
|
||||
{
|
||||
union
|
||||
|
@ -275,7 +252,7 @@ struct alignas(16) GSHWDrawConfig
|
|||
u8 fst : 1;
|
||||
u8 tme : 1;
|
||||
u8 iip : 1;
|
||||
u8 point_size : 1; ///< Set when points need to be expanded without geometry shader.
|
||||
u8 point_size : 1; ///< Set when points need to be expanded without VS expanding.
|
||||
VSExpand expand : 2;
|
||||
u8 _free : 2;
|
||||
};
|
||||
|
@ -283,6 +260,9 @@ struct alignas(16) GSHWDrawConfig
|
|||
};
|
||||
VSSelector(): key(0) {}
|
||||
VSSelector(u8 k): key(k) {}
|
||||
|
||||
/// Returns true if the fixed index buffer should be used.
|
||||
__fi bool UseExpandIndexBuffer() const { return (expand == VSExpand::Point || expand == VSExpand::Sprite); }
|
||||
};
|
||||
#pragma pack(pop)
|
||||
#pragma pack(push, 4)
|
||||
|
@ -657,7 +637,6 @@ struct alignas(16) GSHWDrawConfig
|
|||
Topology topology; ///< Draw topology
|
||||
|
||||
alignas(8) PSSelector ps;
|
||||
GSSelector gs;
|
||||
VSSelector vs;
|
||||
|
||||
BlendState blend;
|
||||
|
@ -713,13 +692,12 @@ public:
|
|||
struct FeatureSupport
|
||||
{
|
||||
bool broken_point_sampler : 1; ///< Issue with AMD cards, see tfx shader for details
|
||||
bool geometry_shader : 1; ///< Supports geometry shader
|
||||
bool vs_expand : 1; ///< Supports expanding points/lines/sprites in the vertex shader
|
||||
bool primitive_id : 1; ///< Supports primitive ID for use with prim tracking destination alpha algorithm
|
||||
bool texture_barrier : 1; ///< Supports sampling rt and hopefully texture barrier
|
||||
bool provoking_vertex_last: 1; ///< Supports using the last vertex in a primitive as the value for flat shading.
|
||||
bool point_expand : 1; ///< Supports point expansion in hardware without using geometry shaders.
|
||||
bool line_expand : 1; ///< Supports line expansion in hardware without using geometry shaders.
|
||||
bool point_expand : 1; ///< Supports point expansion in hardware.
|
||||
bool line_expand : 1; ///< Supports line expansion in hardware.
|
||||
bool prefer_new_textures : 1; ///< Allocate textures up to the pool size before reusing them, to avoid render pass restarts.
|
||||
bool dxt_textures : 1; ///< Supports DXTn texture compression, i.e. S3TC and BC1-3.
|
||||
bool bptc_textures : 1; ///< Supports BC6/7 texture compression.
|
||||
|
@ -771,6 +749,7 @@ protected:
|
|||
static constexpr float MAD_SENSITIVITY = 0.08f;
|
||||
static constexpr u32 MAX_POOLED_TEXTURES = 300;
|
||||
static constexpr u32 NUM_CAS_CONSTANTS = 12; // 8 plus src offset x/y, 16 byte alignment
|
||||
static constexpr u32 EXPAND_BUFFER_SIZE = sizeof(u32) * std::numeric_limits<u16>::max() * 6;
|
||||
|
||||
WindowInfo m_window_info;
|
||||
VsyncMode m_vsync_mode = VsyncMode::Off;
|
||||
|
@ -824,6 +803,9 @@ public:
|
|||
/// Converts a fullscreen mode to a string.
|
||||
static std::string GetFullscreenModeString(u32 width, u32 height, float refresh_rate);
|
||||
|
||||
/// Generates a fixed index buffer for expanding points and sprites. Buffer is assumed to be at least EXPAND_BUFFER_SIZE in size.
|
||||
static void GenerateExpansionIndexBuffer(void* buffer);
|
||||
|
||||
__fi unsigned int GetFrameNumber() const { return m_frame; }
|
||||
__fi u64 GetPoolMemoryUsage() const { return m_pool_memory_usage; }
|
||||
|
||||
|
|
|
@ -53,7 +53,6 @@ GSDevice11::GSDevice11()
|
|||
m_state.topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
|
||||
m_state.bf = -1;
|
||||
|
||||
m_features.geometry_shader = true;
|
||||
m_features.primitive_id = true;
|
||||
m_features.texture_barrier = false;
|
||||
m_features.provoking_vertex_last = false;
|
||||
|
@ -365,7 +364,46 @@ bool GSDevice11::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
Console.Error("Failed to create index buffer.");
|
||||
return false;
|
||||
}
|
||||
m_ctx->IASetIndexBuffer(m_ib.get(), DXGI_FORMAT_R32_UINT, 0);
|
||||
IASetIndexBuffer(m_ib.get());
|
||||
|
||||
if (m_features.vs_expand)
|
||||
{
|
||||
bd.ByteWidth = VERTEX_BUFFER_SIZE;
|
||||
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
bd.StructureByteStride = sizeof(GSVertex);
|
||||
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
|
||||
if (FAILED(m_dev->CreateBuffer(&bd, nullptr, m_expand_vb.put())))
|
||||
{
|
||||
Console.Error("Failed to create expand vertex buffer.");
|
||||
return false;
|
||||
}
|
||||
|
||||
const CD3D11_SHADER_RESOURCE_VIEW_DESC vb_srv_desc(
|
||||
D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 0, VERTEX_BUFFER_SIZE / sizeof(GSVertex));
|
||||
if (FAILED(m_dev->CreateShaderResourceView(m_expand_vb.get(), &vb_srv_desc, m_expand_vb_srv.put())))
|
||||
{
|
||||
Console.Error("Failed to create expand vertex buffer SRV.");
|
||||
return false;
|
||||
}
|
||||
|
||||
m_ctx->VSSetShaderResources(0, 1, m_expand_vb_srv.addressof());
|
||||
|
||||
bd.ByteWidth = EXPAND_BUFFER_SIZE;
|
||||
bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
|
||||
bd.StructureByteStride = 0;
|
||||
bd.MiscFlags = 0;
|
||||
|
||||
std::unique_ptr<u8[]> expand_data = std::make_unique<u8[]>(EXPAND_BUFFER_SIZE);
|
||||
GenerateExpansionIndexBuffer(expand_data.get());
|
||||
|
||||
const D3D11_SUBRESOURCE_DATA srd = {expand_data.get()};
|
||||
if (FAILED(m_dev->CreateBuffer(&bd, &srd, m_expand_ib.put())))
|
||||
{
|
||||
Console.Error("Failed to create expand index buffer.");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
|
@ -466,6 +504,9 @@ void GSDevice11::Destroy()
|
|||
|
||||
m_vb.reset();
|
||||
m_ib.reset();
|
||||
m_expand_vb_srv.reset();
|
||||
m_expand_vb.reset();
|
||||
m_expand_ib.reset();
|
||||
|
||||
m_vs.clear();
|
||||
m_vs_cb.reset();
|
||||
|
@ -508,6 +549,9 @@ void GSDevice11::SetFeatures()
|
|||
SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC3_UNORM);
|
||||
|
||||
m_features.bptc_textures = SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC7_UNORM);
|
||||
|
||||
const D3D_FEATURE_LEVEL feature_level = m_dev->GetFeatureLevel();
|
||||
m_features.vs_expand = (feature_level >= D3D_FEATURE_LEVEL_11_0);
|
||||
}
|
||||
|
||||
bool GSDevice11::HasSurface() const
|
||||
|
@ -1234,11 +1278,6 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
|
|||
VSSetShader(m_convert.vs.get(), nullptr);
|
||||
|
||||
|
||||
// gs
|
||||
|
||||
GSSetShader(nullptr, nullptr);
|
||||
|
||||
|
||||
// ps
|
||||
|
||||
PSSetShaderResources(sTex, nullptr);
|
||||
|
@ -1307,11 +1346,6 @@ void GSDevice11::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
|
|||
VSSetShader(m_present.vs.get(), nullptr);
|
||||
|
||||
|
||||
// gs
|
||||
|
||||
GSSetShader(nullptr, nullptr);
|
||||
|
||||
|
||||
// ps
|
||||
|
||||
PSSetShaderResources(sTex, nullptr);
|
||||
|
@ -1368,7 +1402,6 @@ void GSDevice11::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_re
|
|||
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
||||
|
||||
VSSetShader(m_convert.vs.get(), nullptr);
|
||||
GSSetShader(nullptr, nullptr);
|
||||
PSSetShader(m_convert.ps[static_cast<int>(shader)].get(), nullptr);
|
||||
|
||||
OMSetDepthStencilState(dTex->IsRenderTarget() ? m_convert.dss.get() : m_convert.dss_write.get(), 0);
|
||||
|
@ -1437,6 +1470,7 @@ void GSDevice11::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rect
|
|||
|
||||
IAUnmapVertexBuffer(sizeof(GSVertexPT1), vcount);
|
||||
IAUnmapIndexBuffer(icount);
|
||||
IASetIndexBuffer(m_ib.get());
|
||||
|
||||
PSSetShaderResource(0, rects[0].src);
|
||||
PSSetSamplerState(rects[0].linear ? m_convert.ln.get() : m_convert.pt.get());
|
||||
|
@ -1682,7 +1716,6 @@ void GSDevice11::RenderImGui()
|
|||
IASetInputLayout(m_imgui.il.get());
|
||||
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
VSSetShader(m_imgui.vs.get(), m_imgui.vs_cb.get());
|
||||
GSSetShader(nullptr, nullptr);
|
||||
PSSetShader(m_imgui.ps.get(), nullptr);
|
||||
OMSetBlendState(m_imgui.bs.get(), 0.0f);
|
||||
OMSetDepthStencilState(m_convert.dss.get(), 0);
|
||||
|
@ -1761,7 +1794,7 @@ void GSDevice11::RenderImGui()
|
|||
}
|
||||
|
||||
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &m_state.vb_stride, &vb_offset);
|
||||
m_ctx->IASetIndexBuffer(m_ib.get(), DXGI_FORMAT_R32_UINT, 0);
|
||||
m_ctx->IASetIndexBuffer(m_state.index_buffer, DXGI_FORMAT_R32_UINT, 0);
|
||||
}
|
||||
|
||||
void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm)
|
||||
|
@ -1786,10 +1819,6 @@ void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vert
|
|||
|
||||
VSSetShader(m_convert.vs.get(), nullptr);
|
||||
|
||||
// gs
|
||||
|
||||
GSSetShader(nullptr, nullptr);
|
||||
|
||||
// ps
|
||||
PSSetShaderResources(rt, nullptr);
|
||||
PSSetSamplerState(m_convert.pt.get());
|
||||
|
@ -1852,6 +1881,37 @@ bool GSDevice11::IASetVertexBuffer(const void* vertex, u32 stride, u32 count)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool GSDevice11::IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 count)
|
||||
{
|
||||
const u32 size = stride * count;
|
||||
if (size > VERTEX_BUFFER_SIZE)
|
||||
return false;
|
||||
|
||||
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
|
||||
|
||||
m_vertex.start = (m_structured_vb_pos + (stride - 1)) / stride;
|
||||
m_structured_vb_pos = (m_vertex.start * stride) + size;
|
||||
if (m_structured_vb_pos > VERTEX_BUFFER_SIZE)
|
||||
{
|
||||
m_vertex.start = 0;
|
||||
m_structured_vb_pos = size;
|
||||
type = D3D11_MAP_WRITE_DISCARD;
|
||||
}
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE m;
|
||||
if (FAILED(m_ctx->Map(m_expand_vb.get(), 0, type, 0, &m)))
|
||||
return false;
|
||||
|
||||
void* map = static_cast<u8*>(m.pData) + (m_vertex.start * stride);
|
||||
|
||||
GSVector4i::storent(map, vertex, count * stride);
|
||||
|
||||
m_ctx->Unmap(m_expand_vb.get(), 0);
|
||||
|
||||
m_vertex.count = count;
|
||||
return true;
|
||||
}
|
||||
|
||||
u32* GSDevice11::IAMapIndexBuffer(u32 count)
|
||||
{
|
||||
if (count > (INDEX_BUFFER_SIZE / sizeof(u32)))
|
||||
|
@ -1890,9 +1950,19 @@ bool GSDevice11::IASetIndexBuffer(const void* index, u32 count)
|
|||
|
||||
std::memcpy(map, index, count * sizeof(u32));
|
||||
IAUnmapIndexBuffer(count);
|
||||
IASetIndexBuffer(m_ib.get());
|
||||
return true;
|
||||
}
|
||||
|
||||
void GSDevice11::IASetIndexBuffer(ID3D11Buffer* buffer)
|
||||
{
|
||||
if (m_state.index_buffer != buffer)
|
||||
{
|
||||
m_ctx->IASetIndexBuffer(buffer, DXGI_FORMAT_R32_UINT, 0);
|
||||
m_state.index_buffer = buffer;
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout)
|
||||
{
|
||||
if (m_state.layout != layout)
|
||||
|
@ -1930,23 +2000,6 @@ void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDevice11::GSSetShader(ID3D11GeometryShader* gs, ID3D11Buffer* gs_cb)
|
||||
{
|
||||
if (m_state.gs != gs)
|
||||
{
|
||||
m_state.gs = gs;
|
||||
|
||||
m_ctx->GSSetShader(gs, nullptr, 0);
|
||||
}
|
||||
|
||||
if (m_state.gs_cb != gs_cb)
|
||||
{
|
||||
m_state.gs_cb = gs_cb;
|
||||
|
||||
m_ctx->GSSetConstantBuffers(0, 1, &gs_cb);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice11::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1)
|
||||
{
|
||||
PSSetShaderResource(0, sr0);
|
||||
|
@ -2172,12 +2225,40 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
|
||||
}
|
||||
|
||||
if (!IASetVertexBuffer(config.verts, sizeof(*config.verts), config.nverts) ||
|
||||
!IASetIndexBuffer(config.indices, config.nindices))
|
||||
if (config.vs.expand != GSHWDrawConfig::VSExpand::None)
|
||||
{
|
||||
Console.Error("Failed to upload vertices/indices (%u/%u)", config.nverts, config.nindices);
|
||||
return;
|
||||
if (!IASetExpandVertexBuffer(config.verts, sizeof(*config.verts), config.nverts))
|
||||
{
|
||||
Console.Error("Failed to upload structured vertices (%u)", config.nverts);
|
||||
return;
|
||||
}
|
||||
|
||||
config.cb_vs.max_depth.y = m_vertex.start;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!IASetVertexBuffer(config.verts, sizeof(*config.verts), config.nverts))
|
||||
{
|
||||
Console.Error("Failed to upload vertices (%u)", config.nverts);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (config.vs.UseExpandIndexBuffer())
|
||||
{
|
||||
IASetIndexBuffer(m_expand_ib.get());
|
||||
m_index.start = 0;
|
||||
m_index.count = config.nindices;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!IASetIndexBuffer(config.indices, config.nindices))
|
||||
{
|
||||
Console.Error("Failed to upload indices (%u)", config.nindices);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
D3D11_PRIMITIVE_TOPOLOGY topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
|
||||
switch (config.topology)
|
||||
{
|
||||
|
@ -2207,7 +2288,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||
}
|
||||
|
||||
SetupVS(config.vs, &config.cb_vs);
|
||||
SetupGS(config.gs);
|
||||
SetupPS(config.ps, &config.cb_ps, config.sampler);
|
||||
|
||||
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
|
||||
|
@ -2223,7 +2303,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||
blend.blend_op = 3; // MIN
|
||||
SetupOM(dss, blend, 0);
|
||||
OMSetRenderTargets(primid_tex, config.ds, &config.scissor);
|
||||
|
||||
DrawIndexedPrimitive();
|
||||
|
||||
config.ps.date = 3;
|
||||
|
@ -2234,7 +2313,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||
|
||||
SetupOM(config.depth, convertSel(config.colormask, config.blend), config.blend.constant);
|
||||
OMSetRenderTargets(hdr_rt ? hdr_rt : config.rt, config.ds, &config.scissor);
|
||||
|
||||
DrawIndexedPrimitive();
|
||||
|
||||
if (config.separate_alpha_pass)
|
||||
|
@ -2243,7 +2321,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||
SetHWDrawConfigForAlphaPass(&config.ps, &config.colormask, &sap_blend, &config.depth);
|
||||
SetupOM(config.depth, convertSel(config.colormask, sap_blend), config.blend.constant);
|
||||
SetupPS(config.ps, &config.cb_ps, config.sampler);
|
||||
|
||||
DrawIndexedPrimitive();
|
||||
}
|
||||
|
||||
|
@ -2262,7 +2339,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||
}
|
||||
|
||||
SetupOM(config.alpha_second_pass.depth, convertSel(config.alpha_second_pass.colormask, config.blend), config.blend.constant);
|
||||
|
||||
DrawIndexedPrimitive();
|
||||
|
||||
if (config.second_separate_alpha_pass)
|
||||
|
@ -2271,7 +2347,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|||
SetHWDrawConfigForAlphaPass(&config.alpha_second_pass.ps, &config.alpha_second_pass.colormask, &sap_blend, &config.alpha_second_pass.depth);
|
||||
SetupOM(config.alpha_second_pass.depth, convertSel(config.alpha_second_pass.colormask, sap_blend), config.blend.constant);
|
||||
SetupPS(config.alpha_second_pass.ps, &config.cb_ps, config.sampler);
|
||||
|
||||
DrawIndexedPrimitive();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,7 +34,6 @@ class GSDevice11 final : public GSDevice
|
|||
{
|
||||
public:
|
||||
using VSSelector = GSHWDrawConfig::VSSelector;
|
||||
using GSSelector = GSHWDrawConfig::GSSelector;
|
||||
using PSSelector = GSHWDrawConfig::PSSelector;
|
||||
using PSSamplerSelector = GSHWDrawConfig::SamplerSelector;
|
||||
using OMDepthStencilSelector = GSHWDrawConfig::DepthStencilSelector;
|
||||
|
@ -150,8 +149,12 @@ private:
|
|||
|
||||
wil::com_ptr_nothrow<ID3D11Buffer> m_vb;
|
||||
wil::com_ptr_nothrow<ID3D11Buffer> m_ib;
|
||||
wil::com_ptr_nothrow<ID3D11Buffer> m_expand_vb;
|
||||
wil::com_ptr_nothrow<ID3D11Buffer> m_expand_ib;
|
||||
wil::com_ptr_nothrow<ID3D11ShaderResourceView> m_expand_vb_srv;
|
||||
u32 m_vb_pos = 0; // bytes
|
||||
u32 m_ib_pos = 0; // indices/sizeof(u32)
|
||||
u32 m_structured_vb_pos = 0; // bytes
|
||||
int m_d3d_texsize = 0;
|
||||
|
||||
bool m_allow_tearing_supported = false;
|
||||
|
@ -162,10 +165,9 @@ private:
|
|||
{
|
||||
ID3D11InputLayout* layout;
|
||||
D3D11_PRIMITIVE_TOPOLOGY topology;
|
||||
ID3D11Buffer* index_buffer;
|
||||
ID3D11VertexShader* vs;
|
||||
ID3D11Buffer* vs_cb;
|
||||
ID3D11GeometryShader* gs;
|
||||
ID3D11Buffer* gs_cb;
|
||||
std::array<ID3D11ShaderResourceView*, MAX_TEXTURES> ps_sr_views;
|
||||
ID3D11PixelShader* ps;
|
||||
ID3D11Buffer* ps_cb;
|
||||
|
@ -339,16 +341,17 @@ public:
|
|||
void* IAMapVertexBuffer(u32 stride, u32 count);
|
||||
void IAUnmapVertexBuffer(u32 stride, u32 count);
|
||||
bool IASetVertexBuffer(const void* vertex, u32 stride, u32 count);
|
||||
bool IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 count);
|
||||
|
||||
u32* IAMapIndexBuffer(u32 count);
|
||||
void IAUnmapIndexBuffer(u32 count);
|
||||
bool IASetIndexBuffer(const void* index, u32 count);
|
||||
void IASetIndexBuffer(ID3D11Buffer* buffer);
|
||||
|
||||
void IASetInputLayout(ID3D11InputLayout* layout);
|
||||
void IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology);
|
||||
|
||||
void VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb);
|
||||
void GSSetShader(ID3D11GeometryShader* gs, ID3D11Buffer* gs_cb = nullptr);
|
||||
|
||||
void PSSetShaderResources(GSTexture* sr0, GSTexture* sr1);
|
||||
void PSSetShaderResource(int i, GSTexture* sr);
|
||||
|
@ -364,7 +367,6 @@ public:
|
|||
|
||||
bool CreateTextureFX();
|
||||
void SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer* cb);
|
||||
void SetupGS(GSSelector sel);
|
||||
void SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstantBuffer* cb, PSSamplerSelector ssel);
|
||||
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 afix);
|
||||
|
||||
|
|
|
@ -53,8 +53,6 @@ bool GSDevice11::CreateTextureFX()
|
|||
|
||||
SetupVS(sel, &cb);
|
||||
|
||||
SetupGS(GSSelector(1));
|
||||
|
||||
//
|
||||
|
||||
return true;
|
||||
|
@ -68,11 +66,13 @@ void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer*
|
|||
{
|
||||
ShaderMacro sm(m_shader_cache.GetFeatureLevel());
|
||||
|
||||
sm.AddMacro("VERTEX_SHADER", 1);
|
||||
sm.AddMacro("VS_TME", sel.tme);
|
||||
sm.AddMacro("VS_FST", sel.fst);
|
||||
sm.AddMacro("VS_IIP", sel.iip);
|
||||
sm.AddMacro("VS_EXPAND", static_cast<int>(sel.expand));
|
||||
|
||||
D3D11_INPUT_ELEMENT_DESC layout[] =
|
||||
static constexpr const D3D11_INPUT_ELEMENT_DESC layout[] =
|
||||
{
|
||||
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
|
@ -84,8 +84,16 @@ void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer*
|
|||
};
|
||||
|
||||
GSVertexShader11 vs;
|
||||
m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(),
|
||||
vs.vs.put(), vs.il.put(), layout, std::size(layout), m_tfx_source, sm.GetPtr(), "vs_main");
|
||||
if (sel.expand == GSHWDrawConfig::VSExpand::None)
|
||||
{
|
||||
m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(), vs.vs.put(), vs.il.put(), layout,
|
||||
std::size(layout), m_tfx_source, sm.GetPtr(), "vs_main");
|
||||
}
|
||||
else
|
||||
{
|
||||
vs.vs = m_shader_cache.GetVertexShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "vs_main_expand");
|
||||
}
|
||||
|
||||
i = m_vs.try_emplace(sel.key, std::move(vs)).first;
|
||||
}
|
||||
|
||||
|
@ -99,37 +107,6 @@ void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer*
|
|||
IASetInputLayout(i->second.il.get());
|
||||
}
|
||||
|
||||
void GSDevice11::SetupGS(GSSelector sel)
|
||||
{
|
||||
wil::com_ptr_nothrow<ID3D11GeometryShader> gs;
|
||||
|
||||
// Geometry shader is disabled if sprite conversion is done on the cpu (sel.cpu_sprite).
|
||||
if (sel.expand)
|
||||
{
|
||||
const auto i = std::as_const(m_gs).find(sel.key);
|
||||
|
||||
if (i != m_gs.end())
|
||||
{
|
||||
gs = i->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
ShaderMacro sm(m_shader_cache.GetFeatureLevel());
|
||||
|
||||
sm.AddMacro("GS_IIP", sel.iip);
|
||||
sm.AddMacro("GS_PRIM", static_cast<int>(sel.topology));
|
||||
sm.AddMacro("GS_EXPAND", sel.expand);
|
||||
sm.AddMacro("GS_FORWARD_PRIMID", sel.forward_primid);
|
||||
|
||||
gs = m_shader_cache.GetGeometryShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "gs_main");
|
||||
|
||||
m_gs[sel.key] = gs;
|
||||
}
|
||||
}
|
||||
|
||||
GSSetShader(gs.get(), m_vs_cb.get());
|
||||
}
|
||||
|
||||
void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstantBuffer* cb, PSSamplerSelector ssel)
|
||||
{
|
||||
auto i = std::as_const(m_ps).find(sel);
|
||||
|
@ -138,6 +115,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
|
|||
{
|
||||
ShaderMacro sm(m_shader_cache.GetFeatureLevel());
|
||||
|
||||
sm.AddMacro("PIXEL_SHADER", 1);
|
||||
sm.AddMacro("PS_FST", sel.fst);
|
||||
sm.AddMacro("PS_WMS", sel.wms);
|
||||
sm.AddMacro("PS_WMT", sel.wmt);
|
||||
|
|
|
@ -602,7 +602,6 @@ bool GSDevice12::CheckFeatures()
|
|||
|
||||
m_features.texture_barrier = false;
|
||||
m_features.broken_point_sampler = isAMD;
|
||||
m_features.geometry_shader = true;
|
||||
m_features.primitive_id = true;
|
||||
m_features.prefer_new_textures = true;
|
||||
m_features.provoking_vertex_last = false;
|
||||
|
@ -613,6 +612,7 @@ bool GSDevice12::CheckFeatures()
|
|||
m_features.clip_control = true;
|
||||
m_features.stencil_buffer = true;
|
||||
m_features.test_and_sample_depth = false;
|
||||
m_features.vs_expand = true;
|
||||
|
||||
m_features.dxt_textures = g_d3d12_context->SupportsTextureFormat(DXGI_FORMAT_BC1_UNORM) &&
|
||||
g_d3d12_context->SupportsTextureFormat(DXGI_FORMAT_BC2_UNORM) &&
|
||||
|
@ -1729,6 +1729,13 @@ bool GSDevice12::CreateBuffers()
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!g_d3d12_context->AllocatePreinitializedGPUBuffer(EXPAND_BUFFER_SIZE, &m_expand_index_buffer,
|
||||
&m_expand_index_buffer_allocation, &GSDevice::GenerateExpansionIndexBuffer))
|
||||
{
|
||||
Host::ReportErrorAsync("GS", "Failed to allocate expansion index buffer");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1753,6 +1760,7 @@ bool GSDevice12::CreateRootSignatures()
|
|||
rsb.SetInputAssemblerFlag();
|
||||
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
|
||||
rsb.AddCBVParameter(1, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
rsb.AddSRVParameter(0, D3D12_SHADER_VISIBILITY_VERTEX);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, NUM_TFX_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 2, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
|
@ -2096,7 +2104,6 @@ void GSDevice12::DestroyResources()
|
|||
g_d3d12_context->DeferObjectDestruction(it.second.get());
|
||||
m_tfx_pipelines.clear();
|
||||
m_tfx_pixel_shaders.clear();
|
||||
m_tfx_geometry_shaders.clear();
|
||||
m_tfx_vertex_shaders.clear();
|
||||
m_interlace = {};
|
||||
m_merge = {};
|
||||
|
@ -2119,6 +2126,8 @@ void GSDevice12::DestroyResources()
|
|||
g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetSamplerHeapManager(), &m_point_sampler_cpu);
|
||||
g_d3d12_context->InvalidateSamplerGroups();
|
||||
|
||||
m_expand_index_buffer.reset();
|
||||
m_expand_index_buffer_allocation.reset();
|
||||
m_pixel_constant_buffer.Destroy(false);
|
||||
m_vertex_constant_buffer.Destroy(false);
|
||||
m_index_stream_buffer.Destroy(false);
|
||||
|
@ -2139,32 +2148,18 @@ const ID3DBlob* GSDevice12::GetTFXVertexShader(GSHWDrawConfig::VSSelector sel)
|
|||
return it->second.get();
|
||||
|
||||
ShaderMacro sm(m_shader_cache.GetFeatureLevel());
|
||||
sm.AddMacro("VERTEX_SHADER", 1);
|
||||
sm.AddMacro("VS_TME", sel.tme);
|
||||
sm.AddMacro("VS_FST", sel.fst);
|
||||
sm.AddMacro("VS_IIP", sel.iip);
|
||||
sm.AddMacro("VS_EXPAND", static_cast<int>(sel.expand));
|
||||
|
||||
ComPtr<ID3DBlob> vs(m_shader_cache.GetVertexShader(m_tfx_source, sm.GetPtr(), "vs_main"));
|
||||
const char* entry_point = (sel.expand != GSHWDrawConfig::VSExpand::None) ? "vs_main_expand" : "vs_main";
|
||||
ComPtr<ID3DBlob> vs(m_shader_cache.GetVertexShader(m_tfx_source, sm.GetPtr(), entry_point));
|
||||
it = m_tfx_vertex_shaders.emplace(sel.key, std::move(vs)).first;
|
||||
return it->second.get();
|
||||
}
|
||||
|
||||
const ID3DBlob* GSDevice12::GetTFXGeometryShader(GSHWDrawConfig::GSSelector sel)
|
||||
{
|
||||
auto it = m_tfx_geometry_shaders.find(sel.key);
|
||||
if (it != m_tfx_geometry_shaders.end())
|
||||
return it->second.get();
|
||||
|
||||
ShaderMacro sm(m_shader_cache.GetFeatureLevel());
|
||||
sm.AddMacro("GS_IIP", sel.iip);
|
||||
sm.AddMacro("GS_PRIM", static_cast<int>(sel.topology));
|
||||
sm.AddMacro("GS_EXPAND", sel.expand);
|
||||
sm.AddMacro("GS_FORWARD_PRIMID", sel.forward_primid);
|
||||
|
||||
ComPtr<ID3DBlob> gs(m_shader_cache.GetGeometryShader(m_tfx_source, sm.GetPtr(), "gs_main"));
|
||||
it = m_tfx_geometry_shaders.emplace(sel.key, std::move(gs)).first;
|
||||
return it->second.get();
|
||||
}
|
||||
|
||||
const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sel)
|
||||
{
|
||||
auto it = m_tfx_pixel_shaders.find(sel);
|
||||
|
@ -2172,6 +2167,7 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
|
|||
return it->second.get();
|
||||
|
||||
ShaderMacro sm(m_shader_cache.GetFeatureLevel());
|
||||
sm.AddMacro("PIXEL_SHADER", 1);
|
||||
sm.AddMacro("PS_FST", sel.fst);
|
||||
sm.AddMacro("PS_WMS", sel.wms);
|
||||
sm.AddMacro("PS_WMT", sel.wmt);
|
||||
|
@ -2246,9 +2242,8 @@ GSDevice12::ComPtr<ID3D12PipelineState> GSDevice12::CreateTFXPipeline(const Pipe
|
|||
}
|
||||
|
||||
const ID3DBlob* vs = GetTFXVertexShader(p.vs);
|
||||
const ID3DBlob* gs = p.gs.expand ? GetTFXGeometryShader(p.gs) : nullptr;
|
||||
const ID3DBlob* ps = GetTFXPixelShader(pps);
|
||||
if (!vs || (p.gs.expand && !gs) || !ps)
|
||||
if (!vs || !ps)
|
||||
return nullptr;
|
||||
|
||||
// Common state
|
||||
|
@ -2271,18 +2266,19 @@ GSDevice12::ComPtr<ID3D12PipelineState> GSDevice12::CreateTFXPipeline(const Pipe
|
|||
|
||||
// Shaders
|
||||
gpb.SetVertexShader(vs);
|
||||
if (gs)
|
||||
gpb.SetGeometryShader(gs);
|
||||
gpb.SetPixelShader(ps);
|
||||
|
||||
// IA
|
||||
gpb.AddVertexAttribute("TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0);
|
||||
gpb.AddVertexAttribute("COLOR", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, 8);
|
||||
gpb.AddVertexAttribute("TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12);
|
||||
gpb.AddVertexAttribute("POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16);
|
||||
gpb.AddVertexAttribute("POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20);
|
||||
gpb.AddVertexAttribute("TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24);
|
||||
gpb.AddVertexAttribute("COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28);
|
||||
if (p.vs.expand == GSHWDrawConfig::VSExpand::None)
|
||||
{
|
||||
gpb.AddVertexAttribute("TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0);
|
||||
gpb.AddVertexAttribute("COLOR", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, 8);
|
||||
gpb.AddVertexAttribute("TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12);
|
||||
gpb.AddVertexAttribute("POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16);
|
||||
gpb.AddVertexAttribute("POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20);
|
||||
gpb.AddVertexAttribute("TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24);
|
||||
gpb.AddVertexAttribute("COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28);
|
||||
}
|
||||
|
||||
// DepthStencil
|
||||
if (p.ds)
|
||||
|
@ -2337,7 +2333,7 @@ GSDevice12::ComPtr<ID3D12PipelineState> GSDevice12::CreateTFXPipeline(const Pipe
|
|||
if (pipeline)
|
||||
{
|
||||
D3D12::SetObjectNameFormatted(
|
||||
pipeline.get(), "TFX Pipeline %08X/%08X/%" PRIX64 "%08X", p.vs.key, p.gs.key, p.ps.key_hi, p.ps.key_lo);
|
||||
pipeline.get(), "TFX Pipeline %08X/%" PRIX64 "%08X", p.vs.key, p.ps.key_hi, p.ps.key_lo);
|
||||
}
|
||||
|
||||
return pipeline;
|
||||
|
@ -2941,6 +2937,11 @@ bool GSDevice12::ApplyTFXState(bool already_execed)
|
|||
cmdlist->SetGraphicsRootConstantBufferView(TFX_ROOT_SIGNATURE_PARAM_VS_CBV, m_tfx_constant_buffers[0]);
|
||||
if (flags & DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING)
|
||||
cmdlist->SetGraphicsRootConstantBufferView(TFX_ROOT_SIGNATURE_PARAM_PS_CBV, m_tfx_constant_buffers[1]);
|
||||
if (flags & DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING)
|
||||
{
|
||||
cmdlist->SetGraphicsRootShaderResourceView(TFX_ROOT_SIGNATURE_PARAM_VS_SRV,
|
||||
m_vertex_stream_buffer.GetGPUPointer() + m_vertex.start * sizeof(GSVertex));
|
||||
}
|
||||
if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE)
|
||||
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES, m_tfx_textures_handle_gpu);
|
||||
if (flags & DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE)
|
||||
|
@ -3070,8 +3071,7 @@ GSTexture12* GSDevice12::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config, Pipe
|
|||
|
||||
// image is now filled with either -1 or INT_MAX, so now we can do the prepass
|
||||
SetPrimitiveTopology(s_primitive_topology_mapping[static_cast<u8>(config.topology)]);
|
||||
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
|
||||
IASetIndexBuffer(config.indices, config.nindices);
|
||||
UploadHWDrawVerticesAndIndices(config);
|
||||
|
||||
// cut down the configuration for the prepass, we don't need blending or any feedback loop
|
||||
PipelineSelector init_pipe(m_pipeline_selector);
|
||||
|
@ -3252,10 +3252,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
|||
// VB/IB upload, if we did DATE setup and it's not HDR this has already been done
|
||||
SetPrimitiveTopology(s_primitive_topology_mapping[static_cast<u8>(config.topology)]);
|
||||
if (!date_image || hdr_rt)
|
||||
{
|
||||
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
|
||||
IASetIndexBuffer(config.indices, config.nindices);
|
||||
}
|
||||
UploadHWDrawVerticesAndIndices(config);
|
||||
|
||||
// now we can do the actual draw
|
||||
if (BindDrawPipeline(pipe))
|
||||
|
@ -3333,7 +3330,6 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
|||
void GSDevice12::UpdateHWPipelineSelector(GSHWDrawConfig& config)
|
||||
{
|
||||
m_pipeline_selector.vs.key = config.vs.key;
|
||||
m_pipeline_selector.gs.key = config.gs.key;
|
||||
m_pipeline_selector.ps.key_hi = config.ps.key_hi;
|
||||
m_pipeline_selector.ps.key_lo = config.ps.key_lo;
|
||||
m_pipeline_selector.dss.key = config.depth.key;
|
||||
|
@ -3344,3 +3340,23 @@ void GSDevice12::UpdateHWPipelineSelector(GSHWDrawConfig& config)
|
|||
m_pipeline_selector.rt = config.rt != nullptr;
|
||||
m_pipeline_selector.ds = config.ds != nullptr;
|
||||
}
|
||||
|
||||
void GSDevice12::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
|
||||
{
|
||||
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
|
||||
|
||||
// Update SRV in root signature directly, rather than using a uniform for base vertex.
|
||||
if (config.vs.expand != GSHWDrawConfig::VSExpand::None)
|
||||
m_dirty_flags |= DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING;
|
||||
|
||||
if (config.vs.UseExpandIndexBuffer())
|
||||
{
|
||||
m_index.start = 0;
|
||||
m_index.count = config.nindices;
|
||||
SetIndexBuffer(m_expand_index_buffer->GetGPUVirtualAddress(), EXPAND_BUFFER_SIZE, DXGI_FORMAT_R32_UINT);
|
||||
}
|
||||
else
|
||||
{
|
||||
IASetIndexBuffer(config.indices, config.nindices);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,7 +52,6 @@ public:
|
|||
};
|
||||
|
||||
GSHWDrawConfig::VSSelector vs;
|
||||
GSHWDrawConfig::GSSelector gs;
|
||||
GSHWDrawConfig::DepthStencilSelector dss;
|
||||
GSHWDrawConfig::ColorMaskSelector cms;
|
||||
GSHWDrawConfig::BlendState bs;
|
||||
|
@ -69,7 +68,7 @@ public:
|
|||
std::size_t operator()(const PipelineSelector& e) const noexcept
|
||||
{
|
||||
std::size_t hash = 0;
|
||||
HashCombine(hash, e.vs.key, e.gs.key, e.ps.key_hi, e.ps.key_lo, e.dss.key, e.cms.key, e.bs.key, e.key);
|
||||
HashCombine(hash, e.vs.key, e.ps.key_hi, e.ps.key_lo, e.dss.key, e.cms.key, e.bs.key, e.key);
|
||||
return hash;
|
||||
}
|
||||
};
|
||||
|
@ -124,9 +123,10 @@ public:
|
|||
|
||||
TFX_ROOT_SIGNATURE_PARAM_VS_CBV = 0,
|
||||
TFX_ROOT_SIGNATURE_PARAM_PS_CBV = 1,
|
||||
TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 2,
|
||||
TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS = 3,
|
||||
TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES = 4,
|
||||
TFX_ROOT_SIGNATURE_PARAM_VS_SRV = 2,
|
||||
TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 3,
|
||||
TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS = 4,
|
||||
TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES = 5,
|
||||
|
||||
UTILITY_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS = 0,
|
||||
UTILITY_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 1,
|
||||
|
@ -154,6 +154,8 @@ private:
|
|||
D3D12::StreamBuffer m_index_stream_buffer;
|
||||
D3D12::StreamBuffer m_vertex_constant_buffer;
|
||||
D3D12::StreamBuffer m_pixel_constant_buffer;
|
||||
ComPtr<ID3D12Resource> m_expand_index_buffer;
|
||||
ComPtr<D3D12MA::Allocation> m_expand_index_buffer_allocation;
|
||||
|
||||
D3D12::DescriptorHandle m_point_sampler_cpu;
|
||||
D3D12::DescriptorHandle m_linear_sampler_cpu;
|
||||
|
@ -173,7 +175,6 @@ private:
|
|||
ComPtr<ID3D12PipelineState> m_imgui_pipeline;
|
||||
|
||||
std::unordered_map<u32, ComPtr<ID3DBlob>> m_tfx_vertex_shaders;
|
||||
std::unordered_map<u32, ComPtr<ID3DBlob>> m_tfx_geometry_shaders;
|
||||
std::unordered_map<GSHWDrawConfig::PSSelector, ComPtr<ID3DBlob>, GSHWDrawConfig::PSSelectorHash> m_tfx_pixel_shaders;
|
||||
std::unordered_map<PipelineSelector, ComPtr<ID3D12PipelineState>, PipelineSelectorHash> m_tfx_pipelines;
|
||||
|
||||
|
@ -209,7 +210,6 @@ private:
|
|||
bool GetTextureGroupDescriptors(D3D12::DescriptorHandle* gpu_handle, const D3D12::DescriptorHandle* cpu_handles, u32 count);
|
||||
|
||||
const ID3DBlob* GetTFXVertexShader(GSHWDrawConfig::VSSelector sel);
|
||||
const ID3DBlob* GetTFXGeometryShader(GSHWDrawConfig::GSSelector sel);
|
||||
const ID3DBlob* GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sel);
|
||||
ComPtr<ID3D12PipelineState> CreateTFXPipeline(const PipelineSelector& p);
|
||||
const ID3D12PipelineState* GetTFXPipeline(const PipelineSelector& p);
|
||||
|
@ -317,6 +317,7 @@ public:
|
|||
|
||||
void RenderHW(GSHWDrawConfig& config) override;
|
||||
void UpdateHWPipelineSelector(GSHWDrawConfig& config);
|
||||
void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config);
|
||||
|
||||
public:
|
||||
/// Ends any render pass, executes the command buffer, and invalidates cached state.
|
||||
|
@ -377,25 +378,27 @@ private:
|
|||
|
||||
DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING = (1 << 5),
|
||||
DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING = (1 << 6),
|
||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE = (1 << 7),
|
||||
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE = (1 << 8),
|
||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 = (1 << 9),
|
||||
DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING = (1 << 7),
|
||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE = (1 << 8),
|
||||
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE = (1 << 9),
|
||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 = (1 << 10),
|
||||
|
||||
DIRTY_FLAG_VERTEX_BUFFER = (1 << 10),
|
||||
DIRTY_FLAG_INDEX_BUFFER = (1 << 11),
|
||||
DIRTY_FLAG_PRIMITIVE_TOPOLOGY = (1 << 12),
|
||||
DIRTY_FLAG_VIEWPORT = (1 << 13),
|
||||
DIRTY_FLAG_SCISSOR = (1 << 14),
|
||||
DIRTY_FLAG_RENDER_TARGET = (1 << 15),
|
||||
DIRTY_FLAG_PIPELINE = (1 << 16),
|
||||
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 17),
|
||||
DIRTY_FLAG_STENCIL_REF = (1 << 18),
|
||||
DIRTY_FLAG_VERTEX_BUFFER = (1 << 11),
|
||||
DIRTY_FLAG_INDEX_BUFFER = (1 << 12),
|
||||
DIRTY_FLAG_PRIMITIVE_TOPOLOGY = (1 << 13),
|
||||
DIRTY_FLAG_VIEWPORT = (1 << 14),
|
||||
DIRTY_FLAG_SCISSOR = (1 << 15),
|
||||
DIRTY_FLAG_RENDER_TARGET = (1 << 16),
|
||||
DIRTY_FLAG_PIPELINE = (1 << 17),
|
||||
DIRTY_FLAG_BLEND_CONSTANTS = (1 << 18),
|
||||
DIRTY_FLAG_STENCIL_REF = (1 << 19),
|
||||
|
||||
DIRTY_BASE_STATE = DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING |
|
||||
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 |
|
||||
DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE |
|
||||
DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 |
|
||||
DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PRIMITIVE_TOPOLOGY |
|
||||
DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET |
|
||||
DIRTY_FLAG_PIPELINE | DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_STENCIL_REF,
|
||||
DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET | DIRTY_FLAG_PIPELINE |
|
||||
DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_STENCIL_REF,
|
||||
|
||||
DIRTY_TFX_STATE = DIRTY_BASE_STATE | DIRTY_FLAG_TFX_TEXTURES | DIRTY_FLAG_TFX_SAMPLERS | DIRTY_FLAG_TFX_RT_TEXTURES,
|
||||
DIRTY_UTILITY_STATE = DIRTY_BASE_STATE,
|
||||
|
|
|
@ -249,6 +249,7 @@ void GSRendererHW::Lines2Sprites()
|
|||
}
|
||||
|
||||
// assume vertices are tightly packed and sequentially indexed (it should be the case)
|
||||
const bool predivide_q = PRIM->TME && !PRIM->FST && m_vt.m_accurate_stq;
|
||||
|
||||
if (m_vertex.next >= 2)
|
||||
{
|
||||
|
@ -275,7 +276,7 @@ void GSRendererHW::Lines2Sprites()
|
|||
v0.XYZ.Z = v1.XYZ.Z;
|
||||
v0.FOG = v1.FOG;
|
||||
|
||||
if (PRIM->TME && !PRIM->FST)
|
||||
if (predivide_q)
|
||||
{
|
||||
const GSVector4 st0 = GSVector4::loadl(&v0.ST.U64);
|
||||
const GSVector4 st1 = GSVector4::loadl(&v1.ST.U64);
|
||||
|
@ -319,65 +320,28 @@ void GSRendererHW::Lines2Sprites()
|
|||
}
|
||||
}
|
||||
|
||||
template <GSHWDrawConfig::VSExpand Expand>
|
||||
void GSRendererHW::ExpandIndices()
|
||||
void GSRendererHW::ExpandLineIndices()
|
||||
{
|
||||
u32 process_count = (m_index.tail + 3) / 4 * 4;
|
||||
if (Expand == GSHWDrawConfig::VSExpand::Point)
|
||||
{
|
||||
// Make sure we have space for writing off the end slightly
|
||||
while (process_count > m_vertex.maxcount)
|
||||
GrowVertexBuffer();
|
||||
}
|
||||
|
||||
u32 expansion_factor = Expand == GSHWDrawConfig::VSExpand::Point ? 6 : 3;
|
||||
const u32 process_count = (m_index.tail + 3) / 4 * 4;
|
||||
const u32 expansion_factor = 3;
|
||||
m_index.tail *= expansion_factor;
|
||||
GSVector4i* end = reinterpret_cast<GSVector4i*>(m_index.buff);
|
||||
GSVector4i* read = reinterpret_cast<GSVector4i*>(m_index.buff + process_count);
|
||||
GSVector4i* write = reinterpret_cast<GSVector4i*>(m_index.buff + process_count * expansion_factor);
|
||||
|
||||
constexpr GSVector4i low0 = GSVector4i::cxpr(0, 1, 2, 1);
|
||||
constexpr GSVector4i low1 = GSVector4i::cxpr(2, 3, 0, 1);
|
||||
constexpr GSVector4i low2 = GSVector4i::cxpr(2, 1, 2, 3);
|
||||
|
||||
while (read > end)
|
||||
{
|
||||
read -= 1;
|
||||
write -= expansion_factor;
|
||||
switch (Expand)
|
||||
{
|
||||
case GSHWDrawConfig::VSExpand::None:
|
||||
break;
|
||||
case GSHWDrawConfig::VSExpand::Point:
|
||||
{
|
||||
constexpr GSVector4i low0 = GSVector4i::cxpr(0, 1, 2, 1);
|
||||
constexpr GSVector4i low1 = GSVector4i::cxpr(2, 3, 0, 1);
|
||||
constexpr GSVector4i low2 = GSVector4i::cxpr(2, 1, 2, 3);
|
||||
const GSVector4i in = read->sll32(2);
|
||||
write[0] = in.xxxx() | low0;
|
||||
write[1] = in.xxyy() | low1;
|
||||
write[2] = in.yyyy() | low2;
|
||||
write[3] = in.zzzz() | low0;
|
||||
write[4] = in.zzww() | low1;
|
||||
write[5] = in.wwww() | low2;
|
||||
break;
|
||||
}
|
||||
case GSHWDrawConfig::VSExpand::Line:
|
||||
{
|
||||
constexpr GSVector4i low0 = GSVector4i::cxpr(0, 1, 2, 1);
|
||||
constexpr GSVector4i low1 = GSVector4i::cxpr(2, 3, 0, 1);
|
||||
constexpr GSVector4i low2 = GSVector4i::cxpr(2, 1, 2, 3);
|
||||
const GSVector4i in = read->sll32(2);
|
||||
write[0] = in.xxyx() | low0;
|
||||
write[1] = in.yyzz() | low1;
|
||||
write[2] = in.wzww() | low2;
|
||||
break;
|
||||
}
|
||||
case GSHWDrawConfig::VSExpand::Sprite:
|
||||
{
|
||||
constexpr GSVector4i low = GSVector4i::cxpr(0, 1, 0, 1);
|
||||
const GSVector4i in = read->sll32(1);
|
||||
write[0] = in.xxyx() | low;
|
||||
write[1] = in.yyzz() | low;
|
||||
write[2] = in.wzww() | low;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const GSVector4i in = read->sll32(2);
|
||||
write[0] = in.xxyx() | low0;
|
||||
write[1] = in.yyzz() | low1;
|
||||
write[2] = in.wzww() | low2;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2453,110 +2417,90 @@ void GSRendererHW::SetupIA(float target_scale, float sx, float sy)
|
|||
switch (m_vt.m_primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
m_conf.gs.topology = GSHWDrawConfig::GSTopology::Point;
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Point;
|
||||
m_conf.indices_per_prim = 1;
|
||||
if (unscale_pt_ln)
|
||||
{
|
||||
if (features.point_expand)
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Point;
|
||||
m_conf.indices_per_prim = 1;
|
||||
if (unscale_pt_ln)
|
||||
{
|
||||
if (features.point_expand)
|
||||
{
|
||||
m_conf.vs.point_size = true;
|
||||
m_conf.cb_vs.point_size = GSVector2(target_scale);
|
||||
}
|
||||
else if (features.vs_expand)
|
||||
{
|
||||
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Point;
|
||||
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
||||
m_conf.verts = m_vertex.buff;
|
||||
m_conf.nverts = m_vertex.next;
|
||||
m_conf.nindices = m_index.tail * 6;
|
||||
m_conf.indices_per_prim = 6;
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Vulkan/GL still need to set point size.
|
||||
m_conf.cb_vs.point_size = target_scale;
|
||||
|
||||
// M1 requires point size output on *all* points.
|
||||
m_conf.vs.point_size = true;
|
||||
m_conf.cb_vs.point_size = GSVector2(target_scale);
|
||||
}
|
||||
else if (features.geometry_shader)
|
||||
{
|
||||
m_conf.gs.expand = true;
|
||||
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
|
||||
}
|
||||
else if (features.vs_expand)
|
||||
{
|
||||
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Point;
|
||||
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
||||
m_conf.indices_per_prim = 6;
|
||||
ExpandIndices<GSHWDrawConfig::VSExpand::Point>();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Vulkan/GL still need to set point size.
|
||||
m_conf.cb_vs.point_size = target_scale;
|
||||
}
|
||||
break;
|
||||
|
||||
case GS_LINE_CLASS:
|
||||
m_conf.gs.topology = GSHWDrawConfig::GSTopology::Line;
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Line;
|
||||
m_conf.indices_per_prim = 2;
|
||||
if (unscale_pt_ln)
|
||||
{
|
||||
if (features.line_expand)
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Line;
|
||||
m_conf.indices_per_prim = 2;
|
||||
if (unscale_pt_ln)
|
||||
{
|
||||
m_conf.line_expand = true;
|
||||
}
|
||||
else if (features.geometry_shader)
|
||||
{
|
||||
m_conf.gs.expand = true;
|
||||
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
|
||||
}
|
||||
else if (features.vs_expand)
|
||||
{
|
||||
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Line;
|
||||
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
||||
m_conf.indices_per_prim = 6;
|
||||
ExpandIndices<GSHWDrawConfig::VSExpand::Line>();
|
||||
if (features.line_expand)
|
||||
{
|
||||
m_conf.line_expand = true;
|
||||
}
|
||||
else if (features.vs_expand)
|
||||
{
|
||||
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Line;
|
||||
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
||||
m_conf.indices_per_prim = 6;
|
||||
ExpandLineIndices();
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case GS_SPRITE_CLASS:
|
||||
// Heuristics: trade-off
|
||||
// Lines: GPU conversion => ofc, more GPU. And also more CPU due to extra shader validation stage.
|
||||
// Triangles: CPU conversion => ofc, more CPU ;) more bandwidth (72 bytes / sprite)
|
||||
//
|
||||
// Note: severals openGL operation does draw call under the wood like texture upload. So even if
|
||||
// you do 10 consecutive draw with the geometry shader, you will still pay extra validation if new
|
||||
// texture are uploaded. (game Shadow Hearts)
|
||||
//
|
||||
// Note2: Due to MultiThreaded driver, Nvidia suffers less of the previous issue. Still it isn't free
|
||||
// Shadow Heart is 90 fps (gs) vs 113 fps (no gs)
|
||||
//
|
||||
// Note3: Some GPUs (Happens on GT 750m, not on Intel 5200) don't properly divide by large floats (e.g. FLT_MAX/FLT_MAX == 0)
|
||||
// Lines2Sprites predivides by Q, avoiding this issue, so always use it if m_vt.m_accurate_stq
|
||||
|
||||
// If the draw calls contains few primitives. Geometry Shader gain with be rather small versus
|
||||
// the extra validation cost of the extra stage.
|
||||
//
|
||||
// Note: keep Geometry Shader in the replayer to ease debug.
|
||||
if (g_gs_device->Features().geometry_shader && !m_vt.m_accurate_stq && m_vertex.next > 32) // <=> 16 sprites (based on Shadow Hearts)
|
||||
{
|
||||
m_conf.gs.expand = true;
|
||||
// Need to pre-divide ST by Q if Q is very large, to avoid precision issues on some GPUs.
|
||||
// May as well just expand the whole thing out with the CPU path in such a case.
|
||||
if (features.vs_expand && !m_vt.m_accurate_stq)
|
||||
{
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
||||
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Sprite;
|
||||
m_conf.verts = m_vertex.buff;
|
||||
m_conf.nverts = m_vertex.next;
|
||||
m_conf.nindices = m_index.tail * 3;
|
||||
m_conf.indices_per_prim = 6;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
Lines2Sprites();
|
||||
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Line;
|
||||
m_conf.indices_per_prim = 2;
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
||||
m_conf.indices_per_prim = 6;
|
||||
}
|
||||
}
|
||||
else if (features.vs_expand && !m_vt.m_accurate_stq)
|
||||
{
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
||||
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Sprite;
|
||||
m_conf.indices_per_prim = 6;
|
||||
ExpandIndices<GSHWDrawConfig::VSExpand::Sprite>();
|
||||
}
|
||||
else
|
||||
{
|
||||
Lines2Sprites();
|
||||
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
||||
m_conf.indices_per_prim = 6;
|
||||
}
|
||||
m_conf.gs.topology = GSHWDrawConfig::GSTopology::Sprite;
|
||||
break;
|
||||
|
||||
case GS_TRIANGLE_CLASS:
|
||||
m_conf.gs.topology = GSHWDrawConfig::GSTopology::Triangle;
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
||||
m_conf.indices_per_prim = 3;
|
||||
{
|
||||
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
|
||||
m_conf.indices_per_prim = 3;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -4443,7 +4387,6 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
|||
|
||||
// GS_SPRITE_CLASS are already flat (either by CPU or the GS)
|
||||
m_conf.ps.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 0 : PRIM->IIP;
|
||||
m_conf.gs.iip = m_conf.ps.iip;
|
||||
m_conf.vs.iip = m_conf.ps.iip;
|
||||
|
||||
if (DATE_BARRIER)
|
||||
|
@ -4463,7 +4406,6 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
|||
else if (DATE_PRIMID)
|
||||
{
|
||||
m_conf.ps.date = 1 + m_cached_ctx.TEST.DATM;
|
||||
m_conf.gs.forward_primid = 1;
|
||||
}
|
||||
else if (DATE)
|
||||
{
|
||||
|
|
|
@ -168,7 +168,7 @@ public:
|
|||
float GetUpscaleMultiplier() override;
|
||||
void Lines2Sprites();
|
||||
bool VerifyIndices();
|
||||
template <GSHWDrawConfig::VSExpand Expand> void ExpandIndices();
|
||||
void ExpandLineIndices();
|
||||
void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba);
|
||||
GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex);
|
||||
GSVector4i ComputeBoundingBox(const GSVector2i& rtsize, float rtscale);
|
||||
|
|
|
@ -2609,7 +2609,6 @@ bool GSTextureCache::ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx
|
|||
config.vs.tme = true;
|
||||
config.vs.iip = true;
|
||||
config.vs.fst = true;
|
||||
config.gs.key = 0;
|
||||
config.ps.key_lo = 0;
|
||||
config.ps.key_hi = 0;
|
||||
config.ps.read_ba = read_ba;
|
||||
|
|
|
@ -281,7 +281,7 @@ public:
|
|||
MRCOwned<id<MTLDepthStencilState>> m_dss_stencil_write;
|
||||
MRCOwned<id<MTLDepthStencilState>> m_dss_hw[1 << 5];
|
||||
|
||||
MRCOwned<id<MTLBuffer>> m_texture_download_buf;
|
||||
MRCOwned<id<MTLBuffer>> m_expand_index_buffer;
|
||||
UploadBuffer m_texture_upload_buf;
|
||||
BufferPair m_vertex_upload_buf;
|
||||
|
||||
|
|
|
@ -748,6 +748,21 @@ void GSDeviceMTL::DetachSurfaceOnMainThread()
|
|||
m_layer = nullptr;
|
||||
}
|
||||
|
||||
// Metal is fun and won't let you use newBufferWithBytes for private buffers
|
||||
static MRCOwned<id<MTLBuffer>> CreatePrivateBufferWithContent(
|
||||
id<MTLDevice> dev, id<MTLCommandBuffer> cb,
|
||||
MTLResourceOptions options, NSUInteger length,
|
||||
std::function<void(void*)> fill)
|
||||
{
|
||||
MRCOwned<id<MTLBuffer>> tmp = MRCTransfer([dev newBufferWithLength:length options:MTLResourceStorageModeShared]);
|
||||
MRCOwned<id<MTLBuffer>> actual = MRCTransfer([dev newBufferWithLength:length options:options|MTLResourceStorageModePrivate]);
|
||||
fill([tmp contents]);
|
||||
id<MTLBlitCommandEncoder> blit = [cb blitCommandEncoder];
|
||||
[blit copyFromBuffer:tmp sourceOffset:0 toBuffer:actual destinationOffset:0 size:length];
|
||||
[blit endEncoding];
|
||||
return actual;
|
||||
}
|
||||
|
||||
bool GSDeviceMTL::Create(const WindowInfo& wi, VsyncMode vsync)
|
||||
{ @autoreleasepool {
|
||||
if (!GSDevice::Create(wi, vsync))
|
||||
|
@ -808,7 +823,6 @@ bool GSDeviceMTL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
MTLPixelFormat layer_px_fmt = [m_layer pixelFormat];
|
||||
|
||||
m_features.broken_point_sampler = [[m_dev.dev name] containsString:@"AMD"];
|
||||
m_features.geometry_shader = false;
|
||||
m_features.vs_expand = true;
|
||||
m_features.primitive_id = m_dev.features.primid;
|
||||
m_features.texture_barrier = true;
|
||||
|
@ -852,6 +866,9 @@ bool GSDeviceMTL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
m_cas_pipeline[sharpen_only] = MakeComputePipeline(LoadShader(shader), sharpen_only ? @"CAS Sharpen" : @"CAS Upscale");
|
||||
}
|
||||
|
||||
m_expand_index_buffer = CreatePrivateBufferWithContent(m_dev.dev, initCommands, MTLResourceHazardTrackingModeUntracked, EXPAND_BUFFER_SIZE, GenerateExpansionIndexBuffer);
|
||||
[m_expand_index_buffer setLabel:@"Point/Sprite Expand Indices"];
|
||||
|
||||
m_hw_vertex = MRCTransfer([MTLVertexDescriptor new]);
|
||||
[[[m_hw_vertex layouts] objectAtIndexedSubscript:GSMTLBufferIndexHWVertices] setStride:sizeof(GSVertex)];
|
||||
applyAttribute(m_hw_vertex, GSMTLAttributeIndexST, MTLVertexFormatFloat2, offsetof(GSVertex, ST), GSMTLBufferIndexHWVertices);
|
||||
|
@ -1987,17 +2004,27 @@ void GSDeviceMTL::MREInitHWDraw(GSHWDrawConfig& config, const Map& verts)
|
|||
|
||||
void GSDeviceMTL::RenderHW(GSHWDrawConfig& config)
|
||||
{ @autoreleasepool {
|
||||
if (config.topology == GSHWDrawConfig::Topology::Point)
|
||||
config.vs.point_size = 1; // M1 requires point size output on *all* points
|
||||
|
||||
if (config.tex && config.ds == config.tex)
|
||||
EndRenderPass(); // Barrier
|
||||
|
||||
size_t vertsize = config.nverts * sizeof(*config.verts);
|
||||
size_t idxsize = config.nindices * sizeof(*config.indices);
|
||||
size_t idxsize = config.vs.UseExpandIndexBuffer() ? 0 : (config.nindices * sizeof(*config.indices));
|
||||
Map allocation = Allocate(m_vertex_upload_buf, vertsize + idxsize);
|
||||
memcpy(allocation.cpu_buffer, config.verts, vertsize);
|
||||
memcpy(static_cast<u8*>(allocation.cpu_buffer) + vertsize, config.indices, idxsize);
|
||||
|
||||
id<MTLBuffer> index_buffer;
|
||||
size_t index_buffer_offset;
|
||||
if (!config.vs.UseExpandIndexBuffer())
|
||||
{
|
||||
memcpy(static_cast<u8*>(allocation.cpu_buffer) + vertsize, config.indices, idxsize);
|
||||
index_buffer = allocation.gpu_buffer;
|
||||
index_buffer_offset = allocation.gpu_offset + vertsize;
|
||||
}
|
||||
else
|
||||
{
|
||||
index_buffer = m_expand_index_buffer;
|
||||
index_buffer_offset = 0;
|
||||
}
|
||||
|
||||
FlushClears(config.tex);
|
||||
FlushClears(config.pal);
|
||||
|
@ -2028,7 +2055,7 @@ void GSDeviceMTL::RenderHW(GSHWDrawConfig& config)
|
|||
ASSERT(config.require_full_barrier == false && config.drawlist == nullptr);
|
||||
MRESetHWPipelineState(config.vs, config.ps, {}, {});
|
||||
MREInitHWDraw(config, allocation);
|
||||
SendHWDraw(config, m_current_render.encoder, allocation.gpu_buffer, allocation.gpu_offset + vertsize);
|
||||
SendHWDraw(config, m_current_render.encoder, index_buffer, index_buffer_offset);
|
||||
config.ps.date = 3;
|
||||
break;
|
||||
}
|
||||
|
@ -2084,7 +2111,7 @@ void GSDeviceMTL::RenderHW(GSHWDrawConfig& config)
|
|||
MRESetHWPipelineState(config.vs, config.ps, config.blend, config.colormask);
|
||||
MRESetDSS(config.depth);
|
||||
|
||||
SendHWDraw(config, mtlenc, allocation.gpu_buffer, allocation.gpu_offset + vertsize);
|
||||
SendHWDraw(config, mtlenc, index_buffer, index_buffer_offset);
|
||||
|
||||
if (config.alpha_second_pass.enable)
|
||||
{
|
||||
|
@ -2095,7 +2122,7 @@ void GSDeviceMTL::RenderHW(GSHWDrawConfig& config)
|
|||
}
|
||||
MRESetHWPipelineState(config.vs, config.alpha_second_pass.ps, config.blend, config.alpha_second_pass.colormask);
|
||||
MRESetDSS(config.alpha_second_pass.depth);
|
||||
SendHWDraw(config, mtlenc, allocation.gpu_buffer, allocation.gpu_offset + vertsize);
|
||||
SendHWDraw(config, mtlenc, index_buffer, index_buffer_offset);
|
||||
}
|
||||
|
||||
if (hdr_rt)
|
||||
|
@ -2141,25 +2168,34 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id<MTLRenderCommandEncoder>
|
|||
|
||||
g_perfmon.Put(GSPerfMon::DrawCalls, config.drawlist->size());
|
||||
g_perfmon.Put(GSPerfMon::Barriers, config.drawlist->size());
|
||||
for (size_t count = 0, p = 0, n = 0; n < config.drawlist->size(); p += count, ++n)
|
||||
|
||||
const u32 indices_per_prim = config.indices_per_prim;
|
||||
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
|
||||
|
||||
for (u32 n = 0, p = 0; n < draw_list_size; n++)
|
||||
{
|
||||
count = (*config.drawlist)[n] * config.indices_per_prim;
|
||||
const u32 count = (*config.drawlist)[n] * indices_per_prim;
|
||||
textureBarrier(enc);
|
||||
[enc drawIndexedPrimitives:topology
|
||||
indexCount:count
|
||||
indexType:MTLIndexTypeUInt32
|
||||
indexBuffer:buffer
|
||||
indexBufferOffset:off + p * sizeof(*config.indices)];
|
||||
p += count;
|
||||
}
|
||||
|
||||
[enc popDebugGroup];
|
||||
return;
|
||||
}
|
||||
else if (config.require_full_barrier)
|
||||
{
|
||||
const u32 ndraws = config.nindices / config.indices_per_prim;
|
||||
const u32 indices_per_prim = config.indices_per_prim;
|
||||
const u32 ndraws = config.nindices / indices_per_prim;
|
||||
g_perfmon.Put(GSPerfMon::DrawCalls, ndraws);
|
||||
g_perfmon.Put(GSPerfMon::Barriers, ndraws);
|
||||
[enc pushDebugGroup:[NSString stringWithFormat:@"Full barrier split draw (%d prims)", ndraws]];
|
||||
for (size_t p = 0; p < config.nindices; p += config.indices_per_prim)
|
||||
|
||||
for (u32 p = 0; p < config.nindices; p += indices_per_prim)
|
||||
{
|
||||
textureBarrier(enc);
|
||||
[enc drawIndexedPrimitives:topology
|
||||
|
@ -2168,30 +2204,24 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id<MTLRenderCommandEncoder>
|
|||
indexBuffer:buffer
|
||||
indexBufferOffset:off + p * sizeof(*config.indices)];
|
||||
}
|
||||
|
||||
[enc popDebugGroup];
|
||||
return;
|
||||
}
|
||||
else if (config.require_one_barrier)
|
||||
{
|
||||
// One barrier needed
|
||||
textureBarrier(enc);
|
||||
[enc drawIndexedPrimitives:topology
|
||||
indexCount:config.nindices
|
||||
indexType:MTLIndexTypeUInt32
|
||||
indexBuffer:buffer
|
||||
indexBufferOffset:off];
|
||||
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
||||
g_perfmon.Put(GSPerfMon::Barriers, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// No barriers needed
|
||||
[enc drawIndexedPrimitives:topology
|
||||
indexCount:config.nindices
|
||||
indexType:MTLIndexTypeUInt32
|
||||
indexBuffer:buffer
|
||||
indexBufferOffset:off];
|
||||
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
||||
}
|
||||
|
||||
[enc drawIndexedPrimitives:topology
|
||||
indexCount:config.nindices
|
||||
indexType:MTLIndexTypeUInt32
|
||||
indexBuffer:buffer
|
||||
indexBufferOffset:off];
|
||||
|
||||
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
||||
}
|
||||
|
||||
// tbh I'm not a fan of the current debug groups
|
||||
|
|
|
@ -116,15 +116,6 @@ namespace GLLoader
|
|||
bool buggy_pbo = false;
|
||||
bool disable_download_pbo = false;
|
||||
|
||||
bool is_gles = false;
|
||||
bool has_dual_source_blend = false;
|
||||
bool has_clip_control = true;
|
||||
bool found_framebuffer_fetch = false;
|
||||
bool found_geometry_shader = true; // we require GL3.3 so geometry must be supported by default
|
||||
// DX11 GPU
|
||||
bool found_GL_ARB_gpu_shader5 = false; // Require IvyBridge
|
||||
bool found_GL_ARB_texture_barrier = false;
|
||||
|
||||
static bool check_gl_version()
|
||||
{
|
||||
const char* vendor = (const char*)glGetString(GL_VENDOR);
|
||||
|
@ -135,13 +126,6 @@ namespace GLLoader
|
|||
else if (strstr(vendor, "Intel"))
|
||||
vendor_id_intel = true;
|
||||
|
||||
if (GSConfig.OverrideGeometryShaders != -1)
|
||||
{
|
||||
found_geometry_shader = GSConfig.OverrideGeometryShaders != 0 &&
|
||||
(GLAD_GL_VERSION_3_2 || GL_ARB_geometry_shader4 || GSConfig.OverrideGeometryShaders == 1);
|
||||
Console.Warning("Overriding geometry shaders detection to %s", found_geometry_shader ? "true" : "false");
|
||||
}
|
||||
|
||||
GLint major_gl = 0;
|
||||
GLint minor_gl = 0;
|
||||
glGetIntegerv(GL_MAJOR_VERSION, &major_gl);
|
||||
|
@ -157,32 +141,13 @@ namespace GLLoader
|
|||
|
||||
static bool check_gl_supported_extension()
|
||||
{
|
||||
if (GLAD_GL_VERSION_3_3 && !GLAD_GL_ARB_shading_language_420pack)
|
||||
if (!GLAD_GL_ARB_shading_language_420pack)
|
||||
{
|
||||
Host::ReportFormattedErrorAsync("GS",
|
||||
"GL_ARB_shading_language_420pack is not supported, this is required for the OpenGL renderer.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// GLES doesn't have ARB_clip_control.
|
||||
has_clip_control = GLAD_GL_ARB_clip_control;
|
||||
if (!has_clip_control && !is_gles)
|
||||
{
|
||||
Host::AddOSDMessage("GL_ARB_clip_control is not supported, this will cause rendering issues.",
|
||||
Host::OSD_ERROR_DURATION);
|
||||
}
|
||||
|
||||
found_GL_ARB_gpu_shader5 = GLAD_GL_ARB_gpu_shader5;
|
||||
found_GL_ARB_texture_barrier = GLAD_GL_ARB_texture_barrier;
|
||||
|
||||
has_dual_source_blend = GLAD_GL_VERSION_3_2 || GLAD_GL_ARB_blend_func_extended;
|
||||
found_framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch;
|
||||
if (found_framebuffer_fetch && GSConfig.DisableFramebufferFetch)
|
||||
{
|
||||
Console.Warning("Framebuffer fetch was found but is disabled. This will reduce performance.");
|
||||
found_framebuffer_fetch = false;
|
||||
}
|
||||
|
||||
if (!GLAD_GL_ARB_viewport_array)
|
||||
{
|
||||
glScissorIndexed = ReplaceGL::ScissorIndexed;
|
||||
|
@ -203,21 +168,6 @@ namespace GLLoader
|
|||
Emulate_DSA::Init();
|
||||
}
|
||||
|
||||
if (is_gles)
|
||||
{
|
||||
has_dual_source_blend = GLAD_GL_EXT_blend_func_extended || GLAD_GL_ARB_blend_func_extended;
|
||||
if (!has_dual_source_blend && !found_framebuffer_fetch)
|
||||
{
|
||||
Host::AddOSDMessage("Both dual source blending and framebuffer fetch are missing, things will be broken.",
|
||||
Host::OSD_ERROR_DURATION);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Core in GL3.2, so everything supports it.
|
||||
has_dual_source_blend = true;
|
||||
}
|
||||
|
||||
// Don't use PBOs when we don't have ARB_buffer_storage, orphaning buffers probably ends up worse than just
|
||||
// using the normal texture update routines and letting the driver take care of it.
|
||||
buggy_pbo = !GLAD_GL_VERSION_4_4 && !GLAD_GL_ARB_buffer_storage && !GLAD_GL_EXT_buffer_storage;
|
||||
|
|
|
@ -31,13 +31,4 @@ namespace GLLoader
|
|||
extern bool vendor_id_intel;
|
||||
extern bool buggy_pbo;
|
||||
extern bool disable_download_pbo;
|
||||
|
||||
// GL
|
||||
extern bool is_gles;
|
||||
extern bool has_clip_control;
|
||||
extern bool has_dual_source_blend;
|
||||
extern bool found_framebuffer_fetch;
|
||||
extern bool found_geometry_shader;
|
||||
extern bool found_GL_ARB_gpu_shader5;
|
||||
extern bool found_GL_ARB_texture_barrier;
|
||||
} // namespace GLLoader
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
namespace GLState
|
||||
{
|
||||
GLuint vao;
|
||||
GLuint fbo;
|
||||
GSVector2i viewport;
|
||||
GSVector4i scissor;
|
||||
|
@ -49,6 +50,7 @@ namespace GLState
|
|||
|
||||
void Clear()
|
||||
{
|
||||
vao = 0;
|
||||
fbo = 0;
|
||||
viewport = GSVector2i(1, 1);
|
||||
scissor = GSVector4i(0, 0, 1, 1);
|
||||
|
|
|
@ -22,6 +22,7 @@ class GSTextureOGL;
|
|||
|
||||
namespace GLState
|
||||
{
|
||||
extern GLuint vao; // vertex array object
|
||||
extern GLuint fbo; // frame buffer object
|
||||
extern GSVector2i viewport;
|
||||
extern GSVector4i scissor;
|
||||
|
|
|
@ -90,7 +90,12 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
if (!GSDevice::Create(wi, vsync))
|
||||
return false;
|
||||
|
||||
m_gl_context = GL::Context::Create(wi);
|
||||
// We need at least GL3.3.
|
||||
static constexpr const GL::Context::Version version_list[] = {{GL::Context::Profile::Core, 4, 6},
|
||||
{GL::Context::Profile::Core, 4, 5}, {GL::Context::Profile::Core, 4, 4}, {GL::Context::Profile::Core, 4, 3},
|
||||
{GL::Context::Profile::Core, 4, 2}, {GL::Context::Profile::Core, 4, 1}, {GL::Context::Profile::Core, 4, 0},
|
||||
{GL::Context::Profile::Core, 3, 3}};
|
||||
m_gl_context = GL::Context::Create(wi, version_list);
|
||||
if (!m_gl_context)
|
||||
{
|
||||
Console.Error("Failed to create any GL context");
|
||||
|
@ -104,9 +109,6 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
return false;
|
||||
}
|
||||
|
||||
// Check openGL requirement as soon as possible so we can switch to another
|
||||
// renderer/device
|
||||
GLLoader::is_gles = m_gl_context->IsGLES();
|
||||
if (!GLLoader::check_gl_requirements())
|
||||
return false;
|
||||
|
||||
|
@ -124,41 +126,57 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
|
||||
// optional features based on context
|
||||
m_features.broken_point_sampler = GLLoader::vendor_id_amd;
|
||||
m_features.geometry_shader = GLLoader::found_geometry_shader;
|
||||
m_features.primitive_id = true;
|
||||
|
||||
m_features.framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch;
|
||||
if (m_features.framebuffer_fetch && GSConfig.DisableFramebufferFetch)
|
||||
{
|
||||
Host::AddOSDMessage("Framebuffer fetch was found but is disabled. This will reduce performance.", Host::OSD_ERROR_DURATION);
|
||||
m_features.framebuffer_fetch = false;
|
||||
}
|
||||
|
||||
if (GSConfig.OverrideTextureBarriers == 0)
|
||||
m_features.texture_barrier = GLLoader::found_framebuffer_fetch; // Force Disabled
|
||||
m_features.texture_barrier = m_features.framebuffer_fetch; // Force Disabled
|
||||
else if (GSConfig.OverrideTextureBarriers == 1)
|
||||
m_features.texture_barrier = true; // Force Enabled
|
||||
else
|
||||
m_features.texture_barrier = GLLoader::found_framebuffer_fetch || GLLoader::found_GL_ARB_texture_barrier;
|
||||
m_features.texture_barrier = m_features.framebuffer_fetch || GLAD_GL_ARB_texture_barrier;
|
||||
if (!m_features.texture_barrier)
|
||||
{
|
||||
Host::AddOSDMessage(
|
||||
"GL_ARB_texture_barrier is not supported, blending will not be accurate.", Host::OSD_ERROR_DURATION);
|
||||
}
|
||||
|
||||
m_features.provoking_vertex_last = true;
|
||||
m_features.dxt_textures = GLAD_GL_EXT_texture_compression_s3tc;
|
||||
m_features.bptc_textures = GLAD_GL_VERSION_4_2 || GLAD_GL_ARB_texture_compression_bptc || GLAD_GL_EXT_texture_compression_bptc;
|
||||
m_features.prefer_new_textures = GLLoader::is_gles;
|
||||
m_features.framebuffer_fetch = GLLoader::found_framebuffer_fetch;
|
||||
m_features.dual_source_blend = GLLoader::has_dual_source_blend && !GSConfig.DisableDualSourceBlend;
|
||||
m_features.clip_control = GLLoader::has_clip_control;
|
||||
m_features.prefer_new_textures = false;
|
||||
m_features.dual_source_blend = !GSConfig.DisableDualSourceBlend;
|
||||
m_features.clip_control = GLAD_GL_ARB_clip_control;
|
||||
if (!m_features.clip_control)
|
||||
Host::AddOSDMessage("GL_ARB_clip_control is not supported, this will cause rendering issues.", Host::OSD_ERROR_DURATION);
|
||||
m_features.stencil_buffer = true;
|
||||
m_features.test_and_sample_depth = m_features.texture_barrier && !GLLoader::is_gles;
|
||||
m_features.test_and_sample_depth = m_features.texture_barrier;
|
||||
|
||||
if (GLAD_GL_ARB_shader_storage_buffer_object)
|
||||
{
|
||||
GLint max_vertex_ssbos = 0;
|
||||
glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &max_vertex_ssbos);
|
||||
DevCon.WriteLn("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS: %d", max_vertex_ssbos);
|
||||
m_features.vs_expand = (max_vertex_ssbos > 0);
|
||||
}
|
||||
if (!m_features.vs_expand)
|
||||
Console.Warning("Vertex expansion is not supported. This will reduce performance.");
|
||||
|
||||
GLint point_range[2] = {};
|
||||
glGetIntegerv(GL_ALIASED_POINT_SIZE_RANGE, point_range);
|
||||
m_features.point_expand = (point_range[0] <= GSConfig.UpscaleMultiplier && point_range[1] >= GSConfig.UpscaleMultiplier);
|
||||
m_features.line_expand = false;
|
||||
|
||||
if (GLLoader::is_gles)
|
||||
{
|
||||
GLint line_range[2] = {};
|
||||
glGetIntegerv(GL_ALIASED_LINE_WIDTH_RANGE, line_range);
|
||||
m_features.line_expand = (line_range[0] <= static_cast<GLint>(GSConfig.UpscaleMultiplier) && line_range[1] >= static_cast<GLint>(GSConfig.UpscaleMultiplier));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_features.line_expand = false;
|
||||
}
|
||||
|
||||
DevCon.WriteLn("Using %s for point expansion and %s for line expansion.",
|
||||
m_features.point_expand ? "hardware" : "geometry shaders", m_features.line_expand ? "hardware" : "geometry shaders");
|
||||
Console.WriteLn("Using %s for point expansion, %s for line expansion and %s for sprite expansion.",
|
||||
m_features.point_expand ? "hardware" : (m_features.vs_expand ? "vertex expanding" : "UNSUPPORTED"),
|
||||
m_features.line_expand ? "hardware" : (m_features.vs_expand ? "vertex expanding" : "UNSUPPORTED"),
|
||||
m_features.vs_expand ? "vertex expanding" : "CPU");
|
||||
|
||||
// because of fbo bindings below...
|
||||
GLState::Clear();
|
||||
|
@ -168,20 +186,12 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
// ****************************************************************
|
||||
if (GSConfig.UseDebugDevice)
|
||||
{
|
||||
if (!GLLoader::is_gles)
|
||||
{
|
||||
glDebugMessageCallback(DebugMessageCallback, NULL);
|
||||
glDebugMessageCallback(DebugMessageCallback, NULL);
|
||||
|
||||
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true);
|
||||
// Useless info message on Nvidia driver
|
||||
GLuint ids[] = { 0x20004 };
|
||||
glDebugMessageControl(GL_DEBUG_SOURCE_API_ARB, GL_DEBUG_TYPE_OTHER_ARB, GL_DONT_CARE, std::size(ids), ids, false);
|
||||
}
|
||||
else if (GLAD_GL_KHR_debug)
|
||||
{
|
||||
glDebugMessageCallbackKHR(DebugMessageCallback, NULL);
|
||||
glDebugMessageControlKHR(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true);
|
||||
}
|
||||
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true);
|
||||
// Useless info message on Nvidia driver
|
||||
static constexpr const GLuint ids[] = { 0x20004 };
|
||||
glDebugMessageControl(GL_DEBUG_SOURCE_API_ARB, GL_DEBUG_TYPE_OTHER_ARB, GL_DONT_CARE, std::size(ids), ids, false);
|
||||
|
||||
// Uncomment synchronous if you want callstacks which match where the error occurred.
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
|
@ -219,8 +229,8 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
{
|
||||
GL_PUSH("GSDeviceOGL::Vertex Buffer");
|
||||
|
||||
glGenVertexArrays(1, &m_vertex_array_object);
|
||||
glBindVertexArray(m_vertex_array_object);
|
||||
glGenVertexArrays(1, &m_vao);
|
||||
IASetVAO(m_vao);
|
||||
|
||||
m_vertex_stream_buffer = GL::StreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE);
|
||||
m_index_stream_buffer = GL::StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE);
|
||||
|
@ -233,14 +243,13 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
return false;
|
||||
}
|
||||
|
||||
m_vertex_stream_buffer->Bind();
|
||||
m_index_stream_buffer->Bind();
|
||||
|
||||
// Force UBOs to be uploaded on first use.
|
||||
std::memset(&m_vs_cb_cache, 0xFF, sizeof(m_vs_cb_cache));
|
||||
std::memset(&m_ps_cb_cache, 0xFF, sizeof(m_ps_cb_cache));
|
||||
|
||||
// rebind because of VAO state
|
||||
m_vertex_stream_buffer->Bind();
|
||||
m_index_stream_buffer->Bind();
|
||||
|
||||
static_assert(sizeof(GSVertexPT1) == sizeof(GSVertex), "wrong GSVertex size");
|
||||
for (u32 i = 0; i < 8; i++)
|
||||
glEnableVertexAttribArray(i);
|
||||
|
@ -253,6 +262,29 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
glVertexAttribIPointer(5, 1, GL_UNSIGNED_INT, sizeof(GSVertex), (const GLvoid*)(20));
|
||||
glVertexAttribIPointer(6, 2, GL_UNSIGNED_SHORT, sizeof(GSVertex), (const GLvoid*)(24));
|
||||
glVertexAttribPointer(7, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(GSVertex), (const GLvoid*)(28));
|
||||
|
||||
if (m_features.vs_expand)
|
||||
{
|
||||
glGenVertexArrays(1, &m_expand_vao);
|
||||
glBindVertexArray(m_expand_vao);
|
||||
IASetVAO(m_expand_vao);
|
||||
|
||||
// Still need the vertex buffer bound, because uploads happen to GL_ARRAY_BUFFER.
|
||||
m_vertex_stream_buffer->Bind();
|
||||
|
||||
std::unique_ptr<u8[]> expand_data = std::make_unique<u8[]>(EXPAND_BUFFER_SIZE);
|
||||
GenerateExpansionIndexBuffer(expand_data.get());
|
||||
glGenBuffers(1, &m_expand_ibo);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_expand_ibo);
|
||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER, EXPAND_BUFFER_SIZE, expand_data.get(), GL_STATIC_DRAW);
|
||||
|
||||
// We can bind it once when using gl_BaseVertexARB.
|
||||
if (GLAD_GL_ARB_shader_draw_parameters)
|
||||
{
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 2, m_vertex_stream_buffer->GetGLBufferId(),
|
||||
0, VERTEX_BUFFER_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ****************************************************************
|
||||
|
@ -289,7 +321,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
{
|
||||
const char* name = shaderName(static_cast<ShaderConvert>(i));
|
||||
const std::string ps(GetShaderSource(name, GL_FRAGMENT_SHADER, *convert_glsl));
|
||||
if (!m_shader_cache.GetProgram(&m_convert.ps[i], m_convert.vs, {}, ps))
|
||||
if (!m_shader_cache.GetProgram(&m_convert.ps[i], m_convert.vs, ps))
|
||||
return false;
|
||||
m_convert.ps[i].SetFormattedName("Convert pipe %s", name);
|
||||
|
||||
|
@ -343,7 +375,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
{
|
||||
const char* name = shaderName(static_cast<PresentShader>(i));
|
||||
const std::string ps(GetShaderSource(name, GL_FRAGMENT_SHADER, *shader));
|
||||
if (!m_shader_cache.GetProgram(&m_present[i], present_vs, {}, ps))
|
||||
if (!m_shader_cache.GetProgram(&m_present[i], present_vs, ps))
|
||||
return false;
|
||||
m_present[i].SetFormattedName("Present pipe %s", name);
|
||||
|
||||
|
@ -376,7 +408,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
for (size_t i = 0; i < std::size(m_merge_obj.ps); i++)
|
||||
{
|
||||
const std::string ps(GetShaderSource(fmt::format("ps_main{}", i), GL_FRAGMENT_SHADER, *shader));
|
||||
if (!m_shader_cache.GetProgram(&m_merge_obj.ps[i], m_convert.vs, {}, ps))
|
||||
if (!m_shader_cache.GetProgram(&m_merge_obj.ps[i], m_convert.vs, ps))
|
||||
return false;
|
||||
m_merge_obj.ps[i].SetFormattedName("Merge pipe %zu", i);
|
||||
m_merge_obj.ps[i].RegisterUniform("BGColor");
|
||||
|
@ -399,7 +431,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
for (size_t i = 0; i < std::size(m_interlace.ps); i++)
|
||||
{
|
||||
const std::string ps(GetShaderSource(fmt::format("ps_main{}", i), GL_FRAGMENT_SHADER, *shader));
|
||||
if (!m_shader_cache.GetProgram(&m_interlace.ps[i], m_convert.vs, {}, ps))
|
||||
if (!m_shader_cache.GetProgram(&m_interlace.ps[i], m_convert.vs, ps))
|
||||
return false;
|
||||
m_interlace.ps[i].SetFormattedName("Merge pipe %zu", i);
|
||||
m_interlace.ps[i].RegisterUniform("ZrH");
|
||||
|
@ -421,12 +453,10 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
{
|
||||
GL_PUSH("GSDeviceOGL::Rasterization");
|
||||
|
||||
if (!GLLoader::is_gles)
|
||||
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
|
||||
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
|
||||
glDisable(GL_CULL_FACE);
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
if (!GLLoader::is_gles)
|
||||
glDisable(GL_MULTISAMPLE);
|
||||
glDisable(GL_MULTISAMPLE);
|
||||
|
||||
glDisable(GL_DITHER); // Honestly I don't know!
|
||||
}
|
||||
|
@ -446,7 +476,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
const std::string ps(GetShaderSource(
|
||||
fmt::format("ps_stencil_image_init_{}", i),
|
||||
GL_FRAGMENT_SHADER, *convert_glsl));
|
||||
m_shader_cache.GetProgram(&m_date.primid_ps[i], m_convert.vs, {}, ps);
|
||||
m_shader_cache.GetProgram(&m_date.primid_ps[i], m_convert.vs, ps);
|
||||
m_date.primid_ps[i].SetFormattedName("PrimID Destination Alpha Init %d", i);
|
||||
}
|
||||
}
|
||||
|
@ -461,7 +491,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi, VsyncMode vsync)
|
|||
// This extension allow FS depth to range from -1 to 1. So
|
||||
// gl_position.z could range from [0, 1]
|
||||
// Change depth convention
|
||||
if (GLLoader::has_clip_control)
|
||||
if (m_features.clip_control)
|
||||
glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
|
||||
|
||||
// ****************************************************************
|
||||
|
@ -602,12 +632,16 @@ void GSDeviceOGL::DestroyResources()
|
|||
m_vertex_uniform_stream_buffer.reset();
|
||||
|
||||
glBindVertexArray(0);
|
||||
if (m_vertex_array_object != 0)
|
||||
glDeleteVertexArrays(1, &m_vertex_array_object);
|
||||
if (m_expand_ibo != 0)
|
||||
glDeleteVertexArrays(1, &m_expand_ibo);
|
||||
if (m_vao != 0)
|
||||
glDeleteVertexArrays(1, &m_vao);
|
||||
|
||||
m_index_stream_buffer.reset();
|
||||
m_vertex_stream_buffer.reset();
|
||||
s_texture_upload_buffer.reset();
|
||||
if (m_expand_ibo)
|
||||
glDeleteBuffers(1, &m_expand_ibo);
|
||||
|
||||
if (m_fbo != 0)
|
||||
glDeleteFramebuffers(1, &m_fbo);
|
||||
|
@ -1092,56 +1126,28 @@ std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum typ
|
|||
{
|
||||
std::string header;
|
||||
|
||||
if (GLLoader::is_gles)
|
||||
// Intel's GL driver doesn't like the readonly qualifier with 3.3 GLSL.
|
||||
if (m_features.vs_expand)
|
||||
{
|
||||
if (GLAD_GL_ES_VERSION_3_2)
|
||||
header = "#version 320 es\n";
|
||||
else if (GLAD_GL_ES_VERSION_3_1)
|
||||
header = "#version 310 es\n";
|
||||
|
||||
if (GLAD_GL_EXT_blend_func_extended)
|
||||
header += "#extension GL_EXT_blend_func_extended : require\n";
|
||||
if (GLAD_GL_ARB_blend_func_extended)
|
||||
header += "#extension GL_ARB_blend_func_extended : require\n";
|
||||
if (m_features.framebuffer_fetch)
|
||||
{
|
||||
if (GLAD_GL_EXT_shader_framebuffer_fetch)
|
||||
header += "#extension GL_EXT_shader_framebuffer_fetch : require\n";
|
||||
else if (GLAD_GL_ARM_shader_framebuffer_fetch)
|
||||
header += "#extension GL_ARM_shader_framebuffer_fetch : require\n";
|
||||
}
|
||||
|
||||
header += "precision highp float;\n";
|
||||
header += "precision highp int;\n";
|
||||
header += "precision highp sampler2D;\n";
|
||||
if (GLAD_GL_ES_VERSION_3_1)
|
||||
header += "precision highp sampler2DMS;\n";
|
||||
if (GLAD_GL_ES_VERSION_3_2)
|
||||
header += "precision highp usamplerBuffer;\n";
|
||||
|
||||
if (!GLAD_GL_EXT_blend_func_extended && !GLAD_GL_ARB_blend_func_extended)
|
||||
header += "#define DISABLE_DUAL_SOURCE\n";
|
||||
header = "#version 430 core\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
header = "#version 330 core\n";
|
||||
|
||||
// Need GL version 420
|
||||
header += "#extension GL_ARB_shading_language_420pack: require\n";
|
||||
|
||||
if (m_features.framebuffer_fetch && GLAD_GL_EXT_shader_framebuffer_fetch)
|
||||
header += "#extension GL_EXT_shader_framebuffer_fetch : require\n";
|
||||
|
||||
if (GLLoader::found_GL_ARB_gpu_shader5)
|
||||
header += "#extension GL_ARB_gpu_shader5 : enable\n";
|
||||
}
|
||||
|
||||
if (GLAD_GL_ARB_shader_draw_parameters)
|
||||
header += "#extension GL_ARB_shader_draw_parameters : require\n";
|
||||
if (m_features.framebuffer_fetch && GLAD_GL_EXT_shader_framebuffer_fetch)
|
||||
header += "#extension GL_EXT_shader_framebuffer_fetch : require\n";
|
||||
|
||||
if (m_features.framebuffer_fetch)
|
||||
header += "#define HAS_FRAMEBUFFER_FETCH 1\n";
|
||||
else
|
||||
header += "#define HAS_FRAMEBUFFER_FETCH 0\n";
|
||||
|
||||
if (GLLoader::has_clip_control)
|
||||
if (m_features.clip_control)
|
||||
header += "#define HAS_CLIP_CONTROL 1\n";
|
||||
else
|
||||
header += "#define HAS_CLIP_CONTROL 0\n";
|
||||
|
@ -1178,26 +1184,14 @@ std::string GSDeviceOGL::GetVSSource(VSSelector sel)
|
|||
|
||||
std::string macro = fmt::format("#define VS_FST {}\n", static_cast<u32>(sel.fst))
|
||||
+ fmt::format("#define VS_IIP {}\n", static_cast<u32>(sel.iip))
|
||||
+ fmt::format("#define VS_POINT_SIZE {}\n", static_cast<u32>(sel.point_size));
|
||||
+ fmt::format("#define VS_POINT_SIZE {}\n", static_cast<u32>(sel.point_size))
|
||||
+ fmt::format("#define VS_EXPAND {}\n", static_cast<int>(sel.expand));
|
||||
|
||||
std::string src = GenGlslHeader("vs_main", GL_VERTEX_SHADER, macro);
|
||||
src += m_shader_tfx_vgs;
|
||||
return src;
|
||||
}
|
||||
|
||||
std::string GSDeviceOGL::GetGSSource(GSSelector sel)
|
||||
{
|
||||
DevCon.WriteLn("Compiling new geometry shader with selector 0x%" PRIX64, sel.key);
|
||||
|
||||
std::string macro = fmt::format("#define GS_PRIM {}\n", static_cast<u32>(sel.topology))
|
||||
+ fmt::format("#define GS_EXPAND {}\n", static_cast<u32>(sel.expand))
|
||||
+ fmt::format("#define GS_IIP {}\n", static_cast<u32>(sel.iip));
|
||||
|
||||
std::string src = GenGlslHeader("gs_main", GL_GEOMETRY_SHADER, macro);
|
||||
src += m_shader_tfx_vgs;
|
||||
return src;
|
||||
}
|
||||
|
||||
std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
|
||||
{
|
||||
DevCon.WriteLn("Compiling new pixel shader with selector 0x%" PRIX64 "%08X", sel.key_hi, sel.key_lo);
|
||||
|
@ -1502,6 +1496,7 @@ void GSDeviceOGL::DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect
|
|||
{GSVector4(right , bottom, 0.0f, 0.0f) , GSVector2(sRect.z , sRect.w)} ,
|
||||
};
|
||||
|
||||
IASetVAO(m_vao);
|
||||
IASetVertexBuffer(vertices, 4);
|
||||
IASetPrimitiveTopology(GL_TRIANGLE_STRIP);
|
||||
DrawPrimitive();
|
||||
|
@ -1510,6 +1505,7 @@ void GSDeviceOGL::DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect
|
|||
void GSDeviceOGL::DrawMultiStretchRects(
|
||||
const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader)
|
||||
{
|
||||
IASetVAO(m_vao);
|
||||
IASetPrimitiveTopology(GL_TRIANGLE_STRIP);
|
||||
OMSetDepthStencilState(m_convert.dss);
|
||||
OMSetBlendState(false);
|
||||
|
@ -1672,7 +1668,7 @@ void GSDeviceOGL::DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture
|
|||
bool GSDeviceOGL::CompileFXAAProgram()
|
||||
{
|
||||
// Needs ARB_gpu_shader5 for gather.
|
||||
if (!GLLoader::is_gles && !GLLoader::found_GL_ARB_gpu_shader5)
|
||||
if (!GLAD_GL_ARB_gpu_shader5)
|
||||
{
|
||||
Console.Warning("FXAA is not supported with the current GPU");
|
||||
return true;
|
||||
|
@ -1687,7 +1683,7 @@ bool GSDeviceOGL::CompileFXAAProgram()
|
|||
}
|
||||
|
||||
const std::string ps(GetShaderSource("ps_main", GL_FRAGMENT_SHADER, shader->c_str(), fxaa_macro));
|
||||
std::optional<GL::Program> prog = m_shader_cache.GetProgram(m_convert.vs, {}, ps);
|
||||
std::optional<GL::Program> prog = m_shader_cache.GetProgram(m_convert.vs, ps);
|
||||
if (!prog.has_value())
|
||||
{
|
||||
Console.Error("Failed to compile FXAA fragment shader");
|
||||
|
@ -1725,7 +1721,7 @@ bool GSDeviceOGL::CompileShadeBoostProgram()
|
|||
}
|
||||
|
||||
const std::string ps(GetShaderSource("ps_main", GL_FRAGMENT_SHADER, *shader));
|
||||
if (!m_shader_cache.GetProgram(&m_shadeboost.ps, m_convert.vs, {}, ps))
|
||||
if (!m_shader_cache.GetProgram(&m_shadeboost.ps, m_convert.vs, ps))
|
||||
return false;
|
||||
m_shadeboost.ps.RegisterUniform("params");
|
||||
m_shadeboost.ps.SetName("Shadeboost pipe");
|
||||
|
@ -1770,6 +1766,7 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
|
|||
|
||||
// ia
|
||||
|
||||
IASetVAO(m_vao);
|
||||
IASetVertexBuffer(vertices, 4);
|
||||
IASetPrimitiveTopology(GL_TRIANGLE_STRIP);
|
||||
|
||||
|
@ -1787,6 +1784,15 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
|
|||
}
|
||||
}
|
||||
|
||||
void GSDeviceOGL::IASetVAO(GLuint vao)
|
||||
{
|
||||
if (GLState::vao == vao)
|
||||
return;
|
||||
|
||||
GLState::vao = vao;
|
||||
glBindVertexArray(vao);
|
||||
}
|
||||
|
||||
void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t count)
|
||||
{
|
||||
const u32 size = static_cast<u32>(count) * sizeof(GSVertexPT1);
|
||||
|
@ -1862,12 +1868,6 @@ bool GSDeviceOGL::CreateCASPrograms()
|
|||
}
|
||||
|
||||
const char* header =
|
||||
GLLoader::is_gles ?
|
||||
"#version 320 es\n"
|
||||
"precision highp float;\n"
|
||||
"precision highp int;\n"
|
||||
"precision highp sampler2D;\n"
|
||||
"precision highp image2D;\n" :
|
||||
"#version 420\n"
|
||||
"#extension GL_ARB_compute_shader : require\n";
|
||||
const char* sharpen_params[2] = {
|
||||
|
@ -1921,7 +1921,7 @@ bool GSDeviceOGL::CreateImGuiProgram()
|
|||
}
|
||||
|
||||
std::optional<GL::Program> prog = m_shader_cache.GetProgram(
|
||||
GetShaderSource("vs_main", GL_VERTEX_SHADER, glsl.value()), {},
|
||||
GetShaderSource("vs_main", GL_VERTEX_SHADER, glsl.value()),
|
||||
GetShaderSource("ps_main", GL_FRAGMENT_SHADER, glsl.value()));
|
||||
if (!prog.has_value())
|
||||
{
|
||||
|
@ -1937,6 +1937,7 @@ bool GSDeviceOGL::CreateImGuiProgram()
|
|||
glGenVertexArrays(1, &m_imgui.vao);
|
||||
glBindVertexArray(m_imgui.vao);
|
||||
m_vertex_stream_buffer->Bind();
|
||||
m_index_stream_buffer->Bind();
|
||||
|
||||
glEnableVertexAttribArray(0);
|
||||
glEnableVertexAttribArray(1);
|
||||
|
@ -1945,7 +1946,7 @@ bool GSDeviceOGL::CreateImGuiProgram()
|
|||
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(ImDrawVert), (GLvoid*)IM_OFFSETOF(ImDrawVert, uv));
|
||||
glVertexAttribPointer(2, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(ImDrawVert), (GLvoid*)IM_OFFSETOF(ImDrawVert, col));
|
||||
|
||||
glBindVertexArray(m_vertex_array_object);
|
||||
glBindVertexArray(GLState::vao);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1973,7 +1974,7 @@ void GSDeviceOGL::RenderImGui()
|
|||
|
||||
m_imgui.ps.Bind();
|
||||
m_imgui.ps.UniformMatrix4fv(0, &ortho_projection[0][0]);
|
||||
glBindVertexArray(m_imgui.vao);
|
||||
IASetVAO(m_imgui.vao);
|
||||
OMSetBlendState(true, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_FUNC_ADD);
|
||||
OMSetDepthStencilState(m_convert.dss);
|
||||
PSSetSamplerState(m_convert.ln);
|
||||
|
@ -2041,7 +2042,7 @@ void GSDeviceOGL::RenderImGui()
|
|||
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);
|
||||
}
|
||||
|
||||
glBindVertexArray(m_vertex_array_object);
|
||||
IASetVAO(m_vao);
|
||||
glScissor(GLState::scissor.x, GLState::scissor.y, GLState::scissor.width(), GLState::scissor.height());
|
||||
}
|
||||
|
||||
|
@ -2224,10 +2225,9 @@ void GSDeviceOGL::SetupPipeline(const ProgramSelector& psel)
|
|||
|
||||
const std::string vs(GetVSSource(psel.vs));
|
||||
const std::string ps(GetPSSource(psel.ps));
|
||||
const std::string gs((psel.gs.key != 0) ? GetGSSource(psel.gs) : std::string());
|
||||
|
||||
GL::Program prog;
|
||||
m_shader_cache.GetProgram(&prog, vs, gs, ps);
|
||||
m_shader_cache.GetProgram(&prog, vs, ps);
|
||||
it = m_programs.emplace(psel, std::move(prog)).first;
|
||||
it->second.Bind();
|
||||
}
|
||||
|
@ -2330,7 +2330,26 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
|
|||
}
|
||||
|
||||
IASetVertexBuffer(config.verts, config.nverts);
|
||||
IASetIndexBuffer(config.indices, config.nindices);
|
||||
if (config.vs.expand != GSHWDrawConfig::VSExpand::None && !GLAD_GL_ARB_shader_draw_parameters)
|
||||
{
|
||||
// Need to offset the buffer.
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 2, m_vertex_stream_buffer->GetGLBufferId(),
|
||||
m_vertex.start * sizeof(GSVertex), config.nverts * sizeof(GSVertex));
|
||||
m_vertex.start = 0;
|
||||
}
|
||||
|
||||
if (config.vs.UseExpandIndexBuffer())
|
||||
{
|
||||
IASetVAO(m_expand_vao);
|
||||
m_index.start = 0;
|
||||
m_index.count = config.nindices;
|
||||
}
|
||||
else
|
||||
{
|
||||
IASetVAO(m_vao);
|
||||
IASetIndexBuffer(config.indices, config.nindices);
|
||||
}
|
||||
|
||||
GLenum topology = 0;
|
||||
switch (config.topology)
|
||||
{
|
||||
|
@ -2363,15 +2382,12 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
|
|||
psel.vs = config.vs;
|
||||
psel.ps.key_hi = config.ps.key_hi;
|
||||
psel.ps.key_lo = config.ps.key_lo;
|
||||
psel.gs.key = 0;
|
||||
psel.pad = 0;
|
||||
if (config.gs.expand)
|
||||
psel.gs.key = config.gs.key;
|
||||
|
||||
SetupPipeline(psel);
|
||||
|
||||
// additional non-pipeline config stuff
|
||||
const bool point_size_enabled = config.vs.point_size && !GLLoader::is_gles;
|
||||
const bool point_size_enabled = config.vs.point_size;
|
||||
if (GLState::point_size != point_size_enabled)
|
||||
{
|
||||
if (point_size_enabled)
|
||||
|
@ -2516,47 +2532,45 @@ void GSDeviceOGL::SendHWDraw(const GSHWDrawConfig& config, bool needs_barrier)
|
|||
|
||||
g_perfmon.Put(GSPerfMon::Barriers, static_cast<u32>(config.drawlist->size()));
|
||||
|
||||
for (size_t count = 0, p = 0, n = 0; n < config.drawlist->size(); p += count, ++n)
|
||||
const u32 indices_per_prim = config.indices_per_prim;
|
||||
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
|
||||
|
||||
for (u32 n = 0, p = 0; n < draw_list_size; n++)
|
||||
{
|
||||
count = (*config.drawlist)[n] * config.indices_per_prim;
|
||||
const u32 count = (*config.drawlist)[n] * indices_per_prim;
|
||||
glTextureBarrier();
|
||||
DrawIndexedPrimitive(p, count);
|
||||
p += count;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const bool tex_is_ds = config.tex && config.tex == config.ds;
|
||||
if ((needs_barrier && m_features.texture_barrier) || tex_is_ds)
|
||||
if (needs_barrier && m_features.texture_barrier)
|
||||
{
|
||||
if (config.require_full_barrier)
|
||||
{
|
||||
GL_PUSH("Split the draw");
|
||||
const u32 indices_per_prim = config.indices_per_prim;
|
||||
|
||||
GL_PERF("Split single draw in %d draw", config.nindices / config.indices_per_prim);
|
||||
GL_PUSH("Split single draw in %d draw", config.nindices / indices_per_prim);
|
||||
g_perfmon.Put(GSPerfMon::Barriers, config.nindices / config.indices_per_prim);
|
||||
|
||||
for (size_t p = 0; p < config.nindices; p += config.indices_per_prim)
|
||||
for (u32 p = 0; p < config.nindices; p += indices_per_prim)
|
||||
{
|
||||
glTextureBarrier();
|
||||
DrawIndexedPrimitive(p, config.indices_per_prim);
|
||||
DrawIndexedPrimitive(p, indices_per_prim);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.require_one_barrier || tex_is_ds)
|
||||
if (config.require_one_barrier)
|
||||
{
|
||||
// The common renderer code doesn't put a barrier here because D3D/VK need to copy the DS, so we need to check it.
|
||||
// One barrier needed for non-overlapping draw.
|
||||
g_perfmon.Put(GSPerfMon::Barriers, 1);
|
||||
glTextureBarrier();
|
||||
DrawIndexedPrimitive();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// No barriers needed
|
||||
DrawIndexedPrimitive();
|
||||
}
|
||||
|
||||
|
|
|
@ -121,7 +121,6 @@ class GSDeviceOGL final : public GSDevice
|
|||
{
|
||||
public:
|
||||
using VSSelector = GSHWDrawConfig::VSSelector;
|
||||
using GSSelector = GSHWDrawConfig::GSSelector;
|
||||
using PSSelector = GSHWDrawConfig::PSSelector;
|
||||
using PSSamplerSelector = GSHWDrawConfig::SamplerSelector;
|
||||
using OMDepthStencilSelector = GSHWDrawConfig::DepthStencilSelector;
|
||||
|
@ -131,7 +130,6 @@ public:
|
|||
{
|
||||
PSSelector ps;
|
||||
VSSelector vs;
|
||||
GSSelector gs;
|
||||
u16 pad;
|
||||
|
||||
__fi bool operator==(const ProgramSelector& p) const { return (std::memcmp(this, &p, sizeof(*this)) == 0); }
|
||||
|
@ -144,7 +142,7 @@ public:
|
|||
__fi std::size_t operator()(const ProgramSelector& p) const noexcept
|
||||
{
|
||||
std::size_t h = 0;
|
||||
HashCombine(h, p.vs.key, p.gs.key, p.ps.key_hi, p.ps.key_lo);
|
||||
HashCombine(h, p.vs.key, p.ps.key_hi, p.ps.key_lo);
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
@ -160,7 +158,9 @@ private:
|
|||
|
||||
std::unique_ptr<GL::StreamBuffer> m_vertex_stream_buffer;
|
||||
std::unique_ptr<GL::StreamBuffer> m_index_stream_buffer;
|
||||
GLuint m_vertex_array_object = 0;
|
||||
GLuint m_expand_ibo = 0;
|
||||
GLuint m_vao = 0;
|
||||
GLuint m_expand_vao = 0;
|
||||
GLenum m_draw_topology = 0;
|
||||
|
||||
std::unique_ptr<GL::StreamBuffer> m_vertex_uniform_stream_buffer;
|
||||
|
@ -344,6 +344,7 @@ public:
|
|||
|
||||
void SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm);
|
||||
|
||||
void IASetVAO(GLuint vao);
|
||||
void IASetPrimitiveTopology(GLenum topology);
|
||||
void IASetVertexBuffer(const void* vertices, size_t count);
|
||||
void IASetIndexBuffer(const void* index, size_t count);
|
||||
|
@ -367,7 +368,6 @@ public:
|
|||
const std::string_view& macro_sel = std::string_view());
|
||||
std::string GenGlslHeader(const std::string_view& entry, GLenum type, const std::string_view& macro);
|
||||
std::string GetVSSource(VSSelector sel);
|
||||
std::string GetGSSource(GSSelector sel);
|
||||
std::string GetPSSource(const PSSelector& sel);
|
||||
GLuint CreateSampler(PSSamplerSelector sel);
|
||||
GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel);
|
||||
|
|
|
@ -94,7 +94,7 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format
|
|||
// Depth buffer
|
||||
case Format::DepthStencil:
|
||||
{
|
||||
if (!GLLoader::found_framebuffer_fetch)
|
||||
if (!g_gs_device->Features().framebuffer_fetch)
|
||||
{
|
||||
gl_fmt = GL_DEPTH32F_STENCIL8;
|
||||
m_int_format = GL_DEPTH_STENCIL;
|
||||
|
|
|
@ -581,17 +581,17 @@ bool GSDeviceVK::CheckFeatures()
|
|||
m_features.framebuffer_fetch = g_vulkan_context->GetOptionalExtensions().vk_arm_rasterization_order_attachment_access && !GSConfig.DisableFramebufferFetch;
|
||||
m_features.texture_barrier = GSConfig.OverrideTextureBarriers != 0;
|
||||
m_features.broken_point_sampler = isAMD;
|
||||
m_features.geometry_shader = features.geometryShader && GSConfig.OverrideGeometryShaders != 0;
|
||||
// Usually, geometry shader indicates primid support
|
||||
// However on Metal (MoltenVK), geometry shader is never available, but primid sometimes is
|
||||
// Officially, it's available on GPUs that support barycentric coordinates (Newer AMD and Apple)
|
||||
// Unofficially, it seems to work on older Intel GPUs (but breaks other things on newer Intel GPUs, see GSMTLDeviceInfo.mm for details)
|
||||
// We'll only enable for the officially supported GPUs here. We'll leave in the option of force-enabling it with OverrideGeometryShaders though.
|
||||
m_features.primitive_id = features.geometryShader || GSConfig.OverrideGeometryShaders == 1 || g_vulkan_context->GetOptionalExtensions().vk_khr_fragment_shader_barycentric;
|
||||
m_features.primitive_id = features.geometryShader || g_vulkan_context->GetOptionalExtensions().vk_khr_fragment_shader_barycentric;
|
||||
m_features.prefer_new_textures = true;
|
||||
m_features.provoking_vertex_last = g_vulkan_context->GetOptionalExtensions().vk_ext_provoking_vertex;
|
||||
m_features.dual_source_blend = features.dualSrcBlend && !GSConfig.DisableDualSourceBlend;
|
||||
m_features.clip_control = true;
|
||||
m_features.vs_expand = g_vulkan_context->GetOptionalExtensions().vk_khr_shader_draw_parameters;
|
||||
|
||||
if (!m_features.dual_source_blend)
|
||||
Console.Warning("Vulkan driver is missing dual-source blending. This will have an impact on performance.");
|
||||
|
@ -624,9 +624,10 @@ bool GSDeviceVK::CheckFeatures()
|
|||
(features.largePoints && limits.pointSizeRange[0] <= f_upscale && limits.pointSizeRange[1] >= f_upscale);
|
||||
m_features.line_expand =
|
||||
(features.wideLines && limits.lineWidthRange[0] <= f_upscale && limits.lineWidthRange[1] >= f_upscale);
|
||||
|
||||
DevCon.WriteLn("Using %s for point expansion and %s for line expansion.",
|
||||
m_features.point_expand ? "hardware" : "geometry shaders",
|
||||
m_features.line_expand ? "hardware" : "geometry shaders");
|
||||
m_features.point_expand ? "hardware" : "vertex expanding",
|
||||
m_features.line_expand ? "hardware" : "vertex expanding");
|
||||
|
||||
// Check texture format support before we try to create them.
|
||||
for (u32 fmt = static_cast<u32>(GSTexture::Format::Color); fmt < static_cast<u32>(GSTexture::Format::PrimID); fmt++)
|
||||
|
@ -1004,6 +1005,7 @@ void GSDeviceVK::DoMultiStretchRects(
|
|||
m_index.count = icount;
|
||||
m_vertex_stream_buffer.CommitMemory(vcount * sizeof(GSVertexPT1));
|
||||
m_index_stream_buffer.CommitMemory(icount * sizeof(u32));
|
||||
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32);
|
||||
|
||||
// Even though we're batching, a cmdbuffer submit could've messed this up.
|
||||
const GSVector4i rc(dTex->GetRect());
|
||||
|
@ -1379,6 +1381,8 @@ void GSDeviceVK::IASetIndexBuffer(const void* index, size_t count)
|
|||
|
||||
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), index, size);
|
||||
m_index_stream_buffer.CommitMemory(size);
|
||||
|
||||
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32);
|
||||
}
|
||||
|
||||
void GSDeviceVK::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, FeedbackLoopFlag feedback_loop)
|
||||
|
@ -1493,10 +1497,14 @@ static void AddMacro(std::stringstream& ss, const char* name, int value)
|
|||
|
||||
static void AddShaderHeader(std::stringstream& ss)
|
||||
{
|
||||
const GSDevice::FeatureSupport features(g_gs_device->Features());
|
||||
|
||||
ss << "#version 460 core\n";
|
||||
ss << "#extension GL_EXT_samplerless_texture_functions : require\n";
|
||||
|
||||
const GSDevice::FeatureSupport features(g_gs_device->Features());
|
||||
if (features.vs_expand)
|
||||
ss << "#extension GL_ARB_shader_draw_parameters : require\n";
|
||||
|
||||
if (!features.texture_barrier)
|
||||
ss << "#define DISABLE_TEXTURE_BARRIER 1\n";
|
||||
if (!features.dual_source_blend)
|
||||
|
@ -1568,7 +1576,9 @@ bool GSDeviceVK::CreateNullTexture()
|
|||
|
||||
bool GSDeviceVK::CreateBuffers()
|
||||
{
|
||||
if (!m_vertex_stream_buffer.Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_BUFFER_SIZE))
|
||||
if (!m_vertex_stream_buffer.Create(
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | (m_features.vs_expand ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : 0),
|
||||
VERTEX_BUFFER_SIZE))
|
||||
{
|
||||
Host::ReportErrorAsync("GS", "Failed to allocate vertex buffer");
|
||||
return false;
|
||||
|
@ -1593,7 +1603,14 @@ bool GSDeviceVK::CreateBuffers()
|
|||
}
|
||||
|
||||
SetVertexBuffer(m_vertex_stream_buffer.GetBuffer(), 0);
|
||||
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32);
|
||||
|
||||
if (!g_vulkan_context->AllocatePreinitializedGPUBuffer(EXPAND_BUFFER_SIZE, &m_expand_index_buffer,
|
||||
&m_expand_index_buffer_allocation, VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
|
||||
&GSDevice::GenerateExpansionIndexBuffer))
|
||||
{
|
||||
Host::ReportErrorAsync("GS", "Failed to allocate expansion index buffer");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -1625,6 +1642,8 @@ bool GSDeviceVK::CreatePipelineLayouts()
|
|||
dslb.AddBinding(
|
||||
0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT);
|
||||
dslb.AddBinding(1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
if (m_features.vs_expand)
|
||||
dslb.AddBinding(2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_VERTEX_BIT);
|
||||
if ((m_tfx_ubo_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::Util::SetObjectName(dev, m_tfx_ubo_ds_layout, "TFX UBO descriptor layout");
|
||||
|
@ -2366,9 +2385,6 @@ void GSDeviceVK::RenderImGui()
|
|||
|
||||
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);
|
||||
}
|
||||
|
||||
// normal draws use 32-bit indices
|
||||
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32);
|
||||
}
|
||||
|
||||
bool GSDeviceVK::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array<u32, NUM_CAS_CONSTANTS>& constants)
|
||||
|
@ -2449,8 +2465,6 @@ void GSDeviceVK::DestroyResources()
|
|||
Vulkan::Util::SafeDestroyPipeline(it.second);
|
||||
for (auto& it : m_tfx_fragment_shaders)
|
||||
Vulkan::Util::SafeDestroyShaderModule(it.second);
|
||||
for (auto& it : m_tfx_geometry_shaders)
|
||||
Vulkan::Util::SafeDestroyShaderModule(it.second);
|
||||
for (auto& it : m_tfx_vertex_shaders)
|
||||
Vulkan::Util::SafeDestroyShaderModule(it.second);
|
||||
for (VkPipeline& it : m_interlace)
|
||||
|
@ -2506,6 +2520,12 @@ void GSDeviceVK::DestroyResources()
|
|||
m_vertex_uniform_stream_buffer.Destroy(false);
|
||||
m_index_stream_buffer.Destroy(false);
|
||||
m_vertex_stream_buffer.Destroy(false);
|
||||
if (m_expand_index_buffer != VK_NULL_HANDLE)
|
||||
{
|
||||
vmaDestroyBuffer(g_vulkan_context->GetAllocator(), m_expand_index_buffer, m_expand_index_buffer_allocation);
|
||||
m_expand_index_buffer = VK_NULL_HANDLE;
|
||||
m_expand_index_buffer_allocation = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
Vulkan::Util::SafeDestroyPipelineLayout(m_tfx_pipeline_layout);
|
||||
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_tfx_rt_texture_ds_layout);
|
||||
|
@ -2530,6 +2550,8 @@ VkShaderModule GSDeviceVK::GetTFXVertexShader(GSHWDrawConfig::VSSelector sel)
|
|||
AddMacro(ss, "VS_FST", sel.fst);
|
||||
AddMacro(ss, "VS_IIP", sel.iip);
|
||||
AddMacro(ss, "VS_POINT_SIZE", sel.point_size);
|
||||
AddMacro(ss, "VS_EXPAND", static_cast<int>(sel.expand));
|
||||
AddMacro(ss, "VS_PROVOKING_VERTEX_LAST", static_cast<int>(m_features.provoking_vertex_last));
|
||||
ss << m_tfx_source;
|
||||
|
||||
VkShaderModule mod = g_vulkan_shader_cache->GetVertexShader(ss.str());
|
||||
|
@ -2540,29 +2562,6 @@ VkShaderModule GSDeviceVK::GetTFXVertexShader(GSHWDrawConfig::VSSelector sel)
|
|||
return mod;
|
||||
}
|
||||
|
||||
VkShaderModule GSDeviceVK::GetTFXGeometryShader(GSHWDrawConfig::GSSelector sel)
|
||||
{
|
||||
const auto it = m_tfx_geometry_shaders.find(sel.key);
|
||||
if (it != m_tfx_geometry_shaders.end())
|
||||
return it->second;
|
||||
|
||||
std::stringstream ss;
|
||||
AddShaderHeader(ss);
|
||||
AddShaderStageMacro(ss, false, true, false);
|
||||
AddMacro(ss, "GS_IIP", sel.iip);
|
||||
AddMacro(ss, "GS_PRIM", static_cast<int>(sel.topology));
|
||||
AddMacro(ss, "GS_EXPAND", sel.expand);
|
||||
AddMacro(ss, "GS_FORWARD_PRIMID", sel.forward_primid);
|
||||
ss << m_tfx_source;
|
||||
|
||||
VkShaderModule mod = g_vulkan_shader_cache->GetGeometryShader(ss.str());
|
||||
if (mod)
|
||||
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), mod, "TFX Geometry %08X", sel.key);
|
||||
|
||||
m_tfx_geometry_shaders.emplace(sel.key, mod);
|
||||
return mod;
|
||||
}
|
||||
|
||||
VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector& sel)
|
||||
{
|
||||
const auto it = m_tfx_fragment_shaders.find(sel);
|
||||
|
@ -2651,9 +2650,8 @@ VkPipeline GSDeviceVK::CreateTFXPipeline(const PipelineSelector& p)
|
|||
}
|
||||
|
||||
VkShaderModule vs = GetTFXVertexShader(p.vs);
|
||||
VkShaderModule gs = p.gs.expand ? GetTFXGeometryShader(p.gs) : VK_NULL_HANDLE;
|
||||
VkShaderModule fs = GetTFXFragmentShader(pps);
|
||||
if (vs == VK_NULL_HANDLE || (p.gs.expand && gs == VK_NULL_HANDLE) || fs == VK_NULL_HANDLE)
|
||||
if (vs == VK_NULL_HANDLE || fs == VK_NULL_HANDLE)
|
||||
return VK_NULL_HANDLE;
|
||||
|
||||
Vulkan::GraphicsPipelineBuilder gpb;
|
||||
|
@ -2685,19 +2683,20 @@ VkPipeline GSDeviceVK::CreateTFXPipeline(const PipelineSelector& p)
|
|||
|
||||
// Shaders
|
||||
gpb.SetVertexShader(vs);
|
||||
if (gs != VK_NULL_HANDLE)
|
||||
gpb.SetGeometryShader(gs);
|
||||
gpb.SetFragmentShader(fs);
|
||||
|
||||
// IA
|
||||
gpb.AddVertexBuffer(0, sizeof(GSVertex));
|
||||
gpb.AddVertexAttribute(0, 0, VK_FORMAT_R32G32_SFLOAT, 0); // ST
|
||||
gpb.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UINT, 8); // RGBA
|
||||
gpb.AddVertexAttribute(2, 0, VK_FORMAT_R32_SFLOAT, 12); // Q
|
||||
gpb.AddVertexAttribute(3, 0, VK_FORMAT_R16G16_UINT, 16); // XY
|
||||
gpb.AddVertexAttribute(4, 0, VK_FORMAT_R32_UINT, 20); // Z
|
||||
gpb.AddVertexAttribute(5, 0, VK_FORMAT_R16G16_UINT, 24); // UV
|
||||
gpb.AddVertexAttribute(6, 0, VK_FORMAT_R8G8B8A8_UNORM, 28); // FOG
|
||||
if (p.vs.expand == GSHWDrawConfig::VSExpand::None)
|
||||
{
|
||||
gpb.AddVertexBuffer(0, sizeof(GSVertex));
|
||||
gpb.AddVertexAttribute(0, 0, VK_FORMAT_R32G32_SFLOAT, 0); // ST
|
||||
gpb.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UINT, 8); // RGBA
|
||||
gpb.AddVertexAttribute(2, 0, VK_FORMAT_R32_SFLOAT, 12); // Q
|
||||
gpb.AddVertexAttribute(3, 0, VK_FORMAT_R16G16_UINT, 16); // XY
|
||||
gpb.AddVertexAttribute(4, 0, VK_FORMAT_R32_UINT, 20); // Z
|
||||
gpb.AddVertexAttribute(5, 0, VK_FORMAT_R16G16_UINT, 24); // UV
|
||||
gpb.AddVertexAttribute(6, 0, VK_FORMAT_R8G8B8A8_UNORM, 28); // FOG
|
||||
}
|
||||
|
||||
// DepthStencil
|
||||
static const VkCompareOp ztst[] = {
|
||||
|
@ -2753,7 +2752,7 @@ VkPipeline GSDeviceVK::CreateTFXPipeline(const PipelineSelector& p)
|
|||
if (pipeline)
|
||||
{
|
||||
Vulkan::Util::SetObjectName(
|
||||
g_vulkan_context->GetDevice(), pipeline, "TFX Pipeline %08X/%08X/%" PRIX64 "%08X", p.vs.key, p.gs.key, p.ps.key_hi, p.ps.key_lo);
|
||||
g_vulkan_context->GetDevice(), pipeline, "TFX Pipeline %08X/%" PRIX64 "%08X", p.vs.key, p.ps.key_hi, p.ps.key_lo);
|
||||
}
|
||||
|
||||
return pipeline;
|
||||
|
@ -2822,6 +2821,11 @@ bool GSDeviceVK::CreatePersistentDescriptorSets()
|
|||
m_vertex_uniform_stream_buffer.GetBuffer(), 0, sizeof(GSHWDrawConfig::VSConstantBuffer));
|
||||
dsub.AddBufferDescriptorWrite(m_tfx_descriptor_sets[0], 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
|
||||
m_fragment_uniform_stream_buffer.GetBuffer(), 0, sizeof(GSHWDrawConfig::PSConstantBuffer));
|
||||
if (m_features.vs_expand)
|
||||
{
|
||||
dsub.AddBufferDescriptorWrite(m_tfx_descriptor_sets[0], 2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
m_vertex_stream_buffer.GetBuffer(), 0, VERTEX_BUFFER_SIZE);
|
||||
}
|
||||
dsub.Update(dev);
|
||||
Vulkan::Util::SetObjectName(dev, m_tfx_descriptor_sets[0], "Persistent TFX UBO set");
|
||||
return true;
|
||||
|
@ -3476,8 +3480,7 @@ GSTextureVK* GSDeviceVK::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config)
|
|||
DrawPrimitive();
|
||||
|
||||
// image is now filled with either -1 or INT_MAX, so now we can do the prepass
|
||||
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
|
||||
IASetIndexBuffer(config.indices, config.nindices);
|
||||
UploadHWDrawVerticesAndIndices(config);
|
||||
|
||||
// cut down the configuration for the prepass, we don't need blending or any feedback loop
|
||||
PipelineSelector& pipe = m_pipeline_selector;
|
||||
|
@ -3722,10 +3725,7 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
|||
|
||||
// VB/IB upload, if we did DATE setup and it's not HDR this has already been done
|
||||
if (!date_image || hdr_rt)
|
||||
{
|
||||
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
|
||||
IASetIndexBuffer(config.indices, config.nindices);
|
||||
}
|
||||
UploadHWDrawVerticesAndIndices(config);
|
||||
|
||||
// now we can do the actual draw
|
||||
if (BindDrawPipeline(pipe))
|
||||
|
@ -3818,7 +3818,6 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
|||
void GSDeviceVK::UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe)
|
||||
{
|
||||
pipe.vs.key = config.vs.key;
|
||||
pipe.gs.key = config.gs.key;
|
||||
pipe.ps.key_hi = config.ps.key_hi;
|
||||
pipe.ps.key_lo = config.ps.key_lo;
|
||||
pipe.dss.key = config.depth.key;
|
||||
|
@ -3840,6 +3839,22 @@ void GSDeviceVK::UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelect
|
|||
pipe.vs.point_size |= (config.topology == GSHWDrawConfig::Topology::Point);
|
||||
}
|
||||
|
||||
void GSDeviceVK::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
|
||||
{
|
||||
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
|
||||
|
||||
if (config.vs.UseExpandIndexBuffer())
|
||||
{
|
||||
m_index.start = 0;
|
||||
m_index.count = config.nindices;
|
||||
SetIndexBuffer(m_expand_index_buffer, 0, VK_INDEX_TYPE_UINT32);
|
||||
}
|
||||
else
|
||||
{
|
||||
IASetIndexBuffer(config.indices, config.nindices);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier)
|
||||
{
|
||||
if (config.drawlist)
|
||||
|
@ -3847,23 +3862,25 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
|
|||
GL_PUSH("Split the draw (SPRITE)");
|
||||
g_perfmon.Put(GSPerfMon::Barriers, static_cast<u32>(config.drawlist->size()) - static_cast<u32>(skip_first_barrier));
|
||||
|
||||
u32 count = 0;
|
||||
const u32 indices_per_prim = config.indices_per_prim;
|
||||
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
|
||||
u32 p = 0;
|
||||
u32 n = 0;
|
||||
|
||||
if (skip_first_barrier)
|
||||
{
|
||||
count = (*config.drawlist)[n] * config.indices_per_prim;
|
||||
const u32 count = (*config.drawlist)[n] * indices_per_prim;
|
||||
DrawIndexedPrimitive(p, count);
|
||||
p += count;
|
||||
++n;
|
||||
}
|
||||
|
||||
for (; n < static_cast<u32>(config.drawlist->size()); p += count, ++n)
|
||||
for (; n < draw_list_size; n++)
|
||||
{
|
||||
count = (*config.drawlist)[n] * config.indices_per_prim;
|
||||
const u32 count = (*config.drawlist)[n] * indices_per_prim;
|
||||
ColorBufferBarrier(draw_rt);
|
||||
DrawIndexedPrimitive(p, count);
|
||||
p += count;
|
||||
}
|
||||
|
||||
return;
|
||||
|
@ -3873,21 +3890,22 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
|
|||
{
|
||||
if (config.require_full_barrier)
|
||||
{
|
||||
GL_PUSH("Split single draw in %d draw", config.nindices / config.indices_per_prim);
|
||||
g_perfmon.Put(GSPerfMon::Barriers, (config.nindices / config.indices_per_prim) - static_cast<u32>(skip_first_barrier));
|
||||
const u32 indices_per_prim = config.indices_per_prim;
|
||||
|
||||
GL_PUSH("Split single draw in %d draw", config.nindices / indices_per_prim);
|
||||
g_perfmon.Put(GSPerfMon::Barriers, (config.nindices / indices_per_prim) - static_cast<u32>(skip_first_barrier));
|
||||
|
||||
const u32 ipp = config.indices_per_prim;
|
||||
u32 p = 0;
|
||||
if (skip_first_barrier)
|
||||
{
|
||||
DrawIndexedPrimitive(p, ipp);
|
||||
p += ipp;
|
||||
DrawIndexedPrimitive(p, indices_per_prim);
|
||||
p += indices_per_prim;
|
||||
}
|
||||
|
||||
for (; p < config.nindices; p += ipp)
|
||||
for (; p < config.nindices; p += indices_per_prim)
|
||||
{
|
||||
ColorBufferBarrier(draw_rt);
|
||||
DrawIndexedPrimitive(p, ipp);
|
||||
DrawIndexedPrimitive(p, indices_per_prim);
|
||||
}
|
||||
|
||||
return;
|
||||
|
@ -3897,11 +3915,8 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
|
|||
{
|
||||
g_perfmon.Put(GSPerfMon::Barriers, 1);
|
||||
ColorBufferBarrier(draw_rt);
|
||||
DrawIndexedPrimitive();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Don't need any barrier
|
||||
DrawIndexedPrimitive();
|
||||
}
|
||||
|
|
|
@ -58,7 +58,6 @@ public:
|
|||
};
|
||||
|
||||
GSHWDrawConfig::VSSelector vs;
|
||||
GSHWDrawConfig::GSSelector gs;
|
||||
GSHWDrawConfig::DepthStencilSelector dss;
|
||||
GSHWDrawConfig::ColorMaskSelector cms;
|
||||
GSHWDrawConfig::BlendState bs;
|
||||
|
@ -78,7 +77,7 @@ public:
|
|||
std::size_t operator()(const PipelineSelector& e) const noexcept
|
||||
{
|
||||
std::size_t hash = 0;
|
||||
HashCombine(hash, e.vs.key, e.gs.key, e.ps.key_hi, e.ps.key_lo, e.dss.key, e.cms.key, e.bs.key, e.key);
|
||||
HashCombine(hash, e.vs.key, e.ps.key_hi, e.ps.key_lo, e.dss.key, e.cms.key, e.bs.key, e.key);
|
||||
return hash;
|
||||
}
|
||||
};
|
||||
|
@ -123,6 +122,8 @@ private:
|
|||
Vulkan::StreamBuffer m_index_stream_buffer;
|
||||
Vulkan::StreamBuffer m_vertex_uniform_stream_buffer;
|
||||
Vulkan::StreamBuffer m_fragment_uniform_stream_buffer;
|
||||
VkBuffer m_expand_index_buffer = VK_NULL_HANDLE;
|
||||
VmaAllocation m_expand_index_buffer_allocation = VK_NULL_HANDLE;
|
||||
|
||||
VkSampler m_point_sampler = VK_NULL_HANDLE;
|
||||
VkSampler m_linear_sampler = VK_NULL_HANDLE;
|
||||
|
@ -142,7 +143,6 @@ private:
|
|||
VkPipeline m_shadeboost_pipeline = {};
|
||||
|
||||
std::unordered_map<u32, VkShaderModule> m_tfx_vertex_shaders;
|
||||
std::unordered_map<u32, VkShaderModule> m_tfx_geometry_shaders;
|
||||
std::unordered_map<GSHWDrawConfig::PSSelector, VkShaderModule, GSHWDrawConfig::PSSelectorHash> m_tfx_fragment_shaders;
|
||||
std::unordered_map<PipelineSelector, VkPipeline, PipelineSelectorHash> m_tfx_pipelines;
|
||||
|
||||
|
@ -183,7 +183,6 @@ private:
|
|||
void ClearSamplerCache() final;
|
||||
|
||||
VkShaderModule GetTFXVertexShader(GSHWDrawConfig::VSSelector sel);
|
||||
VkShaderModule GetTFXGeometryShader(GSHWDrawConfig::GSSelector sel);
|
||||
VkShaderModule GetTFXFragmentShader(const GSHWDrawConfig::PSSelector& sel);
|
||||
VkPipeline CreateTFXPipeline(const PipelineSelector& p);
|
||||
VkPipeline GetTFXPipeline(const PipelineSelector& p);
|
||||
|
@ -307,6 +306,7 @@ public:
|
|||
|
||||
void RenderHW(GSHWDrawConfig& config) override;
|
||||
void UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe);
|
||||
void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config);
|
||||
void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -529,7 +529,6 @@ bool Pcsx2Config::GSOptions::OptionsAreEqual(const GSOptions& right) const
|
|||
OpEqu(UserHacks_GPUTargetCLUTMode) &&
|
||||
OpEqu(UserHacks_TextureInsideRt) &&
|
||||
OpEqu(OverrideTextureBarriers) &&
|
||||
OpEqu(OverrideGeometryShaders) &&
|
||||
|
||||
OpEqu(CAS_Sharpness) &&
|
||||
OpEqu(ShadeBoost_Brightness) &&
|
||||
|
@ -574,8 +573,7 @@ bool Pcsx2Config::GSOptions::RestartOptionsAreEqual(const GSOptions& right) cons
|
|||
OpEqu(DisableDualSourceBlend) &&
|
||||
OpEqu(DisableFramebufferFetch) &&
|
||||
OpEqu(DisableThreadedPresentation) &&
|
||||
OpEqu(OverrideTextureBarriers) &&
|
||||
OpEqu(OverrideGeometryShaders);
|
||||
OpEqu(OverrideTextureBarriers);
|
||||
}
|
||||
|
||||
void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap)
|
||||
|
@ -724,7 +722,6 @@ void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap)
|
|||
GSSettingIntEnumEx(UserHacks_GPUTargetCLUTMode, "UserHacks_GPUTargetCLUTMode");
|
||||
GSSettingIntEnumEx(TriFilter, "TriFilter");
|
||||
GSSettingIntEx(OverrideTextureBarriers, "OverrideTextureBarriers");
|
||||
GSSettingIntEx(OverrideGeometryShaders, "OverrideGeometryShaders");
|
||||
|
||||
GSSettingInt(ShadeBoost_Brightness);
|
||||
GSSettingInt(ShadeBoost_Contrast);
|
||||
|
|
|
@ -15,4 +15,4 @@
|
|||
|
||||
/// Version number for GS and other shaders. Increment whenever any of the contents of the
|
||||
/// shaders change, to invalidate the cache.
|
||||
static constexpr u32 SHADER_CACHE_VERSION = 23;
|
||||
static constexpr u32 SHADER_CACHE_VERSION = 24;
|
||||
|
|
Loading…
Reference in New Issue