diff --git a/bin/resources/shaders/vulkan/convert.glsl b/bin/resources/shaders/vulkan/convert.glsl new file mode 100644 index 0000000000..7bb24ab99a --- /dev/null +++ b/bin/resources/shaders/vulkan/convert.glsl @@ -0,0 +1,412 @@ +#ifndef PS_SCALE_FACTOR +#define PS_SCALE_FACTOR 1 +#endif + +#ifdef VERTEX_SHADER + +layout(location = 0) in vec4 a_pos; +layout(location = 1) in vec2 a_tex; +layout(location = 2) in vec4 a_color; + +layout(location = 0) out vec2 v_tex; +layout(location = 1) out vec4 v_color; + +void main() +{ + gl_Position = vec4(a_pos.x, -a_pos.y, a_pos.z, a_pos.w); + v_tex = a_tex; + v_color = a_color; +} + +#endif + +#ifdef FRAGMENT_SHADER + +layout(location = 0) in vec2 v_tex; +layout(location = 1) in vec4 v_color; + +#if defined(ps_convert_rgba8_16bits) || defined(ps_convert_float32_32bits) +layout(location = 0) out uint o_col0; +#else +layout(location = 0) out vec4 o_col0; +#endif + +layout(set = 0, binding = 0) uniform sampler2D samp0; + +vec4 sample_c(vec2 uv) +{ + return texture(samp0, uv); +} + +vec4 ps_crt(uint i) +{ + vec4 mask[4] = vec4[4] + ( + vec4(1, 0, 0, 0), + vec4(0, 1, 0, 0), + vec4(0, 0, 1, 0), + vec4(1, 1, 1, 0) + ); + return sample_c(v_tex) * clamp((mask[i] + 0.5f), 0.0f, 1.0f); +} + +vec4 ps_scanlines(uint i) +{ + vec4 mask[2] = + { + vec4(1, 1, 1, 0), + vec4(0, 0, 0, 0) + }; + + return sample_c(v_tex) * clamp((mask[i] + 0.5f), 0.0f, 1.0f); +} + +#ifdef ps_copy +void ps_copy() +{ + o_col0 = sample_c(v_tex); +} +#endif + +#ifdef ps_filter_transparency +void ps_filter_transparency() +{ + vec4 c = sample_c(v_tex); + + c.a = dot(c.rgb, vec3(0.299, 0.587, 0.114)); + + o_col0 = c; +} +#endif + +#ifdef ps_convert_rgba8_16bits +void ps_convert_rgba8_16bits() +{ + vec4 c = sample_c(v_tex); + + c.a *= 256.0f / 127; // hm, 0.5 won't give us 1.0 if we just multiply with 2 + + uvec4 i = uvec4(c * vec4(0x001f, 0x03e0, 0x7c00, 0x8000)); + + o_col0 = (i.x & 0x001fu) | (i.y & 0x03e0u) | (i.z & 0x7c00u) | (i.w & 0x8000u); +} +#endif + +#ifdef ps_datm1 +void ps_datm1() +{ + o_col0 = vec4(0, 0, 0, 0); + + if(sample_c(v_tex).a < (127.5f / 255.0f)) // >= 0x80 pass + discard; + +} +#endif + +#ifdef ps_datm0 +void ps_datm0() +{ + o_col0 = vec4(0, 0, 0, 0); + + if((127.5f / 255.0f) < sample_c(v_tex).a) // < 0x80 pass (== 0x80 should not pass) + discard; +} +#endif + +#ifdef ps_mod256 +void ps_mod256() +{ + vec4 c = roundEven(sample_c(v_tex) * 255); + // We use 2 fmod to avoid negative value. + vec4 fmod1 = mod(c, 256) + 256; + vec4 fmod2 = mod(fmod1, 256); + + o_col0 = fmod2 / 255.0f; +} +#endif + +#ifdef ps_filter_scanlines +void ps_filter_scanlines() // scanlines +{ + uvec4 p = uvec4(gl_FragCoord); + + o_col0 = ps_scanlines(p.y % 2); +} +#endif + +#ifdef ps_filter_diagonal +void ps_filter_diagonal() // diagonal +{ + uvec4 p = uvec4(gl_FragCoord); + o_col0 = ps_crt((p.x + (p.y % 3)) % 3); +} +#endif + +#ifdef ps_filter_triangular +void ps_filter_triangular() // triangular +{ + uvec4 p = uvec4(gl_FragCoord); + + // output.c = ps_crt(input, ((p.x + (p.y & 1) * 3) >> 1) % 3); + o_col0 = ps_crt(((p.x + ((p.y >> 1) & 1) * 3) >> 1) % 3); +} +#endif + +#ifdef ps_filter_complex +void ps_filter_complex() // triangular +{ + const float PI = 3.14159265359f; + vec2 texdim = vec2(textureSize(samp0, 0)); + if (dFdy(v_tex.y) * texdim.y > 0.5) + o_col0 = sample_c(v_tex); + else + o_col0 = (0.9 - 0.4 * cos(2 * PI * v_tex.y * texdim.y)) * sample_c(vec2(v_tex.x, (floor(v_tex.y * texdim.y) + 0.5) / texdim.y)); +} +#endif + +#ifdef ps_convert_float32_32bits +void ps_convert_float32_32bits() +{ + // Convert a vec32 depth texture into a 32 bits UINT texture + o_col0 = uint(exp2(32.0f) * sample_c(v_tex).r); +} +#endif + +#ifdef ps_convert_float32_rgba8 +void ps_convert_float32_rgba8() +{ + // Convert a vec32 depth texture into a RGBA color texture + const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f)); + const vec4 bitMsk = vec4(0.0, 1.0 / 256.0, 1.0 / 256.0, 1.0 / 256.0); + + vec4 res = fract(vec4(sample_c(v_tex).rrrr) * bitSh); + + o_col0 = (res - res.xxyz * bitMsk) * 256.0f / 255.0f; +} +#endif + +#ifdef ps_convert_float16_rgb5a1 +void ps_convert_float16_rgb5a1() +{ + // Convert a vec32 (only 16 lsb) depth into a RGB5A1 color texture + const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f)); + const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1); + uvec4 color = uvec4(vec4(sample_c(v_tex).rrrr) * bitSh) & bitMsk; + + o_col0 = vec4(color) / vec4(32.0f, 32.0f, 32.0f, 1.0f); +} +#endif + +#ifdef ps_convert_rgba8_float32 +void ps_convert_rgba8_float32() +{ + // Convert a RRGBA texture into a float depth texture + // FIXME: I'm afraid of the accuracy + const vec4 bitSh = vec4(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f), exp2(-8.0f)) * vec4(255.0); + + gl_FragDepth = dot(sample_c(v_tex), bitSh); +} +#endif + +#ifdef ps_convert_rgba8_float24 +void ps_convert_rgba8_float24() +{ + // Same as above but without the alpha channel (24 bits Z) + + // Convert a RRGBA texture into a float depth texture + const vec3 bitSh = vec3(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f)) * vec3(255.0); + + gl_FragDepth = dot(sample_c(v_tex).rgb, bitSh); +} +#endif + +#ifdef ps_convert_rgba8_float16 +void ps_convert_rgba8_float16() +{ + // Same as above but without the A/B channels (16 bits Z) + + // Convert a RRGBA texture into a float depth texture + // FIXME: I'm afraid of the accuracy + const vec2 bitSh = vec2(exp2(-32.0f), exp2(-24.0f)) * vec2(255.0); + + gl_FragDepth = dot(sample_c(v_tex).rg, bitSh); +} +#endif + +#ifdef ps_convert_rgb5a1_float16 +void ps_convert_rgb5a1_float16() +{ + // Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z + // FIXME: I'm afraid of the accuracy + const vec4 bitSh = vec4(exp2(-32.0f), exp2(-27.0f), exp2(-22.0f), exp2(-17.0f)); + // Trunc color to drop useless lsb + vec4 color = trunc(sample_c(v_tex) * vec4(255.0f) / vec4(8.0f, 8.0f, 8.0f, 128.0f)); + + gl_FragDepth = dot(vec4(color), bitSh); +} +#endif + +#ifdef ps_convert_rgba_8i +void ps_convert_rgba_8i() +{ + // Potential speed optimization. There is a high probability that + // game only want to extract a single channel (blue). It will allow + // to remove most of the conditional operation and yield a +2/3 fps + // boost on MGS3 + // + // Hypothesis wrong in Prince of Persia ... Seriously WTF ! + //#define ONLY_BLUE; + + // Convert a RGBA texture into a 8 bits packed texture + // Input column: 8x2 RGBA pixels + // 0: 8 RGBA + // 1: 8 RGBA + // Output column: 16x4 Index pixels + // 0: 8 R | 8 B + // 1: 8 R | 8 B + // 2: 8 G | 8 A + // 3: 8 G | 8 A + float c; + + uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u); + ivec2 tb = ((ivec2(gl_FragCoord.xy) & ~ivec2(15, 3)) >> 1); + + int ty = tb.y | (int(gl_FragCoord.y) & 1); + int txN = tb.x | (int(gl_FragCoord.x) & 7); + int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7); + + txN *= PS_SCALE_FACTOR; + txH *= PS_SCALE_FACTOR; + ty *= PS_SCALE_FACTOR; + + // TODO investigate texture gather + vec4 cN = texelFetch(samp0, ivec2(txN, ty), 0); + vec4 cH = texelFetch(samp0, ivec2(txH, ty), 0); + + + if ((sel.y & 4u) == 0u) + { +#ifdef ONLY_BLUE + c = cN.b; +#else + // Column 0 and 2 + if ((sel.y & 3u) < 2u) + { + // First 2 lines of the col + if (sel.x < 8u) + c = cN.r; + else + c = cN.b; + } + else + { + if (sel.x < 8u) + c = cH.g; + else + c = cH.a; + } +#endif + } + else + { +#ifdef ONLY_BLUE + c = cH.b; +#else + // Column 1 and 3 + if ((sel.y & 3u) < 2u) + { + // First 2 lines of the col + if (sel.x < 8u) + c = cH.r; + else + c = cH.b; + } + else + { + if (sel.x < 8u) + c = cN.g; + else + c = cN.a; + } +#endif + } + + o_col0 = vec4(c); // Divide by something here? +} +#endif + +#ifdef ps_yuv +layout(push_constant) uniform cb10 +{ + int EMODA; + int EMODC; +}; + +void ps_yuv() +{ + vec4 i = sample_c(v_tex); + vec4 o; + + mat3 rgb2yuv; + rgb2yuv[0] = vec3(0.587, -0.311, -0.419); + rgb2yuv[1] = vec3(0.114, 0.500, -0.081); + rgb2yuv[2] = vec3(0.299, -0.169, 0.500); + + vec3 yuv = rgb2yuv * i.gbr; + + float Y = float(0xDB)/255.0f * yuv.x + float(0x10)/255.0f; + float Cr = float(0xE0)/255.0f * yuv.y + float(0x80)/255.0f; + float Cb = float(0xE0)/255.0f * yuv.z + float(0x80)/255.0f; + + switch(EMODA) { + case 0: + o.a = i.a; + break; + case 1: + o.a = Y; + break; + case 2: + o.a = Y/2.0f; + break; + case 3: + o.a = 0.0f; + break; + } + + switch(EMODC) { + case 0: + o.rgb = i.rgb; + break; + case 1: + o.rgb = vec3(Y); + break; + case 2: + o.rgb = vec3(Y, Cb, Cr); + break; + case 3: + o.rgb = vec3(i.a); + break; + } + + o_col0 = o; +} +#endif + +#if defined(ps_stencil_image_init_0) || defined(ps_stencil_image_init_1) + +void main() +{ + o_col0 = vec4(0x7FFFFFFF); + + #ifdef ps_stencil_image_init_0 + if((127.5f / 255.0f) < sample_c(v_tex).a) // < 0x80 pass (== 0x80 should not pass) + o_col0 = vec4(-1); + #endif + #ifdef ps_stencil_image_init_1 + if(sample_c(v_tex).a < (127.5f / 255.0f)) // >= 0x80 pass + o_col0 = vec4(-1); + #endif +} +#endif + +#endif \ No newline at end of file diff --git a/bin/resources/shaders/vulkan/interlace.glsl b/bin/resources/shaders/vulkan/interlace.glsl new file mode 100644 index 0000000000..d44cf021b4 --- /dev/null +++ b/bin/resources/shaders/vulkan/interlace.glsl @@ -0,0 +1,65 @@ +#ifdef VERTEX_SHADER + +layout(location = 0) in vec4 a_pos; +layout(location = 1) in vec2 a_tex; + +layout(location = 0) out vec2 v_tex; + +void main() +{ + gl_Position = vec4(a_pos.x, -a_pos.y, a_pos.z, a_pos.w); + v_tex = a_tex; +} + +#endif + +#ifdef FRAGMENT_SHADER + +layout(location = 0) in vec2 v_tex; +layout(location = 0) out vec4 o_col0; + +layout(push_constant) uniform cb0 +{ + vec2 ZrH; + float hH; +}; + +layout(set = 0, binding = 0) uniform sampler2D samp0; + +#ifdef ps_main0 +void ps_main0() +{ + o_col0 = texture(samp0, v_tex); + if (fract(v_tex.y * hH) - 0.5 < 0.0) + discard; +} +#endif + +#ifdef ps_main1 +void ps_main1() +{ + o_col0 = texture(samp0, v_tex); + if (0.5 - fract(v_tex.y * hH) < 0.0) + discard; +} +#endif + +#ifdef ps_main2 +void ps_main2() +{ + vec4 c0 = texture(samp0, v_tex - ZrH); + vec4 c1 = texture(samp0, v_tex); + vec4 c2 = texture(samp0, v_tex + ZrH); + + o_col0 = (c0 + c1 * 2.0f + c2) / 4.0f; +} +#endif + +#ifdef ps_main3 +void ps_main3() +{ + o_col0 = texture(samp0, v_tex); +} +#endif + +#endif diff --git a/bin/resources/shaders/vulkan/merge.glsl b/bin/resources/shaders/vulkan/merge.glsl new file mode 100644 index 0000000000..6b73a4f08a --- /dev/null +++ b/bin/resources/shaders/vulkan/merge.glsl @@ -0,0 +1,43 @@ +#ifdef VERTEX_SHADER + +layout(location = 0) in vec4 a_pos; +layout(location = 1) in vec2 a_tex; + +layout(location = 0) out vec2 v_tex; + +void main() +{ + gl_Position = vec4(a_pos.x, -a_pos.y, a_pos.z, a_pos.w); + v_tex = a_tex; +} + +#endif + +#ifdef FRAGMENT_SHADER + +layout(location = 0) in vec2 v_tex; +layout(location = 0) out vec4 o_col0; + +layout(push_constant) uniform cb10 +{ + vec4 BGColor; +}; + +layout(set = 0, binding = 0) uniform sampler2D samp0; + +void ps_main0() +{ + vec4 c = texture(samp0, v_tex); + // Note: clamping will be done by fixed unit + c.a *= 2.0f; + o_col0 = c; +} + +void ps_main1() +{ + vec4 c = texture(samp0, v_tex); + c.a = BGColor.a; + o_col0 = c; +} + +#endif diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl new file mode 100644 index 0000000000..7655f51575 --- /dev/null +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -0,0 +1,1184 @@ +////////////////////////////////////////////////////////////////////// +// Vertex Shader +////////////////////////////////////////////////////////////////////// + +#if defined(VERTEX_SHADER) || defined(GEOMETRY_SHADER) + +layout(std140, set = 0, binding = 0) uniform cb0 +{ + vec2 VertexScale; + vec2 VertexOffset; + vec2 TextureScale; + vec2 TextureOffset; + vec2 PointSize; + uint MaxDepth; + uint pad_cb0; +}; + +#endif + +#ifdef VERTEX_SHADER + +layout(location = 0) in vec2 a_st; +layout(location = 1) in uvec4 a_c; +layout(location = 2) in float a_q; +layout(location = 3) in uvec2 a_p; +layout(location = 4) in uint a_z; +layout(location = 5) in uvec2 a_uv; +layout(location = 6) in vec4 a_f; + +layout(location = 0) out VSOutput +{ + vec4 t; + vec4 ti; + + #if VS_IIP != 0 + vec4 c; + #else + flat vec4 c; + #endif +} vsOut; + +void main() +{ + // Clamp to max depth, gs doesn't wrap + float z = min(a_z, MaxDepth); + + // pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go) + // example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty + // input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel + // example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133 + + gl_Position = vec4(a_p, z, 1.0f) - vec4(0.05f, 0.05f, 0, 0); + gl_Position.xy = gl_Position.xy * vec2(VertexScale.x, -VertexScale.y) - vec2(VertexOffset.x, -VertexOffset.y); + gl_Position.z *= exp2(-32.0f); // integer->float depth + gl_Position.y = -gl_Position.y; + + #if VS_TME + vec2 uv = a_uv - TextureOffset; + vec2 st = a_st - TextureOffset; + + // Integer nomalized + vsOut.ti.xy = uv * TextureScale; + + #if VS_FST + // Integer integral + vsOut.ti.zw = uv; + #else + // float for post-processing in some games + vsOut.ti.zw = st / TextureScale; + #endif + + // Float coords + vsOut.t.xy = st; + vsOut.t.w = a_q; + #else + vsOut.t = vec4(0.0f, 0.0f, 0.0f, 1.0f); + vsOut.ti = vec4(0.0f); + #endif + + #if VS_POINT_SIZE + gl_PointSize = float(VS_POINT_SIZE_VALUE); + #endif + + vsOut.c = a_c; + vsOut.t.z = a_f.r; +} + +#endif + +#ifdef GEOMETRY_SHADER + +layout(location = 0) in VSOutput +{ + vec4 t; + vec4 ti; + #if GS_IIP != 0 + vec4 c; + #else + flat vec4 c; + #endif +} gsIn[]; + +layout(location = 0) out GSOutput +{ + vec4 t; + vec4 ti; + #if GS_IIP != 0 + vec4 c; + #else + flat vec4 c; + #endif +} gsOut; + +void WriteVertex(vec4 pos, vec4 t, vec4 ti, vec4 c) +{ + gl_Position = pos; + gsOut.t = t; + gsOut.ti = ti; + gsOut.c = c; + EmitVertex(); +} + +////////////////////////////////////////////////////////////////////// +// Geometry Shader +////////////////////////////////////////////////////////////////////// + +#if GS_PRIM == 0 && GS_POINT == 0 + +layout(points) in; +layout(points, max_vertices = 1) out; +void main() +{ + WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[0].c); + EndPrimitive(); +} + +#elif GS_PRIM == 0 && GS_POINT == 1 + +layout(points) in; +layout(triangle_strip, max_vertices = 4) out; + +void main() +{ + // Transform a point to a NxN sprite + + // Get new position + vec4 lt_p = gl_in[0].gl_Position; + vec4 rb_p = gl_in[0].gl_Position + vec4(PointSize.x, PointSize.y, 0.0f, 0.0f); + vec4 lb_p = rb_p; + vec4 rt_p = rb_p; + lb_p.x = lt_p.x; + rt_p.y = lt_p.y; + + WriteVertex(lt_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c); + WriteVertex(lb_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c); + WriteVertex(rt_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c); + WriteVertex(rb_p, gsIn[0].t, gsIn[0].ti, gsIn[0].c); + + EndPrimitive(); +} + +#elif GS_PRIM == 1 && GS_LINE == 0 + +layout(lines) in; +layout(line_strip, max_vertices = 2) out; + +void main() +{ +#if GS_IIP == 0 + WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[1].c); + WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[1].c); +#else + WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[0].c); + WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[1].c); +#endif + EndPrimitive(); +} + +#elif GS_PRIM == 1 && GS_LINE == 1 + +layout(lines) in; +layout(triangle_strip, max_vertices = 4) out; + +void main() +{ + // Transform a line to a thick line-sprite + vec4 left_t = gsIn[0].t; + vec4 left_ti = gsIn[0].ti; + vec4 left_c = gsIn[0].c; + vec4 right_t = gsIn[1].t; + vec4 right_ti = gsIn[1].ti; + vec4 right_c = gsIn[1].c; + vec4 lt_p = gl_in[0].gl_Position; + vec4 rt_p = gl_in[1].gl_Position; + + // Potentially there is faster math + vec2 line_vector = normalize(rt_p.xy - lt_p.xy); + vec2 line_normal = vec2(line_vector.y, -line_vector.x); + vec2 line_width = (line_normal * PointSize) / 2.0; + + lt_p.xy -= line_width; + rt_p.xy -= line_width; + vec4 lb_p = gl_in[0].gl_Position + vec4(line_width, 0.0, 0.0); + vec4 rb_p = gl_in[1].gl_Position + vec4(line_width, 0.0, 0.0); + + #if GS_IIP == 0 + left_c = right_c; + #endif + + WriteVertex(lt_p, left_t, left_ti, left_c); + WriteVertex(lb_p, left_t, left_ti, left_c); + WriteVertex(rt_p, right_t, right_ti, right_c); + WriteVertex(rb_p, right_t, right_ti, right_c); + EndPrimitive(); +} + +#elif GS_PRIM == 2 + +layout(triangles) in; +layout(triangle_strip, max_vertices = 3) out; + +void main() +{ + #if GS_IIP == 0 + WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[2].c); + WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[2].c); + WriteVertex(gl_in[2].gl_Position, gsIn[2].t, gsIn[2].ti, gsIn[2].c); + #else + WriteVertex(gl_in[0].gl_Position, gsIn[0].t, gsIn[0].ti, gsIn[0].c); + WriteVertex(gl_in[1].gl_Position, gsIn[1].t, gsIn[1].ti, gsIn[0].c); + WriteVertex(gl_in[2].gl_Position, gsIn[2].t, gsIn[2].ti, gsIn[0].c); + #endif + + EndPrimitive(); +} + +#elif GS_PRIM == 3 + +layout(lines) in; +layout(triangle_strip, max_vertices = 4) out; + +void main() +{ + vec4 lt_p = gl_in[0].gl_Position; + vec4 lt_t = gsIn[0].t; + vec4 lt_ti = gsIn[0].ti; + vec4 lt_c = gsIn[0].c; + vec4 rb_p = gl_in[1].gl_Position; + vec4 rb_t = gsIn[1].t; + vec4 rb_ti = gsIn[1].ti; + vec4 rb_c = gsIn[1].c; + + // flat depth + lt_p.z = rb_p.z; + // flat fog and texture perspective + lt_t.zw = rb_t.zw; + + // flat color + lt_c = rb_c; + + // Swap texture and position coordinate + vec4 lb_p = rb_p; + vec4 lb_t = rb_t; + vec4 lb_ti = rb_ti; + vec4 lb_c = rb_c; + lb_p.x = lt_p.x; + lb_t.x = lt_t.x; + lb_ti.x = lt_ti.x; + lb_ti.z = lt_ti.z; + + vec4 rt_p = rb_p; + vec4 rt_t = rb_t; + vec4 rt_ti = rb_ti; + vec4 rt_c = rb_c; + rt_p.y = lt_p.y; + rt_t.y = lt_t.y; + rt_ti.y = lt_ti.y; + rt_ti.w = lt_ti.w; + + WriteVertex(lt_p, lt_t, lt_ti, lt_c); + WriteVertex(lb_p, lb_t, lb_ti, lb_c); + WriteVertex(rt_p, rt_t, rt_ti, rt_c); + WriteVertex(rb_p, rb_t, rb_ti, rb_c); + EndPrimitive(); +} + +#endif +#endif + +#ifdef FRAGMENT_SHADER + +#define FMT_32 0 +#define FMT_24 1 +#define FMT_16 2 + +#ifndef VS_TME +#define VS_TME 1 +#define VS_FST 1 +#endif + +#ifndef GS_IIP +#define GS_IIP 0 +#define GS_PRIM 3 +#define GS_POINT 0 +#define GS_LINE 0 +#endif + +#ifndef PS_FST +#define PS_FST 0 +#define PS_WMS 0 +#define PS_WMT 0 +#define PS_FMT FMT_32 +#define PS_AEM 0 +#define PS_TFX 0 +#define PS_TCC 1 +#define PS_ATST 1 +#define PS_FOG 0 +#define PS_CLR1 0 +#define PS_FBA 0 +#define PS_FBMASK 0 +#define PS_LTF 1 +#define PS_TCOFFSETHACK 0 +#define PS_POINT_SAMPLER 0 +#define PS_SHUFFLE 0 +#define PS_READ_BA 0 +#define PS_DFMT 0 +#define PS_DEPTH_FMT 0 +#define PS_PAL_FMT 0 +#define PS_CHANNEL_FETCH 0 +#define PS_TALES_OF_ABYSS_HLE 0 +#define PS_URBAN_CHAOS_HLE 0 +#define PS_INVALID_TEX0 0 +#define PS_SCALE_FACTOR 1 +#define PS_HDR 0 +#define PS_COLCLIP 0 +#define PS_BLEND_A 0 +#define PS_BLEND_B 0 +#define PS_BLEND_C 0 +#define PS_BLEND_D 0 +#define PS_PABE 0 +#define PS_DITHER 0 +#define PS_ZCLAMP 0 +#define PS_FEEDBACK_LOOP 0 +#define PS_TEX_IS_FB 0 +#endif + +#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D) +#define SW_BLEND_NEEDS_RT (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1) + +#define PS_FEEDBACK_LOOP_IS_NEEDED (PS_TEX_IS_FB == 1 || PS_FBMASK || SW_BLEND_NEEDS_RT || (PS_DATE < 10 && (((PS_DATE & 3) == 1 || (PS_DATE & 3) == 2)))) + +layout(std140, set = 0, binding = 1) uniform cb1 +{ + vec3 FogColor; + float AREF; + vec4 WH; + vec2 TA; + float MaxDepthPS; + float Af; + uvec4 MskFix; + uvec4 FbMask; + vec4 HalfTexel; + vec4 MinMax; + ivec4 ChannelShuffle; + vec2 TC_OffsetHack; + vec2 pad_cb1; + mat4 DitherMatrix; +}; + +layout(location = 0) in VSOutput +{ + vec4 t; + vec4 ti; + #if PS_IIP != 0 + vec4 c; + #else + flat vec4 c; + #endif +} vsIn; + +#ifndef DISABLE_DUAL_SOURCE +layout(location = 0, index = 0) out vec4 o_col0; +layout(location = 0, index = 1) out vec4 o_col1; +#else +layout(location = 0) out vec4 o_col0; +#endif + +layout(set = 1, binding = 0) uniform sampler2D Texture; +layout(set = 1, binding = 1) uniform sampler2D Palette; +layout(set = 2, binding = 0) uniform texture2D RawTexture; + +#if PS_FEEDBACK_LOOP_IS_NEEDED +layout(input_attachment_index = 0, set = 2, binding = 1) uniform subpassInput RtSampler; +#endif + +#if PS_DATE > 0 +layout(set = 2, binding = 2) uniform texture2D PrimMinTexture; +#endif + +vec4 sample_c(vec2 uv) +{ +#if PS_TEX_IS_FB + return subpassLoad(RtSampler); +#else +#if PS_POINT_SAMPLER + // Weird issue with ATI/AMD cards, + // it looks like they add 127/128 of a texel to sampling coordinates + // occasionally causing point sampling to erroneously round up. + // I'm manually adjusting coordinates to the centre of texels here, + // though the centre is just paranoia, the top left corner works fine. + // As of 2018 this issue is still present. + uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw; +#endif + +#if PS_AUTOMATIC_LOD == 1 + return texture(Texture, uv); +#elif PS_MANUAL_LOD == 1 + // FIXME add LOD: K - ( LOG2(Q) * (1 << L)) + float K = MinMax.x; + float L = MinMax.y; + float bias = MinMax.z; + float max_lod = MinMax.w; + + float gs_lod = K - log2(abs(vsIn.t.w)) * L; + // FIXME max useful ? + //float lod = max(min(gs_lod, max_lod) - bias, 0.0f); + float lod = min(gs_lod, max_lod) - bias; + + return textureLod(Texture, uv, lod); +#else + return textureLod(Texture, uv, 0); // No lod +#endif +#endif +} + +vec4 sample_p(float u) +{ + return texture(Palette, vec2(u, 0.0f)); +} + +vec4 clamp_wrap_uv(vec4 uv) +{ + vec4 tex_size; + + #if PS_INVALID_TEX0 + tex_size = WH.zwzw; + #else + tex_size = WH.xyxy; + #endif + + #if PS_WMS == PS_WMT + { + #if PS_WMS == 2 + { + uv = clamp(uv, MinMax.xyxy, MinMax.zwzw); + } + #elif PS_WMS == 3 + { + #if PS_FST == 0 + // wrap negative uv coords to avoid an off by one error that shifted + // textures. Fixes Xenosaga's hair issue. + uv = fract(uv); + #endif + uv = vec4((uvec4(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size; + } + #endif + } + #else + { + #if PS_WMS == 2 + { + uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); + } + #elif PS_WMS == 3 + { + #if PS_FST == 0 + uv.xz = fract(uv.xz); + #endif + uv.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; + } + #endif + #if PS_WMT == 2 + { + uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); + } + #elif PS_WMT == 3 + { + #if PS_FST == 0 + uv.yw = fract(uv.yw); + #endif + uv.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; + } + #endif + } + #endif + + return uv; +} + +mat4 sample_4c(vec4 uv) +{ + mat4 c; + + c[0] = sample_c(uv.xy); + c[1] = sample_c(uv.zy); + c[2] = sample_c(uv.xw); + c[3] = sample_c(uv.zw); + + return c; +} + +vec4 sample_4_index(vec4 uv) +{ + vec4 c; + + c.x = sample_c(uv.xy).a; + c.y = sample_c(uv.zy).a; + c.z = sample_c(uv.xw).a; + c.w = sample_c(uv.zw).a; + + // Denormalize value + uvec4 i = uvec4(c * 255.0f + 0.5f); + + #if PS_PAL_FMT == 1 + // 4HL + c = vec4(i & 0xFu) / 255.0f; + #elif PS_PAL_FMT == 2 + // 4HH + c = vec4(i >> 4u) / 255.0f; + #endif + + // Most of texture will hit this code so keep normalized float value + // 8 bits + return c * 255./256 + 0.5/256; +} + +mat4 sample_4p(vec4 u) +{ + mat4 c; + + c[0] = sample_p(u.x); + c[1] = sample_p(u.y); + c[2] = sample_p(u.z); + c[3] = sample_p(u.w); + + return c; +} + +int fetch_raw_depth(ivec2 xy) +{ + vec4 col = texelFetch(RawTexture, xy, 0); + return int(col.r * exp2(32.0f)); +} + +vec4 fetch_raw_color(ivec2 xy) +{ + return texelFetch(RawTexture, xy, 0); +} + +vec4 fetch_c(ivec2 uv) +{ + return texelFetch(Texture, uv, 0); +} + +////////////////////////////////////////////////////////////////////// +// Depth sampling +////////////////////////////////////////////////////////////////////// + +ivec2 clamp_wrap_uv_depth(ivec2 uv) +{ + ivec4 mask = ivec4(MskFix << 4); + #if (PS_WMS == PS_WMT) + { + #if (PS_WMS == 2) + { + uv = clamp(uv, mask.xy, mask.zw); + } + #elif (PS_WMS == 3) + { + uv = (uv & mask.xy) | mask.zw; + } + #endif + } + #else + { + #if (PS_WMS == 2) + { + uv.x = clamp(uv.x, mask.x, mask.z); + } + #elif (PS_WMS == 3) + { + uv.x = (uv.x & mask.x) | mask.z; + } + #endif + #if (PS_WMT == 2) + { + uv.y = clamp(uv.y, mask.y, mask.w); + } + #elif (PS_WMT == 3) + { + uv.y = (uv.y & mask.y) | mask.w; + } + #endif + } + #endif + return uv; +} + +vec4 sample_depth(vec2 st, ivec2 pos) +{ + vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(PS_SCALE_FACTOR) * vec2(1.0f / 16.0f); + ivec2 uv = ivec2(uv_f); + + vec4 t = vec4(0.0f); + + #if (PS_TALES_OF_ABYSS_HLE == 1) + { + // Warning: UV can't be used in channel effect + int depth = fetch_raw_depth(pos); + + // Convert msb based on the palette + t = texelFetch(Palette, ivec2((depth >> 8) & 0xFF, 0), 0) * 255.0f; + } + #elif (PS_URBAN_CHAOS_HLE == 1) + { + // Depth buffer is read as a RGB5A1 texture. The game try to extract the green channel. + // So it will do a first channel trick to extract lsb, value is right-shifted. + // Then a new channel trick to extract msb which will shifted to the left. + // OpenGL uses a vec32 format for the depth so it requires a couple of conversion. + // To be faster both steps (msb&lsb) are done in a single pass. + + // Warning: UV can't be used in channel effect + int depth = fetch_raw_depth(pos); + + // Convert lsb based on the palette + t = Palette.Load(ivec3(depth & 0xFF, 0, 0)) * 255.0f; + + // Msb is easier + float green = float(((depth >> 8) & 0xFF) * 36.0f); + green = min(green, 255.0f); + t.g += green; + } + #elif (PS_DEPTH_FMT == 1) + { + // Based on ps_main11 of convert + + // Convert a vec32 depth texture into a RGBA color texture + const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f)); + const vec4 bitMsk = vec4(0.0, 1.0f / 256.0f, 1.0f / 256.0f, 1.0f / 256.0f); + + vec4 res = fract(vec4(fetch_c(uv).r) * bitSh); + + t = (res - res.xxyz * bitMsk) * 256.0f; + } + #elif (PS_DEPTH_FMT == 2) + { + // Based on ps_main12 of convert + + // Convert a vec32 (only 16 lsb) depth into a RGB5A1 color texture + const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f)); + const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1); + uvec4 color = uvec4(vec4(fetch_c(uv).r) * bitSh) & bitMsk; + + t = vec4(color) * vec4(8.0f, 8.0f, 8.0f, 128.0f); + } + #elif (PS_DEPTH_FMT == 3) + { + // Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture + t = fetch_c(uv) * 255.0f; + } + #endif + + #if (PS_AEM_FMT == FMT_24) + { + t.a = ((PS_AEM == 0) || any(bvec3(t.rgb))) ? 255.0f * TA.x : 0.0f; + } + #elif (PS_AEM_FMT == FMT_16) + { + t.a = t.a >= 128.0f ? 255.0f * TA.y : ((PS_AEM == 0) || any(bvec3(t.rgb))) ? 255.0f * TA.x : 0.0f; + } + #endif + + return t; +} + +////////////////////////////////////////////////////////////////////// +// Fetch a Single Channel +////////////////////////////////////////////////////////////////////// + +vec4 fetch_red(ivec2 xy) +{ + vec4 rt; + + #if (PS_DEPTH_FMT == 1) || (PS_DEPTH_FMT == 2) + int depth = (fetch_raw_depth(xy)) & 0xFF; + rt = vec4(float(depth) / 255.0f); + #else + rt = fetch_raw_color(xy); + #endif + + return sample_p(rt.r) * 255.0f; +} + +vec4 fetch_green(ivec2 xy) +{ + vec4 rt; + + #if (PS_DEPTH_FMT == 1) || (PS_DEPTH_FMT == 2) + int depth = (fetch_raw_depth(xy) >> 8) & 0xFF; + rt = vec4(float(depth) / 255.0f); + #else + rt = fetch_raw_color(xy); + #endif + + return sample_p(rt.g) * 255.0f; +} + +vec4 fetch_blue(ivec2 xy) +{ + vec4 rt; + + #if (PS_DEPTH_FMT == 1) || (PS_DEPTH_FMT == 2) + int depth = (fetch_raw_depth(xy) >> 16) & 0xFF; + rt = vec4(float(depth) / 255.0f); + #else + rt = fetch_raw_color(xy); + #endif + + return sample_p(rt.b) * 255.0f; +} + +vec4 fetch_alpha(ivec2 xy) +{ + vec4 rt = fetch_raw_color(xy); + return sample_p(rt.a) * 255.0f; +} + +vec4 fetch_rgb(ivec2 xy) +{ + vec4 rt = fetch_raw_color(xy); + vec4 c = vec4(sample_p(rt.r).r, sample_p(rt.g).g, sample_p(rt.b).b, 1.0); + return c * 255.0f; +} + +vec4 fetch_gXbY(ivec2 xy) +{ + #if (PS_DEPTH_FMT == 1) || (PS_DEPTH_FMT == 2) + int depth = fetch_raw_depth(xy); + int bg = (depth >> (8 + ChannelShuffle.w)) & 0xFF; + return vec4(bg); + #else + ivec4 rt = ivec4(fetch_raw_color(xy) * 255.0); + int green = (rt.g >> ChannelShuffle.w) & ChannelShuffle.z; + int blue = (rt.b << ChannelShuffle.y) & ChannelShuffle.x; + return vec4(float(green | blue)); + #endif +} + +vec4 sample_color(vec2 st) +{ + #if PS_TCOFFSETHACK + st += TC_OffsetHack.xy; + #endif + + vec4 t; + mat4 c; + vec2 dd; + + #if PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2 + { + c[0] = sample_c(st); + } + #else + { + vec4 uv; + + #if PS_LTF + { + uv = st.xyxy + HalfTexel; + dd = fract(uv.xy * WH.zw); + + #if PS_FST == 0 + { + dd = clamp(dd, vec2(0.0f), vec2(0.9999999f)); + } + #endif + } + #else + { + uv = st.xyxy; + } + #endif + + uv = clamp_wrap_uv(uv); + +#if PS_PAL_FMT != 0 + c = sample_4p(sample_4_index(uv)); +#else + c = sample_4c(uv); +#endif + } + #endif + + for (uint i = 0; i < 4; i++) + { + #if (PS_AEM_FMT == FMT_24) + c[i].a = (PS_AEM == 0 || any(bvec3(c[i].rgb))) ? TA.x : 0.0f; + #elif (PS_AEM_FMT == FMT_16) + c[i].a = (c[i].a >= 0.5) ? TA.y : ((PS_AEM == 0 || any(bvec3(c[i].rgb))) ? TA.x : 0.0f); + #endif + } + + #if PS_LTF + { + t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y); + } + #else + { + t = c[0]; + } + #endif + + return trunc(t * 255.0f + 0.05f); +} + +vec4 tfx(vec4 T, vec4 C) +{ + vec4 C_out; + vec4 FxT = trunc(trunc(C) * T / 128.0f); + +#if (PS_TFX == 0) + C_out = FxT; +#elif (PS_TFX == 1) + C_out = T; +#elif (PS_TFX == 2) + C_out.rgb = FxT.rgb + C.a; + C_out.a = T.a + C.a; +#elif (PS_TFX == 3) + C_out.rgb = FxT.rgb + C.a; + C_out.a = T.a; +#else + C_out = C; +#endif + +#if (PS_TCC == 0) + C_out.a = C.a; +#endif + +#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3) + // Clamp only when it is useful + C_out = min(C_out, 255.0f); +#endif + + return C_out; +} + +void atst(vec4 C) +{ + float a = C.a; + + #if (PS_ATST == 0) + { + // nothing to do + } + #elif (PS_ATST == 1) + { + if (a > AREF) discard; + } + #elif (PS_ATST == 2) + { + if (a < AREF) discard; + } + #elif (PS_ATST == 3) + { + if (abs(a - AREF) > 0.5f) discard; + } + #elif (PS_ATST == 4) + { + if (abs(a - AREF) < 0.5f) discard; + } + #endif +} + +vec4 fog(vec4 c, float f) +{ + #if PS_FOG + c.rgb = trunc(mix(FogColor, c.rgb, f)); + #endif + + return c; +} + +vec4 ps_color() +{ +#if PS_FST == 0 && PS_INVALID_TEX0 == 1 + // Re-normalize coordinate from invalid GS to corrected texture size + vec2 st = (vsIn.t.xy * WH.xy) / (vsIn.t.w * WH.zw); + // no st_int yet +#elif PS_FST == 0 + vec2 st = vsIn.t.xy / vsIn.t.w; + vec2 st_int = vsIn.ti.zw / vsIn.t.w; +#else + vec2 st = vsIn.ti.xy; + vec2 st_int = vsIn.ti.zw; +#endif + +#if PS_CHANNEL_FETCH == 1 + vec4 T = fetch_red(ivec2(gl_FragCoord.xy)); +#elif PS_CHANNEL_FETCH == 2 + vec4 T = fetch_green(ivec2(gl_FragCoord.xy)); +#elif PS_CHANNEL_FETCH == 3 + vec4 T = fetch_blue(ivec2(gl_FragCoord.xy)); +#elif PS_CHANNEL_FETCH == 4 + vec4 T = fetch_alpha(ivec2(gl_FragCoord.xy)); +#elif PS_CHANNEL_FETCH == 5 + vec4 T = fetch_rgb(ivec2(gl_FragCoord.xy)); +#elif PS_CHANNEL_FETCH == 6 + vec4 T = fetch_gXbY(ivec2(gl_FragCoord.xy)); +#elif PS_DEPTH_FMT > 0 + vec4 T = sample_depth(st_int, ivec2(gl_FragCoord.xy)); +#else + vec4 T = sample_color(st); +#endif + + vec4 C = tfx(T, vsIn.c); + + atst(C); + + C = fog(C, vsIn.t.z); + + #if PS_CLR1 // needed for Cd * (As/Ad/F + 1) blending modes + C.rgb = vec3(255.0f); + #endif + + return C; +} + +void ps_fbmask(inout vec4 C) +{ + #if PS_FBMASK + vec4 RT = trunc(subpassLoad(RtSampler) * 255.0f + 0.1f); + C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)); + #endif +} + +void ps_dither(inout vec3 C) +{ + #if PS_DITHER + ivec2 fpos; + + #if PS_DITHER == 2 + fpos = ivec2(gl_FragCoord.xy); + #else + fpos = ivec2(gl_FragCoord.xy / float(PS_SCALE_FACTOR)); + #endif + + C += DitherMatrix[fpos.y & 3][fpos.x & 3]; + #endif +} + +void ps_color_clamp_wrap(inout vec3 C) +{ + // When dithering the bottom 3 bits become meaningless and cause lines in the picture + // so we need to limit the color depth on dithered items +#if SW_BLEND || PS_DITHER + + // Correct the Color value based on the output format +#if PS_COLCLIP == 0 && PS_HDR == 0 + // Standard Clamp + C = clamp(C, vec3(0.0f), vec3(255.0f)); +#endif + + // FIXME rouding of negative float? + // compiler uses trunc but it might need floor + + // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy + // GS: Color = 1, Alpha = 255 => output 1 + // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875 +#if PS_DFMT == FMT_16 + // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania + C = vec3(ivec3(C) & ivec3(0xF8)); +#elif PS_COLCLIP == 1 && PS_HDR == 0 + C = vec3(ivec3(C) & ivec3(0xFF)); +#endif + +#endif +} + +void ps_blend(inout vec4 Color, float As) +{ + #if SW_BLEND + #if PS_FEEDBACK_LOOP_IS_NEEDED + vec4 RT = trunc(subpassLoad(RtSampler) * 255.0f + 0.1f); + #else + // Not used, but we define it to make the selection below simpler. + vec4 RT = vec4(0.0f); + #endif + + #if PS_DFMT == FMT_24 + float Ad = 1.0f; + #else + // FIXME FMT_16 case + // FIXME Ad or Ad * 2? + float Ad = RT.a / 128.0f; + #endif + + // Let the compiler do its jobs ! + vec3 Cd = RT.rgb; + vec3 Cs = Color.rgb; + + #if PS_BLEND_A == 0 + vec3 A = Cs; + #elif PS_BLEND_A == 1 + vec3 A = Cd; + #else + vec3 A = vec3(0.0f); + #endif + + #if PS_BLEND_B == 0 + vec3 B = Cs; + #elif PS_BLEND_B == 1 + vec3 B = Cd; + #else + vec3 B = vec3(0.0f); + #endif + + #if PS_BLEND_C == 0 + float C = As; + #elif PS_BLEND_C == 1 + float C = Ad; + #else + float C = Af; + #endif + + #if PS_BLEND_D == 0 + vec3 D = Cs; + #elif PS_BLEND_D == 1 + vec3 D = Cd; + #else + vec3 D = vec3(0.0f); + #endif + + // As/Af clamp alpha for Blend mix + #if PS_ALPHA_CLAMP + C = min(C, 1.0f); + #endif + + #if PS_BLEND_A == PS_BLEND_B + Color.rgb = D; + #else + Color.rgb = trunc((A - B) * C + D); + #endif + + // PABE + #if PS_PABE + Color.rgb = (As >= 1.0f) ? Color.rgb : Cs; + #endif + + #endif +} + +#if PS_DATE == 1 || PS_DATE == 2 || PS_DATE == 11 || PS_DATE == 12 +layout(early_fragment_tests) in; +#endif + +void main() +{ +#if PS_SCANMSK & 2 + // fail depth test on prohibited lines + if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1)) + discard; +#endif +#if PS_DATE < 10 && (((PS_DATE & 3) == 1 || (PS_DATE & 3) == 2)) + +#if PS_WRITE_RG == 1 + // Pseudo 16 bits access. + float rt_a = subpassLoad(RtSampler).g; +#else + float rt_a = subpassLoad(RtSampler).a; +#endif + +#if (PS_DATE & 3) == 1 + // DATM == 0: Pixel with alpha equal to 1 will failed + bool bad = (127.5f / 255.0f) < rt_a; +#elif (PS_DATE & 3) == 2 + // DATM == 1: Pixel with alpha equal to 0 will failed + bool bad = rt_a < (127.5f / 255.0f); +#endif + + if (bad) { +#if PS_DATE >= 5 + discard; +#else + // imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1)); + return; +#endif + } + +#endif // PS_DATE < 10 && (((PS_DATE & 3) == 1 || (PS_DATE & 3) == 2)) + +#if PS_DATE == 3 + int stencil_ceil = int(texelFetch(PrimMinTexture, ivec2(gl_FragCoord.xy), 0).r); + // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update + // the bad alpha value so we must keep it. + + if (gl_PrimitiveID > stencil_ceil) { + discard; + } +#endif + + vec4 C = ps_color(); + + #if PS_SHUFFLE + uvec4 denorm_c = uvec4(C); + uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f); + + // Mask will take care of the correct destination + #if PS_READ_BA + C.rb = C.bb; + #else + C.rb = C.rr; + #endif + + #if PS_READ_BA + if ((denorm_c.a & 0x80u) != 0u) + C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); + #else + if ((denorm_c.g & 0x80u) != 0u) + C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); + #endif + #endif + + // Must be done before alpha correction + float alpha_blend = C.a / 128.0f; + + // Correct the ALPHA value based on the output format +#if (PS_DFMT == FMT_16) + float A_one = 128.0f; // alpha output will be 0x80 + C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one; +#elif (PS_DFMT == FMT_32) && (PS_FBA != 0) + if(C.a < 128.0f) C.a += 128.0f; +#endif + + // Get first primitive that will write a failling alpha value +#if PS_DATE == 1 || PS_DATE == 11 + + // DATM == 0 + // Pixel with alpha equal to 1 will failed (128-255) + o_col0 = (C.a > 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF); + +#elif PS_DATE == 2 || PS_DATE == 12 + + // DATM == 1 + // Pixel with alpha equal to 0 will failed (0-127) + o_col0 = (C.a < 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF); + +#else + + ps_blend(C, alpha_blend); + + ps_dither(C.rgb); + + // Color clamp/wrap needs to be done after sw blending and dithering + ps_color_clamp_wrap(C.rgb); + + ps_fbmask(C); + + o_col0 = C / 255.0f; +#ifndef DISABLE_DUAL_SOURCE + o_col1 = vec4(alpha_blend); +#endif + +#if PS_ZCLAMP + gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS); +#endif + +#endif // PS_DATE +} + +#endif diff --git a/common/Vulkan/Context.cpp b/common/Vulkan/Context.cpp index fadd3f30a2..b4cc683131 100644 --- a/common/Vulkan/Context.cpp +++ b/common/Vulkan/Context.cpp @@ -32,7 +32,6 @@ enum : u32 MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME = 2 * MAX_DRAW_CALLS_PER_FRAME, MAX_SAMPLED_IMAGE_DESCRIPTORS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME, // assume at least half our draws aren't going to be shuffle/blending - MAX_STORAGE_IMAGE_DESCRIPTORS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME, MAX_INPUT_ATTACHMENT_IMAGE_DESCRIPTORS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME, MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME * 2 }; @@ -406,7 +405,9 @@ namespace Vulkan if (g_vulkan_context->m_debug_messenger_callback != VK_NULL_HANDLE) g_vulkan_context->DisableDebugUtils(); - vkDestroyInstance(g_vulkan_context->m_instance, nullptr); + if (g_vulkan_context->m_instance != VK_NULL_HANDLE) + vkDestroyInstance(g_vulkan_context->m_instance, nullptr); + Vulkan::UnloadVulkanLibrary(); g_vulkan_context.reset(); @@ -701,8 +702,7 @@ namespace Vulkan VkDescriptorPoolSize pool_sizes[] = { {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME}, {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, MAX_SAMPLED_IMAGE_DESCRIPTORS_PER_FRAME}, - {VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, MAX_STORAGE_IMAGE_DESCRIPTORS_PER_FRAME}, - {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_STORAGE_IMAGE_DESCRIPTORS_PER_FRAME}, + {VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, MAX_INPUT_ATTACHMENT_IMAGE_DESCRIPTORS_PER_FRAME}, }; VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, 0, diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 73ab1a1f54..926e913156 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -709,6 +709,17 @@ set(pcsx2GSHeaders GS/Window/GSwxDialog.h ) +if(USE_VULKAN) + list(APPEND pcsx2GSSources + GS/Renderers/Vulkan/GSDeviceVK.cpp + GS/Renderers/Vulkan/GSTextureVK.cpp + ) + list(APPEND pcsx2GSHeaders + GS/Renderers/Vulkan/GSDeviceVK.h + GS/Renderers/Vulkan/GSTextureVK.h + ) +endif() + if(WIN32) list(APPEND pcsx2SPU2Sources SPU2/Windows/CfgHelpers.cpp diff --git a/pcsx2/Config.h b/pcsx2/Config.h index 785280326a..0540a226bb 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -108,6 +108,7 @@ enum class GSRendererType : s8 Null = 11, OGL = 12, SW = 13, + VK = 14, }; enum class GSInterlaceMode : u8 diff --git a/pcsx2/GS/GS.cpp b/pcsx2/GS/GS.cpp index f473d6e33c..bcaf1bdde1 100644 --- a/pcsx2/GS/GS.cpp +++ b/pcsx2/GS/GS.cpp @@ -41,6 +41,10 @@ #include "pcsx2/HostSettings.h" #endif +#ifdef ENABLE_VULKAN +#include "Renderers/Vulkan/GSDeviceVK.h" +#endif + #ifdef _WIN32 #include "Renderers/DX11/GSDevice11.h" @@ -154,6 +158,9 @@ static HostDisplay::RenderAPI GetAPIForRenderer(GSRendererType renderer) #endif return HostDisplay::RenderAPI::OpenGL; + case GSRendererType::VK: + return HostDisplay::RenderAPI::Vulkan; + #ifdef _WIN32 case GSRendererType::DX11: case GSRendererType::SW: @@ -183,6 +190,12 @@ static bool DoGSOpen(GSRendererType renderer, u8* basemem) g_gs_device = std::make_unique(); break; +#ifdef ENABLE_VULKAN + case HostDisplay::RenderAPI::Vulkan: + g_gs_device = std::make_unique(); + break; +#endif + default: Console.Error("Unknown render API %u", static_cast(display->GetRenderAPI())); return false; @@ -1123,6 +1136,9 @@ void GSApp::Init() m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::DX11), "Direct3D 11", "")); #endif m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::OGL), "OpenGL", "")); +#ifdef ENABLE_VULKAN + m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::VK), "Vulkan", "")); +#endif m_gs_renderers.push_back(GSSetting(static_cast(GSRendererType::SW), "Software", "")); // The null renderer goes last, it has use for benchmarking purposes in a release build @@ -1216,9 +1232,12 @@ void GSApp::Init() // clang-format off // Avoid to clutter the ini file with useless options +#if defined(ENABLE_VULKAN) || defined(_WIN32) + m_default_configuration["Adapter"] = ""; +#endif + #ifdef _WIN32 // Per OS option. - m_default_configuration["Adapter"] = ""; m_default_configuration["CaptureFileName"] = ""; m_default_configuration["CaptureVideoCodecDisplayName"] = ""; m_default_configuration["dx_break_on_severity"] = "0"; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index 57e5cbaa58..7c128cbb14 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -51,13 +51,13 @@ const char* shaderName(ShaderConvert value) std::unique_ptr g_gs_device; GSDevice::GSDevice() - : m_rbswapped(false) - , m_merge(NULL) + : m_merge(NULL) , m_weavebob(NULL) , m_blend(NULL) , m_target_tmp(NULL) , m_current(NULL) , m_frame(0) + , m_rbswapped(false) { memset(&m_vertex, 0, sizeof(m_vertex)); memset(&m_index, 0, sizeof(m_index)); @@ -104,11 +104,13 @@ void GSDevice::RestoreAPIState() { } -GSTexture* GSDevice::FetchSurface(GSTexture::Type type, int w, int h, GSTexture::Format format, bool clear) +GSTexture* GSDevice::FetchSurface(GSTexture::Type type, int w, int h, GSTexture::Format format, bool clear, bool prefer_reuse) { const GSVector2i size(w, h); + const bool prefer_new_texture = (m_features.prefer_new_textures && type == GSTexture::Type::Texture && !prefer_reuse); GSTexture* t = nullptr; + auto fallback = m_pool.end(); for (auto i = m_pool.begin(); i != m_pool.end(); ++i) { @@ -118,21 +120,37 @@ GSTexture* GSDevice::FetchSurface(GSTexture::Type type, int w, int h, GSTexture: if (t->GetType() == type && t->GetFormat() == format && t->GetSize() == size) { - m_pool.erase(i); - break; + if (!prefer_new_texture) + { + m_pool.erase(i); + break; + } + else if (fallback == m_pool.end()) + { + fallback = i; + } } t = nullptr; } if (!t) - t = CreateSurface(type, w, h, format); - - if (!t) - throw std::bad_alloc(); + { + if (m_pool.size() >= MAX_POOLED_TEXTURES && fallback != m_pool.end()) + { + t = *fallback; + m_pool.erase(fallback); + } + else + { + t = CreateSurface(type, w, h, format); + if (!t) + throw std::bad_alloc(); + } + } t->Commit(); // Clear won't be done if the texture isn't committed. - + switch (type) { case GSTexture::Type::RenderTarget: @@ -195,7 +213,7 @@ void GSDevice::Recycle(GSTexture* t) //printf("%d\n",m_pool.size()); - while (m_pool.size() > 300) + while (m_pool.size() > MAX_POOLED_TEXTURES) { delete m_pool.back(); @@ -225,32 +243,32 @@ void GSDevice::PurgePool() GSTexture* GSDevice::CreateSparseRenderTarget(int w, int h, GSTexture::Format format, bool clear) { - return FetchSurface(HasColorSparse() ? GSTexture::Type::SparseRenderTarget : GSTexture::Type::RenderTarget, w, h, format, clear); + return FetchSurface(HasColorSparse() ? GSTexture::Type::SparseRenderTarget : GSTexture::Type::RenderTarget, w, h, format, clear, true); } GSTexture* GSDevice::CreateSparseDepthStencil(int w, int h, GSTexture::Format format, bool clear) { - return FetchSurface(HasDepthSparse() ? GSTexture::Type::SparseDepthStencil : GSTexture::Type::DepthStencil, w, h, format, clear); + return FetchSurface(HasDepthSparse() ? GSTexture::Type::SparseDepthStencil : GSTexture::Type::DepthStencil, w, h, format, clear, true); } GSTexture* GSDevice::CreateRenderTarget(int w, int h, GSTexture::Format format, bool clear) { - return FetchSurface(GSTexture::Type::RenderTarget, w, h, format, clear); + return FetchSurface(GSTexture::Type::RenderTarget, w, h, format, clear, true); } GSTexture* GSDevice::CreateDepthStencil(int w, int h, GSTexture::Format format, bool clear) { - return FetchSurface(GSTexture::Type::DepthStencil, w, h, format, clear); + return FetchSurface(GSTexture::Type::DepthStencil, w, h, format, clear, true); } -GSTexture* GSDevice::CreateTexture(int w, int h, GSTexture::Format format) +GSTexture* GSDevice::CreateTexture(int w, int h, GSTexture::Format format, bool prefer_reuse) { - return FetchSurface(GSTexture::Type::Texture, w, h, format, false); + return FetchSurface(GSTexture::Type::Texture, w, h, format, false, prefer_reuse); } GSTexture* GSDevice::CreateOffscreen(int w, int h, GSTexture::Format format) { - return FetchSurface(GSTexture::Type::Offscreen, w, h, format, false); + return FetchSurface(GSTexture::Type::Offscreen, w, h, format, false, true); } GSTexture::Format GSDevice::GetDefaultTextureFormat(GSTexture::Type type) @@ -398,7 +416,7 @@ void GSDevice::ShadeBoost() } } -bool GSDevice::ResizeTexture(GSTexture** t, GSTexture::Type type, int w, int h, bool clear) +bool GSDevice::ResizeTexture(GSTexture** t, GSTexture::Type type, int w, int h, bool clear, bool prefer_reuse) { if (t == NULL) { @@ -413,7 +431,7 @@ bool GSDevice::ResizeTexture(GSTexture** t, GSTexture::Type type, int w, int h, GSTexture::Format fmt = t2 ? t2->GetFormat() : GetDefaultTextureFormat(type); delete t2; - t2 = FetchSurface(type, w, h, fmt, clear); + t2 = FetchSurface(type, w, h, fmt, clear, prefer_reuse); *t = t2; } @@ -421,9 +439,9 @@ bool GSDevice::ResizeTexture(GSTexture** t, GSTexture::Type type, int w, int h, return t2 != NULL; } -bool GSDevice::ResizeTexture(GSTexture** t, int w, int h, bool clear) +bool GSDevice::ResizeTexture(GSTexture** t, int w, int h, bool prefer_reuse) { - return ResizeTexture(t, GSTexture::Type::Texture, w, h, clear); + return ResizeTexture(t, GSTexture::Type::Texture, w, h, false, prefer_reuse); } bool GSDevice::ResizeTarget(GSTexture** t, int w, int h) diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 23aa90cbaa..a18e081a47 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -197,7 +197,7 @@ struct alignas(16) GSHWDrawConfig // Flat/goround shading u32 iip : 1; // Pixel test - u32 date : 3; + u32 date : 4; u32 atst : 3; // Color sampling u32 fst : 1; // Investigate to do it on the VS @@ -248,12 +248,17 @@ struct alignas(16) GSHWDrawConfig // Scan mask u32 scanmsk : 2; - u32 _free2 : 3; + u32 _free2 : 2; }; u64 key; }; PSSelector(): key(0) {} + + __fi bool IsFeedbackLoop() const + { + return tex_is_fb || fbmask || date > 0 || blend_a == 1 || blend_b == 1 || blend_c == 1 || blend_d == 1; + } }; struct SamplerSelector { @@ -501,6 +506,7 @@ public: bool provoking_vertex_last: 1; ///< Supports using the last vertex in a primitive as the value for flat shading. bool point_expand : 1; ///< Supports point expansion in hardware without using geometry shaders. bool line_expand : 1; ///< Supports line expansion in hardware without using geometry shaders. + bool prefer_new_textures : 1; ///< Allocate textures up to the pool size before reusing them, to avoid render pass restarts. FeatureSupport() { memset(this, 0, sizeof(*this)); @@ -527,7 +533,8 @@ protected: static const int m_NO_BLEND = 0; static const int m_MERGE_BLEND = m_blendMap.size() - 1; - bool m_rbswapped; + static constexpr u32 MAX_POOLED_TEXTURES = 300; + HostDisplay* m_display; GSTexture* m_merge; GSTexture* m_weavebob; @@ -543,11 +550,11 @@ protected: size_t start, count, limit; } m_index; unsigned int m_frame; // for ageing the pool - bool m_linear_present; + bool m_rbswapped; FeatureSupport m_features; virtual GSTexture* CreateSurface(GSTexture::Type type, int w, int h, GSTexture::Format format) = 0; - virtual GSTexture* FetchSurface(GSTexture::Type type, int w, int h, GSTexture::Format format, bool clear); + virtual GSTexture* FetchSurface(GSTexture::Type type, int w, int h, GSTexture::Format format, bool clear, bool prefer_reuse); virtual void DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c) = 0; virtual void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset) = 0; @@ -561,6 +568,7 @@ public: virtual ~GSDevice(); __fi HostDisplay* GetDisplay() const { return m_display; } + __fi unsigned int GetFrameNumber() const { return m_frame; } void Recycle(GSTexture* t); @@ -606,7 +614,7 @@ public: GSTexture* CreateSparseDepthStencil(int w, int h, GSTexture::Format format, bool clear = true); GSTexture* CreateRenderTarget(int w, int h, GSTexture::Format format, bool clear = true); GSTexture* CreateDepthStencil(int w, int h, GSTexture::Format format, bool clear = true); - GSTexture* CreateTexture(int w, int h, GSTexture::Format format); + GSTexture* CreateTexture(int w, int h, GSTexture::Format format, bool prefer_reuse = false); GSTexture* CreateOffscreen(int w, int h, GSTexture::Format format); GSTexture::Format GetDefaultTextureFormat(GSTexture::Type type); @@ -638,8 +646,8 @@ public: void ShadeBoost(); void ExternalFX(); - bool ResizeTexture(GSTexture** t, GSTexture::Type type, int w, int h, bool clear = true); - bool ResizeTexture(GSTexture** t, int w, int h, bool clear = true); + bool ResizeTexture(GSTexture** t, GSTexture::Type type, int w, int h, bool clear = true, bool prefer_reuse = false); + bool ResizeTexture(GSTexture** t, int w, int h, bool prefer_reuse = false); bool ResizeTarget(GSTexture** t, int w, int h); bool ResizeTarget(GSTexture** t); diff --git a/pcsx2/GS/Renderers/Common/GSTexture.cpp b/pcsx2/GS/Renderers/Common/GSTexture.cpp index d1fa28e3c0..65fdf99b35 100644 --- a/pcsx2/GS/Renderers/Common/GSTexture.cpp +++ b/pcsx2/GS/Renderers/Common/GSTexture.cpp @@ -27,6 +27,7 @@ GSTexture::GSTexture() , m_mipmap_levels(0) , m_type(Type::Invalid) , m_format(Format::Invalid) + , m_state(State::Dirty) , m_sparse(false) , m_needs_mipmaps_generated(true) , last_frame_used(0) diff --git a/pcsx2/GS/Renderers/Common/GSTexture.h b/pcsx2/GS/Renderers/Common/GSTexture.h index 150bb5d9f5..1202d2f74a 100644 --- a/pcsx2/GS/Renderers/Common/GSTexture.h +++ b/pcsx2/GS/Renderers/Common/GSTexture.h @@ -49,6 +49,13 @@ public: Int32, ///< Int32 texture for date emulation }; + enum class State : u8 + { + Dirty, + Cleared, + Invalidated + }; + protected: GSVector2 m_scale; GSVector2i m_size; @@ -57,6 +64,7 @@ protected: int m_mipmap_levels; Type m_type; Format m_format; + State m_state; bool m_sparse; bool m_needs_mipmaps_generated; @@ -91,6 +99,23 @@ public: Type GetType() const { return m_type; } Format GetFormat() const { return m_format; } + bool IsRenderTargetOrDepthStencil() const + { + return (m_type >= Type::RenderTarget && m_type <= Type::DepthStencil) || + (m_type >= Type::SparseRenderTarget && m_type <= Type::SparseDepthStencil); + } + bool IsRenderTarget() const + { + return (m_type == Type::RenderTarget || m_type == Type::SparseRenderTarget); + } + bool IsDepthStencil() const + { + return (m_type == Type::DepthStencil || m_type == Type::SparseDepthStencil); + } + + State GetState() const { return m_state; } + void SetState(State state) { m_state = state; } + void GenerateMipmapsIfNeeded(); void ClearMipmapGenerationFlag() { m_needs_mipmaps_generated = false; } diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index 9428b84d0b..a46157f37f 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -43,6 +43,7 @@ GSDevice11::GSDevice11() m_features.provoking_vertex_last = false; m_features.point_expand = false; m_features.line_expand = false; + m_features.prefer_new_textures = false; } bool GSDevice11::SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 028d8c6361..f6608ea8a9 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -185,6 +185,12 @@ GSRendererHW::~GSRendererHW() delete m_tc; } +void GSRendererHW::Destroy() +{ + m_tc->RemoveAll(); + GSRenderer::Destroy(); +} + void GSRendererHW::SetGameCRC(u32 crc, int options) { GSRenderer::SetGameCRC(crc, options); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 688a8771a2..67de7802dd 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -166,6 +166,8 @@ public: GSRendererHW(); virtual ~GSRendererHW() override; + void Destroy() override; + void SetGameCRC(u32 crc, int options) override; bool CanUpscale() override; int GetUpscaleMultiplier() override; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index bb368bc77d..e41372917d 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1297,7 +1297,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con int h = (int)(scale.y * th); GSTexture* sTex = dst->m_texture; - GSTexture* dTex = g_gs_device->CreateTexture(w, h, GSTexture::Format::Color); + GSTexture* dTex = g_gs_device->CreateTexture(w, h, GSTexture::Format::Color, true); GSVector4i area(x, y, x + w, y + h); g_gs_device->CopyRect(sTex, dTex, area); @@ -1329,7 +1329,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // So it could be tricky to put in the middle of the DrawPrims // Texture is created to keep code compatibility - GSTexture* dTex = g_gs_device->CreateTexture(tw, th, GSTexture::Format::Color); + GSTexture* dTex = g_gs_device->CreateTexture(tw, th, GSTexture::Format::Color, true); // Keep a trace of origin of the texture src->m_texture = dTex; @@ -1503,7 +1503,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // 'src' is the new texture cache entry (hence the output) GSTexture* sTex = dst->m_texture; GSTexture* dTex = use_texture ? - g_gs_device->CreateTexture(w, h, GSTexture::Format::Color) : + g_gs_device->CreateTexture(w, h, GSTexture::Format::Color, true) : g_gs_device->CreateRenderTarget(w, h, GSTexture::Format::Color, !texture_completely_overwritten); src->m_texture = dTex; diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index 458ab17daa..780c4d3732 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -238,6 +238,7 @@ bool GSDeviceOGL::Create(HostDisplay* display) m_features.image_load_store = GLLoader::found_GL_ARB_shader_image_load_store && GLLoader::found_GL_ARB_clear_texture; m_features.texture_barrier = true; m_features.provoking_vertex_last = true; + m_features.prefer_new_textures = false; GLint point_range[2] = {}; GLint line_range[2] = {}; diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp new file mode 100644 index 0000000000..7ac40d2e9e --- /dev/null +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -0,0 +1,2875 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "PrecompiledHeader.h" +#include "common/Vulkan/Builders.h" +#include "common/Vulkan/Context.h" +#include "common/Vulkan/ShaderCache.h" +#include "common/Vulkan/SwapChain.h" +#include "common/Vulkan/Util.h" +#include "common/Align.h" +#include "common/ScopedGuard.h" +#include "GS.h" +#include "GSDeviceVK.h" +#include "GS/GSGL.h" +#include "GS/GSPerfMon.h" +#include "GS/GSUtil.h" +#include "Host.h" +#include "HostDisplay.h" +#include +#include + +#ifdef ENABLE_OGL_DEBUG +static u32 s_debug_scope_depth = 0; +#endif + +static bool IsDepthConvertShader(ShaderConvert i) +{ + return (i == ShaderConvert::RGBA8_TO_FLOAT32 || i == ShaderConvert::RGBA8_TO_FLOAT24 || + i == ShaderConvert::RGBA8_TO_FLOAT16 || i == ShaderConvert::RGB5A1_TO_FLOAT16 || + i == ShaderConvert::DATM_0 || i == ShaderConvert::DATM_1); +} + +static bool IsIntConvertShader(ShaderConvert i) +{ + return (i == ShaderConvert::RGBA8_TO_16_BITS || i == ShaderConvert::FLOAT32_TO_16_BITS || + i == ShaderConvert::FLOAT32_TO_32_BITS); +} + +static bool IsDATMConvertShader(ShaderConvert i) { return (i == ShaderConvert::DATM_0 || i == ShaderConvert::DATM_1); } + +static bool IsPresentConvertShader(ShaderConvert i) +{ + return (i == ShaderConvert::COPY || (i >= ShaderConvert::SCANLINE && i <= ShaderConvert::COMPLEX_FILTER)); +} + +static VkAttachmentLoadOp GetLoadOpForTexture(GSTextureVK* tex) +{ + if (!tex) + return VK_ATTACHMENT_LOAD_OP_DONT_CARE; + + // clang-format off + switch (tex->GetState()) + { + case GSTextureVK::State::Cleared: tex->SetState(GSTexture::State::Dirty); return VK_ATTACHMENT_LOAD_OP_CLEAR; + case GSTextureVK::State::Invalidated: tex->SetState(GSTexture::State::Dirty); return VK_ATTACHMENT_LOAD_OP_DONT_CARE; + case GSTextureVK::State::Dirty: return VK_ATTACHMENT_LOAD_OP_LOAD; + default: return VK_ATTACHMENT_LOAD_OP_LOAD; + } + // clang-format on +} + +GSDeviceVK::GSDeviceVK() +{ +#ifdef ENABLE_OGL_DEBUG + s_debug_scope_depth = 0; +#endif + + m_mipmap = theApp.GetConfigI("mipmap"); + m_upscale_multiplier = static_cast(std::max(1, theApp.GetConfigI("upscale_multiplier"))); + + std::memset(&m_pipeline_selector, 0, sizeof(m_pipeline_selector)); +} + +GSDeviceVK::~GSDeviceVK() {} + +bool GSDeviceVK::Create(HostDisplay* display) +{ + if (!GSDevice::Create(display) || !CheckFeatures()) + return false; + + { + std::optional shader = Host::ReadResourceFileToString("shaders/vulkan/tfx.glsl"); + if (!shader.has_value()) + { + Host::ReportErrorAsync("GS", "Failed to read shaders/vulkan/tfx.glsl."); + return false; + } + + m_tfx_source = std::move(*shader); + } + + if (!CreateNullTexture()) + { + Host::ReportErrorAsync("GS", "Failed to create dummy texture"); + return false; + } + + if (!CreatePipelineLayouts()) + { + Host::ReportErrorAsync("GS", "Failed to create pipeline layouts"); + return false; + } + + if (!CreateRenderPasses()) + { + Host::ReportErrorAsync("GS", "Failed to create render passes"); + return false; + } + + if (!CreateBuffers()) + return false; + + if (!CompileConvertPipelines() || !CompileInterlacePipelines() || !CompileMergePipelines()) + { + Host::ReportErrorAsync("GS", "Failed to compile utility pipelines"); + return false; + } + + if (!CreatePersistentDescriptorSets()) + { + Host::ReportErrorAsync("GS", "Failed to create persistent descriptor sets"); + return false; + } + + InitializeState(); + return true; +} + +void GSDeviceVK::Destroy() +{ + if (!g_vulkan_context) + return; + + EndRenderPass(); + ExecuteCommandBuffer(true); + DestroyResources(); + GSDevice::Destroy(); +} + +void GSDeviceVK::ResetAPIState() { EndRenderPass(); } + +void GSDeviceVK::RestoreAPIState() { InvalidateCachedState(); } + +#ifdef ENABLE_OGL_DEBUG +static std::array Palette(float phase, const std::array& a, const std::array& b, + const std::array& c, const std::array& d) +{ + std::array result; + result[0] = a[0] + b[0] * std::cos(6.28318f * (c[0] * phase + d[0])); + result[1] = a[1] + b[1] * std::cos(6.28318f * (c[1] * phase + d[1])); + result[2] = a[2] + b[2] * std::cos(6.28318f * (c[2] * phase + d[2])); + return result; +} +#endif + +void GSDeviceVK::PushDebugGroup(const char* fmt, ...) +{ +#ifdef ENABLE_OGL_DEBUG + if (!vkCmdBeginDebugUtilsLabelEXT) + return; + + std::va_list ap; + va_start(ap, fmt); + const std::string buf(StringUtil::StdStringFromFormatV(fmt, ap)); + va_end(ap); + + const std::array color = Palette( + ++s_debug_scope_depth, {0.5f, 0.5f, 0.5f}, {0.5f, 0.5f, 0.5f}, {1.0f, 1.0f, 0.5f}, {0.8f, 0.90f, 0.30f}); + + const VkDebugUtilsLabelEXT label = { + VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + nullptr, + buf.c_str(), + {color[0], color[1], color[2], 1.0f}, + }; + vkCmdBeginDebugUtilsLabelEXT(g_vulkan_context->GetCurrentCommandBuffer(), &label); +#endif +} + +void GSDeviceVK::PopDebugGroup() +{ +#ifdef ENABLE_OGL_DEBUG + if (!vkCmdEndDebugUtilsLabelEXT) + return; + + s_debug_scope_depth = (s_debug_scope_depth == 0) ? 0 : (s_debug_scope_depth - 1u); + + vkCmdEndDebugUtilsLabelEXT(g_vulkan_context->GetCurrentCommandBuffer()); +#endif +} + +void GSDeviceVK::InsertDebugMessage(DebugMessageCategory category, const char* fmt, ...) +{ +#ifdef ENABLE_OGL_DEBUG + if (!vkCmdInsertDebugUtilsLabelEXT) + return; + + std::va_list ap; + va_start(ap, fmt); + const std::string buf(StringUtil::StdStringFromFormatV(fmt, ap)); + va_end(ap); + + if (buf.empty()) + return; + + static constexpr float colors[][3] = { + {0.1f, 0.1f, 0.0f}, // Cache + {0.1f, 0.1f, 0.0f}, // Reg + {0.5f, 0.0f, 0.5f}, // Debug + {0.0f, 0.5f, 0.5f}, // Message + {0.0f, 0.2f, 0.0f} // Performance + }; + + const VkDebugUtilsLabelEXT label = {VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, nullptr, buf.c_str(), + {colors[static_cast(category)][0], colors[static_cast(category)][1], + colors[static_cast(category)][2], 1.0f}}; + vkCmdInsertDebugUtilsLabelEXT(g_vulkan_context->GetCurrentCommandBuffer(), &label); +#endif +} + +bool GSDeviceVK::CheckFeatures() +{ + const VkPhysicalDeviceProperties& properties = g_vulkan_context->GetDeviceProperties(); + const VkPhysicalDeviceFeatures& features = g_vulkan_context->GetDeviceFeatures(); + const VkPhysicalDeviceLimits& limits = g_vulkan_context->GetDeviceLimits(); + const u32 vendorID = properties.vendorID; + const bool isAMD = (vendorID == 0x1002 || vendorID == 0x1022); + // const bool isNVIDIA = (vendorID == 0x10DE); + + m_features.broken_point_sampler = isAMD; + m_features.geometry_shader = features.geometryShader; + m_features.image_load_store = features.fragmentStoresAndAtomics; + m_features.texture_barrier = true; + m_features.prefer_new_textures = true; + + if (!features.dualSrcBlend) + { + Console.Error("Vulkan driver is missing dual-source blending."); + Host::AddOSDMessage( + "Dual-source blending is not supported by your driver. This will significantly slow performance.", 30.0f); + } + + // whether we can do point/line expand depends on the range of the device + const float f_upscale = static_cast(m_upscale_multiplier); + m_features.point_expand = + (features.largePoints && limits.pointSizeRange[0] <= f_upscale && limits.pointSizeRange[1] >= f_upscale); + m_features.line_expand = + (features.wideLines && limits.lineWidthRange[0] <= f_upscale && limits.lineWidthRange[1] >= f_upscale); + Console.WriteLn("Using %s for point expansion and %s for line expansion.", + m_features.point_expand ? "hardware" : "geometry shaders", + m_features.line_expand ? "hardware" : "geometry shaders"); + + return true; +} + +void GSDeviceVK::DrawPrimitive() +{ + g_perfmon.Put(GSPerfMon::DrawCalls, 1); + vkCmdDraw(g_vulkan_context->GetCurrentCommandBuffer(), m_vertex.count, 1, m_vertex.start, 0); +} + +void GSDeviceVK::DrawIndexedPrimitive() +{ + g_perfmon.Put(GSPerfMon::DrawCalls, 1); + vkCmdDrawIndexed(g_vulkan_context->GetCurrentCommandBuffer(), m_index.count, 1, m_index.start, m_vertex.start, 0); +} + +void GSDeviceVK::DrawIndexedPrimitive(int offset, int count) +{ + ASSERT(offset + count <= (int)m_index.count); + g_perfmon.Put(GSPerfMon::DrawCalls, 1); + vkCmdDrawIndexed(g_vulkan_context->GetCurrentCommandBuffer(), count, 1, m_index.start + offset, m_vertex.start, 0); +} + +void GSDeviceVK::ClearRenderTarget(GSTexture* t, const GSVector4& c) +{ + if (!t) + return; + + if (m_current_render_target == t) + EndRenderPass(); + + static_cast(t)->SetClearColor(c); +} + +void GSDeviceVK::ClearRenderTarget(GSTexture* t, u32 c) { ClearRenderTarget(t, GSVector4::rgba32(c) * (1.0f / 255)); } + +void GSDeviceVK::InvalidateRenderTarget(GSTexture* t) +{ + if (!t) + return; + + if (m_current_render_target == t || m_current_depth_target == t) + EndRenderPass(); + + t->SetState(GSTexture::State::Invalidated); +} + +void GSDeviceVK::ClearDepth(GSTexture* t) +{ + if (!t) + return; + + if (m_current_depth_target == t) + EndRenderPass(); + + static_cast(t)->SetClearDepth(0.0f); +} + +void GSDeviceVK::ClearStencil(GSTexture* t, u8 c) +{ + if (!t) + return; + + EndRenderPass(); + + static_cast(t)->TransitionToLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + const VkClearDepthStencilValue dsv{0.0f, static_cast(c)}; + const VkImageSubresourceRange srr{VK_IMAGE_ASPECT_STENCIL_BIT, 0u, 1u, 0u, 1u}; + + vkCmdClearDepthStencilImage(g_vulkan_context->GetCurrentCommandBuffer(), static_cast(t)->GetImage(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &dsv, 1, &srr); + + static_cast(t)->TransitionToLayout(VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); +} + +GSTexture* GSDeviceVK::CreateSurface(GSTexture::Type type, int w, int h, GSTexture::Format format) +{ + pxAssert(type != GSTexture::Type::Offscreen && type != GSTexture::Type::SparseRenderTarget && + type != GSTexture::Type::SparseDepthStencil); + + const u32 width = std::max(1, std::min(w, g_vulkan_context->GetMaxImageDimension2D())); + const u32 height = std::max(1, std::min(h, g_vulkan_context->GetMaxImageDimension2D())); + + const bool mipmap = + type == GSTexture::Type::Texture && (m_mipmap > 1 || GSConfig.UserHacks_TriFilter == TriFiltering::Forced); + const u32 layers = mipmap && format == GSTexture::Format::Color ? static_cast(log2(std::max(w, h))) : 1u; + + return GSTextureVK::Create(type, width, height, layers, format).release(); +} + +bool GSDeviceVK::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map) +{ + const u32 width = rect.width(); + const u32 height = rect.height(); + const u32 pitch = width * Vulkan::Util::GetTexelSize(static_cast(src)->GetNativeFormat()); + const u32 size = pitch * height; + const u32 level = 0; + if (!CheckStagingBufferSize(size)) + { + Console.Error("Can't read back %ux%u", width, height); + return false; + } + + g_perfmon.Put(GSPerfMon::Readbacks, 1); + EndRenderPass(); + { + const VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + GL_INS("ReadbackTexture: {%d,%d} %ux%u", rect.left, rect.top, width, height); + + GSTextureVK* vkSrc = static_cast(src); + VkImageLayout old_layout = vkSrc->GetTexture().GetLayout(); + if (old_layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) + vkSrc->GetTexture().TransitionSubresourcesToLayout( + cmdbuf, level, 1, 0, 1, old_layout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + VkBufferImageCopy image_copy = {}; + const VkImageAspectFlags aspect = Vulkan::Util::IsDepthFormat(static_cast(vkSrc->GetFormat())) ? + VK_IMAGE_ASPECT_DEPTH_BIT : + VK_IMAGE_ASPECT_COLOR_BIT; + image_copy.bufferOffset = 0; + image_copy.bufferRowLength = width; + image_copy.bufferImageHeight = 0; + image_copy.imageSubresource = {aspect, level, 0u, 1u}; + image_copy.imageOffset = {rect.left, rect.top, 0}; + image_copy.imageExtent = {width, height, 1u}; + + // invalidate gpu cache + // TODO: Needed? + Vulkan::Util::BufferMemoryBarrier(cmdbuf, m_readback_staging_buffer, 0, VK_ACCESS_TRANSFER_WRITE_BIT, 0, size, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + + // do the copy + vkCmdCopyImageToBuffer(cmdbuf, vkSrc->GetTexture().GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + m_readback_staging_buffer, 1, &image_copy); + + // flush gpu cache + Vulkan::Util::BufferMemoryBarrier(cmdbuf, m_readback_staging_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_HOST_READ_BIT, 0, size, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_HOST_BIT); + + if (old_layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) + { + vkSrc->GetTexture().TransitionSubresourcesToLayout( + cmdbuf, level, 1, 0, 1, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, old_layout); + } + } + + ExecuteCommandBuffer(true); + + // invalidate cpu cache before reading + VkResult res = vmaInvalidateAllocation(g_vulkan_context->GetAllocator(), m_readback_staging_allocation, 0, size); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vmaInvalidateAllocation() failed, readback may be incorrect: "); + + out_map.bits = reinterpret_cast(m_readback_staging_buffer_map); + out_map.pitch = pitch; + + return true; +} + +void GSDeviceVK::DownloadTextureComplete() {} + +void GSDeviceVK::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) +{ + if (!sTex || !dTex) + { + ASSERT(0); + return; + } + + g_perfmon.Put(GSPerfMon::TextureCopies, 1); + + GSTextureVK* const sTexVK = static_cast(sTex); + GSTextureVK* const dTexVK = static_cast(dTex); + const GSVector4i dtex_rc(0, 0, dTexVK->GetWidth(), dTexVK->GetHeight()); + const GSVector4i dst_rc(r - r.xyxy()); + + if (sTexVK->GetState() == GSTexture::State::Cleared) + { + // source is cleared. if destination is a render target, we can carry the clear forward + if (dTexVK->IsRenderTargetOrDepthStencil()) + { + if (dtex_rc.eq(dst_rc)) + { + // pass it forward if we're clearing the whole thing + if (sTexVK->IsDepthStencil()) + dTexVK->SetClearDepth(sTexVK->GetClearDepth()); + else + dTexVK->SetClearColor(sTexVK->GetClearColor()); + + return; + } + else + { + // otherwise we need to do an attachment clear + const bool depth = (dTexVK->GetType() == GSTexture::Type::DepthStencil); + OMSetRenderTargets(depth ? nullptr : dTexVK, depth ? dTexVK : nullptr, dtex_rc, false); + BeginRenderPassForStretchRect(dTexVK, dtex_rc, dst_rc); + + // so use an attachment clear + VkClearAttachment ca; + ca.aspectMask = depth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + GSVector4::store(ca.clearValue.color.float32, sTexVK->GetClearColor()); + ca.clearValue.depthStencil.depth = sTexVK->GetClearDepth(); + ca.clearValue.depthStencil.stencil = 0; + ca.colorAttachment = 0; + + const VkClearRect cr = { {{0, 0}, {static_cast(dst_rc.width()), static_cast(dst_rc.height())}}, 0u, 1u }; + vkCmdClearAttachments(g_vulkan_context->GetCurrentCommandBuffer(), 1, &ca, 1, &cr); + return; + } + } + + // commit the clear to the source first, then do normal copy + sTexVK->CommitClear(); + } + + // if the destination has been cleared, and we're not overwriting the whole thing, commit the clear first + // (the area outside of where we're copying to) + if (dTexVK->GetState() == GSTexture::State::Cleared && !dtex_rc.eq(dst_rc)) + dTexVK->CommitClear(); + + // *now* we can do a normal image copy. + const VkImageAspectFlags src_aspect = (sTexVK->IsDepthStencil()) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + const VkImageAspectFlags dst_aspect = (dTexVK->IsDepthStencil()) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + const VkImageCopy ic = {{src_aspect, 0u, 0u, 1u}, {r.left, r.top, 0u}, {dst_aspect, 0u, 0u, 1u}, {0u, 0u, 0u}, + {static_cast(r.width()), static_cast(r.height()), 1u}}; + + EndRenderPass(); + + sTexVK->TransitionToLayout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + dTexVK->TransitionToLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + vkCmdCopyImage(g_vulkan_context->GetCurrentCommandBuffer(), sTexVK->GetImage(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dTexVK->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &ic); + + dTexVK->SetState(GSTexture::State::Dirty); +} + +void GSDeviceVK::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, + ShaderConvert shader /* = ShaderConvert::COPY */, bool linear /* = true */) +{ + pxAssert(IsDepthConvertShader(shader) == (dTex && dTex->GetType() == GSTexture::Type::DepthStencil)); + + GL_INS("StretchRect(%d) {%d,%d} %dx%d -> {%d,%d) %dx%d", shader, int(sRect.left), int(sRect.top), + int(sRect.right - sRect.left), int(sRect.bottom - sRect.top), int(dRect.left), int(dRect.top), + int(dRect.right - dRect.left), int(dRect.bottom - dRect.top)); + + DoStretchRect(static_cast(sTex), sRect, static_cast(dTex), dRect, + dTex ? m_convert[static_cast(shader)] : m_present[static_cast(shader)], linear); +} + +void GSDeviceVK::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, + bool green, bool blue, bool alpha) +{ + GL_PUSH("ColorCopy Red:%d Green:%d Blue:%d Alpha:%d", red, green, blue, alpha); + + const u32 index = (red ? 1 : 0) | (green ? 2 : 0) | (blue ? 4 : 0) | (alpha ? 8 : 0); + DoStretchRect( + static_cast(sTex), sRect, static_cast(dTex), dRect, m_color_copy[index], false); +} + +void GSDeviceVK::BeginRenderPassForStretchRect(GSTextureVK* dTex, const GSVector4i& dtex_rc, const GSVector4i& dst_rc) +{ + const bool is_whole_target = dst_rc.eq(dtex_rc); + const VkAttachmentLoadOp load_op = + is_whole_target ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : GetLoadOpForTexture(dTex); + dTex->SetState(GSTexture::State::Dirty); + + if (dTex->GetType() == GSTexture::Type::DepthStencil) + { + if (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + BeginClearRenderPass(m_utility_depth_render_pass_clear, dtex_rc, dTex->GetClearDepth(), 0); + else + BeginRenderPass((load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) ? m_utility_depth_render_pass_discard : + m_utility_depth_render_pass_load, + dst_rc); + } + else if (dTex->GetFormat() == GSTexture::Format::Color) + { + if (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + BeginClearRenderPass(m_utility_color_render_pass_clear, dtex_rc, dTex->GetClearColor()); + else + BeginRenderPass((load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) ? m_utility_color_render_pass_discard : + m_utility_color_render_pass_load, + dst_rc); + } + else + { + // integer formats, etc + const VkRenderPass rp = g_vulkan_context->GetRenderPass(dTex->GetNativeFormat(), VK_FORMAT_UNDEFINED, + load_op, VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE); + if (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + { + BeginClearRenderPass(rp, dtex_rc, dTex->GetClearColor()); + } + else + { + BeginRenderPass(rp, dst_rc); + } + } +} + +void GSDeviceVK::DoStretchRect(GSTextureVK* sTex, const GSVector4& sRect, GSTextureVK* dTex, const GSVector4& dRect, + VkPipeline pipeline, bool linear) +{ + if (sTex->GetLayout() != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) + { + // can't transition in a render pass + EndRenderPass(); + sTex->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + SetUtilityTexture(sTex, linear ? m_linear_sampler : m_point_sampler); + SetPipeline(pipeline); + + const bool is_present = (!dTex); + const bool depth = (dTex && dTex->GetType() == GSTexture::Type::DepthStencil); + const GSVector2i size( + is_present ? GSVector2i(m_display->GetWindowWidth(), m_display->GetWindowHeight()) : dTex->GetSize()); + const GSVector4i dtex_rc(0, 0, size.x, size.y); + const GSVector4i dst_rc(GSVector4i(dRect).rintersect(dtex_rc)); + + // switch rts (which might not end the render pass), so check the bounds + if (!is_present) + { + OMSetRenderTargets(depth ? nullptr : dTex, depth ? dTex : nullptr, dst_rc, false); + if (InRenderPass() && !CheckRenderPassArea(dst_rc)) + EndRenderPass(); + } + + const bool drawing_to_current_rt = (is_present || InRenderPass()); + if (!drawing_to_current_rt) + BeginRenderPassForStretchRect(dTex, dtex_rc, dst_rc); + + DrawStretchRect(sRect, dRect, size); + + if (!drawing_to_current_rt) + { + EndRenderPass(); + static_cast(dTex)->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } +} + +void GSDeviceVK::DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect, const GSVector2i& ds) +{ + // ia + const float left = dRect.x * 2 / ds.x - 1.0f; + const float top = 1.0f - dRect.y * 2 / ds.y; + const float right = dRect.z * 2 / ds.x - 1.0f; + const float bottom = 1.0f - dRect.w * 2 / ds.y; + + GSVertexPT1 vertices[] = { + {GSVector4(left, top, 0.5f, 1.0f), GSVector2(sRect.x, sRect.y)}, + {GSVector4(right, top, 0.5f, 1.0f), GSVector2(sRect.z, sRect.y)}, + {GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sRect.x, sRect.w)}, + {GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sRect.z, sRect.w)}, + }; + IASetVertexBuffer(vertices, sizeof(vertices[0]), std::size(vertices)); + + if (ApplyUtilityState()) + DrawPrimitive(); +} + +void GSDeviceVK::BlitRect(GSTexture* sTex, const GSVector4i& sRect, u32 sLevel, GSTexture* dTex, + const GSVector4i& dRect, u32 dLevel, bool linear) +{ + GSTextureVK* sTexVK = static_cast(sTex); + GSTextureVK* dTexVK = static_cast(dTex); + + //const VkImageLayout old_src_layout = sTexVK->GetTexture().GetLayout(); + //const VkImageLayout old_dst_layout = dTexVK->GetTexture().GetLayout(); + + EndRenderPass(); + + sTexVK->TransitionToLayout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + dTexVK->TransitionToLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + pxAssert( + (sTexVK->GetType() == GSTexture::Type::DepthStencil) == (dTexVK->GetType() == GSTexture::Type::DepthStencil)); + const VkImageAspectFlags aspect = + (sTexVK->GetType() == GSTexture::Type::DepthStencil) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + const VkImageBlit ib{{aspect, sLevel, 0u, 1u}, {{sRect.left, sRect.top, 0}, {sRect.right, sRect.bottom, 1}}, + {aspect, dLevel, 0u, 1u}, {{dRect.left, dRect.top, 0}, {dRect.right, dRect.bottom, 1}}}; + + vkCmdBlitImage(g_vulkan_context->GetCurrentCommandBuffer(), sTexVK->GetTexture().GetImage(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dTexVK->GetTexture().GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, + &ib, linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); +} + +void GSDeviceVK::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, + const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c) +{ + GL_PUSH("DoMerge"); + + const GSVector4 full_r(0.0f, 0.0f, 1.0f, 1.0f); + const u32 yuv_constants[4] = {EXTBUF.EMODA, EXTBUF.EMODC}; + const bool feedback_write_2 = PMODE.EN2 && sTex[2] != nullptr && EXTBUF.FBIN == 1; + const bool feedback_write_1 = PMODE.EN1 && sTex[2] != nullptr && EXTBUF.FBIN == 0; + const bool feedback_write_2_but_blend_bg = feedback_write_2 && PMODE.SLBG == 1; + + // Merge the 2 source textures (sTex[0],sTex[1]). Final results go to dTex. Feedback write will go to sTex[2]. + // If either 2nd output is disabled or SLBG is 1, a background color will be used. + // Note: background color is also used when outside of the unit rectangle area + EndRenderPass(); + + // transition everything before starting the new render pass + static_cast(dTex)->TransitionToLayout(VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + if (sTex[0]) + static_cast(sTex[0])->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + const GSVector2i dsize(dTex->GetSize()); + const GSVector4i darea(0, 0, dsize.x, dsize.y); + bool dcleared = false; + if (sTex[1] && (PMODE.SLBG == 0 || feedback_write_2_but_blend_bg)) + { + // 2nd output is enabled and selected. Copy it to destination so we can blend it with 1st output + // Note: value outside of dRect must contains the background color (c) + if (sTex[1]->GetState() == GSTexture::State::Dirty) + { + static_cast(sTex[1])->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + OMSetRenderTargets(dTex, nullptr, darea, false); + SetUtilityTexture(sTex[1], m_linear_sampler); + BeginClearRenderPass(m_utility_color_render_pass_clear, darea, c); + SetPipeline(m_convert[static_cast(ShaderConvert::COPY)]); + DrawStretchRect(sRect[1], dRect[1], dsize); + dTex->SetState(GSTexture::State::Dirty); + dcleared = true; + } + } + + // Upload constant to select YUV algo + const GSVector2i fbsize(sTex[2] ? sTex[2]->GetSize() : GSVector2i(0, 0)); + const GSVector4i fbarea(0, 0, fbsize.x, fbsize.y); + if (feedback_write_2)// FIXME I'm not sure dRect[1] is always correct + { + EndRenderPass(); + OMSetRenderTargets(sTex[2], nullptr, fbarea, false); + if (dcleared) + SetUtilityTexture(dTex, m_linear_sampler); + + // sTex[2] can be sTex[0], in which case it might be cleared (e.g. Xenosaga). + BeginRenderPassForStretchRect(static_cast(sTex[2]), fbarea, GSVector4i(dRect[1])); + if (dcleared) + { + SetPipeline(m_convert[static_cast(ShaderConvert::YUV)]); + SetUtilityPushConstants(yuv_constants, sizeof(yuv_constants)); + DrawStretchRect(full_r, dRect[1], fbsize); + } + EndRenderPass(); + + if (sTex[0] == sTex[2]) + { + // need a barrier here because of the render pass + static_cast(sTex[2])->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + } + + // Restore background color to process the normal merge + if (feedback_write_2_but_blend_bg || !dcleared) + { + EndRenderPass(); + OMSetRenderTargets(dTex, nullptr, darea, false); + BeginClearRenderPass(m_utility_color_render_pass_clear, darea, c); + } + else if (!InRenderPass()) + { + OMSetRenderTargets(dTex, nullptr, darea, false); + BeginRenderPass(m_utility_color_render_pass_load, darea); + } + + if (sTex[0] && sTex[0]->GetState() == GSTexture::State::Dirty) + { + // 1st output is enabled. It must be blended + SetUtilityTexture(sTex[0], m_linear_sampler); + SetPipeline(m_merge[PMODE.MMOD]); + SetUtilityPushConstants(&c, sizeof(c)); + DrawStretchRect(sRect[0], dRect[0], dTex->GetSize()); + } + + if (feedback_write_1) // FIXME I'm not sure dRect[0] is always correct + { + EndRenderPass(); + SetPipeline(m_convert[static_cast(ShaderConvert::YUV)]); + SetUtilityTexture(dTex, m_linear_sampler); + SetUtilityPushConstants(yuv_constants, sizeof(yuv_constants)); + OMSetRenderTargets(sTex[2], nullptr, fbarea, false); + BeginRenderPass(m_utility_color_render_pass_load, fbarea); + DrawStretchRect(full_r, dRect[0], dsize); + } + + EndRenderPass(); + + // this texture is going to get used as an input, so make sure we don't read undefined data + static_cast(dTex)->CommitClear(); + static_cast(dTex)->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); +} + +void GSDeviceVK::DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset) +{ + const GSVector2i size(dTex->GetSize()); + const GSVector4 s = GSVector4(size); + + const GSVector4 sRect(0, 0, 1, 1); + const GSVector4 dRect(0.0f, yoffset, s.x, s.y + yoffset); + + InterlaceConstantBuffer cb; + cb.ZrH = GSVector2(0, 1.0f / s.y); + cb.hH = s.y / 2; + + GL_PUSH("DoInterlace %dx%d Shader:%d Linear:%d", size.x, size.y, shader, linear); + + static_cast(dTex)->TransitionToLayout(VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + const VkFramebuffer fb = static_cast(dTex)->GetFramebuffer(false); + if (fb == VK_NULL_HANDLE) + return; + + const GSVector4i rc(0, 0, size.x, size.y); + EndRenderPass(); + OMSetRenderTargets(dTex, nullptr, rc, false); + SetUtilityTexture(sTex, linear ? m_linear_sampler : m_point_sampler); + BeginRenderPass(m_utility_color_render_pass_load, rc); + SetPipeline(m_interlace[shader]); + SetUtilityPushConstants(&cb, sizeof(cb)); + DrawStretchRect(sRect, dRect, dTex->GetSize()); + EndRenderPass(); + + // this texture is going to get used as an input, so make sure we don't read undefined data + static_cast(dTex)->CommitClear(); + static_cast(dTex)->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); +} + +void GSDeviceVK::IASetVertexBuffer(const void* vertex, size_t stride, size_t count) +{ + const u32 size = static_cast(stride) * static_cast(count); + if (!m_vertex_stream_buffer.ReserveMemory(size, static_cast(stride))) + { + ExecuteCommandBuffer(false, "Uploading %u bytes to vertex buffer", size); + if (!m_vertex_stream_buffer.ReserveMemory(size, static_cast(stride))) + pxFailRel("Failed to reserve space for vertices"); + } + + m_vertex.start = m_vertex_stream_buffer.GetCurrentOffset() / stride; + m_vertex.limit = count; + m_vertex.stride = stride; + m_vertex.count = count; + SetVertexBuffer(m_vertex_stream_buffer.GetBuffer(), 0); + + GSVector4i::storent(m_vertex_stream_buffer.GetCurrentHostPointer(), vertex, count * stride); + m_vertex_stream_buffer.CommitMemory(size); +} + +bool GSDeviceVK::IAMapVertexBuffer(void** vertex, size_t stride, size_t count) +{ + const u32 size = static_cast(stride) * static_cast(count); + if (!m_vertex_stream_buffer.ReserveMemory(size, static_cast(stride))) + { + ExecuteCommandBuffer(false, "Mapping %u bytes to vertex buffer", size); + if (!m_vertex_stream_buffer.ReserveMemory(size, static_cast(stride))) + pxFailRel("Failed to reserve space for vertices"); + } + + m_vertex.start = m_vertex_stream_buffer.GetCurrentOffset() / stride; + m_vertex.limit = m_vertex_stream_buffer.GetCurrentSpace() / stride; + m_vertex.stride = stride; + m_vertex.count = count; + SetVertexBuffer(m_vertex_stream_buffer.GetBuffer(), 0); + + *vertex = m_vertex_stream_buffer.GetCurrentHostPointer(); + return true; +} + +void GSDeviceVK::IAUnmapVertexBuffer() +{ + const u32 size = static_cast(m_vertex.stride) * static_cast(m_vertex.count); + m_vertex_stream_buffer.CommitMemory(size); +} + +void GSDeviceVK::IASetIndexBuffer(const void* index, size_t count) +{ + const u32 size = sizeof(u32) * static_cast(count); + if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u32))) + { + ExecuteCommandBuffer(false, "Uploading %u bytes to index buffer", size); + if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u32))) + pxFailRel("Failed to reserve space for vertices"); + } + + m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u32); + m_index.limit = count; + m_index.count = count; + SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32); + + std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), index, size); + m_index_stream_buffer.CommitMemory(size); +} + +void GSDeviceVK::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, bool feedback_loop) +{ + GSTextureVK* vkRt = static_cast(rt); + GSTextureVK* vkDs = static_cast(ds); + pxAssert(vkRt || vkDs); + + if (m_current_render_target != vkRt || m_current_depth_target != vkDs || + m_current_framebuffer_has_feedback_loop != feedback_loop) + { + // framebuffer change or feedback loop enabled/disabled + EndRenderPass(); + + if (vkRt) + m_current_framebuffer = vkRt->GetLinkedFramebuffer(vkDs, feedback_loop); + else + m_current_framebuffer = vkDs->GetLinkedFramebuffer(nullptr, feedback_loop); + } + + m_current_render_target = vkRt; + m_current_depth_target = vkDs; + m_current_framebuffer_has_feedback_loop = feedback_loop; + + if (!InRenderPass()) + { + if (vkRt) + vkRt->TransitionToLayout( + feedback_loop ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + if (vkDs) + vkDs->TransitionToLayout(VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + } + + // This is used to set/initialize the framebuffer for tfx rendering. + const GSVector2i size = vkRt ? vkRt->GetSize() : vkDs->GetSize(); + const VkViewport vp{0.0f, 0.0f, static_cast(size.x), static_cast(size.y), 0.0f, 1.0f}; + + SetViewport(vp); + SetScissor(scissor); +} + +u16 GSDeviceVK::ConvertBlendEnum(u16 generic) +{ + switch (generic) + { + case SRC_COLOR: + return VK_BLEND_FACTOR_SRC_COLOR; + case INV_SRC_COLOR: + return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; + case DST_COLOR: + return VK_BLEND_FACTOR_DST_COLOR; + case INV_DST_COLOR: + return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; + case SRC1_COLOR: + return VK_BLEND_FACTOR_SRC1_COLOR; + case INV_SRC1_COLOR: + return VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR; + case SRC_ALPHA: + return VK_BLEND_FACTOR_SRC_ALPHA; + case INV_SRC_ALPHA: + return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + case DST_ALPHA: + return VK_BLEND_FACTOR_DST_ALPHA; + case INV_DST_ALPHA: + return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; + case SRC1_ALPHA: + return VK_BLEND_FACTOR_SRC1_ALPHA; + case INV_SRC1_ALPHA: + return VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA; + case CONST_COLOR: + return VK_BLEND_FACTOR_CONSTANT_COLOR; + case INV_CONST_COLOR: + return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR; + case CONST_ONE: + return VK_BLEND_FACTOR_ONE; + case CONST_ZERO: + return VK_BLEND_FACTOR_ZERO; + case OP_ADD: + return VK_BLEND_OP_ADD; + case OP_SUBTRACT: + return VK_BLEND_OP_SUBTRACT; + case OP_REV_SUBTRACT: + return VK_BLEND_OP_REVERSE_SUBTRACT; + default: + ASSERT(0); + return 0; + } +} + +VkSampler GSDeviceVK::GetSampler(GSHWDrawConfig::SamplerSelector ss) +{ + const auto it = m_samplers.find(ss.key); + if (it != m_samplers.end()) + return it->second; + + const bool aniso = (ss.aniso && GSConfig.MaxAnisotropy > 1); + + static constexpr std::array mipmap_modes = {{ + VK_SAMPLER_MIPMAP_MODE_NEAREST, // Nearest + VK_SAMPLER_MIPMAP_MODE_NEAREST, // Linear + VK_SAMPLER_MIPMAP_MODE_NEAREST, // Nearest_Mipmap_Nearest + VK_SAMPLER_MIPMAP_MODE_LINEAR, // Nearest_Mipmap_Linear + VK_SAMPLER_MIPMAP_MODE_NEAREST, // Linear_Mipmap_Nearest + VK_SAMPLER_MIPMAP_MODE_LINEAR, // Linear_Mipmap_Linear + }}; + + const VkSamplerCreateInfo ci = { + VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, nullptr, 0, + ss.biln ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, // min + ss.biln ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, // max + mipmap_modes[ss.triln], // mip + static_cast( + ss.tau ? VK_SAMPLER_ADDRESS_MODE_REPEAT : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE), // u + static_cast( + ss.tav ? VK_SAMPLER_ADDRESS_MODE_REPEAT : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE), // v + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // w + 0.0f, // lod bias + static_cast(aniso), // anisotropy enable + aniso ? static_cast(GSConfig.MaxAnisotropy) : 1.0f, // anisotropy + VK_FALSE, // compare enable + VK_COMPARE_OP_ALWAYS, // compare op + -1000.0f, // min lod + (ss.triln >= static_cast(GS_MIN_FILTER::Nearest_Mipmap_Nearest)) ? 1000.0f : 0.0f, // max lod + VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, // border + VK_FALSE // unnormalized coordinates + }; + VkSampler sampler = VK_NULL_HANDLE; + VkResult res = vkCreateSampler(g_vulkan_context->GetDevice(), &ci, nullptr, &sampler); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkCreateSampler() failed: "); + + m_samplers.emplace(ss.key, sampler); + return sampler; +} + +static void AddMacro(std::stringstream& ss, const char* name, const char* value) +{ + ss << "#define " << name << " " << value << "\n"; +} + +static void AddMacro(std::stringstream& ss, const char* name, int value) +{ + ss << "#define " << name << " " << value << "\n"; +} + +static void AddShaderHeader(std::stringstream& ss) +{ + ss << "#version 460 core\n"; + ss << "#extension GL_EXT_samplerless_texture_functions : require\n"; + + if (!g_vulkan_context->GetDeviceFeatures().dualSrcBlend) + ss << "#define DISABLE_DUAL_SOURCE 1\n"; +} + +static void AddShaderStageMacro(std::stringstream& ss, bool vs, bool gs, bool fs) +{ + if (vs) + ss << "#define VERTEX_SHADER 1\n"; + else if (gs) + ss << "#define GEOMETRY_SHADER 1\n"; + else if (fs) + ss << "#define FRAGMENT_SHADER 1\n"; +} + +static void AddUtilityVertexAttributes(Vulkan::GraphicsPipelineBuilder& gpb) +{ + gpb.AddVertexBuffer(0, sizeof(GSVertexPT1)); + gpb.AddVertexAttribute(0, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 0); + gpb.AddVertexAttribute(1, 0, VK_FORMAT_R32G32_SFLOAT, 16); + gpb.AddVertexAttribute(2, 0, VK_FORMAT_R8G8B8A8_UNORM, 28); + gpb.SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP); +} + +VkShaderModule GSDeviceVK::GetUtilityVertexShader(const std::string& source, const char* replace_main = nullptr) +{ + std::stringstream ss; + AddShaderHeader(ss); + AddShaderStageMacro(ss, true, false, false); + AddMacro(ss, "PS_SCALE_FACTOR", m_upscale_multiplier); + if (replace_main) + ss << "#define " << replace_main << " main\n"; + ss << source; + + return g_vulkan_shader_cache->GetVertexShader(ss.str()); +} + +VkShaderModule GSDeviceVK::GetUtilityFragmentShader(const std::string& source, const char* replace_main = nullptr) +{ + std::stringstream ss; + AddShaderHeader(ss); + AddShaderStageMacro(ss, false, false, true); + AddMacro(ss, "PS_SCALE_FACTOR", m_upscale_multiplier); + if (replace_main) + ss << "#define " << replace_main << " main\n"; + ss << source; + + return g_vulkan_shader_cache->GetFragmentShader(ss.str()); +} + +bool GSDeviceVK::CreateNullTexture() +{ + if (!m_null_texture.Create(1, 1, 1, 1, VK_FORMAT_R8G8B8A8_UNORM, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, + VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) + { + return false; + } + + const VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + const VkImageSubresourceRange srr{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}; + const VkClearColorValue ccv{}; + m_null_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + vkCmdClearColorImage(cmdbuf, m_null_texture.GetImage(), m_null_texture.GetLayout(), &ccv, 1, &srr); + m_null_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_null_texture.GetImage(), "Null texture"); + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_null_texture.GetView(), "Null texture view"); + + return true; +} + +bool GSDeviceVK::CreateBuffers() +{ + if (!m_vertex_stream_buffer.Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_BUFFER_SIZE)) + { + Host::ReportErrorAsync("GS", "Failed to allocate vertex buffer"); + return false; + } + + if (!m_index_stream_buffer.Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_BUFFER_SIZE)) + { + Host::ReportErrorAsync("GS", "Failed to allocate index buffer"); + return false; + } + + if (!m_vertex_uniform_stream_buffer.Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VERTEX_UNIFORM_BUFFER_SIZE)) + { + Host::ReportErrorAsync("GS", "Failed to allocate vertex uniform buffer"); + return false; + } + + if (!m_fragment_uniform_stream_buffer.Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, FRAGMENT_UNIFORM_BUFFER_SIZE)) + { + Host::ReportErrorAsync("GS", "Failed to allocate fragment uniform buffer"); + return false; + } + + return true; +} + +bool GSDeviceVK::CreatePipelineLayouts() +{ + VkDevice dev = g_vulkan_context->GetDevice(); + Vulkan::DescriptorSetLayoutBuilder dslb; + Vulkan::PipelineLayoutBuilder plb; + + ////////////////////////////////////////////////////////////////////////// + // Convert Pipeline Layout + ////////////////////////////////////////////////////////////////////////// + + dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, NUM_CONVERT_SAMPLERS, VK_SHADER_STAGE_FRAGMENT_BIT); + if ((m_utility_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE) + return false; + Vulkan::Util::SetObjectName(dev, m_utility_ds_layout, "Convert descriptor layout"); + + plb.AddPushConstants(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, CONVERT_PUSH_CONSTANTS_SIZE); + plb.AddDescriptorSet(m_utility_ds_layout); + if ((m_utility_pipeline_layout = plb.Create(dev)) == VK_NULL_HANDLE) + return false; + Vulkan::Util::SetObjectName(dev, m_utility_ds_layout, "Convert pipeline layout"); + + ////////////////////////////////////////////////////////////////////////// + // Draw/TFX Pipeline Layout + ////////////////////////////////////////////////////////////////////////// + dslb.AddBinding( + 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT); + dslb.AddBinding(1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + if ((m_tfx_ubo_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE) + return false; + Vulkan::Util::SetObjectName(dev, m_tfx_ubo_ds_layout, "TFX UBO descriptor layout"); + for (u32 i = 0; i < NUM_TFX_SAMPLERS; i++) + dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + if ((m_tfx_sampler_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE) + return false; + Vulkan::Util::SetObjectName(dev, m_tfx_sampler_ds_layout, "TFX sampler descriptor layout"); + dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + dslb.AddBinding(1, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + dslb.AddBinding(2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + if ((m_tfx_rt_texture_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE) + return false; + Vulkan::Util::SetObjectName(dev, m_tfx_rt_texture_ds_layout, "TFX RT texture descriptor layout"); + + plb.AddDescriptorSet(m_tfx_ubo_ds_layout); + plb.AddDescriptorSet(m_tfx_sampler_ds_layout); + plb.AddDescriptorSet(m_tfx_rt_texture_ds_layout); + if ((m_tfx_pipeline_layout = plb.Create(dev)) == VK_NULL_HANDLE) + return false; + Vulkan::Util::SetObjectName(dev, m_tfx_pipeline_layout, "TFX pipeline layout"); + return true; +} + +bool GSDeviceVK::CreateRenderPasses() +{ +#define GET(dest, rt, depth, fbl, opa, opb, opc) \ + do \ + { \ + dest = g_vulkan_context->GetRenderPass( \ + (rt), (depth), ((rt) != VK_FORMAT_UNDEFINED) ? (opa) : VK_ATTACHMENT_LOAD_OP_DONT_CARE, /* color load */ \ + ((rt) != VK_FORMAT_UNDEFINED) ? VK_ATTACHMENT_STORE_OP_STORE : \ + VK_ATTACHMENT_STORE_OP_DONT_CARE, /* color store */ \ + ((depth) != VK_FORMAT_UNDEFINED) ? (opb) : VK_ATTACHMENT_LOAD_OP_DONT_CARE, /* depth load */ \ + ((depth) != VK_FORMAT_UNDEFINED) ? VK_ATTACHMENT_STORE_OP_STORE : \ + VK_ATTACHMENT_STORE_OP_DONT_CARE, /* depth store */ \ + ((depth) != VK_FORMAT_UNDEFINED) ? (opc) : VK_ATTACHMENT_LOAD_OP_DONT_CARE, /* stencil load */ \ + VK_ATTACHMENT_STORE_OP_DONT_CARE, /* stencil store */ \ + (fbl) /* feedback loop */ \ + ); \ + if (dest == VK_NULL_HANDLE) \ + return false; \ + } while (0) + + const VkFormat rt_format = GSTextureVK::LookupNativeFormat(GSTexture::Format::Color); + const VkFormat hdr_rt_format = GSTextureVK::LookupNativeFormat(GSTexture::Format::FloatColor); + const VkFormat depth_format = GSTextureVK::LookupNativeFormat(GSTexture::Format::DepthStencil); + + for (u32 rt = 0; rt < 2; rt++) + { + for (u32 ds = 0; ds < 2; ds++) + { + for (u32 hdr = 0; hdr < 2; hdr++) + { + for (u32 date = DATE_RENDER_PASS_NONE; date <= DATE_RENDER_PASS_STENCIL_ONE; date++) + { + for (u32 fbl = 0; fbl < 2; fbl++) + { + for (u32 opa = VK_ATTACHMENT_LOAD_OP_LOAD; opa <= VK_ATTACHMENT_LOAD_OP_DONT_CARE; opa++) + { + for (u32 opb = VK_ATTACHMENT_LOAD_OP_LOAD; opb <= VK_ATTACHMENT_LOAD_OP_DONT_CARE; opb++) + { + const VkFormat rp_rt_format = + (rt != 0) ? ((hdr != 0) ? hdr_rt_format : rt_format) : VK_FORMAT_UNDEFINED; + const VkFormat rp_depth_format = (ds != 0) ? depth_format : VK_FORMAT_UNDEFINED; + const VkAttachmentLoadOp opc = + ((date == DATE_RENDER_PASS_NONE) ? + VK_ATTACHMENT_LOAD_OP_DONT_CARE : + (date == DATE_RENDER_PASS_STENCIL_ONE ? VK_ATTACHMENT_LOAD_OP_CLEAR : + VK_ATTACHMENT_LOAD_OP_LOAD)); + GET(m_tfx_render_pass[rt][ds][hdr][date][fbl][opa][opb], rp_rt_format, rp_depth_format, + (fbl != 0), static_cast(opa), + static_cast(opb), static_cast(opc)); + } + } + } + } + } + } + } + + GET(m_utility_color_render_pass_load, rt_format, VK_FORMAT_UNDEFINED, false, VK_ATTACHMENT_LOAD_OP_LOAD, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_LOAD_OP_DONT_CARE); + GET(m_utility_color_render_pass_clear, rt_format, VK_FORMAT_UNDEFINED, false, VK_ATTACHMENT_LOAD_OP_CLEAR, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_LOAD_OP_DONT_CARE); + GET(m_utility_color_render_pass_discard, rt_format, VK_FORMAT_UNDEFINED, false, VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_LOAD_OP_DONT_CARE); + GET(m_utility_depth_render_pass_load, VK_FORMAT_UNDEFINED, depth_format, false, VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_LOAD_OP_DONT_CARE); + GET(m_utility_depth_render_pass_clear, VK_FORMAT_UNDEFINED, depth_format, false, VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_LOAD_OP_DONT_CARE); + GET(m_utility_depth_render_pass_discard, VK_FORMAT_UNDEFINED, depth_format, false, VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_LOAD_OP_DONT_CARE); + + m_date_setup_render_pass = g_vulkan_context->GetRenderPass(VK_FORMAT_UNDEFINED, depth_format, + VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_LOAD, + VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE); + if (m_date_setup_render_pass == VK_NULL_HANDLE) + return false; + +#undef GET + + return true; +} + +bool GSDeviceVK::CompileConvertPipelines() +{ + // we may not have a swap chain if running in headless mode. + Vulkan::SwapChain* swapchain = static_cast(m_display->GetRenderSurface()); + if (swapchain) + { + m_swap_chain_render_pass = + g_vulkan_context->GetRenderPass(swapchain->GetSurfaceFormat().format, VK_FORMAT_UNDEFINED); + if (!m_swap_chain_render_pass) + return false; + } + + std::optional shader = Host::ReadResourceFileToString("shaders/vulkan/convert.glsl"); + if (!shader) + { + Host::ReportErrorAsync("GS", "Failed to read shaders/vulkan/convert.glsl."); + return false; + } + + VkShaderModule vs = GetUtilityVertexShader(*shader); + if (vs == VK_NULL_HANDLE) + return false; + ScopedGuard vs_guard([&vs]() { Vulkan::Util::SafeDestroyShaderModule(vs); }); + + Vulkan::GraphicsPipelineBuilder gpb; + AddUtilityVertexAttributes(gpb); + gpb.SetPipelineLayout(m_utility_pipeline_layout); + gpb.SetDynamicViewportAndScissorState(); + gpb.AddDynamicState(VK_DYNAMIC_STATE_BLEND_CONSTANTS); + gpb.SetNoCullRasterizationState(); + gpb.SetNoBlendingState(); + gpb.SetVertexShader(vs); + + for (ShaderConvert i = ShaderConvert::COPY; static_cast(i) < static_cast(ShaderConvert::Count); + i = static_cast(static_cast(i) + 1)) + { + const bool depth = IsDepthConvertShader(i); + const int index = static_cast(i); + + VkRenderPass rp; + switch (i) + { + case ShaderConvert::RGBA8_TO_16_BITS: + case ShaderConvert::FLOAT32_TO_16_BITS: + { + rp = g_vulkan_context->GetRenderPass(GSTextureVK::LookupNativeFormat(GSTexture::Format::UInt16), + VK_FORMAT_UNDEFINED, VK_ATTACHMENT_LOAD_OP_DONT_CARE); + } + break; + case ShaderConvert::FLOAT32_TO_32_BITS: + { + rp = g_vulkan_context->GetRenderPass(GSTextureVK::LookupNativeFormat(GSTexture::Format::UInt32), + VK_FORMAT_UNDEFINED, VK_ATTACHMENT_LOAD_OP_DONT_CARE); + } + break; + case ShaderConvert::DATM_0: + case ShaderConvert::DATM_1: + { + rp = m_date_setup_render_pass; + } + break; + default: + { + rp = g_vulkan_context->GetRenderPass( + GSTextureVK::LookupNativeFormat(depth ? GSTexture::Format::Invalid : GSTexture::Format::Color), + GSTextureVK::LookupNativeFormat( + depth ? GSTexture::Format::DepthStencil : GSTexture::Format::Invalid), + VK_ATTACHMENT_LOAD_OP_DONT_CARE); + } + break; + } + if (!rp) + return false; + + gpb.SetRenderPass(rp, 0); + + if (IsDATMConvertShader(i)) + { + const VkStencilOpState sos = { + VK_STENCIL_OP_KEEP, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_ALWAYS, 1u, 1u, 1u}; + gpb.SetDepthState(false, false, VK_COMPARE_OP_ALWAYS); + gpb.SetStencilState(true, sos, sos); + } + else + { + gpb.SetDepthState(depth, depth, VK_COMPARE_OP_ALWAYS); + gpb.SetNoStencilState(); + } + + VkShaderModule ps = GetUtilityFragmentShader(*shader, shaderName(i)); + if (ps == VK_NULL_HANDLE) + return false; + + ScopedGuard ps_guard([&ps]() { Vulkan::Util::SafeDestroyShaderModule(ps); }); + gpb.SetFragmentShader(ps); + + m_convert[index] = + gpb.Create(g_vulkan_context->GetDevice(), g_vulkan_shader_cache->GetPipelineCache(true), false); + if (!m_convert[index]) + return false; + + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_convert[index], "Convert pipeline %d", i); + + if (swapchain && IsPresentConvertShader(i)) + { + // compile a present variant too + gpb.SetRenderPass(m_swap_chain_render_pass, 0); + m_present[index] = + gpb.Create(g_vulkan_context->GetDevice(), g_vulkan_shader_cache->GetPipelineCache(true), false); + if (!m_present[index]) + return false; + + Vulkan::Util::SetObjectName( + g_vulkan_context->GetDevice(), m_present[index], "Convert pipeline %d (Present)", i); + } + + if (i == ShaderConvert::COPY) + { + // compile the variant for setting up hdr rendering + for (u32 ds = 0; ds < 2; ds++) + { + for (u32 fbl = 0; fbl < 2; fbl++) + { + pxAssert(!m_hdr_setup_pipelines[ds][fbl]); + + gpb.SetRenderPass(GetTFXRenderPass(true, ds != 0, true, DATE_RENDER_PASS_NONE, fbl != 0, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_LOAD_OP_DONT_CARE), + 0); + m_hdr_setup_pipelines[ds][fbl] = + gpb.Create(g_vulkan_context->GetDevice(), g_vulkan_shader_cache->GetPipelineCache(true), false); + if (!m_hdr_setup_pipelines[ds][fbl]) + return false; + + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_hdr_setup_pipelines[ds][fbl], + "HDR setup/copy pipeline (ds=%u, fbl=%u)", i, ds, fbl); + } + } + + // compile color copy pipelines + gpb.SetRenderPass(m_utility_color_render_pass_discard, 0); + for (u32 i = 0; i < 16; i++) + { + pxAssert(!m_color_copy[i]); + gpb.ClearBlendAttachments(); + gpb.SetBlendAttachment(0, false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, + VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, static_cast(i)); + m_color_copy[i] = + gpb.Create(g_vulkan_context->GetDevice(), g_vulkan_shader_cache->GetPipelineCache(true), false); + if (!m_color_copy[i]) + return false; + + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_color_copy[i], + "Color copy pipeline (r=%u, g=%u, b=%u, a=%u)", i & 1u, (i >> 1) & 1u, (i >> 2) & 1u, + (i >> 3) & 1u); + } + } + else if (i == ShaderConvert::MOD_256) + { + for (u32 ds = 0; ds < 2; ds++) + { + for (u32 fbl = 0; fbl < 2; fbl++) + { + pxAssert(!m_hdr_finish_pipelines[ds][fbl]); + + gpb.SetRenderPass(GetTFXRenderPass(true, ds != 0, false, DATE_RENDER_PASS_NONE, fbl != 0, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_LOAD_OP_DONT_CARE), + 0); + m_hdr_finish_pipelines[ds][fbl] = + gpb.Create(g_vulkan_context->GetDevice(), g_vulkan_shader_cache->GetPipelineCache(true), false); + if (!m_hdr_finish_pipelines[ds][fbl]) + return false; + + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_hdr_setup_pipelines[ds][fbl], + "HDR finish/copy pipeline (ds=%u, fbl=%u)", i, ds, fbl); + } + } + } + } + + // date image setup + for (u32 ds = 0; ds < 2; ds++) + { + for (u32 clear = 0; clear < 2; clear++) + { + m_date_image_setup_render_passes[ds][clear] = + g_vulkan_context->GetRenderPass(GSTextureVK::LookupNativeFormat(GSTexture::Format::Int32), + ds ? GSTextureVK::LookupNativeFormat(GSTexture::Format::DepthStencil) : VK_FORMAT_UNDEFINED, + VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE, + ds ? (clear ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD) : + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + ds ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE); + } + } + + for (u32 datm = 0; datm < 2; datm++) + { + VkShaderModule ps = + GetUtilityFragmentShader(*shader, datm ? "ps_stencil_image_init_1" : "ps_stencil_image_init_0"); + if (ps == VK_NULL_HANDLE) + return false; + + ScopedGuard ps_guard([&ps]() { Vulkan::Util::SafeDestroyShaderModule(ps); }); + gpb.SetPipelineLayout(m_utility_pipeline_layout); + gpb.SetFragmentShader(ps); + gpb.SetNoDepthTestState(); + gpb.SetNoStencilState(); + gpb.ClearBlendAttachments(); + gpb.SetBlendAttachment(0, true, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_MIN, + VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, VK_COLOR_COMPONENT_R_BIT); + + for (u32 ds = 0; ds < 2; ds++) + { + gpb.SetRenderPass(m_date_image_setup_render_passes[ds][0], 0); + m_date_image_setup_pipelines[ds][datm] = + gpb.Create(g_vulkan_context->GetDevice(), g_vulkan_shader_cache->GetPipelineCache(true), false); + if (!m_date_image_setup_pipelines[ds][datm]) + return false; + + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_date_image_setup_pipelines[ds][datm], + "DATE image clear pipeline (ds=%u, datm=%u)", ds, datm); + } + } + + return true; +} + +bool GSDeviceVK::CompileInterlacePipelines() +{ + std::optional shader = Host::ReadResourceFileToString("shaders/vulkan/interlace.glsl"); + if (!shader) + { + Host::ReportErrorAsync("GS", "Failed to read shaders/vulkan/interlace.glsl."); + return false; + } + + VkRenderPass rp = g_vulkan_context->GetRenderPass( + GSTextureVK::LookupNativeFormat(GSTexture::Format::Color), VK_FORMAT_UNDEFINED, VK_ATTACHMENT_LOAD_OP_LOAD); + if (!rp) + return false; + + VkShaderModule vs = GetUtilityVertexShader(*shader); + if (vs == VK_NULL_HANDLE) + return false; + ScopedGuard vs_guard([&vs]() { Vulkan::Util::SafeDestroyShaderModule(vs); }); + + Vulkan::GraphicsPipelineBuilder gpb; + AddUtilityVertexAttributes(gpb); + gpb.SetPipelineLayout(m_utility_pipeline_layout); + gpb.SetDynamicViewportAndScissorState(); + gpb.AddDynamicState(VK_DYNAMIC_STATE_BLEND_CONSTANTS); + gpb.SetNoCullRasterizationState(); + gpb.SetNoDepthTestState(); + gpb.SetNoBlendingState(); + gpb.SetRenderPass(rp, 0); + gpb.SetVertexShader(vs); + + for (int i = 0; i < static_cast(m_interlace.size()); i++) + { + VkShaderModule ps = GetUtilityFragmentShader(*shader, StringUtil::StdStringFromFormat("ps_main%d", i).c_str()); + if (ps == VK_NULL_HANDLE) + return false; + + gpb.SetFragmentShader(ps); + + m_interlace[i] = + gpb.Create(g_vulkan_context->GetDevice(), g_vulkan_shader_cache->GetPipelineCache(true), false); + Vulkan::Util::SafeDestroyShaderModule(ps); + if (!m_interlace[i]) + return false; + + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_convert[i], "Interlace pipeline %d", i); + } + + return true; +} + +bool GSDeviceVK::CompileMergePipelines() +{ + std::optional shader = Host::ReadResourceFileToString("shaders/vulkan/merge.glsl"); + if (!shader) + { + Host::ReportErrorAsync("GS", "Failed to read shaders/vulkan/merge.glsl."); + return false; + } + + VkRenderPass rp = g_vulkan_context->GetRenderPass( + GSTextureVK::LookupNativeFormat(GSTexture::Format::Color), VK_FORMAT_UNDEFINED, VK_ATTACHMENT_LOAD_OP_LOAD); + if (!rp) + return false; + + VkShaderModule vs = GetUtilityVertexShader(*shader); + if (vs == VK_NULL_HANDLE) + return false; + ScopedGuard vs_guard([&vs]() { Vulkan::Util::SafeDestroyShaderModule(vs); }); + + Vulkan::GraphicsPipelineBuilder gpb; + AddUtilityVertexAttributes(gpb); + gpb.SetPipelineLayout(m_utility_pipeline_layout); + gpb.SetDynamicViewportAndScissorState(); + gpb.AddDynamicState(VK_DYNAMIC_STATE_BLEND_CONSTANTS); + gpb.SetNoCullRasterizationState(); + gpb.SetNoDepthTestState(); + gpb.SetRenderPass(rp, 0); + gpb.SetVertexShader(vs); + + for (int i = 0; i < static_cast(m_merge.size()); i++) + { + VkShaderModule ps = GetUtilityFragmentShader(*shader, StringUtil::StdStringFromFormat("ps_main%d", i).c_str()); + if (ps == VK_NULL_HANDLE) + return false; + + gpb.SetFragmentShader(ps); + gpb.SetBlendAttachment(0, true, VK_BLEND_FACTOR_SRC_ALPHA, VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + VK_BLEND_OP_ADD, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD); + + m_merge[i] = gpb.Create(g_vulkan_context->GetDevice(), g_vulkan_shader_cache->GetPipelineCache(true), false); + Vulkan::Util::SafeDestroyShaderModule(ps); + if (!m_merge[i]) + return false; + + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_convert[i], "Merge pipeline %d", i); + } + + return true; +} + +bool GSDeviceVK::CheckStagingBufferSize(u32 required_size) +{ + if (m_readback_staging_buffer_size >= required_size) + return true; + + DestroyStagingBuffer(); + + const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0u, required_size, + VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_SHARING_MODE_EXCLUSIVE, 0u, nullptr}; + + VmaAllocationCreateInfo aci = {}; + aci.usage = VMA_MEMORY_USAGE_GPU_TO_CPU; + aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + + VmaAllocationInfo ai = {}; + VkResult res = vmaCreateBuffer( + g_vulkan_context->GetAllocator(), &bci, &aci, &m_readback_staging_buffer, &m_readback_staging_allocation, &ai); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vmaCreateBuffer() failed: "); + return false; + } + + m_readback_staging_buffer_map = ai.pMappedData; + return true; +} + +void GSDeviceVK::DestroyStagingBuffer() +{ + // unmapped as part of the buffer destroy + m_readback_staging_buffer_map = nullptr; + m_readback_staging_buffer_size = 0; + + if (m_readback_staging_buffer != VK_NULL_HANDLE) + { + vmaDestroyBuffer(g_vulkan_context->GetAllocator(), m_readback_staging_buffer, m_readback_staging_allocation); + m_readback_staging_buffer = VK_NULL_HANDLE; + m_readback_staging_allocation = VK_NULL_HANDLE; + m_readback_staging_buffer_size = 0; + } +} + +void GSDeviceVK::DestroyResources() +{ + g_vulkan_context->ExecuteCommandBuffer(true); + if (m_tfx_descriptor_sets[0] != VK_NULL_HANDLE) + g_vulkan_context->FreeGlobalDescriptorSet(m_tfx_descriptor_sets[0]); + + for (auto& it : m_tfx_pipelines) + Vulkan::Util::SafeDestroyPipeline(it.second); + for (auto& it : m_tfx_fragment_shaders) + Vulkan::Util::SafeDestroyShaderModule(it.second); + for (auto& it : m_tfx_geometry_shaders) + Vulkan::Util::SafeDestroyShaderModule(it.second); + for (auto& it : m_tfx_vertex_shaders) + Vulkan::Util::SafeDestroyShaderModule(it.second); + for (VkPipeline& it : m_interlace) + Vulkan::Util::SafeDestroyPipeline(it); + for (VkPipeline& it : m_merge) + Vulkan::Util::SafeDestroyPipeline(it); + for (VkPipeline& it : m_color_copy) + Vulkan::Util::SafeDestroyPipeline(it); + for (VkPipeline& it : m_present) + Vulkan::Util::SafeDestroyPipeline(it); + for (VkPipeline& it : m_convert) + Vulkan::Util::SafeDestroyPipeline(it); + for (u32 ds = 0; ds < 2; ds++) + { + for (u32 fbl = 0; fbl < 2; fbl++) + { + Vulkan::Util::SafeDestroyPipeline(m_hdr_setup_pipelines[ds][fbl]); + Vulkan::Util::SafeDestroyPipeline(m_hdr_finish_pipelines[ds][fbl]); + } + } + for (u32 ds = 0; ds < 2; ds++) + { + for (u32 datm = 0; datm < 2; datm++) + { + Vulkan::Util::SafeDestroyPipeline(m_date_image_setup_pipelines[ds][datm]); + } + } + for (auto& it : m_samplers) + Vulkan::Util::SafeDestroySampler(it.second); + + m_linear_sampler = VK_NULL_HANDLE; + m_point_sampler = VK_NULL_HANDLE; + + m_utility_color_render_pass_load = VK_NULL_HANDLE; + m_utility_color_render_pass_clear = VK_NULL_HANDLE; + m_utility_color_render_pass_discard = VK_NULL_HANDLE; + m_utility_depth_render_pass_load = VK_NULL_HANDLE; + m_utility_depth_render_pass_clear = VK_NULL_HANDLE; + m_utility_depth_render_pass_discard = VK_NULL_HANDLE; + m_date_setup_render_pass = VK_NULL_HANDLE; + m_swap_chain_render_pass = VK_NULL_HANDLE; + + DestroyStagingBuffer(); + + m_fragment_uniform_stream_buffer.Destroy(false); + m_vertex_uniform_stream_buffer.Destroy(false); + m_index_stream_buffer.Destroy(false); + m_vertex_stream_buffer.Destroy(false); + + Vulkan::Util::SafeDestroyPipelineLayout(m_tfx_pipeline_layout); + Vulkan::Util::SafeDestroyDescriptorSetLayout(m_tfx_rt_texture_ds_layout); + Vulkan::Util::SafeDestroyDescriptorSetLayout(m_tfx_sampler_ds_layout); + Vulkan::Util::SafeDestroyDescriptorSetLayout(m_tfx_ubo_ds_layout); + Vulkan::Util::SafeDestroyPipelineLayout(m_utility_pipeline_layout); + Vulkan::Util::SafeDestroyDescriptorSetLayout(m_utility_ds_layout); + + m_null_texture.Destroy(false); +} + +VkShaderModule GSDeviceVK::GetTFXVertexShader(GSHWDrawConfig::VSSelector sel) +{ + const auto it = m_tfx_vertex_shaders.find(sel.key); + if (it != m_tfx_vertex_shaders.end()) + return it->second; + + std::stringstream ss; + AddShaderHeader(ss); + AddShaderStageMacro(ss, true, false, false); + AddMacro(ss, "VS_TME", sel.tme); + AddMacro(ss, "VS_FST", sel.fst); + AddMacro(ss, "VS_IIP", sel.iip); + AddMacro(ss, "VS_POINT_SIZE", sel.point_size); + if (sel.point_size) + AddMacro(ss, "VS_POINT_SIZE_VALUE", m_upscale_multiplier); + ss << m_tfx_source; + + VkShaderModule mod = g_vulkan_shader_cache->GetVertexShader(ss.str()); + if (mod) + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), mod, "TFX Vertex %08X", sel.key); + + m_tfx_vertex_shaders.emplace(sel.key, mod); + return mod; +} + +VkShaderModule GSDeviceVK::GetTFXGeometryShader(GSHWDrawConfig::GSSelector sel) +{ + const auto it = m_tfx_geometry_shaders.find(sel.key); + if (it != m_tfx_geometry_shaders.end()) + return it->second; + + std::stringstream ss; + AddShaderHeader(ss); + AddShaderStageMacro(ss, false, true, false); + AddMacro(ss, "GS_IIP", sel.iip); + AddMacro(ss, "GS_PRIM", static_cast(sel.topology)); + AddMacro(ss, "GS_EXPAND", sel.expand); + ss << m_tfx_source; + + VkShaderModule mod = g_vulkan_shader_cache->GetGeometryShader(ss.str()); + if (mod) + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), mod, "TFX Geometry %08X", sel.key); + + m_tfx_geometry_shaders.emplace(sel.key, mod); + return mod; +} + +VkShaderModule GSDeviceVK::GetTFXFragmentShader(GSHWDrawConfig::PSSelector sel) +{ + const auto it = m_tfx_fragment_shaders.find(sel.key); + if (it != m_tfx_fragment_shaders.end()) + return it->second; + + std::stringstream ss; + AddShaderHeader(ss); + AddShaderStageMacro(ss, false, false, true); + AddMacro(ss, "PS_FST", sel.fst); + AddMacro(ss, "PS_WMS", sel.wms); + AddMacro(ss, "PS_WMT", sel.wmt); + AddMacro(ss, "PS_AEM_FMT", sel.aem_fmt); + AddMacro(ss, "PS_PAL_FMT", sel.pal_fmt); + AddMacro(ss, "PS_DFMT", sel.dfmt); + AddMacro(ss, "PS_DEPTH_FMT", sel.depth_fmt); + AddMacro(ss, "PS_CHANNEL_FETCH", sel.channel); + AddMacro(ss, "PS_URBAN_CHAOS_HLE", sel.urban_chaos_hle); + AddMacro(ss, "PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle); + AddMacro(ss, "PS_TEX_IS_FB", sel.tex_is_fb); + AddMacro(ss, "PS_INVALID_TEX0", sel.invalid_tex0); + AddMacro(ss, "PS_AEM", sel.aem); + AddMacro(ss, "PS_TFX", sel.tfx); + AddMacro(ss, "PS_TCC", sel.tcc); + AddMacro(ss, "PS_ATST", sel.atst); + AddMacro(ss, "PS_FOG", sel.fog); + AddMacro(ss, "PS_CLR1", sel.clr1); + AddMacro(ss, "PS_FBA", sel.fba); + AddMacro(ss, "PS_LTF", sel.ltf); + AddMacro(ss, "PS_AUTOMATIC_LOD", sel.automatic_lod); + AddMacro(ss, "PS_MANUAL_LOD", sel.manual_lod); + AddMacro(ss, "PS_COLCLIP", sel.colclip); + AddMacro(ss, "PS_DATE", sel.date); + AddMacro(ss, "PS_TCOFFSETHACK", sel.tcoffsethack); + AddMacro(ss, "PS_POINT_SAMPLER", sel.point_sampler); + AddMacro(ss, "PS_BLEND_A", sel.blend_a); + AddMacro(ss, "PS_BLEND_B", sel.blend_b); + AddMacro(ss, "PS_BLEND_C", sel.blend_c); + AddMacro(ss, "PS_BLEND_D", sel.blend_d); + AddMacro(ss, "PS_ALPHA_CLAMP", sel.alpha_clamp); + AddMacro(ss, "PS_IIP", sel.iip); + AddMacro(ss, "PS_SHUFFLE", sel.shuffle); + AddMacro(ss, "PS_READ_BA", sel.read_ba); + AddMacro(ss, "PS_WRITE_RG", sel.write_rg); + AddMacro(ss, "PS_FBMASK", sel.fbmask); + AddMacro(ss, "PS_HDR", sel.hdr); + AddMacro(ss, "PS_DITHER", sel.dither); + AddMacro(ss, "PS_ZCLAMP", sel.zclamp); + AddMacro(ss, "PS_PABE", sel.pabe); + AddMacro(ss, "PS_SCANMSK", sel.scanmsk); + AddMacro(ss, "PS_SCALE_FACTOR", m_upscale_multiplier); + AddMacro(ss, "PS_TEX_IS_FB", sel.tex_is_fb); + ss << m_tfx_source; + + VkShaderModule mod = g_vulkan_shader_cache->GetFragmentShader(ss.str()); + if (mod) + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), mod, "TFX Fragment %" PRIX64, sel.key); + + m_tfx_fragment_shaders.emplace(sel.key, mod); + return mod; +} + +VkPipeline GSDeviceVK::CreateTFXPipeline(const PipelineSelector& p) +{ + static constexpr std::array topology_lookup = {{ + VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // Point + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, // Line + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, // Triangle + }}; + + VkShaderModule vs = GetTFXVertexShader(p.vs); + VkShaderModule gs = p.gs.expand ? GetTFXGeometryShader(p.gs) : VK_NULL_HANDLE; + VkShaderModule fs = GetTFXFragmentShader(p.ps); + if (vs == VK_NULL_HANDLE || (p.gs.expand && gs == VK_NULL_HANDLE) || fs == VK_NULL_HANDLE) + return VK_NULL_HANDLE; + + Vulkan::GraphicsPipelineBuilder gpb; + + // Common state + gpb.SetPipelineLayout(m_tfx_pipeline_layout); + if (p.ps.date >= 10) + { + // DATE image prepass + gpb.SetRenderPass(m_date_image_setup_render_passes[p.ds][0], 0); + } + else + { + gpb.SetRenderPass( + GetTFXRenderPass(p.rt, p.ds, p.ps.hdr, p.dss.date ? DATE_RENDER_PASS_STENCIL : DATE_RENDER_PASS_NONE, + p.feedback_loop, p.rt ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE, + p.ds ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE), + 0); + } + gpb.SetPrimitiveTopology(topology_lookup[p.topology]); + gpb.SetRasterizationState(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE); + if (p.line_width) + gpb.SetLineWidth(static_cast(m_upscale_multiplier)); + gpb.SetDynamicViewportAndScissorState(); + gpb.AddDynamicState(VK_DYNAMIC_STATE_BLEND_CONSTANTS); + + // Shaders + gpb.SetVertexShader(vs); + if (gs != VK_NULL_HANDLE) + gpb.SetGeometryShader(gs); + gpb.SetFragmentShader(fs); + + // IA + gpb.AddVertexBuffer(0, sizeof(GSVertex)); + gpb.AddVertexAttribute(0, 0, VK_FORMAT_R32G32_SFLOAT, 0); // ST + gpb.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UINT, 8); // RGBA + gpb.AddVertexAttribute(2, 0, VK_FORMAT_R32_SFLOAT, 12); // Q + gpb.AddVertexAttribute(3, 0, VK_FORMAT_R16G16_UINT, 16); // XY + gpb.AddVertexAttribute(4, 0, VK_FORMAT_R32_UINT, 20); // Z + gpb.AddVertexAttribute(5, 0, VK_FORMAT_R16G16_UINT, 24); // UV + gpb.AddVertexAttribute(6, 0, VK_FORMAT_R8G8B8A8_UNORM, 28); // FOG + + // DepthStencil + static const VkCompareOp ztst[] = { + VK_COMPARE_OP_NEVER, VK_COMPARE_OP_ALWAYS, VK_COMPARE_OP_GREATER_OR_EQUAL, VK_COMPARE_OP_GREATER}; + gpb.SetDepthState((p.dss.ztst != ZTST_ALWAYS || p.dss.zwe), p.dss.zwe, ztst[p.dss.ztst]); + if (p.dss.date) + { + const VkStencilOpState sos{VK_STENCIL_OP_KEEP, p.dss.date_one ? VK_STENCIL_OP_ZERO : VK_STENCIL_OP_KEEP, + VK_STENCIL_OP_KEEP, VK_COMPARE_OP_EQUAL, 1u, 1u, 1u}; + gpb.SetStencilState(true, sos, sos); + } + + // Blending + if (p.ps.date >= 10) + { + // image DATE prepass + gpb.SetBlendAttachment(0, true, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_MIN, VK_BLEND_FACTOR_ONE, + VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, VK_COLOR_COMPONENT_R_BIT); + } + else if (p.bs.index > 0) + { + const HWBlend blend = GetBlend(p.bs.index); + gpb.SetBlendAttachment(0, true, + (p.bs.is_accumulation || p.bs.is_mixed_hw_sw) ? VK_BLEND_FACTOR_ONE : static_cast(blend.src), + p.bs.is_accumulation ? VK_BLEND_FACTOR_ONE : static_cast(blend.dst), + static_cast(blend.op), VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, p.cms.wrgba); + } + else + { + gpb.SetBlendAttachment(0, false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, + VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, p.cms.wrgba); + } + + VkPipeline pipeline = gpb.Create(g_vulkan_context->GetDevice(), g_vulkan_shader_cache->GetPipelineCache(true)); + if (pipeline) + { + Vulkan::Util::SetObjectName( + g_vulkan_context->GetDevice(), pipeline, "TFX Pipeline %08X/%08X/%" PRIX64, p.vs.key, p.gs.key, p.ps.key); + } + + return pipeline; +} + +VkPipeline GSDeviceVK::GetTFXPipeline(const PipelineSelector& p) +{ + const auto it = m_tfx_pipelines.find(p); + if (it != m_tfx_pipelines.end()) + return it->second; + + VkPipeline pipeline = CreateTFXPipeline(p); + m_tfx_pipelines.emplace(p, pipeline); + return pipeline; +} + +bool GSDeviceVK::BindDrawPipeline(const PipelineSelector& p) +{ + VkPipeline pipeline = GetTFXPipeline(p); + if (pipeline == VK_NULL_HANDLE) + return false; + + SetPipeline(pipeline); + + return ApplyTFXState(); +} + +void GSDeviceVK::InitializeState() +{ + m_vertex_buffer = m_vertex_stream_buffer.GetBuffer(); + m_vertex_buffer_offset = 0; + m_index_buffer = m_index_stream_buffer.GetBuffer(); + m_index_buffer_offset = 0; + m_index_type = VK_INDEX_TYPE_UINT32; + m_current_framebuffer = VK_NULL_HANDLE; + m_current_render_pass = VK_NULL_HANDLE; + + for (u32 i = 0; i < NUM_TFX_TEXTURES; i++) + m_tfx_textures[i] = m_null_texture.GetView(); + + m_utility_texture = m_null_texture.GetView(); + + m_point_sampler = GetSampler(GSHWDrawConfig::SamplerSelector::Point()); + if (m_point_sampler) + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_point_sampler, "Point sampler"); + m_linear_sampler = GetSampler(GSHWDrawConfig::SamplerSelector::Linear()); + if (m_linear_sampler) + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_point_sampler, "Linear sampler"); + + for (u32 i = 0; i < NUM_TFX_SAMPLERS; i++) + { + m_tfx_sampler_sel[i] = GSHWDrawConfig::SamplerSelector::Point().key; + m_tfx_samplers[i] = m_point_sampler; + } + + InvalidateCachedState(); +} + +bool GSDeviceVK::CreatePersistentDescriptorSets() +{ + const VkDevice dev = g_vulkan_context->GetDevice(); + Vulkan::DescriptorSetUpdateBuilder dsub; + + // Allocate UBO descriptor sets for TFX. + m_tfx_descriptor_sets[0] = g_vulkan_context->AllocatePersistentDescriptorSet(m_tfx_ubo_ds_layout); + if (m_tfx_descriptor_sets[0] == VK_NULL_HANDLE) + return false; + dsub.AddBufferDescriptorWrite(m_tfx_descriptor_sets[0], 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + m_vertex_uniform_stream_buffer.GetBuffer(), 0, sizeof(GSHWDrawConfig::VSConstantBuffer)); + dsub.AddBufferDescriptorWrite(m_tfx_descriptor_sets[0], 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + m_fragment_uniform_stream_buffer.GetBuffer(), 0, sizeof(GSHWDrawConfig::PSConstantBuffer)); + dsub.Update(dev); + Vulkan::Util::SetObjectName(dev, m_tfx_descriptor_sets[0], "Persistent TFX UBO set"); + return true; +} + +void GSDeviceVK::ExecuteCommandBuffer(bool wait_for_completion) +{ + EndRenderPass(); + g_vulkan_context->ExecuteCommandBuffer(wait_for_completion); + InvalidateCachedState(); +} + +void GSDeviceVK::ExecuteCommandBuffer(bool wait_for_completion, const char* reason, ...) +{ + std::va_list ap; + va_start(ap, reason); + const std::string reason_str(StringUtil::StdStringFromFormatV(reason, ap)); + va_end(ap); + + Console.Warning("Vulkan: Executing command buffer due to '%s'", reason_str.c_str()); + ExecuteCommandBuffer(wait_for_completion); +} + +void GSDeviceVK::ExecuteCommandBufferAndRestartRenderPass(const char* reason) +{ + Console.Warning("Vulkan: Executing command buffer due to '%s'", reason); + + const VkRenderPass render_pass = m_current_render_pass; + const GSVector4i render_pass_area(m_current_render_pass_area); + EndRenderPass(); + g_vulkan_context->ExecuteCommandBuffer(false); + InvalidateCachedState(); + + if (render_pass != VK_NULL_HANDLE) + { + // rebind framebuffer + ApplyBaseState(m_dirty_flags, g_vulkan_context->GetCurrentCommandBuffer()); + m_dirty_flags &= ~DIRTY_BASE_STATE; + + // restart render pass + BeginRenderPass(render_pass, render_pass_area); + } +} + +void GSDeviceVK::InvalidateCachedState() +{ + m_dirty_flags |= DIRTY_FLAG_TFX_SAMPLERS_DS | DIRTY_FLAG_TFX_RT_TEXTURE_DS | DIRTY_FLAG_TFX_DYNAMIC_OFFSETS | + DIRTY_FLAG_UTILITY_TEXTURE | DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_VERTEX_BUFFER | + DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_PIPELINE | + DIRTY_FLAG_VS_CONSTANT_BUFFER | DIRTY_FLAG_PS_CONSTANT_BUFFER; + if (m_vertex_buffer != VK_NULL_HANDLE) + m_dirty_flags |= DIRTY_FLAG_VERTEX_BUFFER; + if (m_index_buffer != VK_NULL_HANDLE) + m_dirty_flags |= DIRTY_FLAG_INDEX_BUFFER; + m_current_pipeline_layout = PipelineLayout::Undefined; + m_tfx_descriptor_sets[1] = VK_NULL_HANDLE; + m_tfx_descriptor_sets[2] = VK_NULL_HANDLE; + m_utility_descriptor_set = VK_NULL_HANDLE; +} + +void GSDeviceVK::SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset) +{ + if (m_vertex_buffer == buffer && m_vertex_buffer_offset == offset) + return; + + m_vertex_buffer = buffer; + m_vertex_buffer_offset = offset; + m_dirty_flags |= DIRTY_FLAG_VERTEX_BUFFER; +} + +void GSDeviceVK::SetIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) +{ + if (m_index_buffer == buffer && m_index_buffer_offset == offset && m_index_type == type) + return; + + m_index_buffer = buffer; + m_index_buffer_offset = offset; + m_index_type = type; + m_dirty_flags |= DIRTY_FLAG_INDEX_BUFFER; +} + +void GSDeviceVK::SetBlendConstants(u8 color) +{ + if (m_blend_constant_color == color) + return; + + m_blend_constant_color = color; + m_dirty_flags |= DIRTY_FLAG_BLEND_CONSTANTS; +} + +void GSDeviceVK::PSSetShaderResource(int i, GSTexture* sr) +{ + VkImageView view; + if (sr) + { + GSTextureVK* vkTex = static_cast(sr); + if (i < 3) + { + if (vkTex->GetTexture().GetLayout() != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL && InRenderPass()) + { + // Console.Warning("Ending render pass due to resource transition"); + EndRenderPass(); + } + + vkTex->CommitClear(); + vkTex->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + vkTex->last_frame_used = m_frame; + view = vkTex->GetView(); + } + else + { + view = m_null_texture.GetView(); + } + + if (m_tfx_textures[i] == view) + return; + + m_tfx_textures[i] = view; + + m_dirty_flags |= (i < 2) ? DIRTY_FLAG_TFX_SAMPLERS_DS : DIRTY_FLAG_TFX_RT_TEXTURE_DS; +} + +void GSDeviceVK::PSSetSampler(u32 index, GSHWDrawConfig::SamplerSelector sel) +{ + if (m_tfx_sampler_sel[index] == sel.key) + return; + + m_tfx_sampler_sel[index] = sel.key; + m_tfx_samplers[index] = GetSampler(sel); + m_dirty_flags |= DIRTY_FLAG_TFX_SAMPLERS_DS; +} + +void GSDeviceVK::SetUtilityTexture(GSTexture* tex, VkSampler sampler) +{ + VkImageView view; + if (tex) + { + GSTextureVK* vkTex = static_cast(tex); + vkTex->last_frame_used = m_frame; + vkTex->CommitClear(); + vkTex->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + view = vkTex->GetView(); + } + else + { + view = m_null_texture.GetView(); + } + + if (m_utility_texture == view && m_utility_sampler == sampler) + return; + + m_utility_texture = view; + m_utility_sampler = sampler; + m_dirty_flags |= DIRTY_FLAG_UTILITY_TEXTURE; +} + +void GSDeviceVK::SetUtilityPushConstants(const void* data, u32 size) +{ + vkCmdPushConstants(g_vulkan_context->GetCurrentCommandBuffer(), m_utility_pipeline_layout, + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, size, data); +} + +void GSDeviceVK::UnbindTexture(GSTextureVK* tex) +{ + const VkImageView view = tex->GetView(); + for (u32 i = 0; i < NUM_TFX_TEXTURES; i++) + { + if (m_tfx_textures[i] == view) + { + m_tfx_textures[i] = m_null_texture.GetView(); + m_dirty_flags |= (i < 2) ? DIRTY_FLAG_TFX_SAMPLERS_DS : DIRTY_FLAG_TFX_RT_TEXTURE_DS; + } + } + if (m_utility_texture == view) + { + m_utility_texture = m_null_texture.GetView(); + m_dirty_flags |= DIRTY_FLAG_UTILITY_TEXTURE; + } + if (m_current_render_target == tex || m_current_depth_target == tex) + { + EndRenderPass(); + m_current_framebuffer = VK_NULL_HANDLE; + m_current_render_target = nullptr; + m_current_depth_target = nullptr; + } +} + +bool GSDeviceVK::InRenderPass() { return m_current_render_pass != VK_NULL_HANDLE; } + +void GSDeviceVK::BeginRenderPass(VkRenderPass rp, const GSVector4i& rect) +{ + if (m_current_render_pass != VK_NULL_HANDLE) + EndRenderPass(); + + m_current_render_pass = rp; + m_current_render_pass_area = rect; + + const VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, m_current_render_pass, + m_current_framebuffer, {{rect.x, rect.y}, {static_cast(rect.width()), static_cast(rect.height())}}, 0, + nullptr}; + + vkCmdBeginRenderPass(g_vulkan_context->GetCurrentCommandBuffer(), &begin_info, VK_SUBPASS_CONTENTS_INLINE); +} + +void GSDeviceVK::BeginClearRenderPass(VkRenderPass rp, const GSVector4i& rect, const VkClearValue* cv, u32 cv_count) +{ + if (m_current_render_pass != VK_NULL_HANDLE) + EndRenderPass(); + + m_current_render_pass = rp; + m_current_render_pass_area = rect; + + const VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, m_current_render_pass, + m_current_framebuffer, {{rect.x, rect.y}, {static_cast(rect.width()), static_cast(rect.height())}}, + cv_count, cv}; + + vkCmdBeginRenderPass(g_vulkan_context->GetCurrentCommandBuffer(), &begin_info, VK_SUBPASS_CONTENTS_INLINE); +} + +void GSDeviceVK::BeginClearRenderPass(VkRenderPass rp, const GSVector4i& rect, const GSVector4& clear_color) +{ + alignas(16) VkClearValue cv; + GSVector4::store((void*)cv.color.float32, clear_color); + BeginClearRenderPass(rp, rect, &cv, 1); +} + +void GSDeviceVK::BeginClearRenderPass(VkRenderPass rp, const GSVector4i& rect, float depth, u8 stencil) +{ + VkClearValue cv; + cv.depthStencil.depth = depth; + cv.depthStencil.stencil = stencil; + BeginClearRenderPass(rp, rect, &cv, 1); +} + +bool GSDeviceVK::CheckRenderPassArea(const GSVector4i& rect) +{ + if (!InRenderPass()) + return false; + + // TODO: Is there a way to do this with GSVector? + if (rect.left < m_current_render_pass_area.left || rect.top < m_current_render_pass_area.top || + rect.right > m_current_render_pass_area.right || rect.bottom > m_current_render_pass_area.bottom) + { +#ifdef _DEBUG + Console.Error("RP check failed: {%d,%d %dx%d} vs {%d,%d %dx%d}", rect.left, rect.top, rect.width(), + rect.height(), m_current_render_pass_area.left, m_current_render_pass_area.top, + m_current_render_pass_area.width(), m_current_render_pass_area.height()); +#endif + return false; + } + + return true; +} + +void GSDeviceVK::EndRenderPass() +{ + if (m_current_render_pass == VK_NULL_HANDLE) + return; + + vkCmdEndRenderPass(g_vulkan_context->GetCurrentCommandBuffer()); + + m_current_render_pass = VK_NULL_HANDLE; +} + +void GSDeviceVK::SetViewport(const VkViewport& viewport) +{ + if (std::memcmp(&viewport, &m_viewport, sizeof(VkViewport)) == 0) + return; + + std::memcpy(&m_viewport, &viewport, sizeof(VkViewport)); + m_dirty_flags |= DIRTY_FLAG_VIEWPORT; +} + +void GSDeviceVK::SetScissor(const GSVector4i& scissor) +{ + if (m_scissor.eq(scissor)) + return; + + m_scissor = scissor; + m_dirty_flags |= DIRTY_FLAG_SCISSOR; +} + +void GSDeviceVK::SetPipeline(VkPipeline pipeline) +{ + if (m_current_pipeline == pipeline) + return; + + m_current_pipeline = pipeline; + m_dirty_flags |= DIRTY_FLAG_PIPELINE; +} + +__ri void GSDeviceVK::ApplyBaseState(u32 flags, VkCommandBuffer cmdbuf) +{ + if (flags & DIRTY_FLAG_VERTEX_BUFFER) + vkCmdBindVertexBuffers(cmdbuf, 0, 1, &m_vertex_buffer, &m_vertex_buffer_offset); + + if (flags & DIRTY_FLAG_INDEX_BUFFER) + vkCmdBindIndexBuffer(cmdbuf, m_index_buffer, m_index_buffer_offset, m_index_type); + + if (flags & DIRTY_FLAG_PIPELINE) + vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_current_pipeline); + + if (flags & DIRTY_FLAG_VIEWPORT) + vkCmdSetViewport(cmdbuf, 0, 1, &m_viewport); + + if (flags & DIRTY_FLAG_SCISSOR) + { + const VkRect2D vscissor{ + {m_scissor.x, m_scissor.y}, {static_cast(m_scissor.width()), static_cast(m_scissor.height())}}; + vkCmdSetScissor(cmdbuf, 0, 1, &vscissor); + } + + if (flags & DIRTY_FLAG_BLEND_CONSTANTS) + { + const GSVector4 col(static_cast(m_blend_constant_color) / 128.0f); + vkCmdSetBlendConstants(cmdbuf, col.v); + } +} + +bool GSDeviceVK::ApplyTFXState(bool already_execed) +{ + if (m_current_pipeline_layout == PipelineLayout::TFX && m_dirty_flags == 0) + return true; + + const VkDevice dev = g_vulkan_context->GetDevice(); + const VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + u32 flags = m_dirty_flags; + m_dirty_flags &= ~DIRTY_TFX_STATE | DIRTY_CONSTANT_BUFFER_STATE; + + // do cbuffer first, because it's the most likely to cause an exec + if (flags & DIRTY_FLAG_VS_CONSTANT_BUFFER) + { + if (!m_vertex_uniform_stream_buffer.ReserveMemory( + sizeof(m_vs_cb_cache), g_vulkan_context->GetUniformBufferAlignment())) + { + if (already_execed) + { + Console.Error("Failed to reserve vertex uniform space"); + return false; + } + + ExecuteCommandBufferAndRestartRenderPass("Ran out of vertex uniform space"); + return ApplyTFXState(true); + } + + std::memcpy(m_vertex_uniform_stream_buffer.GetCurrentHostPointer(), &m_vs_cb_cache, sizeof(m_vs_cb_cache)); + m_tfx_dynamic_offsets[0] = m_vertex_uniform_stream_buffer.GetCurrentOffset(); + m_vertex_uniform_stream_buffer.CommitMemory(sizeof(m_vs_cb_cache)); + } + + if (flags & DIRTY_FLAG_PS_CONSTANT_BUFFER) + { + if (!m_fragment_uniform_stream_buffer.ReserveMemory( + sizeof(m_ps_cb_cache), g_vulkan_context->GetUniformBufferAlignment())) + { + if (already_execed) + { + Console.Error("Failed to reserve pixel uniform space"); + return false; + } + + ExecuteCommandBufferAndRestartRenderPass("Ran out of pixel uniform space"); + return ApplyTFXState(true); + } + + std::memcpy(m_fragment_uniform_stream_buffer.GetCurrentHostPointer(), &m_ps_cb_cache, sizeof(m_ps_cb_cache)); + m_tfx_dynamic_offsets[1] = m_fragment_uniform_stream_buffer.GetCurrentOffset(); + m_fragment_uniform_stream_buffer.CommitMemory(sizeof(m_ps_cb_cache)); + } + + Vulkan::DescriptorSetUpdateBuilder dsub; + + u32 dirty_descriptor_set_start = NUM_TFX_DESCRIPTOR_SETS; + u32 dirty_descriptor_set_end = 0; + + if (flags & DIRTY_FLAG_TFX_DYNAMIC_OFFSETS) + { + dirty_descriptor_set_start = 0; + } + + if ((flags & DIRTY_FLAG_TFX_SAMPLERS_DS) || m_tfx_descriptor_sets[1] == VK_NULL_HANDLE) + { + VkDescriptorSet ds = g_vulkan_context->AllocateDescriptorSet(m_tfx_sampler_ds_layout); + if (ds == VK_NULL_HANDLE) + { + if (already_execed) + { + Console.Error("Failed to allocate TFX texture descriptors"); + return false; + } + + ExecuteCommandBufferAndRestartRenderPass("Ran out of TFX texture descriptors"); + return ApplyTFXState(true); + } + + dsub.AddCombinedImageSamplerDescriptorWrites( + ds, 0, m_tfx_textures.data(), m_tfx_samplers.data(), NUM_TFX_SAMPLERS); + dsub.Update(dev); + + m_tfx_descriptor_sets[1] = ds; + dirty_descriptor_set_start = std::min(dirty_descriptor_set_start, 1u); + dirty_descriptor_set_end = 1u; + } + + if ((flags & DIRTY_FLAG_TFX_RT_TEXTURE_DS) || m_tfx_descriptor_sets[2] == VK_NULL_HANDLE) + { + VkDescriptorSet ds = g_vulkan_context->AllocateDescriptorSet(m_tfx_rt_texture_ds_layout); + if (ds == VK_NULL_HANDLE) + { + if (already_execed) + { + Console.Error("Failed to allocate TFX sampler descriptors"); + return false; + } + + ExecuteCommandBufferAndRestartRenderPass("Ran out of TFX sampler descriptors"); + return ApplyTFXState(true); + } + + dsub.AddImageDescriptorWrite(ds, 0, m_tfx_textures[NUM_TFX_SAMPLERS]); + dsub.AddInputAttachmentDescriptorWrite(ds, 1, m_tfx_textures[NUM_TFX_SAMPLERS + 1]); + dsub.AddImageDescriptorWrite(ds, 2, m_tfx_textures[NUM_TFX_SAMPLERS + 2]); + dsub.Update(dev); + + m_tfx_descriptor_sets[2] = ds; + dirty_descriptor_set_start = std::min(dirty_descriptor_set_start, 2u); + dirty_descriptor_set_end = 2u; + } + + if (m_current_pipeline_layout != PipelineLayout::TFX) + { + m_current_pipeline_layout = PipelineLayout::TFX; + + vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_tfx_pipeline_layout, 0, + NUM_TFX_DESCRIPTOR_SETS, m_tfx_descriptor_sets.data(), NUM_TFX_DYNAMIC_OFFSETS, + m_tfx_dynamic_offsets.data()); + } + else if (dirty_descriptor_set_start <= dirty_descriptor_set_end) + { + u32 dynamic_count; + const u32* dynamic_offsets; + if (dirty_descriptor_set_start == 0) + { + dynamic_count = NUM_TFX_DYNAMIC_OFFSETS; + dynamic_offsets = m_tfx_dynamic_offsets.data(); + } + else + { + dynamic_count = 0; + dynamic_offsets = nullptr; + } + + const u32 count = dirty_descriptor_set_end - dirty_descriptor_set_start + 1; + + vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_tfx_pipeline_layout, + dirty_descriptor_set_start, count, &m_tfx_descriptor_sets[dirty_descriptor_set_start], dynamic_count, + dynamic_offsets); + } + + + ApplyBaseState(flags, cmdbuf); + return true; +} + +bool GSDeviceVK::ApplyUtilityState(bool already_execed) +{ + if (m_current_pipeline_layout == PipelineLayout::Utility && m_dirty_flags == 0) + return true; + + const VkDevice dev = g_vulkan_context->GetDevice(); + const VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + u32 flags = m_dirty_flags; + m_dirty_flags &= ~DIRTY_UTILITY_STATE; + + bool rebind = (m_current_pipeline_layout != PipelineLayout::Utility); + + if ((flags & DIRTY_FLAG_UTILITY_TEXTURE) || m_utility_descriptor_set == VK_NULL_HANDLE) + { + m_utility_descriptor_set = g_vulkan_context->AllocateDescriptorSet(m_utility_ds_layout); + if (m_utility_descriptor_set == VK_NULL_HANDLE) + { + if (already_execed) + { + Console.Error("Failed to allocate utility descriptors"); + return false; + } + + ExecuteCommandBufferAndRestartRenderPass("Ran out of utility descriptors"); + return ApplyTFXState(true); + } + + Vulkan::DescriptorSetUpdateBuilder dsub; + dsub.AddCombinedImageSamplerDescriptorWrite(m_utility_descriptor_set, 0, m_utility_texture, m_utility_sampler); + dsub.Update(dev); + rebind = true; + } + + if (rebind) + { + vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_utility_pipeline_layout, 0, 1, + &m_utility_descriptor_set, 0, nullptr); + } + + m_current_pipeline_layout = PipelineLayout::Utility; + + ApplyBaseState(flags, cmdbuf); + return true; +} + +void GSDeviceVK::SetVSConstantBuffer(const GSHWDrawConfig::VSConstantBuffer& cb) +{ + if (m_vs_cb_cache.Update(cb)) + m_dirty_flags |= DIRTY_FLAG_VS_CONSTANT_BUFFER; +} + +void GSDeviceVK::SetPSConstantBuffer(const GSHWDrawConfig::PSConstantBuffer& cb) +{ + if (m_ps_cb_cache.Update(cb)) + m_dirty_flags |= DIRTY_FLAG_PS_CONSTANT_BUFFER; +} + +static void ImageBarrier(GSTextureVK* tex, VkAccessFlags src_mask, VkAccessFlags dst_mask, VkImageLayout src_layout, + VkImageLayout dst_layout, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, bool pixel_local) +{ + const VkImageMemoryBarrier barrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, nullptr, src_mask, dst_mask, + src_layout, dst_layout, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, tex->GetTexture().GetImage(), + {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}}; + + vkCmdPipelineBarrier(g_vulkan_context->GetCurrentCommandBuffer(), src_stage, dst_stage, + pixel_local ? VK_DEPENDENCY_BY_REGION_BIT : 0, 0, nullptr, 0, nullptr, 1, &barrier); +} + +static void ColorBufferBarrier(GSTexture* rt) +{ + const VkImageMemoryBarrier barrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, nullptr, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_INPUT_ATTACHMENT_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + static_cast(rt)->GetTexture().GetImage(), {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}}; + + vkCmdPipelineBarrier(g_vulkan_context->GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); +} + +void GSDeviceVK::SetupDATE(GSTexture* rt, GSTexture* ds, bool datm, const GSVector4i& bbox) +{ + GL_PUSH("SetupDATE {%d,%d} %dx%d", bbox.left, bbox.top, bbox.width(), bbox.height()); + + const GSVector2i size(ds->GetSize()); + const GSVector4 src = GSVector4(bbox) / GSVector4(size).xyxy(); + const GSVector4 dst = src * 2.0f - 1.0f; + const GSVertexPT1 vertices[] = { + {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)}, + {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)}, + {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)}, + {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)}, + }; + + // sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows + EndRenderPass(); + SetUtilityTexture(rt, m_point_sampler); + OMSetRenderTargets(nullptr, ds, bbox, false); + IASetVertexBuffer(vertices, sizeof(vertices[0]), 4); + SetPipeline(m_convert[static_cast(datm ? ShaderConvert::DATM_1 : ShaderConvert::DATM_0)]); + BeginClearRenderPass(m_date_setup_render_pass, bbox, 0.0f, 0); + if (ApplyUtilityState()) + DrawPrimitive(); + + EndRenderPass(); +} + +GSTextureVK* GSDeviceVK::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config, PipelineSelector& pipe) +{ + // How this is done: + // - can't put a barrier for the image in the middle of the normal render pass, so that's out + // - so, instead of just filling the int texture with INT_MAX, we sample the RT and use -1 for failing values + // - then, instead of sampling the RT with DATE=1/2, we just do a min() without it, the -1 gets preserved + // - then, the DATE=3 draw is done as normal + GL_INS("Setup DATE Primitive ID Image for {%d,%d}-{%d,%d}", config.drawarea.left, config.drawarea.top, + config.drawarea.right, config.drawarea.bottom); + + const GSVector2i rtsize(config.rt->GetSize()); + GSTextureVK* image = + static_cast(CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Int32, false)); + if (!image) + return nullptr; + + EndRenderPass(); + + // setup the fill quad to prefill with existing alpha values + SetUtilityTexture(config.rt, m_point_sampler); + OMSetRenderTargets(image, config.ds, config.drawarea, false); + + // if the depth target has been cleared, we need to preserve that clear + const VkAttachmentLoadOp ds_load_op = GetLoadOpForTexture(static_cast(config.ds)); + + VkClearValue cv[2] = {}; + cv[0].color.float32[0] = static_cast(std::numeric_limits::max()); + if (ds_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + { + cv[1].depthStencil.depth = static_cast(config.ds)->GetClearDepth(); + cv[1].depthStencil.stencil = 1; + BeginClearRenderPass(m_date_image_setup_render_passes[pipe.ds][1], GSVector4i(0, 0, rtsize.x, rtsize.y), cv, 2); + } + else + { + BeginClearRenderPass(m_date_image_setup_render_passes[pipe.ds][0], config.drawarea, cv, 1); + } + + // draw the quad to prefill the image + const GSVector4 src = GSVector4(config.drawarea) / GSVector4(rtsize).xyxy(); + const GSVector4 dst = src * 2.0f - 1.0f; + const GSVertexPT1 vertices[] = { + {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)}, + {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)}, + {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)}, + {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)}, + }; + const VkPipeline pipeline = m_date_image_setup_pipelines[pipe.ds][config.datm]; + SetPipeline(pipeline); + IASetVertexBuffer(vertices, sizeof(vertices[0]), std::size(vertices)); + if (ApplyUtilityState()) + DrawPrimitive(); + + // image is now filled with either -1 or INT_MAX, so now we can do the prepass + IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts); + IASetIndexBuffer(config.indices, config.nindices); + + // cut down the configuration for the prepass, we don't need blending or any feedback loop + PipelineSelector init_pipe(m_pipeline_selector); + init_pipe.dss.zwe = false; + init_pipe.cms.wrgba = 0; + init_pipe.bs = {}; + init_pipe.ps.blend_a = init_pipe.ps.blend_b = init_pipe.ps.blend_c = init_pipe.ps.blend_d = false; + init_pipe.feedback_loop = false; + init_pipe.rt = true; + init_pipe.ps.date += 10; + if (BindDrawPipeline(init_pipe)) + DrawIndexedPrimitive(); + + // image is initialized/prepass is done, so finish up and get ready to do the "real" draw + EndRenderPass(); + + // .. by setting it to DATE=3 + pipe.ps.date = 3; + config.alpha_second_pass.ps.date = 3; + + // and bind the image to the primitive sampler + image->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + PSSetShaderResource(4, image); + return image; +} + +void GSDeviceVK::RenderHW(GSHWDrawConfig& config) +{ + // Destination Alpha Setup + DATE_RENDER_PASS DATE_rp = DATE_RENDER_PASS_NONE; + switch (config.destination_alpha) + { + case GSHWDrawConfig::DestinationAlphaMode::Off: // No setup + case GSHWDrawConfig::DestinationAlphaMode::Full: // No setup + case GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking: // Setup is done below + break; + case GSHWDrawConfig::DestinationAlphaMode::StencilOne: // setup is done below + { + // we only need to do the setup here if we don't have barriers, in which case do full DATE. + if (!m_features.texture_barrier) + { + SetupDATE(config.rt, config.ds, config.datm, config.drawarea); + DATE_rp = DATE_RENDER_PASS_STENCIL; + } + else + { + DATE_rp = DATE_RENDER_PASS_STENCIL_ONE; + } + } + break; + + case GSHWDrawConfig::DestinationAlphaMode::Stencil: + SetupDATE(config.rt, config.ds, config.datm, config.drawarea); + break; + } + + // stream buffer in first, in case we need to exec + SetVSConstantBuffer(config.cb_vs); + SetPSConstantBuffer(config.cb_ps); + + // figure out the pipeline + UpdateHWPipelineSelector(config); + + // bind textures before checking the render pass, in case we need to transition them + PipelineSelector& pipe = m_pipeline_selector; + if (config.tex) + { + PSSetShaderResource(0, config.tex); + PSSetShaderResource(1, config.pal); + PSSetSampler(0, config.sampler); + } + if (config.raw_tex) + PSSetShaderResource(2, config.raw_tex); + if (config.blend.is_constant) + SetBlendConstants(config.blend.factor); + + // Primitive ID tracking DATE setup. + GSTextureVK* date_image = nullptr; + if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking) + { + pxAssert(pipe.feedback_loop); + date_image = SetupPrimitiveTrackingDATE(config, pipe); + if (!date_image) + { + Console.WriteLn("Failed to allocate DATE image, aborting draw."); + return; + } + } + + // Align the render area to 128x128, hopefully avoiding render pass restarts for small render area changes (e.g. Ratchet and Clank). + const int render_area_alignment = 128 * m_upscale_multiplier; + const GSVector2i rtsize(config.rt ? config.rt->GetSize() : config.ds->GetSize()); + const GSVector4i render_area( + config.ps.hdr ? config.drawarea : + GSVector4i(Common::AlignDownPow2(config.scissor.left, render_area_alignment), + Common::AlignDownPow2(config.scissor.top, render_area_alignment), + std::min(Common::AlignUpPow2(config.scissor.right, render_area_alignment), rtsize.x), + std::min(Common::AlignUpPow2(config.scissor.bottom, render_area_alignment), rtsize.y))); + + GSTextureVK* draw_rt = static_cast(config.rt); + GSTextureVK* draw_ds = static_cast(config.ds); + GSTextureVK* hdr_rt = nullptr; + + // Switch to hdr target for colclip rendering + if (pipe.ps.hdr) + { + EndRenderPass(); + + GL_PUSH_("HDR Render Target Setup"); + hdr_rt = static_cast(CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::FloatColor)); + if (!hdr_rt) + { + Console.WriteLn("Failed to allocate HDR render target, aborting draw."); + if (date_image) + Recycle(date_image); + return; + } + + // propagate clear value through if the hdr render is the first + if (draw_rt->GetState() == GSTexture::State::Cleared) + { + hdr_rt->SetClearColor(draw_rt->GetClearColor()); + } + else + { + hdr_rt->SetState(GSTexture::State::Invalidated); + draw_rt->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + draw_rt = hdr_rt; + } + + const bool render_area_okay = + (!hdr_rt && DATE_rp != DATE_RENDER_PASS_STENCIL_ONE && CheckRenderPassArea(render_area)); + const bool same_framebuffer = + (InRenderPass() && m_current_render_target == draw_rt && m_current_depth_target == draw_ds); + + // Prefer keeping feedback loop enabled, that way we're not constantly restarting render passes + pipe.feedback_loop |= render_area_okay && same_framebuffer && CurrentFramebufferHasFeedbackLoop(); + OMSetRenderTargets(draw_rt, draw_ds, config.scissor, pipe.feedback_loop); + if (pipe.feedback_loop) + PSSetShaderResource(3, draw_rt); + + // Begin render pass if new target or out of the area. + if (!render_area_okay || !InRenderPass()) + { + const VkAttachmentLoadOp rt_op = GetLoadOpForTexture(draw_rt); + const VkAttachmentLoadOp ds_op = GetLoadOpForTexture(draw_ds); + const VkRenderPass rp = + GetTFXRenderPass(pipe.rt, pipe.ds, pipe.ps.hdr, DATE_rp, pipe.feedback_loop, rt_op, ds_op); + const bool is_clearing_rt = (rt_op == VK_ATTACHMENT_LOAD_OP_CLEAR || ds_op == VK_ATTACHMENT_LOAD_OP_CLEAR); + + if (is_clearing_rt || DATE_rp == DATE_RENDER_PASS_STENCIL_ONE) + { + // when we're clearing, we set the draw area to the whole fb, otherwise part of it will be undefined + alignas(16) VkClearValue cvs[2]; + u32 cv_count = 0; + if (draw_rt) + GSVector4::store(&cvs[cv_count++].color, draw_rt->GetClearColor()); + + // the only time the stencil value is used here is DATE_one, so setting it to 1 is fine (not used otherwise) + if (draw_ds) + cvs[cv_count++].depthStencil = {draw_ds->GetClearDepth(), 1}; + + BeginClearRenderPass( + rp, is_clearing_rt ? GSVector4i(0, 0, rtsize.x, rtsize.y) : render_area, cvs, cv_count); + } + else + { + BeginRenderPass(rp, render_area); + } + } + + // rt -> hdr blit if enabled + if (hdr_rt && config.rt->GetState() == GSTexture::State::Dirty) + { + SetUtilityTexture(static_cast(config.rt), m_point_sampler); + SetPipeline(m_hdr_setup_pipelines[pipe.ds][pipe.feedback_loop]); + + const GSVector4 sRect(GSVector4(render_area) / GSVector4(rtsize.x, rtsize.y).xyxy()); + DrawStretchRect(sRect, GSVector4(render_area), rtsize); + g_perfmon.Put(GSPerfMon::TextureCopies); + + GL_POP(); + } + + // VB/IB upload, if we did DATE setup and it's not HDR this has already been done + if (!date_image || hdr_rt) + { + IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts); + IASetIndexBuffer(config.indices, config.nindices); + } + + // now we can do the actual draw + if (BindDrawPipeline(pipe)) + SendHWDraw(config); + + // and the alpha pass + if (config.alpha_second_pass.enable) + { + // cbuffer will definitely be dirty if aref changes, no need to check it + if (config.cb_ps.FogColor_AREF.a != config.alpha_second_pass.ps_aref) + { + config.cb_ps.FogColor_AREF.a = config.alpha_second_pass.ps_aref; + SetPSConstantBuffer(config.cb_ps); + } + + pipe.ps = config.alpha_second_pass.ps; + pipe.cms = config.alpha_second_pass.colormask; + pipe.dss = config.alpha_second_pass.depth; + if (BindDrawPipeline(pipe)) + SendHWDraw(config); + } + + if (date_image) + Recycle(date_image); + + EndScene(); + + // now blit the hdr texture back to the original target + if (hdr_rt) + { + GL_INS("Blit HDR back to RT"); + + EndRenderPass(); + hdr_rt->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + OMSetRenderTargets(config.rt, draw_ds, config.scissor, pipe.feedback_loop); + BeginRenderPass( + GetTFXRenderPass(pipe.rt, pipe.ds, false, DATE_rp, pipe.feedback_loop, VK_ATTACHMENT_LOAD_OP_DONT_CARE, + pipe.ds ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE), + render_area); + + const GSVector4 sRect(GSVector4(render_area) / GSVector4(rtsize.x, rtsize.y).xyxy()); + SetPipeline(m_hdr_finish_pipelines[pipe.ds][pipe.feedback_loop]); + SetUtilityTexture(hdr_rt, m_point_sampler); + DrawStretchRect(sRect, GSVector4(render_area), rtsize); + g_perfmon.Put(GSPerfMon::TextureCopies); + + Recycle(hdr_rt); + } +} + +void GSDeviceVK::UpdateHWPipelineSelector(GSHWDrawConfig& config) +{ + m_pipeline_selector.vs.key = config.vs.key; + m_pipeline_selector.gs.key = config.gs.key; + m_pipeline_selector.ps.key = config.ps.key; + m_pipeline_selector.dss.key = config.depth.key; + m_pipeline_selector.bs.key = config.blend.key; + m_pipeline_selector.bs.factor = 0; // don't dupe states with different alpha values + m_pipeline_selector.cms.key = config.colormask.key; + m_pipeline_selector.topology = static_cast(config.topology); + m_pipeline_selector.rt = config.rt != nullptr; + m_pipeline_selector.ds = config.ds != nullptr; + m_pipeline_selector.line_width = config.line_expand; + m_pipeline_selector.feedback_loop = + config.ps.IsFeedbackLoop() || config.require_one_barrier || config.require_full_barrier || + config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking; + + // enable point size in the vertex shader if we're rendering points regardless of upscaling. + m_pipeline_selector.vs.point_size |= (config.topology == GSHWDrawConfig::Topology::Point); +} + +void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config) +{ + if (config.drawlist) + { + GL_PUSH("Split the draw (SPRITE)"); + + for (u32 count = 0, p = 0, n = 0; n < static_cast(config.drawlist->size()); p += count, ++n) + { + count = (*config.drawlist)[n] * config.indices_per_prim; + ColorBufferBarrier(config.rt); + DrawIndexedPrimitive(p, count); + } + } + else if (config.require_full_barrier) + { + GL_PUSH("Split single draw in %d draw", config.nindices / config.indices_per_prim); + + for (u32 p = 0; p < config.nindices; p += config.indices_per_prim) + { + ColorBufferBarrier(config.rt); + DrawIndexedPrimitive(p, config.indices_per_prim); + } + } + else if (config.require_one_barrier) + { + ColorBufferBarrier(config.rt); + DrawIndexedPrimitive(); + } + else + { + // Don't need any barrier + DrawIndexedPrimitive(); + } +} diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h new file mode 100644 index 0000000000..e85b1be1c2 --- /dev/null +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h @@ -0,0 +1,387 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +#include "GSTextureVK.h" +#include "GS/GSVector.h" +#include "GS/Renderers/Common/GSDevice.h" +#include "common/Vulkan/StreamBuffer.h" +#include "common/HashCombine.h" +#include "vk_mem_alloc.h" +#include +#include + +class GSDeviceVK final : public GSDevice +{ +public: + struct PipelineSelector + { + GSHWDrawConfig::VSSelector vs; + GSHWDrawConfig::GSSelector gs; + GSHWDrawConfig::PSSelector ps; + GSHWDrawConfig::DepthStencilSelector dss; + GSHWDrawConfig::BlendState bs; + GSHWDrawConfig::ColorMaskSelector cms; + + union + { + struct + { + u32 topology : 2; + u32 rt : 1; + u32 ds : 1; + u32 line_width : 1; + u32 feedback_loop : 1; + }; + + u32 key; + }; + + __fi bool operator==(const PipelineSelector& p) const + { + return vs.key == p.vs.key && gs.key == p.gs.key && ps.key == p.ps.key && dss.key == p.dss.key && + bs.key == p.bs.key && cms.key == p.cms.key && key == p.key; + } + __fi bool operator!=(const PipelineSelector& p) const + { + return vs.key != p.vs.key || gs.key != p.gs.key || ps.key != p.ps.key || dss.key != p.dss.key || + bs.key != p.bs.key || cms.key != p.cms.key || key != p.key; + } + + PipelineSelector() + : key(0) + { + } + }; + + struct PipelineSelectorHash + { + std::size_t operator()(const PipelineSelector& e) const noexcept + { + std::size_t hash = 0; + HashCombine(hash, e.vs.key, e.gs.key, e.ps.key, e.dss.key, e.cms.key, e.bs.key, e.key); + return hash; + } + }; + + enum : u32 + { + NUM_TFX_DESCRIPTOR_SETS = 3, + NUM_TFX_DYNAMIC_OFFSETS = 2, + NUM_TFX_SAMPLERS = 2, + NUM_TFX_RT_TEXTURES = 3, + NUM_TFX_TEXTURES = NUM_TFX_SAMPLERS + NUM_TFX_RT_TEXTURES, + NUM_CONVERT_TEXTURES = 1, + NUM_CONVERT_SAMPLERS = 1, + CONVERT_PUSH_CONSTANTS_SIZE = 32, + + VERTEX_BUFFER_SIZE = 32 * 1024 * 1024, + INDEX_BUFFER_SIZE = 16 * 1024 * 1024, + VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, + FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, + }; + enum DATE_RENDER_PASS : u32 + { + DATE_RENDER_PASS_NONE = 0, + DATE_RENDER_PASS_STENCIL = 1, + DATE_RENDER_PASS_STENCIL_ONE = 2, + }; + +private: + u32 m_upscale_multiplier = 1; + int m_mipmap = 0; + + VkDescriptorSetLayout m_utility_ds_layout = VK_NULL_HANDLE; + VkPipelineLayout m_utility_pipeline_layout = VK_NULL_HANDLE; + + VkDescriptorSetLayout m_tfx_ubo_ds_layout = VK_NULL_HANDLE; + VkDescriptorSetLayout m_tfx_sampler_ds_layout = VK_NULL_HANDLE; + VkDescriptorSetLayout m_tfx_rt_texture_ds_layout = VK_NULL_HANDLE; + VkPipelineLayout m_tfx_pipeline_layout = VK_NULL_HANDLE; + + Vulkan::StreamBuffer m_vertex_stream_buffer; + Vulkan::StreamBuffer m_index_stream_buffer; + Vulkan::StreamBuffer m_vertex_uniform_stream_buffer; + Vulkan::StreamBuffer m_fragment_uniform_stream_buffer; + + VmaAllocation m_readback_staging_allocation = VK_NULL_HANDLE; + VkBuffer m_readback_staging_buffer = VK_NULL_HANDLE; + void* m_readback_staging_buffer_map = nullptr; + u32 m_readback_staging_buffer_size = 0; + + VkSampler m_point_sampler = VK_NULL_HANDLE; + VkSampler m_linear_sampler = VK_NULL_HANDLE; + + std::unordered_map m_samplers; + + std::array(ShaderConvert::Count)> m_convert{}; + std::array(ShaderConvert::Count)> m_present{}; + std::array m_color_copy{}; + std::array m_merge{}; + std::array m_interlace{}; + VkPipeline m_hdr_setup_pipelines[2][2] = {}; // [depth][feedback_loop] + VkPipeline m_hdr_finish_pipelines[2][2] = {}; // [depth][feedback_loop] + VkRenderPass m_date_image_setup_render_passes[2][2] = {}; // [depth][clear] + VkPipeline m_date_image_setup_pipelines[2][2] = {}; // [depth][datm] + + std::unordered_map m_tfx_vertex_shaders; + std::unordered_map m_tfx_geometry_shaders; + std::unordered_map m_tfx_fragment_shaders; + std::unordered_map m_tfx_pipelines; + + VkRenderPass m_utility_color_render_pass_load = VK_NULL_HANDLE; + VkRenderPass m_utility_color_render_pass_clear = VK_NULL_HANDLE; + VkRenderPass m_utility_color_render_pass_discard = VK_NULL_HANDLE; + VkRenderPass m_utility_depth_render_pass_load = VK_NULL_HANDLE; + VkRenderPass m_utility_depth_render_pass_clear = VK_NULL_HANDLE; + VkRenderPass m_utility_depth_render_pass_discard = VK_NULL_HANDLE; + VkRenderPass m_date_setup_render_pass = VK_NULL_HANDLE; + VkRenderPass m_swap_chain_render_pass = VK_NULL_HANDLE; + + VkRenderPass m_tfx_render_pass[2][2][2][3][2][3][3] = {}; // [rt][ds][hdr][date][fbl][rt_op][ds_op] + + GSHWDrawConfig::VSConstantBuffer m_vs_cb_cache; + GSHWDrawConfig::PSConstantBuffer m_ps_cb_cache; + + std::string m_tfx_source; + + GSTexture* CreateSurface(GSTexture::Type type, int w, int h, GSTexture::Format format) override; + + void DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, + const GSRegEXTBUF& EXTBUF, const GSVector4& c) final; + void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset = 0) final; + + u16 ConvertBlendEnum(u16 generic) final; + + VkSampler GetSampler(GSHWDrawConfig::SamplerSelector ss); + + VkShaderModule GetTFXVertexShader(GSHWDrawConfig::VSSelector sel); + VkShaderModule GetTFXGeometryShader(GSHWDrawConfig::GSSelector sel); + VkShaderModule GetTFXFragmentShader(GSHWDrawConfig::PSSelector sel); + VkPipeline CreateTFXPipeline(const PipelineSelector& p); + VkPipeline GetTFXPipeline(const PipelineSelector& p); + + VkShaderModule GetUtilityVertexShader(const std::string& source, const char* replace_main); + VkShaderModule GetUtilityFragmentShader(const std::string& source, const char* replace_main); + + bool CheckFeatures(); + bool CreateNullTexture(); + bool CreateBuffers(); + bool CreatePipelineLayouts(); + bool CreateRenderPasses(); + + bool CompileConvertPipelines(); + bool CompileInterlacePipelines(); + bool CompileMergePipelines(); + + bool CheckStagingBufferSize(u32 required_size); + void DestroyStagingBuffer(); + + void DestroyResources(); + +public: + GSDeviceVK(); + ~GSDeviceVK() override; + + __fi static GSDeviceVK* GetInstance() { return static_cast(g_gs_device.get()); } + + __fi VkRenderPass GetTFXRenderPass(bool rt, bool ds, bool hdr, DATE_RENDER_PASS date, bool fbl, + VkAttachmentLoadOp rt_op, VkAttachmentLoadOp ds_op) const + { + return m_tfx_render_pass[rt][ds][hdr][date][fbl][rt_op][ds_op]; + } + __fi VkSampler GetPointSampler() const { return m_point_sampler; } + __fi VkSampler GetLinearSampler() const { return m_linear_sampler; } + + bool Create(HostDisplay* display) override; + void Destroy() override; + + void ResetAPIState() override; + void RestoreAPIState() override; + + void PushDebugGroup(const char* fmt, ...) override; + void PopDebugGroup() override; + void InsertDebugMessage(DebugMessageCategory category, const char* fmt, ...) override; + + void DrawPrimitive(); + void DrawIndexedPrimitive(); + void DrawIndexedPrimitive(int offset, int count); + + void ClearRenderTarget(GSTexture* t, const GSVector4& c) override; + void ClearRenderTarget(GSTexture* t, u32 c) override; + void InvalidateRenderTarget(GSTexture* t) override; + void ClearDepth(GSTexture* t) override; + void ClearStencil(GSTexture* t, u8 c) override; + + bool DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map) override; + void DownloadTextureComplete() override; + + GSTexture* DrawForReadback(GSTexture* src, const GSVector4& sRect, int w, int h, int format = 0, int ps_shader = 0); + bool ReadbackTexture(GSTexture* src, const GSVector4i& rect, u32 level, GSTexture::GSMap* dst); + + void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) override; + + void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, + ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override; + void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, + bool green, bool blue, bool alpha) override; + + void BeginRenderPassForStretchRect(GSTextureVK* dTex, const GSVector4i& dtex_rc, const GSVector4i& dst_rc); + void DoStretchRect(GSTextureVK* sTex, const GSVector4& sRect, GSTextureVK* dTex, const GSVector4& dRect, + VkPipeline pipeline, bool linear); + void DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect, const GSVector2i& ds); + + void BlitRect(GSTexture* sTex, const GSVector4i& sRect, u32 sLevel, GSTexture* dTex, const GSVector4i& dRect, + u32 dLevel, bool linear); + + void SetupDATE(GSTexture* rt, GSTexture* ds, bool datm, const GSVector4i& bbox); + GSTextureVK* SetupPrimitiveTrackingDATE(GSHWDrawConfig& config, PipelineSelector& pipe); + + void IASetVertexBuffer(const void* vertex, size_t stride, size_t count); + bool IAMapVertexBuffer(void** vertex, size_t stride, size_t count); + void IAUnmapVertexBuffer(); + void IASetIndexBuffer(const void* index, size_t count); + + void PSSetShaderResource(int i, GSTexture* sr); + void PSSetSampler(u32 index, GSHWDrawConfig::SamplerSelector sel); + + void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, bool feedback_loop); + + void SetVSConstantBuffer(const GSHWDrawConfig::VSConstantBuffer& cb); + void SetPSConstantBuffer(const GSHWDrawConfig::PSConstantBuffer& cb); + bool BindDrawPipeline(const PipelineSelector& p); + + void RenderHW(GSHWDrawConfig& config) override; + void UpdateHWPipelineSelector(GSHWDrawConfig& config); + void SendHWDraw(const GSHWDrawConfig& config); + + ////////////////////////////////////////////////////////////////////////// + // Vulkan State + ////////////////////////////////////////////////////////////////////////// + +public: + __fi bool CurrentFramebufferHasFeedbackLoop() const { return m_current_framebuffer_has_feedback_loop; } + __fi VkFramebuffer GetCurrentFramebuffer() const { return m_current_framebuffer; } + + /// Ends any render pass, executes the command buffer, and invalidates cached state. + void ExecuteCommandBuffer(bool wait_for_completion); + void ExecuteCommandBuffer(bool wait_for_completion, const char* reason, ...); + void ExecuteCommandBufferAndRestartRenderPass(const char* reason); + + /// Set dirty flags on everything to force re-bind at next draw time. + void InvalidateCachedState(); + + /// Binds all dirty state to the command buffer. + bool ApplyUtilityState(bool already_execed = false); + bool ApplyTFXState(bool already_execed = false); + + void SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset); + void SetIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType type); + void SetBlendConstants(u8 color); + + void SetUtilityTexture(GSTexture* tex, VkSampler sampler); + void SetUtilityPushConstants(const void* data, u32 size); + void UnbindTexture(GSTextureVK* tex); + + // Ends a render pass if we're currently in one. + // When Bind() is next called, the pass will be restarted. + // Calling this function is allowed even if a pass has not begun. + bool InRenderPass(); + void BeginRenderPass(VkRenderPass rp, const GSVector4i& rect); + void BeginClearRenderPass(VkRenderPass rp, const GSVector4i& rect, const VkClearValue* cv, u32 cv_count); + void BeginClearRenderPass(VkRenderPass rp, const GSVector4i& rect, const GSVector4& clear_color); + void BeginClearRenderPass(VkRenderPass rp, const GSVector4i& rect, float depth, u8 stencil); + bool CheckRenderPassArea(const GSVector4i& rect); + void EndRenderPass(); + + void SetViewport(const VkViewport& viewport); + void SetScissor(const GSVector4i& scissor); + void SetPipeline(VkPipeline pipeline); + +private: + enum DIRTY_FLAG : u32 + { + DIRTY_FLAG_TFX_SAMPLERS_DS = (1 << 0), + DIRTY_FLAG_TFX_RT_TEXTURE_DS = (1 << 1), + DIRTY_FLAG_TFX_DYNAMIC_OFFSETS = (1 << 2), + DIRTY_FLAG_UTILITY_TEXTURE = (1 << 3), + DIRTY_FLAG_BLEND_CONSTANTS = (1 << 4), + DIRTY_FLAG_VERTEX_BUFFER = (1 << 5), + DIRTY_FLAG_INDEX_BUFFER = (1 << 6), + DIRTY_FLAG_VIEWPORT = (1 << 7), + DIRTY_FLAG_SCISSOR = (1 << 8), + DIRTY_FLAG_PIPELINE = (1 << 9), + DIRTY_FLAG_VS_CONSTANT_BUFFER = (1 << 10), + DIRTY_FLAG_PS_CONSTANT_BUFFER = (1 << 11), + + DIRTY_BASE_STATE = DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PIPELINE | + DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_BLEND_CONSTANTS, + DIRTY_TFX_STATE = DIRTY_BASE_STATE | DIRTY_FLAG_TFX_SAMPLERS_DS | DIRTY_FLAG_TFX_RT_TEXTURE_DS, + DIRTY_UTILITY_STATE = DIRTY_BASE_STATE | DIRTY_FLAG_UTILITY_TEXTURE, + DIRTY_CONSTANT_BUFFER_STATE = DIRTY_FLAG_VS_CONSTANT_BUFFER | DIRTY_FLAG_PS_CONSTANT_BUFFER, + }; + + enum class PipelineLayout + { + Undefined, + TFX, + Utility + }; + + void InitializeState(); + bool CreatePersistentDescriptorSets(); + + void ApplyBaseState(u32 flags, VkCommandBuffer cmdbuf); + + // Which bindings/state has to be updated before the next draw. + u32 m_dirty_flags = 0; + bool m_current_framebuffer_has_feedback_loop = false; + + // input assembly + VkBuffer m_vertex_buffer = VK_NULL_HANDLE; + VkDeviceSize m_vertex_buffer_offset = 0; + VkBuffer m_index_buffer = VK_NULL_HANDLE; + VkDeviceSize m_index_buffer_offset = 0; + VkIndexType m_index_type = VK_INDEX_TYPE_UINT16; + + GSTextureVK* m_current_render_target = nullptr; + GSTextureVK* m_current_depth_target = nullptr; + VkFramebuffer m_current_framebuffer = VK_NULL_HANDLE; + VkRenderPass m_current_render_pass = VK_NULL_HANDLE; + GSVector4i m_current_render_pass_area = GSVector4i::zero(); + + VkViewport m_viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; + GSVector4i m_scissor = GSVector4i::zero(); + u8 m_blend_constant_color = 0; + + std::array m_tfx_textures{}; + std::array m_tfx_samplers{}; + std::array m_tfx_sampler_sel{}; + std::array m_tfx_descriptor_sets{}; + std::array m_tfx_dynamic_offsets{}; + + VkImageView m_utility_texture = VK_NULL_HANDLE; + VkSampler m_utility_sampler = VK_NULL_HANDLE; + VkDescriptorSet m_utility_descriptor_set = VK_NULL_HANDLE; + + PipelineLayout m_current_pipeline_layout = PipelineLayout::Undefined; + VkPipeline m_current_pipeline = VK_NULL_HANDLE; + + Vulkan::Texture m_null_texture; + + // current pipeline selector - we save this in the struct to avoid re-zeroing it every draw + PipelineSelector m_pipeline_selector = {}; +}; diff --git a/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp new file mode 100644 index 0000000000..fd4897ed86 --- /dev/null +++ b/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp @@ -0,0 +1,367 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "PrecompiledHeader.h" +#include "GSDeviceVK.h" +#include "GSTextureVK.h" +#include "common/Assertions.h" +#include "common/Vulkan/Builders.h" +#include "common/Vulkan/Context.h" +#include "common/Vulkan/Util.h" +#include "GS/GSPerfMon.h" +#include "GS/GSGL.h" + +GSTextureVK::GSTextureVK(Type type, Format format, Vulkan::Texture texture) + : m_texture(std::move(texture)) +{ + m_type = type; + m_format = format; + m_size.x = m_texture.GetWidth(); + m_size.y = m_texture.GetHeight(); + m_mipmap_levels = m_texture.GetLevels(); +} + +GSTextureVK::~GSTextureVK() +{ + GSDeviceVK::GetInstance()->UnbindTexture(this); + + if (m_type == Type::RenderTarget || m_type == Type::DepthStencil) + { + for (const auto& [other_tex, fb, feedback] : m_framebuffers) + { + if (other_tex) + { + for (auto other_it = other_tex->m_framebuffers.begin(); other_it != other_tex->m_framebuffers.end(); + ++other_it) + { + if (std::get<0>(*other_it) == this) + { + other_tex->m_framebuffers.erase(other_it); + break; + } + } + } + + g_vulkan_context->DeferFramebufferDestruction(fb); + } + } +} + +std::unique_ptr GSTextureVK::Create(Type type, u32 width, u32 height, u32 levels, Format format) +{ + switch (type) + { + case Type::Texture: + { + VkImageUsageFlags usage = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + const VkComponentMapping* swizzle = nullptr; + if (format == Format::UNorm8) + { + // for r8 textures, swizzle it across all 4 components. the shaders depend on it being in alpha.. why? + static constexpr VkComponentMapping r8_swizzle = { + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R}; + swizzle = &r8_swizzle; + } + + Vulkan::Texture texture; + if (!texture.Create(width, height, levels, 1, LookupNativeFormat(format), VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, usage, swizzle)) + { + return {}; + } + + Vulkan::Util::SetObjectName( + g_vulkan_context->GetDevice(), texture.GetImage(), "%ux%u texture", width, height); + return std::make_unique(type, format, std::move(texture)); + } + + case Type::RenderTarget: + { + pxAssert(levels == 1); + + Vulkan::Texture texture; + if (!texture.Create(width, height, levels, 1, LookupNativeFormat(format), VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) + { + return {}; + } + + Vulkan::Util::SetObjectName( + g_vulkan_context->GetDevice(), texture.GetImage(), "%ux%u render target", width, height); + return std::make_unique(type, format, std::move(texture)); + } + + case Type::DepthStencil: + { + pxAssert(levels == 1); + + Vulkan::Texture texture; + if (!texture.Create(width, height, levels, 1, LookupNativeFormat(format), VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT)) + { + return {}; + } + + Vulkan::Util::SetObjectName( + g_vulkan_context->GetDevice(), texture.GetImage(), "%ux%u depth stencil", width, height); + return std::make_unique(type, format, std::move(texture)); + } + + default: + return {}; + } +} + +VkFormat GSTextureVK::LookupNativeFormat(Format format) +{ + static constexpr std::array(GSTexture::Format::Int32) + 1> s_format_mapping = {{ + VK_FORMAT_UNDEFINED, // Invalid + VK_FORMAT_R8G8B8A8_UNORM, // Color + VK_FORMAT_R32G32B32A32_SFLOAT, // FloatColor + VK_FORMAT_D32_SFLOAT_S8_UINT, // DepthStencil + VK_FORMAT_R8_UNORM, // UNorm8 + VK_FORMAT_R16_UINT, // UInt16 + VK_FORMAT_R32_UINT, // UInt32 + VK_FORMAT_R32_SFLOAT, // Int32 + }}; + + + return s_format_mapping[static_cast(format)]; +} + +void* GSTextureVK::GetNativeHandle() const { return const_cast(&m_texture); } + +VkCommandBuffer GSTextureVK::GetCommandBufferForUpdate() +{ + const u32 frame = GSDeviceVK::GetInstance()->GetFrameNumber(); + if (m_type != Type::Texture || frame == last_frame_used) + { + // Console.WriteLn("Texture update within frame, can't use do beforehand"); + GSDeviceVK::GetInstance()->EndRenderPass(); + return g_vulkan_context->GetCurrentCommandBuffer(); + } + + return g_vulkan_context->GetCurrentInitCommandBuffer(); +} + +bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int layer) +{ + if (m_type != Type::Texture || static_cast(layer) >= m_texture.GetLevels()) + return false; + + g_perfmon.Put(GSPerfMon::TextureUploads, 1); + + const u32 width = r.width(); + const u32 height = r.height(); + const u32 row_length = static_cast(pitch) / Vulkan::Util::GetTexelSize(m_texture.GetFormat()); + const u32 required_size = static_cast(pitch) * height; + Vulkan::StreamBuffer& buffer = g_vulkan_context->GetTextureUploadBuffer(); + if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferImageGranularity())) + { + GSDeviceVK::GetInstance()->ExecuteCommandBuffer( + false, "While waiting for %u bytes in texture upload buffer", required_size); + if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferImageGranularity())) + pxFailRel("Failed to reserve texture upload memory"); + } + + const u32 buffer_offset = buffer.GetCurrentOffset(); + std::memcpy(buffer.GetCurrentHostPointer(), data, required_size); + buffer.CommitMemory(required_size); + + const VkCommandBuffer cmdbuf = GetCommandBufferForUpdate(); + GL_PUSH("GSTextureVK::Update({%d,%d} %dx%d Lvl:%u", r.x, r.y, r.width(), r.height(), layer); + + // first time the texture is used? don't leave it undefined + if (m_texture.GetLayout() == VK_IMAGE_LAYOUT_UNDEFINED) + m_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + m_texture.UpdateFromBuffer( + cmdbuf, layer, 0, r.x, r.y, width, height, row_length, buffer.GetBuffer(), buffer_offset); + m_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + m_needs_mipmaps_generated |= (layer == 0); + return true; +} + +bool GSTextureVK::Map(GSMap& m, const GSVector4i* r, int layer) +{ + if (m_type == Type::Texture && static_cast(layer) < m_texture.GetLevels()) + { + // map for writing + m_map_area = r ? *r : GSVector4i(0, 0, m_texture.GetWidth(), m_texture.GetHeight()); + m_map_level = layer; + + m.pitch = m_map_area.width() * Vulkan::Util::GetTexelSize(m_texture.GetFormat()); + + const u32 required_size = m.pitch * m_map_area.height(); + Vulkan::StreamBuffer& buffer = g_vulkan_context->GetTextureUploadBuffer(); + if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferImageGranularity())) + { + GSDeviceVK::GetInstance()->ExecuteCommandBuffer( + false, "While waiting for %u bytes in texture upload buffer", required_size); + if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferImageGranularity())) + pxFailRel("Failed to reserve texture upload memory"); + } + + m.bits = static_cast(buffer.GetCurrentHostPointer()); + return true; + } + else + { + // not available + return false; + } +} + +void GSTextureVK::Unmap() +{ + if (m_type == Type::Texture) + { + pxAssert(m_map_level < m_texture.GetLevels()); + g_perfmon.Put(GSPerfMon::TextureUploads, 1); + + // TODO: non-tightly-packed formats + const u32 width = static_cast(m_map_area.width()); + const u32 height = static_cast(m_map_area.height()); + const u32 required_size = width * height * Vulkan::Util::GetTexelSize(m_texture.GetFormat()); + Vulkan::StreamBuffer& buffer = g_vulkan_context->GetTextureUploadBuffer(); + const u32 buffer_offset = buffer.GetCurrentOffset(); + buffer.CommitMemory(required_size); + + const VkCommandBuffer cmdbuf = GetCommandBufferForUpdate(); + GL_PUSH("GSTextureVK::Update({%d,%d} %dx%d Lvl:%u", m_map_area.x, m_map_area.y, m_map_area.width(), + m_map_area.height(), m_map_level); + + // first time the texture is used? don't leave it undefined + if (m_texture.GetLayout() == VK_IMAGE_LAYOUT_UNDEFINED) + m_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + m_texture.UpdateFromBuffer(cmdbuf, m_map_level, 0, m_map_area.x, m_map_area.y, width, height, width, + buffer.GetBuffer(), buffer_offset); + m_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + m_needs_mipmaps_generated |= (m_map_level == 0); + } +} + +void GSTextureVK::GenerateMipmap() +{ + const VkCommandBuffer cmdbuf = GetCommandBufferForUpdate(); + + for (int dst_level = 1; dst_level < m_mipmap_levels; dst_level++) + { + const int src_level = dst_level - 1; + const int src_width = std::max(m_size.x >> src_level, 1); + const int src_height = std::max(m_size.y >> src_level, 1); + const int dst_width = std::max(m_size.x >> dst_level, 1); + const int dst_height = std::max(m_size.y >> dst_level, 1); + + m_texture.TransitionSubresourcesToLayout( + cmdbuf, src_level, 1, 0, 1, m_texture.GetLayout(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + m_texture.TransitionSubresourcesToLayout( + cmdbuf, dst_level, 1, 0, 1, m_texture.GetLayout(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + const VkImageBlit blit = { + {VK_IMAGE_ASPECT_COLOR_BIT, static_cast(src_level), 0u, 1u}, // srcSubresource + {{0, 0, 0}, {src_width, src_height, 1}}, // srcOffsets + {VK_IMAGE_ASPECT_COLOR_BIT, static_cast(dst_level), 0u, 1u}, // dstSubresource + {{0, 0, 0}, {dst_width, dst_height, 1}} // dstOffsets + }; + + vkCmdBlitImage(cmdbuf, m_texture.GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_texture.GetImage(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit, VK_FILTER_LINEAR); + + m_texture.TransitionSubresourcesToLayout( + cmdbuf, src_level, 1, 0, 1, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_texture.GetLayout()); + m_texture.TransitionSubresourcesToLayout( + cmdbuf, dst_level, 1, 0, 1, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, m_texture.GetLayout()); + } +} + +void GSTextureVK::TransitionToLayout(VkImageLayout layout) +{ + m_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), layout); +} + +void GSTextureVK::CommitClear() +{ + if (m_state != GSTexture::State::Cleared) + return; + + GSDeviceVK::GetInstance()->EndRenderPass(); + + TransitionToLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + if (IsDepthStencil()) + { + const VkClearDepthStencilValue cv = { m_clear_value.depth }; + const VkImageSubresourceRange srr = { VK_IMAGE_ASPECT_DEPTH_BIT, 0u, 1u, 0u, 1u }; + vkCmdClearDepthStencilImage(g_vulkan_context->GetCurrentCommandBuffer(), m_texture.GetImage(), m_texture.GetLayout(), &cv, 1, &srr); + } + else + { + alignas(16) VkClearColorValue cv; + GSVector4::store(cv.float32, GetClearColor()); + const VkImageSubresourceRange srr = {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}; + vkCmdClearColorImage(g_vulkan_context->GetCurrentCommandBuffer(), m_texture.GetImage(), m_texture.GetLayout(), &cv, 1, &srr); + } + + SetState(GSTexture::State::Dirty); +} + +VkFramebuffer GSTextureVK::GetFramebuffer(bool feedback_loop) { return GetLinkedFramebuffer(nullptr, feedback_loop); } + +VkFramebuffer GSTextureVK::GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop) +{ + pxAssertRel(m_type != Type::Texture, "Texture is a render target"); + + for (const auto& [other_tex, fb, other_feedback_loop] : m_framebuffers) + { + if (other_tex == depth_texture && other_feedback_loop == feedback_loop) + return fb; + } + + VkRenderPass rp = g_vulkan_context->GetRenderPass( + (m_type != GSTexture::Type::DepthStencil) ? GetNativeFormat() : VK_FORMAT_UNDEFINED, + (m_type != GSTexture::Type::DepthStencil) ? + (depth_texture ? depth_texture->GetNativeFormat() : VK_FORMAT_UNDEFINED) : + GetNativeFormat(), + VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_LOAD, + VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE, feedback_loop); + if (!rp) + return VK_NULL_HANDLE; + + Vulkan::FramebufferBuilder fbb; + fbb.AddAttachment(m_texture.GetView()); + if (depth_texture) + fbb.AddAttachment(depth_texture->m_texture.GetView()); + fbb.SetSize(m_texture.GetWidth(), m_texture.GetHeight(), m_texture.GetLayers()); + fbb.SetRenderPass(rp); + + VkFramebuffer fb = fbb.Create(g_vulkan_context->GetDevice()); + if (!fb) + return VK_NULL_HANDLE; + + m_framebuffers.emplace_back(depth_texture, fb, feedback_loop); + if (depth_texture) + depth_texture->m_framebuffers.emplace_back(this, fb, feedback_loop); + return fb; +} diff --git a/pcsx2/GS/Renderers/Vulkan/GSTextureVK.h b/pcsx2/GS/Renderers/Vulkan/GSTextureVK.h new file mode 100644 index 0000000000..85b63fe281 --- /dev/null +++ b/pcsx2/GS/Renderers/Vulkan/GSTextureVK.h @@ -0,0 +1,85 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +#include "GS.h" +#include "GS/Renderers/Common/GSTexture.h" +#include "common/Vulkan/Texture.h" + +class GSTextureVK final : public GSTexture +{ +public: + union alignas(16) ClearValue + { + float color[4]; + float depth; + }; + +public: + GSTextureVK(Type type, Format format, Vulkan::Texture texture); + ~GSTextureVK() override; + + static std::unique_ptr Create(Type type, u32 width, u32 height, u32 levels, Format format); + static VkFormat LookupNativeFormat(Format format); + + __fi Vulkan::Texture& GetTexture() { return m_texture; } + __fi VkFormat GetNativeFormat() const { return m_texture.GetFormat(); } + __fi VkImage GetImage() const { return m_texture.GetImage(); } + __fi VkImageView GetView() const { return m_texture.GetView(); } + __fi VkImageLayout GetLayout() const { return m_texture.GetLayout(); } + __fi GSVector4 GetClearColor() const { return GSVector4::load(m_clear_value.color); } + __fi float GetClearDepth() const { return m_clear_value.depth; } + + void* GetNativeHandle() const override; + + bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) override; + bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) override; + void Unmap() override; + void GenerateMipmap() override; + + void TransitionToLayout(VkImageLayout layout); + void CommitClear(); + + /// Framebuffers are lazily allocated. + VkFramebuffer GetFramebuffer(bool feedback_loop); + + VkFramebuffer GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop); + + __fi void SetClearColor(const GSVector4& color) + { + m_state = State::Cleared; + GSVector4::store(m_clear_value.color, color); + } + __fi void SetClearDepth(float depth) + { + m_state = State::Cleared; + m_clear_value.depth = depth; + } + +private: + VkCommandBuffer GetCommandBufferForUpdate(); + + Vulkan::Texture m_texture; + + ClearValue m_clear_value = {}; + + GSVector4i m_map_area = GSVector4i::zero(); + u32 m_map_level = UINT32_MAX; + + // linked framebuffer is combined with depth texture + // list of color textures this depth texture is linked to or vice versa + std::vector> m_framebuffers; +}; diff --git a/pcsx2/GS/Window/GSwxDialog.cpp b/pcsx2/GS/Window/GSwxDialog.cpp index 5a8b1293ca..8c0f6970c6 100644 --- a/pcsx2/GS/Window/GSwxDialog.cpp +++ b/pcsx2/GS/Window/GSwxDialog.cpp @@ -23,6 +23,10 @@ #include "Frontend/D3D11HostDisplay.h" #endif +#ifdef ENABLE_VULKAN +#include "Frontend/VulkanHostDisplay.h" +#endif + using namespace GSSettingsDialog; namespace @@ -288,7 +292,7 @@ RendererTab::RendererTab(wxWindow* parent) m_ui.addComboBoxAndLabel(hw_choice_grid, "Anisotropic Filtering:", "MaxAnisotropy", &theApp.m_gs_max_anisotropy, IDC_AFCOMBO, aniso_prereq); m_ui.addComboBoxAndLabel(hw_choice_grid, "Dithering (PgDn):", "dithering_ps2", &theApp.m_gs_dithering, IDC_DITHERING, hw_prereq); - m_ui.addComboBoxAndLabel(hw_choice_grid, "Mipmapping (Insert):", "mipmap_hw", &theApp.m_gs_hw_mipmapping, IDC_MIPMAP_HW, hw_prereq); + m_ui.addComboBoxAndLabel(hw_choice_grid, "Mipmapping:", "mipmap_hw", &theApp.m_gs_hw_mipmapping, IDC_MIPMAP_HW, hw_prereq); m_ui.addComboBoxAndLabel(hw_choice_grid, "CRC Hack Level:", "crc_hack_level", &theApp.m_gs_crc_level, IDC_CRC_LEVEL, hw_prereq); m_blend_mode = m_ui.addComboBoxAndLabel(hw_choice_grid, "Blending Accuracy:", "accurate_blending_unit", &theApp.m_gs_acc_blend_level, IDC_ACCURATE_BLEND_UNIT, hw_prereq); @@ -349,7 +353,7 @@ HacksTab::HacksTab(wxWindow* parent) auto hw_prereq = [this]{ return m_is_hardware; }; auto* hacks_check_box = m_ui.addCheckBox(tab_box.inner, "Enable HW Hacks", "UserHacks", -1, hw_prereq); auto hacks_prereq = [this, hacks_check_box]{ return m_is_hardware && hacks_check_box->GetValue(); }; - auto gl_hacks_prereq = [this, hacks_check_box]{ return m_is_ogl_hw && hacks_check_box->GetValue(); }; + auto gl_or_vk_hacks_prereq = [this, hacks_check_box]{ return (m_is_ogl_hw || m_is_vk_hw) && hacks_check_box->GetValue(); }; auto upscale_hacks_prereq = [this, hacks_check_box]{ return !m_is_native_res && hacks_check_box->GetValue(); }; PaddedBoxSizer rend_hacks_box (wxVERTICAL, this, "Renderer Hacks"); @@ -379,7 +383,7 @@ HacksTab::HacksTab(wxWindow* parent) // Renderer Hacks: m_ui.addComboBoxAndLabel(rend_hack_choice_grid, "Half Screen Fix:", "UserHacks_Half_Bottom_Override", &theApp.m_gs_generic_list, IDC_HALF_SCREEN_TS, hacks_prereq); - m_ui.addComboBoxAndLabel(rend_hack_choice_grid, "Trilinear Filtering:", "UserHacks_TriFilter", &theApp.m_gs_trifilter, IDC_TRI_FILTER, gl_hacks_prereq); + m_ui.addComboBoxAndLabel(rend_hack_choice_grid, "Trilinear Filtering:", "UserHacks_TriFilter", &theApp.m_gs_trifilter, IDC_TRI_FILTER, gl_or_vk_hacks_prereq); // Skipdraw Range add_label(this, rend_hack_choice_grid, "Skipdraw Range:", IDC_SKIPDRAWHACK); @@ -470,10 +474,12 @@ PostTab::PostTab(wxWindow* parent) PaddedBoxSizer tab_box(wxVERTICAL); PaddedBoxSizer shader_box(wxVERTICAL, this, "Custom Shader"); - m_ui.addCheckBox(shader_box.inner, "Texture Filtering of Display", "linear_present", IDC_LINEAR_PRESENT); - m_ui.addCheckBox(shader_box.inner, "FXAA Shader (PgUp)", "fxaa", IDC_FXAA); + auto not_vk_prereq = [this] { return !m_is_vk_hw; }; - CheckboxPrereq shade_boost_check(m_ui.addCheckBox(shader_box.inner, "Enable Shade Boost", "ShadeBoost", IDC_SHADEBOOST)); + m_ui.addCheckBox(shader_box.inner, "Texture Filtering of Display", "linear_present", IDC_LINEAR_PRESENT); + m_ui.addCheckBox(shader_box.inner, "FXAA Shader (PgUp)", "fxaa", IDC_FXAA, not_vk_prereq); + + CheckboxPrereq shade_boost_check(m_ui.addCheckBox(shader_box.inner, "Enable Shade Boost", "ShadeBoost", IDC_SHADEBOOST, not_vk_prereq)); PaddedBoxSizer shade_boost_box(wxVERTICAL, this, "Shade Boost"); auto* shader_boost_grid = new wxFlexGridSizer(2, space, space); @@ -486,7 +492,7 @@ PostTab::PostTab(wxWindow* parent) shade_boost_box->Add(shader_boost_grid, wxSizerFlags().Expand()); shader_box->Add(shade_boost_box.outer, wxSizerFlags().Expand()); - CheckboxPrereq ext_shader_check(m_ui.addCheckBox(shader_box.inner, "Enable External Shader", "shaderfx", IDC_SHADER_FX)); + CheckboxPrereq ext_shader_check(m_ui.addCheckBox(shader_box.inner, "Enable External Shader", "shaderfx", IDC_SHADER_FX, not_vk_prereq)); PaddedBoxSizer ext_shader_box(wxVERTICAL, this, "External Shader (Home)"); auto* ext_shader_grid = new wxFlexGridSizer(2, space, space); @@ -687,6 +693,11 @@ void Dialog::RendererChange() case GSRendererType::DX11: list = D3D11HostDisplay::StaticGetAdapterAndModeList(); break; +#endif +#ifdef ENABLE_VULKAN + case GSRendererType::VK: + list = VulkanHostDisplay::StaticGetAdapterAndModeList(nullptr); + break; #endif default: break; @@ -763,13 +774,15 @@ void Dialog::Update() else { // cross-tab dependencies yay - const bool is_hw = renderer == GSRendererType::OGL || renderer == GSRendererType::DX11; + const bool is_hw = renderer == GSRendererType::OGL || renderer == GSRendererType::DX11 || renderer == GSRendererType::VK; const bool is_upscale = m_renderer_panel->m_internal_resolution->GetSelection() != 0; m_hacks_panel->m_is_native_res = !is_hw || !is_upscale; m_hacks_panel->m_is_hardware = is_hw; m_hacks_panel->m_is_ogl_hw = renderer == GSRendererType::OGL; + m_hacks_panel->m_is_vk_hw = renderer == GSRendererType::VK; m_renderer_panel->m_is_hardware = is_hw; m_renderer_panel->m_is_native_res = !is_hw || !is_upscale; + m_post_panel->m_is_vk_hw = renderer == GSRendererType::VK; m_debug_panel->m_is_ogl_hw = renderer == GSRendererType::OGL; m_ui.Update(); diff --git a/pcsx2/GS/Window/GSwxDialog.h b/pcsx2/GS/Window/GSwxDialog.h index d4f4905241..d4056a0468 100644 --- a/pcsx2/GS/Window/GSwxDialog.h +++ b/pcsx2/GS/Window/GSwxDialog.h @@ -127,6 +127,7 @@ namespace GSSettingsDialog bool m_is_hardware = false; bool m_is_native_res = false; bool m_is_ogl_hw = false; + bool m_is_vk_hw = false; HacksTab(wxWindow* parent); void Load() { m_ui.Load(); } @@ -161,6 +162,7 @@ namespace GSSettingsDialog { public: GSUIElementHolder m_ui; + bool m_is_vk_hw = false; PostTab(wxWindow* parent); void Load() { m_ui.Load(); } diff --git a/pcsx2/HostDisplay.cpp b/pcsx2/HostDisplay.cpp index 474afbeddf..20de7a6fef 100644 --- a/pcsx2/HostDisplay.cpp +++ b/pcsx2/HostDisplay.cpp @@ -119,12 +119,12 @@ std::unique_ptr HostDisplay::CreateDisplayForAPI(RenderAPI api) switch (api) { #ifdef _WIN32 - case HostDisplay::RenderAPI::D3D11: + case RenderAPI::D3D11: return std::make_unique(); #endif - case HostDisplay::RenderAPI::OpenGL: - case HostDisplay::RenderAPI::OpenGLES: + case RenderAPI::OpenGL: + case RenderAPI::OpenGLES: return std::make_unique(); #ifdef ENABLE_VULKAN diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index c387638ab6..008e7b421a 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -266,6 +266,7 @@ const char* Pcsx2Config::GSOptions::GetRendererName(GSRendererType type) case GSRendererType::Auto: return "Auto"; case GSRendererType::DX11: return "Direct3D 11"; case GSRendererType::OGL: return "OpenGL"; + case GSRendererType::VK: return "Vulkan"; case GSRendererType::SW: return "Software"; case GSRendererType::Null: return "Null"; default: return ""; @@ -580,7 +581,7 @@ void Pcsx2Config::GSOptions::MaskUserHacks() bool Pcsx2Config::GSOptions::UseHardwareRenderer() const { - return (Renderer == GSRendererType::DX11 || Renderer == GSRendererType::OGL); + return (Renderer == GSRendererType::DX11 || Renderer == GSRendererType::OGL || Renderer == GSRendererType::VK); } VsyncMode Pcsx2Config::GetEffectiveVsyncMode() const diff --git a/pcsx2/gui/Dialogs/GSDumpDialog.cpp b/pcsx2/gui/Dialogs/GSDumpDialog.cpp index bfb6b318d2..93ee5095cf 100644 --- a/pcsx2/gui/Dialogs/GSDumpDialog.cpp +++ b/pcsx2/gui/Dialogs/GSDumpDialog.cpp @@ -87,6 +87,9 @@ Dialogs::GSDumpDialog::GSDumpDialog(wxWindow* parent) rdoverrides.Add("None"); rdoverrides.Add(Pcsx2Config::GSOptions::GetRendererName(GSRendererType::SW)); rdoverrides.Add(Pcsx2Config::GSOptions::GetRendererName(GSRendererType::OGL)); +#ifdef ENABLE_VULKAN + rdoverrides.Add(Pcsx2Config::GSOptions::GetRendererName(GSRendererType::VK)); +#endif #if defined(_WIN32) rdoverrides.Add(Pcsx2Config::GSOptions::GetRendererName(GSRendererType::DX11)); #endif @@ -719,10 +722,18 @@ void Dialogs::GSDumpDialog::GSThread::ExecuteTaskInThread() case 2: renderer = GSRendererType::OGL; break; - // D3D11 +#ifdef ENABLE_VULKAN + // Vulkan case 3: + renderer = GSRendererType::VK; + break; +#endif +#ifdef _WIN32 + // D3D11 + case 4: // WIN32 implies WITH_VULKAN so this is okay renderer = GSRendererType::DX11; break; +#endif default: break; } diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index ea24f7be9e..7d04e142dc 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -313,6 +313,8 @@ + + @@ -755,6 +757,8 @@ + + @@ -1179,4 +1183,4 @@ - + \ No newline at end of file diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index f5549f5749..c9590d4abc 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -280,6 +280,9 @@ {65f21394-287a-471b-a0c1-d8f0d5d95a81} + + {98829aa9-bb81-4564-bd6e-128719c4faa0} + @@ -1670,6 +1673,12 @@ Host + + System\Ps2\GS\Renderers\Vulkan + + + System\Ps2\GS\Renderers\Vulkan + @@ -2788,6 +2797,13 @@ Host + + System\Ps2\GS\Renderers\Vulkan + + + System\Ps2\GS\Renderers\Vulkan + +