forked from ShuriZma/suyu
1
0
Fork 0

Presentation: Only use FP16 in scaling shaders on supported devices in Vulkan

This commit is contained in:
Marshall Mohror 2021-10-22 23:09:29 -05:00 committed by Fernando Sahmkow
parent a39e867c73
commit dcc5b4f6b0
15 changed files with 199 additions and 115 deletions

View File

@ -25,6 +25,9 @@ if (ARCHITECTURE_x86_64)
add_subdirectory(dynarmic) add_subdirectory(dynarmic)
endif() endif()
add_library(ffx-fsr INTERFACE)
target_include_directories(ffx-fsr INTERFACE FidelityFX-FSR/ffx-fsr)
# getopt # getopt
if (MSVC) if (MSVC)
add_subdirectory(getopt) add_subdirectory(getopt)

View File

@ -237,6 +237,7 @@ target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR})
target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES})
target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS})
target_link_libraries(video_core PRIVATE ffx-fsr)
add_dependencies(video_core host_shaders) add_dependencies(video_core host_shaders)
target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)

View File

@ -18,16 +18,20 @@ set(SHADER_FILES
opengl_copy_bc4.comp opengl_copy_bc4.comp
opengl_present.frag opengl_present.frag
opengl_present.vert opengl_present.vert
opengl_present_scaleforce.frag
pitch_unswizzle.comp pitch_unswizzle.comp
present_scaleforce.frag
present_bicubic.frag present_bicubic.frag
present_gaussian.frag present_gaussian.frag
vulkan_blit_color_float.frag vulkan_blit_color_float.frag
vulkan_blit_depth_stencil.frag vulkan_blit_depth_stencil.frag
vulkan_fidelityfx_fsr_easu.comp vulkan_fidelityfx_fsr_easu_fp16.comp
vulkan_fidelityfx_fsr_rcas.comp vulkan_fidelityfx_fsr_easu_fp32.comp
vulkan_fidelityfx_fsr_rcas_fp16.comp
vulkan_fidelityfx_fsr_rcas_fp32.comp
vulkan_present.frag vulkan_present.frag
vulkan_present.vert vulkan_present.vert
vulkan_present_scaleforce_fp16.frag
vulkan_present_scaleforce_fp32.frag
vulkan_quad_indexed.comp vulkan_quad_indexed.comp
vulkan_uint8.comp vulkan_uint8.comp
) )

View File

@ -28,80 +28,82 @@
// THE SOFTWARE. // THE SOFTWARE.
layout( push_constant ) uniform constants { layout( push_constant ) uniform constants {
u32vec2 input_size; uvec4 Const0;
uvec4 Const1;
uvec4 Const2;
uvec4 Const3;
}; };
uvec4 Const0; layout(set=0,binding=0) uniform sampler2D InputTexture;
uvec4 Const1; layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
uvec4 Const2;
uvec4 Const3;
#define A_GPU 1 #define A_GPU 1
#define A_GLSL 1 #define A_GLSL 1
#define A_HALF
#include "ffx_a.h" #ifndef YUZU_USE_FP16
#include "ffx_a.h"
f16vec4 LinearToSRGB(f16vec4 linear) { #if USE_EASU
bvec4 selector = greaterThan(linear, f16vec4(0.00313066844250063)); #define FSR_EASU_F 1
f16vec4 low = linear * float16_t(12.92); AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; }
f16vec4 high = float16_t(1.055) * pow(linear, f16vec4(1 / 2.4)) - float16_t(0.055); AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; }
return mix(low, high, selector); AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; }
} #endif
#if USE_RCAS
#define FSR_RCAS_F 1
AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); }
void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
#endif
#else
#define A_HALF
#include "ffx_a.h"
f16vec4 SRGBToLinear(f16vec4 srgb) { #if USE_EASU
bvec4 selector = greaterThan(srgb, f16vec4(0.0404482362771082)); #define FSR_EASU_H 1
f16vec4 low = srgb * float16_t(1.0 / 12.92); AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; }
f16vec4 high = pow((srgb + float16_t(0.055)) * float16_t(1.0 / 1.055), f16vec4(2.4)); AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; }
return mix(low, high, selector); AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; }
} #endif
#if USE_RCAS
#if USE_EASU #define FSR_RCAS_H 1
#define FSR_EASU_H 1 AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); }
f16vec4 FsrEasuRH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 0)); return res; } void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){}
f16vec4 FsrEasuGH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 1)); return res; } #endif
f16vec4 FsrEasuBH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 2)); return res; }
#endif
#if USE_RCAS
#define FSR_RCAS_H 1
f16vec4 FsrRcasLoadH(ASW2 p) { return f16vec4(texelFetch(InputTexture, ASU2(p), 0)); }
void FsrRcasInputH(inout float16_t r, inout float16_t g, inout float16_t b) {}
#endif #endif
#include "ffx_fsr1.h" #include "ffx_fsr1.h"
void CurrFilter(u32vec2 pos) { void CurrFilter(AU2 pos) {
// For debugging
#if USE_BILINEAR #if USE_BILINEAR
vec2 pp = (vec2(pos) * vec2_AU2(Const0.xy) + vec2_AU2(Const0.zw)) * vec2_AU2(Const1.xy) + vec2(0.5, -0.5) * vec2_AU2(Const1.zw); AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw);
imageStore(OutputTexture, ivec2(pos), textureLod(InputTexture, pp, 0.0)); imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0));
#endif #endif
#if USE_EASU #if USE_EASU
f16vec3 c; #ifndef YUZU_USE_FP16
FsrEasuH(c, pos, Const0, Const1, Const2, Const3); AF3 c;
imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
#else
AH3 c;
FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
#endif
#endif #endif
#if USE_RCAS #if USE_RCAS
f16vec3 c; #ifndef YUZU_USE_FP16
FsrRcasH(c.r, c.g, c.b, pos, Const0); AF3 c;
imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); FsrRcasF(c.r, c.g, c.b, pos, Const0);
imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
#else
AH3 c;
FsrRcasH(c.r, c.g, c.b, pos, Const0);
imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
#endif
#endif #endif
} }
layout(local_size_x=64) in; layout(local_size_x=64) in;
void main() { void main() {
#if USE_EASU || USE_BILINEAR
vec2 ires = vec2(input_size);
vec2 tres = textureSize(InputTexture, 0);
vec2 ores = imageSize(OutputTexture);
FsrEasuCon(Const0, Const1, Const2, Const3, ires.x, ires.y, tres.x, tres.y, ores.x, ores.y);
#endif
#if USE_RCAS
FsrRcasCon(Const0, 0.25f);
#endif
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern. // Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
CurrFilter(gxy); CurrFilter(gxy);

View File

@ -22,11 +22,29 @@
// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce // Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce
#version 460 //! #version 460
#extension GL_ARB_separate_shader_objects : enable
#ifdef YUZU_USE_FP16
#extension GL_AMD_gpu_shader_half_float : enable #extension GL_AMD_gpu_shader_half_float : enable
#extension GL_NV_gpu_shader5 : enable #extension GL_NV_gpu_shader5 : enable
#define lfloat float16_t
#define lvec2 f16vec2
#define lvec3 f16vec3
#define lvec4 f16vec4
#else
#define lfloat float
#define lvec2 vec2
#define lvec3 vec3
#define lvec4 vec4
#endif
#ifdef VULKAN #ifdef VULKAN
#define BINDING_COLOR_TEXTURE 1 #define BINDING_COLOR_TEXTURE 1
@ -45,25 +63,25 @@ layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
const bool ignore_alpha = true; const bool ignore_alpha = true;
float16_t ColorDist1(f16vec4 a, f16vec4 b) { lfloat ColorDist1(lvec4 a, lvec4 b) {
// https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion
const f16vec3 K = f16vec3(0.2627, 0.6780, 0.0593); const lvec3 K = lvec3(0.2627, 0.6780, 0.0593);
const float16_t scaleB = float16_t(0.5) / (float16_t(1.0) - K.b); const lfloat scaleB = lfloat(0.5) / (lfloat(1.0) - K.b);
const float16_t scaleR = float16_t(0.5) / (float16_t(1.0) - K.r); const lfloat scaleR = lfloat(0.5) / (lfloat(1.0) - K.r);
f16vec4 diff = a - b; lvec4 diff = a - b;
float16_t Y = dot(diff.rgb, K); lfloat Y = dot(diff.rgb, K);
float16_t Cb = scaleB * (diff.b - Y); lfloat Cb = scaleB * (diff.b - Y);
float16_t Cr = scaleR * (diff.r - Y); lfloat Cr = scaleR * (diff.r - Y);
f16vec3 YCbCr = f16vec3(Y, Cb, Cr); lvec3 YCbCr = lvec3(Y, Cb, Cr);
float16_t d = length(YCbCr); lfloat d = length(YCbCr);
if (ignore_alpha) { if (ignore_alpha) {
return d; return d;
} }
return sqrt(a.a * b.a * d * d + diff.a * diff.a); return sqrt(a.a * b.a * d * d + diff.a * diff.a);
} }
f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) { lvec4 ColorDist(lvec4 ref, lvec4 A, lvec4 B, lvec4 C, lvec4 D) {
return f16vec4( return lvec4(
ColorDist1(ref, A), ColorDist1(ref, A),
ColorDist1(ref, B), ColorDist1(ref, B),
ColorDist1(ref, C), ColorDist1(ref, C),
@ -72,36 +90,36 @@ f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) {
} }
vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { vec4 Scaleforce(sampler2D tex, vec2 tex_coord) {
f16vec4 bl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); lvec4 bl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, -1)));
f16vec4 bc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, -1))); lvec4 bc = lvec4(textureOffset(tex, tex_coord, ivec2(0, -1)));
f16vec4 br = f16vec4(textureOffset(tex, tex_coord, ivec2(1, -1))); lvec4 br = lvec4(textureOffset(tex, tex_coord, ivec2(1, -1)));
f16vec4 cl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); lvec4 cl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 0)));
f16vec4 cc = f16vec4(texture(tex, tex_coord)); lvec4 cc = lvec4(texture(tex, tex_coord));
f16vec4 cr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 0))); lvec4 cr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 0)));
f16vec4 tl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); lvec4 tl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 1)));
f16vec4 tc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, 1))); lvec4 tc = lvec4(textureOffset(tex, tex_coord, ivec2(0, 1)));
f16vec4 tr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 1))); lvec4 tr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 1)));
f16vec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); lvec4 offset_tl = ColorDist(cc, tl, tc, tr, cr);
f16vec4 offset_br = ColorDist(cc, br, bc, bl, cl); lvec4 offset_br = ColorDist(cc, br, bc, bl, cl);
// Calculate how different cc is from the texels around it // Calculate how different cc is from the texels around it
const float16_t plus_weight = float16_t(1.5); const lfloat plus_weight = lfloat(1.5);
const float16_t cross_weight = float16_t(1.5); const lfloat cross_weight = lfloat(1.5);
float16_t total_dist = dot(offset_tl + offset_br, f16vec4(cross_weight, plus_weight, cross_weight, plus_weight)); lfloat total_dist = dot(offset_tl + offset_br, lvec4(cross_weight, plus_weight, cross_weight, plus_weight));
if (total_dist == float16_t(0.0)) { if (total_dist == lfloat(0.0)) {
return cc; return cc;
} else { } else {
// Add together all the distances with direction taken into account // Add together all the distances with direction taken into account
f16vec4 tmp = offset_tl - offset_br; lvec4 tmp = offset_tl - offset_br;
f16vec2 total_offset = tmp.wy * plus_weight + (tmp.zz + f16vec2(-tmp.x, tmp.x)) * cross_weight; lvec2 total_offset = tmp.wy * plus_weight + (tmp.zz + lvec2(-tmp.x, tmp.x)) * cross_weight;
// When the image has thin points, they tend to split apart. // When the image has thin points, they tend to split apart.
// This is because the texels all around are different and total_offset reaches into clear areas. // This is because the texels all around are different and total_offset reaches into clear areas.
// This works pretty well to keep the offset in bounds for these cases. // This works pretty well to keep the offset in bounds for these cases.
float16_t clamp_val = length(total_offset) / total_dist; lfloat clamp_val = length(total_offset) / total_dist;
f16vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) / f16vec2(textureSize(tex, 0)); vec2 final_offset = vec2(clamp(total_offset, -clamp_val, clamp_val)) / textureSize(tex, 0);
return texture(tex, tex_coord - final_offset); return texture(tex, tex_coord - final_offset);
} }
@ -109,4 +127,4 @@ vec4 Scaleforce(sampler2D tex, vec2 tex_coord) {
void main() { void main() {
frag_color = Scaleforce(input_texture, tex_coord); frag_color = Scaleforce(input_texture, tex_coord);
} }

View File

@ -5,9 +5,7 @@
#version 460 core #version 460 core
#extension GL_GOOGLE_include_directive : enable #extension GL_GOOGLE_include_directive : enable
layout(set=0,binding=0) uniform sampler2D InputTexture; #define YUZU_USE_FP16
layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
#define USE_EASU 1 #define USE_EASU 1
#include "fidelityfx_fsr.comp" #include "fidelityfx_fsr.comp"

View File

@ -0,0 +1,10 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#version 460 core
#extension GL_GOOGLE_include_directive : enable
#define USE_EASU 1
#include "fidelityfx_fsr.comp"

View File

@ -5,9 +5,7 @@
#version 460 core #version 460 core
#extension GL_GOOGLE_include_directive : enable #extension GL_GOOGLE_include_directive : enable
layout(set=0,binding=0) uniform sampler2D InputTexture; #define YUZU_USE_FP16
layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
#define USE_RCAS 1 #define USE_RCAS 1
#include "fidelityfx_fsr.comp" #include "fidelityfx_fsr.comp"

View File

@ -0,0 +1,10 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#version 460 core
#extension GL_GOOGLE_include_directive : enable
#define USE_RCAS 1
#include "fidelityfx_fsr.comp"

View File

@ -0,0 +1,7 @@
#version 460
#extension GL_GOOGLE_include_directive : enable
#define YUZU_USE_FP16
#include "opengl_present_scaleforce.frag"

View File

@ -0,0 +1,5 @@
#version 460
#extension GL_GOOGLE_include_directive : enable
#include "opengl_present_scaleforce.frag"

View File

@ -24,10 +24,10 @@
#include "video_core/host_shaders/fxaa_frag.h" #include "video_core/host_shaders/fxaa_frag.h"
#include "video_core/host_shaders/fxaa_vert.h" #include "video_core/host_shaders/fxaa_vert.h"
#include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_frag.h"
#include "video_core/host_shaders/opengl_present_scaleforce_frag.h"
#include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/host_shaders/opengl_present_vert.h"
#include "video_core/host_shaders/present_bicubic_frag.h" #include "video_core/host_shaders/present_bicubic_frag.h"
#include "video_core/host_shaders/present_gaussian_frag.h" #include "video_core/host_shaders/present_gaussian_frag.h"
#include "video_core/host_shaders/present_scaleforce_frag.h"
#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shader_util.h"
@ -266,7 +266,8 @@ void RendererOpenGL::InitOpenGLObjects() {
present_gaussian_fragment = present_gaussian_fragment =
CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER); CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER);
present_scaleforce_fragment = present_scaleforce_fragment =
CreateProgram(HostShaders::PRESENT_SCALEFORCE_FRAG, GL_FRAGMENT_SHADER); CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG),
GL_FRAGMENT_SHADER);
// Generate presentation sampler // Generate presentation sampler
present_sampler.Create(); present_sampler.Create();

View File

@ -21,8 +21,9 @@
#include "video_core/host_shaders/fxaa_vert_spv.h" #include "video_core/host_shaders/fxaa_vert_spv.h"
#include "video_core/host_shaders/present_bicubic_frag_spv.h" #include "video_core/host_shaders/present_bicubic_frag_spv.h"
#include "video_core/host_shaders/present_gaussian_frag_spv.h" #include "video_core/host_shaders/present_gaussian_frag_spv.h"
#include "video_core/host_shaders/present_scaleforce_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_frag_spv.h" #include "video_core/host_shaders/vulkan_present_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_vert_spv.h" #include "video_core/host_shaders/vulkan_present_vert_spv.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_blit_screen.h"
@ -328,7 +329,7 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT , cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier); VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier);
} }
}); });
@ -344,8 +345,12 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
crop_rect.bottom = framebuffer.height; crop_rect.bottom = framebuffer.height;
} }
crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
VkExtent2D fsr_input_size{
.width = Settings::values.resolution_info.ScaleUp(framebuffer.width),
.height = Settings::values.resolution_info.ScaleUp(framebuffer.height),
};
VkImageView fsr_image_view = VkImageView fsr_image_view =
fsr->Draw(scheduler, image_index, source_image_view, crop_rect); fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
UpdateDescriptorSet(image_index, fsr_image_view, true); UpdateDescriptorSet(image_index, fsr_image_view, true);
} else { } else {
const bool is_nn = const bool is_nn =
@ -500,7 +505,11 @@ void VKBlitScreen::CreateShaders() {
bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV);
gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV); gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV);
scaleforce_fragment_shader = BuildShader(device, PRESENT_SCALEFORCE_FRAG_SPV); if (device.IsFloat16Supported()) {
scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV);
} else {
scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV);
}
} }
void VKBlitScreen::CreateSemaphores() { void VKBlitScreen::CreateSemaphores() {

View File

@ -4,13 +4,19 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "common/div_ceil.h" #include "common/div_ceil.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h"
#include "video_core/renderer_vulkan/vk_fsr.h" #include "video_core/renderer_vulkan/vk_fsr.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_device.h"
#define A_CPU
#include <ffx_a.h>
#include <ffx_fsr1.h>
namespace Vulkan { namespace Vulkan {
FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_,
@ -29,11 +35,11 @@ FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image
} }
VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
const Common::Rectangle<int>& crop_rect) { VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) {
UpdateDescriptorSet(image_index, image_view); UpdateDescriptorSet(image_index, image_view);
scheduler.Record([this, image_index, crop_rect](vk::CommandBuffer cmdbuf) { scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier base_barrier{ const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr, .pNext = nullptr,
@ -54,13 +60,18 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im
}, },
}; };
// TODO: Support clear color
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline);
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
VkExtent2D{ std::array<AU1, 4 * 4> push_constants;
.width = static_cast<u32>(crop_rect.GetWidth()), FsrEasuConOffset(
.height = static_cast<u32>(crop_rect.GetHeight()), push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8,
}); push_constants.data() + 12,
static_cast<AF1>(crop_rect.GetWidth()), static_cast<AF1>(crop_rect.GetHeight()),
static_cast<AF1>(input_image_extent.width), static_cast<AF1>(input_image_extent.height),
static_cast<AF1>(output_size.width), static_cast<AF1>(output_size.height),
static_cast<AF1>(crop_rect.left), static_cast<AF1>(crop_rect.top));
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
{ {
VkImageMemoryBarrier fsr_write_barrier = base_barrier; VkImageMemoryBarrier fsr_write_barrier = base_barrier;
@ -77,7 +88,9 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im
Common::DivCeil(output_size.height, 16u), 1); Common::DivCeil(output_size.height, 16u), 1);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline);
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, output_size);
FsrRcasCon(push_constants.data(), 0.25f);
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
{ {
std::array<VkImageMemoryBarrier, 2> barriers; std::array<VkImageMemoryBarrier, 2> barriers;
@ -247,7 +260,7 @@ void FSR::CreatePipelineLayout() {
VkPushConstantRange push_const{ VkPushConstantRange push_const{
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.offset = 0, .offset = 0,
.size = sizeof(std::array<u32, 2>), .size = sizeof(std::array<u32, 4 * 4>),
}; };
VkPipelineLayoutCreateInfo ci{ VkPipelineLayoutCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
@ -344,8 +357,13 @@ void FSR::CreateSampler() {
} }
void FSR::CreateShaders() { void FSR::CreateShaders() {
easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_COMP_SPV); if (device.IsFloat16Supported()) {
rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_COMP_SPV); easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV);
rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV);
} else {
easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV);
rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV);
}
} }
void FSR::CreatePipeline() { void FSR::CreatePipeline() {

View File

@ -18,7 +18,7 @@ public:
explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
VkExtent2D output_size); VkExtent2D output_size);
VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
const Common::Rectangle<int>& crop_rect); VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
private: private:
void CreateDescriptorPool(); void CreateDescriptorPool();