vk: Add basic support for FSR 1.0

- Only implemented for image upscaling.
- Disabled by default. Emulators cannot ensure upscalers are injected at the right rendering step.
- GUI integration not implemented.
This commit is contained in:
kd-11 2021-08-04 03:04:48 +03:00 committed by kd-11
parent 69b34693f0
commit d0a824996b
23 changed files with 6686 additions and 57 deletions

2656
3rdparty/GPUOpen/include/ffx_a.h vendored Normal file

File diff suppressed because it is too large Load Diff

1199
3rdparty/GPUOpen/include/ffx_fsr1.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -450,6 +450,7 @@ target_sources(rpcs3_emu PRIVATE
if(TARGET 3rdparty_vulkan)
target_sources(rpcs3_emu PRIVATE
RSX/VK/upscalers/fsr1/fsr_pass.cpp
RSX/VK/vkutils/barriers.cpp
RSX/VK/vkutils/buffer_object.cpp
RSX/VK/vkutils/chip_class.cpp

View File

@ -623,10 +623,10 @@ VKGSRender::~VKGSRender()
// Clear flush requests
m_flush_requests.clear_pending_flag();
//Texture cache
// Texture cache
m_texture_cache.destroy();
//Shaders
// Shaders
vk::destroy_pipe_compiler(); // Ensure no pending shaders being compiled
vk::finalize_compiler_context(); // Shut down the glslang compiler
m_prog_buffer->clear(); // Delete shader objects
@ -636,10 +636,13 @@ VKGSRender::~VKGSRender()
m_volatile_attribute_storage.reset();
m_vertex_layout_storage.reset();
//Global resources
// Upscaler (references some global resources)
m_upscaler.reset();
// Global resources
vk::destroy_global_resources();
//Heaps
// Heaps
m_attrib_ring_info.destroy();
m_fragment_env_ring_info.destroy();
m_vertex_env_ring_info.destroy();
@ -653,13 +656,13 @@ VKGSRender::~VKGSRender()
m_fragment_instructions_buffer.destroy();
m_raster_env_ring_info.destroy();
//Fallback bindables
// Fallback bindables
null_buffer.reset();
null_buffer_view.reset();
if (m_current_frame == &m_aux_frame_context)
{
//Return resources back to the owner
// Return resources back to the owner
m_current_frame = &frame_context_storage[m_current_queue_index];
m_current_frame->swap_storage(m_aux_frame_context);
m_current_frame->grab_resources(m_aux_frame_context);
@ -667,7 +670,7 @@ VKGSRender::~VKGSRender()
m_aux_frame_context.buffer_views_to_clean.clear();
//NOTE: aux_context uses descriptor pools borrowed from the main queues and any allocations will be automatically freed when pool is destroyed
// NOTE: aux_context uses descriptor pools borrowed from the main queues and any allocations will be automatically freed when pool is destroyed
for (auto &ctx : frame_context_storage)
{
vkDestroySemaphore((*m_device), ctx.present_wait_semaphore, nullptr);
@ -677,24 +680,24 @@ VKGSRender::~VKGSRender()
ctx.buffer_views_to_clean.clear();
}
//Textures
// Textures
m_rtts.destroy();
m_texture_cache.destroy();
m_stencil_mirror_sampler.reset();
//Overlay text handler
// Overlay text handler
m_text_writer.reset();
//Pipeline descriptors
vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr);
vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr);
//Queries
// Queries
m_occlusion_query_manager.reset();
m_cond_render_buffer.reset();
//Command buffer
// Command buffer
for (auto &cb : m_primary_cb_list)
cb.destroy();
@ -703,7 +706,7 @@ VKGSRender::~VKGSRender()
m_secondary_command_buffer.destroy();
m_secondary_command_buffer_pool.destroy();
//Device handles/contexts
// Device handles/contexts
m_swapchain->destroy();
m_instance.destroy();

View File

@ -2,6 +2,8 @@
#include "Emu/RSX/GSRender.h"
#include "Emu/Cell/timers.hpp"
#include "upscalers/upscaling.h"
#include "vkutils/descriptors.hpp"
#include "vkutils/data_heap.h"
#include "vkutils/instance.hpp"
@ -336,6 +338,7 @@ namespace vk
}
using namespace vk::vmm_allocation_pool_; // clang workaround.
using namespace vk::upscaling_flags_; // ditto
class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
{
@ -380,6 +383,7 @@ private:
std::unique_ptr<vk::buffer_view> null_buffer_view;
std::unique_ptr<vk::text_writer> m_text_writer;
std::unique_ptr<vk::upscaler> m_upscaler;
std::unique_ptr<vk::buffer> m_cond_render_buffer;
u64 m_cond_render_sync_tag = 0;
@ -518,7 +522,7 @@ private:
void present(vk::frame_context_t *ctx);
void reinitialize_swapchain();
vk::image* get_present_source(vk::present_surface_info* info, const rsx::avconf& avconfig);
vk::viewable_image* get_present_source(vk::present_surface_info* info, const rsx::avconf& avconfig);
void begin_render_pass();
void close_render_pass();

View File

@ -1037,8 +1037,7 @@ namespace vk
for (auto& img : src)
{
// Only raw uploads can possibly have mismatched layout here
img->change_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
img->push_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
views.push_back(img->get_view(VK_REMAP_IDENTITY, rsx::default_remap_vector));
}
@ -1048,5 +1047,10 @@ namespace vk
}
overlay_pass::run(cmd, viewport, target, views, render_pass);
for (auto& img : src)
{
img->pop_layout(cmd);
}
}
}

View File

@ -4,6 +4,8 @@
#include "Emu/RSX/Overlays/overlays.h"
#include "Emu/Cell/Modules/cellVideoOut.h"
#include "upscalers/bilinear_pass.hpp"
#include "upscalers/fsr_pass.h"
#include "util/asm.hpp"
void VKGSRender::reinitialize_swapchain()
@ -34,6 +36,9 @@ void VKGSRender::reinitialize_swapchain()
frame_context_cleanup(&ctx, true);
}
// Discard the current upscaling pipeline if any
m_upscaler.reset();
// Drain all the queues
vkDeviceWaitIdle(*m_device);
@ -275,9 +280,9 @@ void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx, bool free_resou
vk::advance_completed_frame_counter();
}
vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const rsx::avconf& avconfig)
vk::viewable_image* VKGSRender::get_present_source(vk::present_surface_info* info, const rsx::avconf& avconfig)
{
vk::image* image_to_flip = nullptr;
vk::viewable_image* image_to_flip = nullptr;
// Check the surface store first
const auto format_bpp = rsx::get_format_block_size_in_bytes(info->format);
@ -329,7 +334,8 @@ vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const
{
// Hack - this should be the first location to check for output
// The render might have been done offscreen or in software and a blit used to display
image_to_flip = surface->get_raw_texture();
image_to_flip = dynamic_cast<vk::viewable_image*>(surface->get_raw_texture());
ensure(image_to_flip);
}
if (!image_to_flip)
@ -461,7 +467,7 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
}
// Scan memory for required data. This is done early to optimize waiting for the driver image acquire below.
vk::image *image_to_flip = nullptr, *image_to_flip2 = nullptr;
vk::viewable_image *image_to_flip = nullptr, *image_to_flip2 = nullptr;
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
{
vk::present_surface_info present_info;
@ -590,23 +596,63 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
if (!m_upscaler)
{
if (g_cfg.video.vk.fsr_upscaling)
{
m_upscaler = std::make_unique<vk::fsr_upscale_pass>();
}
else
{
m_upscaler = std::make_unique<vk::bilinear_upscale_pass>();
}
}
if (image_to_flip)
{
const bool use_full_rgb_range_output = g_cfg.video.full_rgb_range_output.get();
if (!use_full_rgb_range_output || !rsx::fcmp(avconfig.gamma, 1.f) || avconfig._3d) [[unlikely]]
{
calibration_src.push_back(dynamic_cast<vk::viewable_image*>(image_to_flip));
ensure(calibration_src.front());
if (image_to_flip) calibration_src.push_back(image_to_flip);
if (image_to_flip2) calibration_src.push_back(image_to_flip2);
if (image_to_flip2)
if (g_cfg.video.vk.fsr_upscaling && !avconfig._3d) // 3D will be implemented later
{
calibration_src.push_back(dynamic_cast<vk::viewable_image*>(image_to_flip2));
ensure(calibration_src.back());
}
}
// Run upscaling pass before the rest of the output effects pipeline
// This can be done with all upscalers but we already get bilinear upscaling for free if we just out the filters directly
VkImageBlit request = {};
request.srcSubresource = { image_to_flip->aspect(), 0, 0, 1 };
request.dstSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 };
request.srcOffsets[0] = { 0, 0, 0 };
request.srcOffsets[1] = { s32(buffer_width), s32(buffer_height), 1 };
request.dstOffsets[0] = { 0, 0, 0 };
request.dstOffsets[1] = { aspect_ratio.width, aspect_ratio.height, 1 };
if (calibration_src.empty()) [[likely]]
for (unsigned i = 0; i < calibration_src.size(); ++i)
{
const rsx::flags32_t mode = (i == 0) ? UPSCALE_LEFT_VIEW : UPSCALE_RIGHT_VIEW;
calibration_src[i] = m_upscaler->scale_output(*m_current_command_buffer, image_to_flip, VK_NULL_HANDLE, VK_IMAGE_LAYOUT_UNDEFINED, request, mode);
}
}
vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, subresource_range);
target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
single_target_pass = vk::get_renderpass(*m_device, key);
ensure(single_target_pass != VK_NULL_HANDLE);
direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, VK_FALSE, single_target_pass, m_swapchain->get_surface_format(), target_image);
direct_fbo->add_ref();
vk::get_overlay_pass<vk::video_out_calibration_pass>()->run(
*m_current_command_buffer, areau(aspect_ratio), direct_fbo, calibration_src,
avconfig.gamma, !use_full_rgb_range_output, avconfig._3d, single_target_pass);
direct_fbo->release();
}
else
{
// Do raw transfer here as there is no image object associated with textures owned by the driver (TODO)
const areai dst_rect = aspect_ratio;
@ -619,35 +665,13 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
rgn.dstOffsets[0] = { dst_rect.x1, dst_rect.y1, 0 };
rgn.dstOffsets[1] = { dst_rect.x2, dst_rect.y2, 1 };
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
if (target_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
{
vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
vkCmdBlitImage(*m_current_command_buffer, image_to_flip->value, image_to_flip->current_layout, target_image, target_layout, 1, &rgn, VK_FILTER_LINEAR);
image_to_flip->pop_layout(*m_current_command_buffer);
}
else
{
vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, subresource_range);
target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
single_target_pass = vk::get_renderpass(*m_device, key);
ensure(single_target_pass != VK_NULL_HANDLE);
direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, VK_FALSE, single_target_pass, m_swapchain->get_surface_format(), target_image);
direct_fbo->add_ref();
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
vk::get_overlay_pass<vk::video_out_calibration_pass>()->run(
*m_current_command_buffer, areau(aspect_ratio), direct_fbo, calibration_src,
avconfig.gamma, !use_full_rgb_range_output, avconfig._3d, single_target_pass);
image_to_flip->pop_layout(*m_current_command_buffer);
direct_fbo->release();
m_upscaler->scale_output(*m_current_command_buffer, image_to_flip, target_image, target_layout, rgn, UPSCALE_AND_COMMIT | UPSCALE_DEFAULT_VIEW);
}
if (m_frame->screenshot_toggle)

View File

@ -1199,7 +1199,7 @@ namespace vk
baseclass::on_frame_end();
}
vk::image* texture_cache::upload_image_simple(vk::command_buffer& cmd, VkFormat format, u32 address, u32 width, u32 height, u32 pitch)
vk::viewable_image* texture_cache::upload_image_simple(vk::command_buffer& cmd, VkFormat format, u32 address, u32 width, u32 height, u32 pitch)
{
bool linear_format_supported = false;

View File

@ -497,7 +497,7 @@ namespace vk
void on_frame_end() override;
vk::image* upload_image_simple(vk::command_buffer& cmd, VkFormat format, u32 address, u32 width, u32 height, u32 pitch);
vk::viewable_image* upload_image_simple(vk::command_buffer& cmd, VkFormat format, u32 address, u32 width, u32 height, u32 pitch);
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, vk::surface_cache& m_rtts, vk::command_buffer& cmd);

View File

@ -0,0 +1,32 @@
#pragma once
#include "upscaling.h"
namespace vk
{
struct bilinear_upscale_pass : public upscaler
{
vk::viewable_image* scale_output(
const vk::command_buffer& cmd, // CB
vk::viewable_image* src, // Source input
VkImage present_surface, // Present target. May be VK_NULL_HANDLE for some passes
VkImageLayout present_surface_layout, // Present surface layout, or VK_IMAGE_LAYOUT_UNDEFINED if no present target is provided
const VkImageBlit& request, // Scaling request information
rsx::flags32_t mode // Mode
) override
{
if (mode & UPSCALE_AND_COMMIT)
{
ensure(present_surface);
src->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
vkCmdBlitImage(cmd, src->value, src->current_layout, present_surface, present_surface_layout, 1, &request, VK_FILTER_LINEAR);
src->pop_layout(cmd);
return nullptr;
}
// Upscaling source only is unsupported
return src;
}
};
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,649 @@
// Proprocessed version of ffx_a.h from FSR package
// For original source, see 3rdParty/GPUOpen/Include/ffx_fsr1.h
// Modifications are required to meet MSVC's strict limitations on string length. No single string literal (concatenated or otherwise) can exceed 64K
// * Removed CPU section as we do not need it
// * Stripped all comment-only lines, the original source is still readable for this purpose
// * Stripped all unnecessary whitespace
R"--RPCS3--(
#if defined(A_GPU)&&defined(FSR_EASU_F)
AF4 FsrEasuRF(AF2 p);
AF4 FsrEasuGF(AF2 p);
AF4 FsrEasuBF(AF2 p);
void FsrEasuTapF(
inout AF3 aC, // Accumulated color, with negative lobe.
inout AF1 aW, // Accumulated weight.
AF2 off, // Pixel offset from resolve position to tap.
AF2 dir, // Gradient direction.
AF2 len, // Length.
AF1 lob, // Negative lobe strength.
AF1 clp, // Clipping point.
AF3 c){ // Tap color.
AF2 v;
v.x=(off.x*( dir.x))+(off.y*dir.y);
v.y=(off.x*(-dir.y))+(off.y*dir.x);
v*=len;
AF1 d2=v.x*v.x+v.y*v.y;
d2=min(d2,clp);
AF1 wB=AF1_(2.0/5.0)*d2+AF1_(-1.0);
AF1 wA=lob*d2+AF1_(-1.0);
wB*=wB;
wA*=wA;
wB=AF1_(25.0/16.0)*wB+AF1_(-(25.0/16.0-1.0));
AF1 w=wB*wA;
aC+=c*w;aW+=w;}
void FsrEasuSetF(
inout AF2 dir,
inout AF1 len,
AF2 pp,
AP1 biS,AP1 biT,AP1 biU,AP1 biV,
AF1 lA,AF1 lB,AF1 lC,AF1 lD,AF1 lE){
AF1 w = AF1_(0.0);
if(biS)w=(AF1_(1.0)-pp.x)*(AF1_(1.0)-pp.y);
if(biT)w= pp.x *(AF1_(1.0)-pp.y);
if(biU)w=(AF1_(1.0)-pp.x)* pp.y ;
if(biV)w= pp.x * pp.y ;
AF1 dc=lD-lC;
AF1 cb=lC-lB;
AF1 lenX=max(abs(dc),abs(cb));
lenX=APrxLoRcpF1(lenX);
AF1 dirX=lD-lB;
dir.x+=dirX*w;
lenX=ASatF1(abs(dirX)*lenX);
lenX*=lenX;
len+=lenX*w;
AF1 ec=lE-lC;
AF1 ca=lC-lA;
AF1 lenY=max(abs(ec),abs(ca));
lenY=APrxLoRcpF1(lenY);
AF1 dirY=lE-lA;
dir.y+=dirY*w;
lenY=ASatF1(abs(dirY)*lenY);
lenY*=lenY;
len+=lenY*w;}
void FsrEasuF(
out AF3 pix,
AU2 ip, // Integer pixel position in output.
AU4 con0, // Constants generated by FsrEasuCon().
AU4 con1,
AU4 con2,
AU4 con3){
AF2 pp=AF2(ip)*AF2_AU2(con0.xy)+AF2_AU2(con0.zw);
AF2 fp=floor(pp);
pp-=fp;
AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw);
AF2 p1=p0+AF2_AU2(con2.xy);
AF2 p2=p0+AF2_AU2(con2.zw);
AF2 p3=p0+AF2_AU2(con3.xy);
AF4 bczzR=FsrEasuRF(p0);
AF4 bczzG=FsrEasuGF(p0);
AF4 bczzB=FsrEasuBF(p0);
AF4 ijfeR=FsrEasuRF(p1);
AF4 ijfeG=FsrEasuGF(p1);
AF4 ijfeB=FsrEasuBF(p1);
AF4 klhgR=FsrEasuRF(p2);
AF4 klhgG=FsrEasuGF(p2);
AF4 klhgB=FsrEasuBF(p2);
AF4 zzonR=FsrEasuRF(p3);
AF4 zzonG=FsrEasuGF(p3);
AF4 zzonB=FsrEasuBF(p3);
AF4 bczzL=bczzB*AF4_(0.5)+(bczzR*AF4_(0.5)+bczzG);
AF4 ijfeL=ijfeB*AF4_(0.5)+(ijfeR*AF4_(0.5)+ijfeG);
AF4 klhgL=klhgB*AF4_(0.5)+(klhgR*AF4_(0.5)+klhgG);
AF4 zzonL=zzonB*AF4_(0.5)+(zzonR*AF4_(0.5)+zzonG);
AF1 bL=bczzL.x;
AF1 cL=bczzL.y;
AF1 iL=ijfeL.x;
AF1 jL=ijfeL.y;
AF1 fL=ijfeL.z;
AF1 eL=ijfeL.w;
AF1 kL=klhgL.x;
AF1 lL=klhgL.y;
AF1 hL=klhgL.z;
AF1 gL=klhgL.w;
AF1 oL=zzonL.z;
AF1 nL=zzonL.w;
AF2 dir=AF2_(0.0);
AF1 len=AF1_(0.0);
FsrEasuSetF(dir,len,pp,true, false,false,false,bL,eL,fL,gL,jL);
FsrEasuSetF(dir,len,pp,false,true ,false,false,cL,fL,gL,hL,kL);
FsrEasuSetF(dir,len,pp,false,false,true ,false,fL,iL,jL,kL,nL);
FsrEasuSetF(dir,len,pp,false,false,false,true ,gL,jL,kL,lL,oL);
AF2 dir2=dir*dir;
AF1 dirR=dir2.x+dir2.y;
AP1 zro=dirR<AF1_(1.0/32768.0);
dirR=APrxLoRsqF1(dirR);
dirR=zro?AF1_(1.0):dirR;
dir.x=zro?AF1_(1.0):dir.x;
dir*=AF2_(dirR);
len=len*AF1_(0.5);
len*=len;
AF1 stretch=(dir.x*dir.x+dir.y*dir.y)*APrxLoRcpF1(max(abs(dir.x),abs(dir.y)));
AF2 len2=AF2(AF1_(1.0)+(stretch-AF1_(1.0))*len,AF1_(1.0)+AF1_(-0.5)*len);
AF1 lob=AF1_(0.5)+AF1_((1.0/4.0-0.04)-0.5)*len;
AF1 clp=APrxLoRcpF1(lob);
AF3 min4=min(AMin3F3(AF3(ijfeR.z,ijfeG.z,ijfeB.z),AF3(klhgR.w,klhgG.w,klhgB.w),AF3(ijfeR.y,ijfeG.y,ijfeB.y)),
AF3(klhgR.x,klhgG.x,klhgB.x));
AF3 max4=max(AMax3F3(AF3(ijfeR.z,ijfeG.z,ijfeB.z),AF3(klhgR.w,klhgG.w,klhgB.w),AF3(ijfeR.y,ijfeG.y,ijfeB.y)),
AF3(klhgR.x,klhgG.x,klhgB.x));
AF3 aC=AF3_(0.0);
AF1 aW=AF1_(0.0);
FsrEasuTapF(aC,aW,AF2( 0.0,-1.0)-pp,dir,len2,lob,clp,AF3(bczzR.x,bczzG.x,bczzB.x)); // b
FsrEasuTapF(aC,aW,AF2( 1.0,-1.0)-pp,dir,len2,lob,clp,AF3(bczzR.y,bczzG.y,bczzB.y)); // c
FsrEasuTapF(aC,aW,AF2(-1.0, 1.0)-pp,dir,len2,lob,clp,AF3(ijfeR.x,ijfeG.x,ijfeB.x)); // i
FsrEasuTapF(aC,aW,AF2( 0.0, 1.0)-pp,dir,len2,lob,clp,AF3(ijfeR.y,ijfeG.y,ijfeB.y)); // j
FsrEasuTapF(aC,aW,AF2( 0.0, 0.0)-pp,dir,len2,lob,clp,AF3(ijfeR.z,ijfeG.z,ijfeB.z)); // f
FsrEasuTapF(aC,aW,AF2(-1.0, 0.0)-pp,dir,len2,lob,clp,AF3(ijfeR.w,ijfeG.w,ijfeB.w)); // e
FsrEasuTapF(aC,aW,AF2( 1.0, 1.0)-pp,dir,len2,lob,clp,AF3(klhgR.x,klhgG.x,klhgB.x)); // k
FsrEasuTapF(aC,aW,AF2( 2.0, 1.0)-pp,dir,len2,lob,clp,AF3(klhgR.y,klhgG.y,klhgB.y)); // l
FsrEasuTapF(aC,aW,AF2( 2.0, 0.0)-pp,dir,len2,lob,clp,AF3(klhgR.z,klhgG.z,klhgB.z)); // h
FsrEasuTapF(aC,aW,AF2( 1.0, 0.0)-pp,dir,len2,lob,clp,AF3(klhgR.w,klhgG.w,klhgB.w)); // g
FsrEasuTapF(aC,aW,AF2( 1.0, 2.0)-pp,dir,len2,lob,clp,AF3(zzonR.z,zzonG.z,zzonB.z)); // o
FsrEasuTapF(aC,aW,AF2( 0.0, 2.0)-pp,dir,len2,lob,clp,AF3(zzonR.w,zzonG.w,zzonB.w)); // n
pix=min(max4,max(min4,aC*AF3_(ARcpF1(aW))));}
#endif
#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_EASU_H)
AH4 FsrEasuRH(AF2 p);
AH4 FsrEasuGH(AF2 p);
AH4 FsrEasuBH(AF2 p);
void FsrEasuTapH(
inout AH2 aCR,inout AH2 aCG,inout AH2 aCB,
inout AH2 aW,
AH2 offX,AH2 offY,
AH2 dir,
AH2 len,
AH1 lob,
AH1 clp,
AH2 cR,AH2 cG,AH2 cB){
AH2 vX,vY;
vX=offX* dir.xx +offY*dir.yy;
vY=offX*(-dir.yy)+offY*dir.xx;
vX*=len.x;vY*=len.y;
AH2 d2=vX*vX+vY*vY;
d2=min(d2,AH2_(clp));
AH2 wB=AH2_(2.0/5.0)*d2+AH2_(-1.0);
AH2 wA=AH2_(lob)*d2+AH2_(-1.0);
wB*=wB;
wA*=wA;
wB=AH2_(25.0/16.0)*wB+AH2_(-(25.0/16.0-1.0));
AH2 w=wB*wA;
aCR+=cR*w;aCG+=cG*w;aCB+=cB*w;aW+=w;}
void FsrEasuSetH(
inout AH2 dirPX,inout AH2 dirPY,
inout AH2 lenP,
AH2 pp,
AP1 biST,AP1 biUV,
AH2 lA,AH2 lB,AH2 lC,AH2 lD,AH2 lE){
AH2 w = AH2_(0.0);
if(biST)w=(AH2(1.0,0.0)+AH2(-pp.x,pp.x))*AH2_(AH1_(1.0)-pp.y);
if(biUV)w=(AH2(1.0,0.0)+AH2(-pp.x,pp.x))*AH2_( pp.y);
AH2 dc=lD-lC;
AH2 cb=lC-lB;
AH2 lenX=max(abs(dc),abs(cb));
lenX=ARcpH2(lenX);
AH2 dirX=lD-lB;
dirPX+=dirX*w;
lenX=ASatH2(abs(dirX)*lenX);
lenX*=lenX;
lenP+=lenX*w;
AH2 ec=lE-lC;
AH2 ca=lC-lA;
AH2 lenY=max(abs(ec),abs(ca));
lenY=ARcpH2(lenY);
AH2 dirY=lE-lA;
dirPY+=dirY*w;
lenY=ASatH2(abs(dirY)*lenY);
lenY*=lenY;
lenP+=lenY*w;}
void FsrEasuH(
out AH3 pix,
AU2 ip,
AU4 con0,
AU4 con1,
AU4 con2,
AU4 con3){
AF2 pp=AF2(ip)*AF2_AU2(con0.xy)+AF2_AU2(con0.zw);
AF2 fp=floor(pp);
pp-=fp;
AH2 ppp=AH2(pp);
AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw);
AF2 p1=p0+AF2_AU2(con2.xy);
AF2 p2=p0+AF2_AU2(con2.zw);
AF2 p3=p0+AF2_AU2(con3.xy);
AH4 bczzR=FsrEasuRH(p0);
AH4 bczzG=FsrEasuGH(p0);
AH4 bczzB=FsrEasuBH(p0);
AH4 ijfeR=FsrEasuRH(p1);
AH4 ijfeG=FsrEasuGH(p1);
AH4 ijfeB=FsrEasuBH(p1);
AH4 klhgR=FsrEasuRH(p2);
AH4 klhgG=FsrEasuGH(p2);
AH4 klhgB=FsrEasuBH(p2);
AH4 zzonR=FsrEasuRH(p3);
AH4 zzonG=FsrEasuGH(p3);
AH4 zzonB=FsrEasuBH(p3);
AH4 bczzL=bczzB*AH4_(0.5)+(bczzR*AH4_(0.5)+bczzG);
AH4 ijfeL=ijfeB*AH4_(0.5)+(ijfeR*AH4_(0.5)+ijfeG);
AH4 klhgL=klhgB*AH4_(0.5)+(klhgR*AH4_(0.5)+klhgG);
AH4 zzonL=zzonB*AH4_(0.5)+(zzonR*AH4_(0.5)+zzonG);
AH1 bL=bczzL.x;
AH1 cL=bczzL.y;
AH1 iL=ijfeL.x;
AH1 jL=ijfeL.y;
AH1 fL=ijfeL.z;
AH1 eL=ijfeL.w;
AH1 kL=klhgL.x;
AH1 lL=klhgL.y;
AH1 hL=klhgL.z;
AH1 gL=klhgL.w;
AH1 oL=zzonL.z;
AH1 nL=zzonL.w;
AH2 dirPX=AH2_(0.0);
AH2 dirPY=AH2_(0.0);
AH2 lenP=AH2_(0.0);
FsrEasuSetH(dirPX,dirPY,lenP,ppp,true, false,AH2(bL,cL),AH2(eL,fL),AH2(fL,gL),AH2(gL,hL),AH2(jL,kL));
FsrEasuSetH(dirPX,dirPY,lenP,ppp,false,true ,AH2(fL,gL),AH2(iL,jL),AH2(jL,kL),AH2(kL,lL),AH2(nL,oL));
AH2 dir=AH2(dirPX.r+dirPX.g,dirPY.r+dirPY.g);
AH1 len=lenP.r+lenP.g;
AH2 dir2=dir*dir;
AH1 dirR=dir2.x+dir2.y;
AP1 zro=dirR<AH1_(1.0/32768.0);
dirR=APrxLoRsqH1(dirR);
dirR=zro?AH1_(1.0):dirR;
dir.x=zro?AH1_(1.0):dir.x;
dir*=AH2_(dirR);
len=len*AH1_(0.5);
len*=len;
AH1 stretch=(dir.x*dir.x+dir.y*dir.y)*APrxLoRcpH1(max(abs(dir.x),abs(dir.y)));
AH2 len2=AH2(AH1_(1.0)+(stretch-AH1_(1.0))*len,AH1_(1.0)+AH1_(-0.5)*len);
AH1 lob=AH1_(0.5)+AH1_((1.0/4.0-0.04)-0.5)*len;
AH1 clp=APrxLoRcpH1(lob);
AH2 bothR=max(max(AH2(-ijfeR.z,ijfeR.z),AH2(-klhgR.w,klhgR.w)),max(AH2(-ijfeR.y,ijfeR.y),AH2(-klhgR.x,klhgR.x)));
AH2 bothG=max(max(AH2(-ijfeG.z,ijfeG.z),AH2(-klhgG.w,klhgG.w)),max(AH2(-ijfeG.y,ijfeG.y),AH2(-klhgG.x,klhgG.x)));
AH2 bothB=max(max(AH2(-ijfeB.z,ijfeB.z),AH2(-klhgB.w,klhgB.w)),max(AH2(-ijfeB.y,ijfeB.y),AH2(-klhgB.x,klhgB.x)));
AH2 pR=AH2_(0.0);
AH2 pG=AH2_(0.0);
AH2 pB=AH2_(0.0);
AH2 pW=AH2_(0.0);
FsrEasuTapH(pR,pG,pB,pW,AH2( 0.0, 1.0)-ppp.xx,AH2(-1.0,-1.0)-ppp.yy,dir,len2,lob,clp,bczzR.xy,bczzG.xy,bczzB.xy);
FsrEasuTapH(pR,pG,pB,pW,AH2(-1.0, 0.0)-ppp.xx,AH2( 1.0, 1.0)-ppp.yy,dir,len2,lob,clp,ijfeR.xy,ijfeG.xy,ijfeB.xy);
FsrEasuTapH(pR,pG,pB,pW,AH2( 0.0,-1.0)-ppp.xx,AH2( 0.0, 0.0)-ppp.yy,dir,len2,lob,clp,ijfeR.zw,ijfeG.zw,ijfeB.zw);
FsrEasuTapH(pR,pG,pB,pW,AH2( 1.0, 2.0)-ppp.xx,AH2( 1.0, 1.0)-ppp.yy,dir,len2,lob,clp,klhgR.xy,klhgG.xy,klhgB.xy);
FsrEasuTapH(pR,pG,pB,pW,AH2( 2.0, 1.0)-ppp.xx,AH2( 0.0, 0.0)-ppp.yy,dir,len2,lob,clp,klhgR.zw,klhgG.zw,klhgB.zw);
FsrEasuTapH(pR,pG,pB,pW,AH2( 1.0, 0.0)-ppp.xx,AH2( 2.0, 2.0)-ppp.yy,dir,len2,lob,clp,zzonR.zw,zzonG.zw,zzonB.zw);
AH3 aC=AH3(pR.x+pR.y,pG.x+pG.y,pB.x+pB.y);
AH1 aW=pW.x+pW.y;
pix=min(AH3(bothR.y,bothG.y,bothB.y),max(-AH3(bothR.x,bothG.x,bothB.x),aC*AH3_(ARcpH1(aW))));}
#endif
#define FSR_RCAS_LIMIT (0.25-(1.0/16.0))
A_STATIC void FsrRcasCon(
outAU4 con,
AF1 sharpness){
sharpness=AExp2F1(-sharpness);
varAF2(hSharp)=initAF2(sharpness,sharpness);
con[0]=AU1_AF1(sharpness);
con[1]=AU1_AH2_AF2(hSharp);
con[2]=0;
con[3]=0;}
#if defined(A_GPU)&&defined(FSR_RCAS_F)
AF4 FsrRcasLoadF(ASU2 p);
void FsrRcasInputF(inout AF1 r,inout AF1 g,inout AF1 b);
void FsrRcasF(
out AF1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
out AF1 pixG,
out AF1 pixB,
#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
out AF1 pixA,
#endif
AU2 ip, // Integer pixel position in output.
AU4 con){ // Constant generated by RcasSetup().
ASU2 sp=ASU2(ip);
AF3 b=FsrRcasLoadF(sp+ASU2( 0,-1)).rgb;
AF3 d=FsrRcasLoadF(sp+ASU2(-1, 0)).rgb;
#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
AF4 ee=FsrRcasLoadF(sp);
AF3 e=ee.rgb;pixA=ee.a;
#else
AF3 e=FsrRcasLoadF(sp).rgb;
#endif
)--RPCS3--"
R"--RPCS3--(
AF3 f=FsrRcasLoadF(sp+ASU2( 1, 0)).rgb;
AF3 h=FsrRcasLoadF(sp+ASU2( 0, 1)).rgb;
AF1 bR=b.r;
AF1 bG=b.g;
AF1 bB=b.b;
AF1 dR=d.r;
AF1 dG=d.g;
AF1 dB=d.b;
AF1 eR=e.r;
AF1 eG=e.g;
AF1 eB=e.b;
AF1 fR=f.r;
AF1 fG=f.g;
AF1 fB=f.b;
AF1 hR=h.r;
AF1 hG=h.g;
AF1 hB=h.b;
FsrRcasInputF(bR,bG,bB);
FsrRcasInputF(dR,dG,dB);
FsrRcasInputF(eR,eG,eB);
FsrRcasInputF(fR,fG,fB);
FsrRcasInputF(hR,hG,hB);
AF1 bL=bB*AF1_(0.5)+(bR*AF1_(0.5)+bG);
AF1 dL=dB*AF1_(0.5)+(dR*AF1_(0.5)+dG);
AF1 eL=eB*AF1_(0.5)+(eR*AF1_(0.5)+eG);
AF1 fL=fB*AF1_(0.5)+(fR*AF1_(0.5)+fG);
AF1 hL=hB*AF1_(0.5)+(hR*AF1_(0.5)+hG);
AF1 nz=AF1_(0.25)*bL+AF1_(0.25)*dL+AF1_(0.25)*fL+AF1_(0.25)*hL-eL;
nz=ASatF1(abs(nz)*APrxMedRcpF1(AMax3F1(AMax3F1(bL,dL,eL),fL,hL)-AMin3F1(AMin3F1(bL,dL,eL),fL,hL)));
nz=AF1_(-0.5)*nz+AF1_(1.0);
AF1 mn4R=min(AMin3F1(bR,dR,fR),hR);
AF1 mn4G=min(AMin3F1(bG,dG,fG),hG);
AF1 mn4B=min(AMin3F1(bB,dB,fB),hB);
AF1 mx4R=max(AMax3F1(bR,dR,fR),hR);
AF1 mx4G=max(AMax3F1(bG,dG,fG),hG);
AF1 mx4B=max(AMax3F1(bB,dB,fB),hB);
AF2 peakC=AF2(1.0,-1.0*4.0);
AF1 hitMinR=mn4R*ARcpF1(AF1_(4.0)*mx4R);
AF1 hitMinG=mn4G*ARcpF1(AF1_(4.0)*mx4G);
AF1 hitMinB=mn4B*ARcpF1(AF1_(4.0)*mx4B);
AF1 hitMaxR=(peakC.x-mx4R)*ARcpF1(AF1_(4.0)*mn4R+peakC.y);
AF1 hitMaxG=(peakC.x-mx4G)*ARcpF1(AF1_(4.0)*mn4G+peakC.y);
AF1 hitMaxB=(peakC.x-mx4B)*ARcpF1(AF1_(4.0)*mn4B+peakC.y);
AF1 lobeR=max(-hitMinR,hitMaxR);
AF1 lobeG=max(-hitMinG,hitMaxG);
AF1 lobeB=max(-hitMinB,hitMaxB);
AF1 lobe=max(AF1_(-FSR_RCAS_LIMIT),min(AMax3F1(lobeR,lobeG,lobeB),AF1_(0.0)))*AF1_AU1(con.x);
#ifdef FSR_RCAS_DENOISE
lobe*=nz;
#endif
AF1 rcpL=APrxMedRcpF1(AF1_(4.0)*lobe+AF1_(1.0));
pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;
return;}
#endif
#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_H)
AH4 FsrRcasLoadH(ASW2 p);
void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b);
void FsrRcasH(
out AH1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
out AH1 pixG,
out AH1 pixB,
#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
out AH1 pixA,
#endif
AU2 ip, // Integer pixel position in output.
AU4 con){ // Constant generated by RcasSetup().
ASW2 sp=ASW2(ip);
AH3 b=FsrRcasLoadH(sp+ASW2( 0,-1)).rgb;
AH3 d=FsrRcasLoadH(sp+ASW2(-1, 0)).rgb;
#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
AH4 ee=FsrRcasLoadH(sp);
AH3 e=ee.rgb;pixA=ee.a;
#else
AH3 e=FsrRcasLoadH(sp).rgb;
#endif
AH3 f=FsrRcasLoadH(sp+ASW2( 1, 0)).rgb;
AH3 h=FsrRcasLoadH(sp+ASW2( 0, 1)).rgb;
AH1 bR=b.r;
AH1 bG=b.g;
AH1 bB=b.b;
AH1 dR=d.r;
AH1 dG=d.g;
AH1 dB=d.b;
AH1 eR=e.r;
AH1 eG=e.g;
AH1 eB=e.b;
AH1 fR=f.r;
AH1 fG=f.g;
AH1 fB=f.b;
AH1 hR=h.r;
AH1 hG=h.g;
AH1 hB=h.b;
FsrRcasInputH(bR,bG,bB);
FsrRcasInputH(dR,dG,dB);
FsrRcasInputH(eR,eG,eB);
FsrRcasInputH(fR,fG,fB);
FsrRcasInputH(hR,hG,hB);
AH1 bL=bB*AH1_(0.5)+(bR*AH1_(0.5)+bG);
AH1 dL=dB*AH1_(0.5)+(dR*AH1_(0.5)+dG);
AH1 eL=eB*AH1_(0.5)+(eR*AH1_(0.5)+eG);
AH1 fL=fB*AH1_(0.5)+(fR*AH1_(0.5)+fG);
AH1 hL=hB*AH1_(0.5)+(hR*AH1_(0.5)+hG);
AH1 nz=AH1_(0.25)*bL+AH1_(0.25)*dL+AH1_(0.25)*fL+AH1_(0.25)*hL-eL;
nz=ASatH1(abs(nz)*APrxMedRcpH1(AMax3H1(AMax3H1(bL,dL,eL),fL,hL)-AMin3H1(AMin3H1(bL,dL,eL),fL,hL)));
nz=AH1_(-0.5)*nz+AH1_(1.0);
AH1 mn4R=min(AMin3H1(bR,dR,fR),hR);
AH1 mn4G=min(AMin3H1(bG,dG,fG),hG);
AH1 mn4B=min(AMin3H1(bB,dB,fB),hB);
AH1 mx4R=max(AMax3H1(bR,dR,fR),hR);
AH1 mx4G=max(AMax3H1(bG,dG,fG),hG);
AH1 mx4B=max(AMax3H1(bB,dB,fB),hB);
AH2 peakC=AH2(1.0,-1.0*4.0);
AH1 hitMinR=mn4R*ARcpH1(AH1_(4.0)*mx4R);
AH1 hitMinG=mn4G*ARcpH1(AH1_(4.0)*mx4G);
AH1 hitMinB=mn4B*ARcpH1(AH1_(4.0)*mx4B);
AH1 hitMaxR=(peakC.x-mx4R)*ARcpH1(AH1_(4.0)*mn4R+peakC.y);
AH1 hitMaxG=(peakC.x-mx4G)*ARcpH1(AH1_(4.0)*mn4G+peakC.y);
AH1 hitMaxB=(peakC.x-mx4B)*ARcpH1(AH1_(4.0)*mn4B+peakC.y);
AH1 lobeR=max(-hitMinR,hitMaxR);
AH1 lobeG=max(-hitMinG,hitMaxG);
AH1 lobeB=max(-hitMinB,hitMaxB);
AH1 lobe=max(AH1_(-FSR_RCAS_LIMIT),min(AMax3H1(lobeR,lobeG,lobeB),AH1_(0.0)))*AH2_AU1(con.y).x;
#ifdef FSR_RCAS_DENOISE
lobe*=nz;
#endif
AH1 rcpL=APrxMedRcpH1(AH1_(4.0)*lobe+AH1_(1.0));
pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;}
#endif
#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_HX2)
AH4 FsrRcasLoadHx2(ASW2 p);
void FsrRcasInputHx2(inout AH2 r,inout AH2 g,inout AH2 b);
void FsrRcasDepackHx2(out AH4 pix0,out AH4 pix1,AH2 pixR,AH2 pixG,AH2 pixB){
#ifdef A_HLSL
pix0.a=pix1.a=0.0;
#endif
pix0.rgb=AH3(pixR.x,pixG.x,pixB.x);
pix1.rgb=AH3(pixR.y,pixG.y,pixB.y);}
void FsrRcasHx2(
out AH2 pixR,
out AH2 pixG,
out AH2 pixB,
#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
out AH2 pixA,
#endif
AU2 ip, // Integer pixel position in output.
AU4 con){ // Constant generated by RcasSetup().
ASW2 sp0=ASW2(ip);
AH3 b0=FsrRcasLoadHx2(sp0+ASW2( 0,-1)).rgb;
AH3 d0=FsrRcasLoadHx2(sp0+ASW2(-1, 0)).rgb;
#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
AH4 ee0=FsrRcasLoadHx2(sp0);
AH3 e0=ee0.rgb;pixA.r=ee0.a;
#else
AH3 e0=FsrRcasLoadHx2(sp0).rgb;
#endif
AH3 f0=FsrRcasLoadHx2(sp0+ASW2( 1, 0)).rgb;
AH3 h0=FsrRcasLoadHx2(sp0+ASW2( 0, 1)).rgb;
ASW2 sp1=sp0+ASW2(8,0);
AH3 b1=FsrRcasLoadHx2(sp1+ASW2( 0,-1)).rgb;
AH3 d1=FsrRcasLoadHx2(sp1+ASW2(-1, 0)).rgb;
#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
AH4 ee1=FsrRcasLoadHx2(sp1);
AH3 e1=ee1.rgb;pixA.g=ee1.a;
#else
AH3 e1=FsrRcasLoadHx2(sp1).rgb;
#endif
AH3 f1=FsrRcasLoadHx2(sp1+ASW2( 1, 0)).rgb;
AH3 h1=FsrRcasLoadHx2(sp1+ASW2( 0, 1)).rgb;
AH2 bR=AH2(b0.r,b1.r);
AH2 bG=AH2(b0.g,b1.g);
AH2 bB=AH2(b0.b,b1.b);
AH2 dR=AH2(d0.r,d1.r);
AH2 dG=AH2(d0.g,d1.g);
AH2 dB=AH2(d0.b,d1.b);
AH2 eR=AH2(e0.r,e1.r);
AH2 eG=AH2(e0.g,e1.g);
AH2 eB=AH2(e0.b,e1.b);
AH2 fR=AH2(f0.r,f1.r);
AH2 fG=AH2(f0.g,f1.g);
AH2 fB=AH2(f0.b,f1.b);
AH2 hR=AH2(h0.r,h1.r);
AH2 hG=AH2(h0.g,h1.g);
AH2 hB=AH2(h0.b,h1.b);
FsrRcasInputHx2(bR,bG,bB);
FsrRcasInputHx2(dR,dG,dB);
FsrRcasInputHx2(eR,eG,eB);
FsrRcasInputHx2(fR,fG,fB);
FsrRcasInputHx2(hR,hG,hB);
AH2 bL=bB*AH2_(0.5)+(bR*AH2_(0.5)+bG);
AH2 dL=dB*AH2_(0.5)+(dR*AH2_(0.5)+dG);
AH2 eL=eB*AH2_(0.5)+(eR*AH2_(0.5)+eG);
AH2 fL=fB*AH2_(0.5)+(fR*AH2_(0.5)+fG);
AH2 hL=hB*AH2_(0.5)+(hR*AH2_(0.5)+hG);
AH2 nz=AH2_(0.25)*bL+AH2_(0.25)*dL+AH2_(0.25)*fL+AH2_(0.25)*hL-eL;
nz=ASatH2(abs(nz)*APrxMedRcpH2(AMax3H2(AMax3H2(bL,dL,eL),fL,hL)-AMin3H2(AMin3H2(bL,dL,eL),fL,hL)));
nz=AH2_(-0.5)*nz+AH2_(1.0);
AH2 mn4R=min(AMin3H2(bR,dR,fR),hR);
AH2 mn4G=min(AMin3H2(bG,dG,fG),hG);
AH2 mn4B=min(AMin3H2(bB,dB,fB),hB);
AH2 mx4R=max(AMax3H2(bR,dR,fR),hR);
AH2 mx4G=max(AMax3H2(bG,dG,fG),hG);
AH2 mx4B=max(AMax3H2(bB,dB,fB),hB);
AH2 peakC=AH2(1.0,-1.0*4.0);
AH2 hitMinR=mn4R*ARcpH2(AH2_(4.0)*mx4R);
AH2 hitMinG=mn4G*ARcpH2(AH2_(4.0)*mx4G);
AH2 hitMinB=mn4B*ARcpH2(AH2_(4.0)*mx4B);
AH2 hitMaxR=(peakC.x-mx4R)*ARcpH2(AH2_(4.0)*mn4R+peakC.y);
AH2 hitMaxG=(peakC.x-mx4G)*ARcpH2(AH2_(4.0)*mn4G+peakC.y);
AH2 hitMaxB=(peakC.x-mx4B)*ARcpH2(AH2_(4.0)*mn4B+peakC.y);
AH2 lobeR=max(-hitMinR,hitMaxR);
AH2 lobeG=max(-hitMinG,hitMaxG);
AH2 lobeB=max(-hitMinB,hitMaxB);
AH2 lobe=max(AH2_(-FSR_RCAS_LIMIT),min(AMax3H2(lobeR,lobeG,lobeB),AH2_(0.0)))*AH2_(AH2_AU1(con.y).x);
#ifdef FSR_RCAS_DENOISE
lobe*=nz;
#endif
AH2 rcpL=APrxMedRcpH2(AH2_(4.0)*lobe+AH2_(1.0));
pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;}
#endif
#if defined(A_GPU)
void FsrLfgaF(inout AF3 c,AF3 t,AF1 a){c+=(t*AF3_(a))*min(AF3_(1.0)-c,c);}
#endif
#if defined(A_GPU)&&defined(A_HALF)
void FsrLfgaH(inout AH3 c,AH3 t,AH1 a){c+=(t*AH3_(a))*min(AH3_(1.0)-c,c);}
void FsrLfgaHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 tR,AH2 tG,AH2 tB,AH1 a){
cR+=(tR*AH2_(a))*min(AH2_(1.0)-cR,cR);cG+=(tG*AH2_(a))*min(AH2_(1.0)-cG,cG);cB+=(tB*AH2_(a))*min(AH2_(1.0)-cB,cB);}
#endif
#if defined(A_GPU)
void FsrSrtmF(inout AF3 c){c*=AF3_(ARcpF1(AMax3F1(c.r,c.g,c.b)+AF1_(1.0)));}
void FsrSrtmInvF(inout AF3 c){c*=AF3_(ARcpF1(max(AF1_(1.0/32768.0),AF1_(1.0)-AMax3F1(c.r,c.g,c.b))));}
#endif
#if defined(A_GPU)&&defined(A_HALF)
void FsrSrtmH(inout AH3 c){c*=AH3_(ARcpH1(AMax3H1(c.r,c.g,c.b)+AH1_(1.0)));}
void FsrSrtmInvH(inout AH3 c){c*=AH3_(ARcpH1(max(AH1_(1.0/32768.0),AH1_(1.0)-AMax3H1(c.r,c.g,c.b))));}
void FsrSrtmHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){
AH2 rcp=ARcpH2(AMax3H2(cR,cG,cB)+AH2_(1.0));cR*=rcp;cG*=rcp;cB*=rcp;}
void FsrSrtmInvHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){
AH2 rcp=ARcpH2(max(AH2_(1.0/32768.0),AH2_(1.0)-AMax3H2(cR,cG,cB)));cR*=rcp;cG*=rcp;cB*=rcp;}
#endif
#if defined(A_GPU)
AF1 FsrTepdDitF(AU2 p,AU1 f){
AF1 x=AF1_(p.x+f);
AF1 y=AF1_(p.y);
AF1 a=AF1_((1.0+sqrt(5.0))/2.0);
AF1 b=AF1_(1.0/3.69);
x=x*a+(y*b);
return AFractF1(x);}
void FsrTepdC8F(inout AF3 c,AF1 dit){
AF3 n=sqrt(c);
n=floor(n*AF3_(255.0))*AF3_(1.0/255.0);
AF3 a=n*n;
AF3 b=n+AF3_(1.0/255.0);b=b*b;
AF3 r=(c-b)*APrxMedRcpF3(a-b);
c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/255.0));}
void FsrTepdC10F(inout AF3 c,AF1 dit){
AF3 n=sqrt(c);
n=floor(n*AF3_(1023.0))*AF3_(1.0/1023.0);
AF3 a=n*n;
AF3 b=n+AF3_(1.0/1023.0);b=b*b;
AF3 r=(c-b)*APrxMedRcpF3(a-b);
c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/1023.0));}
#endif
#if defined(A_GPU)&&defined(A_HALF)
AH1 FsrTepdDitH(AU2 p,AU1 f){
AF1 x=AF1_(p.x+f);
AF1 y=AF1_(p.y);
AF1 a=AF1_((1.0+sqrt(5.0))/2.0);
AF1 b=AF1_(1.0/3.69);
x=x*a+(y*b);
return AH1(AFractF1(x));}
void FsrTepdC8H(inout AH3 c,AH1 dit){
AH3 n=sqrt(c);
n=floor(n*AH3_(255.0))*AH3_(1.0/255.0);
AH3 a=n*n;
AH3 b=n+AH3_(1.0/255.0);b=b*b;
AH3 r=(c-b)*APrxMedRcpH3(a-b);
c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/255.0));}
void FsrTepdC10H(inout AH3 c,AH1 dit){
AH3 n=sqrt(c);
n=floor(n*AH3_(1023.0))*AH3_(1.0/1023.0);
AH3 a=n*n;
AH3 b=n+AH3_(1.0/1023.0);b=b*b;
AH3 r=(c-b)*APrxMedRcpH3(a-b);
c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/1023.0));}
AH2 FsrTepdDitHx2(AU2 p,AU1 f){
AF2 x;
x.x=AF1_(p.x+f);
x.y=x.x+AF1_(8.0);
AF1 y=AF1_(p.y);
AF1 a=AF1_((1.0+sqrt(5.0))/2.0);
AF1 b=AF1_(1.0/3.69);
x=x*AF2_(a)+AF2_(y*b);
return AH2(AFractF2(x));}
void FsrTepdC8Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){
AH2 nR=sqrt(cR);
AH2 nG=sqrt(cG);
AH2 nB=sqrt(cB);
nR=floor(nR*AH2_(255.0))*AH2_(1.0/255.0);
nG=floor(nG*AH2_(255.0))*AH2_(1.0/255.0);
nB=floor(nB*AH2_(255.0))*AH2_(1.0/255.0);
AH2 aR=nR*nR;
AH2 aG=nG*nG;
AH2 aB=nB*nB;
AH2 bR=nR+AH2_(1.0/255.0);bR=bR*bR;
AH2 bG=nG+AH2_(1.0/255.0);bG=bG*bG;
AH2 bB=nB+AH2_(1.0/255.0);bB=bB*bB;
AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR);
AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG);
AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB);
cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/255.0));
cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/255.0));
cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/255.0));}
void FsrTepdC10Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){
AH2 nR=sqrt(cR);
AH2 nG=sqrt(cG);
AH2 nB=sqrt(cB);
nR=floor(nR*AH2_(1023.0))*AH2_(1.0/1023.0);
nG=floor(nG*AH2_(1023.0))*AH2_(1.0/1023.0);
nB=floor(nB*AH2_(1023.0))*AH2_(1.0/1023.0);
AH2 aR=nR*nR;
AH2 aG=nG*nG;
AH2 aB=nB*nB;
AH2 bR=nR+AH2_(1.0/1023.0);bR=bR*bR;
AH2 bG=nG+AH2_(1.0/1023.0);bG=bG*bG;
AH2 bB=nB+AH2_(1.0/1023.0);bB=bB*bB;
AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR);
AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG);
AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB);
cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/1023.0));
cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/1023.0));
cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/1023.0));}
#endif
)--RPCS3--"

View File

@ -0,0 +1,350 @@
#include "../../vkutils/barriers.h"
#include "../../VKHelpers.h"
#include "../../VKResourceManager.h"
#include "../fsr_pass.h"
#define A_CPU 1
#include "3rdParty/GPUOpen/include/ffx_a.h"
#include "3rdParty/GPUOpen/include/ffx_fsr1.h"
#undef A_CPU
namespace vk
{
namespace FidelityFX
{
fsr_pass::fsr_pass(const std::string& config_definitions, u32 push_constants_size_)
{
// Just use AMD-provided source with minimal modification
const char* shader_core =
#include "fsr_ubershader.glsl"
;
// Replacements
const char* ffx_a_contents =
#include "fsr_ffx_a_flattened.inc"
;
const char* ffx_fsr_contents =
#include "fsr_ffx_fsr1_flattened.inc"
;
m_src = shader_core;
m_src = fmt::replace_all(m_src, { {"%FFX_DEFINITIONS%", config_definitions }, {"%FFX_A_IMPORT%", ffx_a_contents}, {"%FFX_FSR_IMPORT%", ffx_fsr_contents}});
// Fill with 0 to avoid sending incomplete/unused variables to the GPU
memset(m_constants_buf, 0, sizeof(m_constants_buf));
// Enable push constants
use_push_constants = true;
push_constants_size = push_constants_size_;
create();
}
std::vector<std::pair<VkDescriptorType, u8>> fsr_pass::get_descriptor_layout()
{
return
{
{ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1 },
{ VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1 }
};
}
void fsr_pass::declare_inputs()
{
std::vector<vk::glsl::program_input> inputs =
{
{
::glsl::program_domain::glsl_compute_program,
vk::glsl::program_input_type::input_type_texture,
{}, {},
0,
"InputTexture"
},
{
::glsl::program_domain::glsl_compute_program,
vk::glsl::program_input_type::input_type_texture,
{}, {},
1,
"OutputTexture"
}
};
m_program->load_uniforms(inputs);
}
void fsr_pass::bind_resources()
{
// Bind relevant stuff
if (!m_sampler)
{
const auto pdev = vk::get_current_renderer();
m_sampler = std::make_unique<vk::sampler>(*pdev,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
VK_FALSE, 0.f, 1.f, 0.f, 0.f, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK);
}
m_program->bind_uniform({ m_sampler->value, m_input_image->value, m_input_image->image()->current_layout }, "InputTexture", VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, m_descriptor_set);
m_program->bind_uniform({ VK_NULL_HANDLE, m_output_image->value, m_output_image->image()->current_layout }, "OutputTexture", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set);
}
void fsr_pass::run(const vk::command_buffer& cmd, vk::viewable_image* src, vk::viewable_image* dst, const size2u& input_size, const size2u& output_size)
{
m_input_image = src->get_view(VK_REMAP_IDENTITY, rsx::default_remap_vector);
m_output_image = dst->get_view(VK_REMAP_IDENTITY, rsx::default_remap_vector);
m_input_size = input_size;
m_output_size = output_size;
configure(cmd);
constexpr auto wg_size = 16;
const auto invocations_x = utils::aligned_div(output_size.width, wg_size);
const auto invocations_y = utils::aligned_div(output_size.height, wg_size);
ensure(invocations_x == (output_size.width + (wg_size - 1)) / wg_size);
ensure(invocations_y == (output_size.height + (wg_size - 1)) / wg_size);
compute_task::run(cmd, invocations_x, invocations_y, 1);
}
easu_pass::easu_pass()
: fsr_pass(
"#define SAMPLE_EASU 1\n"
"#define SAMPLE_RCAS 0\n"
"#define SAMPLE_BILINEAR 0\n"
"#define SAMPLE_SLOW_FALLBACK 1",
80 // 5*VEC4
)
{}
void easu_pass::configure(const vk::command_buffer& cmd)
{
auto src_image = m_input_image->image();
// NOTE: Configuration vector 4 is unused as we do not support HDR natively
auto con0 = &m_constants_buf[0];
auto con1 = &m_constants_buf[4];
auto con2 = &m_constants_buf[8];
auto con3 = &m_constants_buf[12];
FsrEasuCon(con0, con1, con2, con3,
static_cast<f32>(m_input_size.width), static_cast<f32>(m_input_size.height), // Incoming viewport size to upscale (actual size)
static_cast<f32>(src_image->width()), static_cast<f32>(src_image->height()), // Size of the raw image to upscale (in case viewport does not cover it all)
static_cast<f32>(m_output_size.width), static_cast<f32>(m_output_size.height)); // Size of output viewport (target size)
vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf);
}
rcas_pass::rcas_pass()
: fsr_pass(
"#define SAMPLE_RCAS 1\n"
"#define SAMPLE_EASU 0\n"
"#define SAMPLE_BILINEAR 0\n"
"#define SAMPLE_SLOW_FALLBACK 1",
32 // 2*VEC4
)
{}
void rcas_pass::configure(const vk::command_buffer& cmd)
{
// 0 is actually the sharpest with 2 being the chosen limit. Each progressive unit 'halves' the sharpening intensity.
auto cas_attenuation = 2.f - (g_cfg.video.vk.rcas_sharpening_intensity / 50.f);
FsrRcasCon(&m_constants_buf[0], cas_attenuation);
vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf);
}
} // Namespace FidelityFX
void fsr_upscale_pass::dispose_images()
{
auto safe_delete = [](auto& data)
{
if (data && data->value)
{
vk::get_resource_manager()->dispose(data);
}
else if (data)
{
data.reset();
}
};
safe_delete(m_output_left);
safe_delete(m_output_right);
safe_delete(m_intermediate_data);
}
void fsr_upscale_pass::initialize_image(u32 output_w, u32 output_h, rsx::flags32_t mode)
{
dispose_images();
auto initialize_image_impl = [output_w, output_h](VkImageUsageFlags usage)
{
const auto pdev = vk::get_current_renderer();
return std::make_unique<vk::viewable_image>(
*pdev, // Owner
pdev->get_memory_mapping().device_local, // Must be in device optimal memory
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_IMAGE_TYPE_2D,
VK_FORMAT_B8G8R8A8_UNORM, // The only format guaranteed by spec
output_w, output_h, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, // Dimensions (w, h, d, mips, layers, samples)
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL,
usage,
VK_IMAGE_CREATE_ALLOW_NULL, // Allow creation to fail if there is no memory
VMM_ALLOCATION_POOL_SWAPCHAIN,
RSX_FORMAT_CLASS_COLOR);
};
bool failed = false;
if (mode & UPSCALE_LEFT_VIEW)
{
m_output_left = initialize_image_impl(VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
failed |= (m_output_left->value == VK_NULL_HANDLE);
}
if ((mode & UPSCALE_RIGHT_VIEW) && !failed)
{
m_output_right = initialize_image_impl(VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
failed |= (m_output_right->value == VK_NULL_HANDLE);
}
if (!failed)
{
m_intermediate_data = initialize_image_impl(VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
failed |= (m_intermediate_data->value == VK_NULL_HANDLE);
}
if (failed)
{
rsx_log.warning("FSR is enabled, but the system is out of memory. Will fall back to bilinear upscaling");
dispose_images();
}
}
vk::viewable_image* fsr_upscale_pass::scale_output(
const vk::command_buffer& cmd,
vk::viewable_image* src,
VkImage present_surface,
VkImageLayout present_surface_layout,
const VkImageBlit& request,
rsx::flags32_t mode)
{
size2u input_size, output_size;
input_size.width = std::abs(request.srcOffsets[1].x - request.srcOffsets[0].x);
input_size.height = std::abs(request.srcOffsets[1].y - request.srcOffsets[0].y);
output_size.width = std::abs(request.dstOffsets[1].x - request.dstOffsets[0].x);
output_size.height = std::abs(request.dstOffsets[1].y - request.dstOffsets[0].y);
auto src_image = src;
auto target_image = present_surface;
auto target_image_layout = present_surface_layout;
auto output_request = request;
if (input_size.width < output_size.width && input_size.height < output_size.height)
{
// Cannot upscale both LEFT and RIGHT images at the same time.
// Default maps to LEFT for simplicity
ensure((mode & (UPSCALE_LEFT_VIEW | UPSCALE_RIGHT_VIEW)) != (UPSCALE_LEFT_VIEW | UPSCALE_RIGHT_VIEW));
auto& m_output_data = (mode & UPSCALE_LEFT_VIEW) ? m_output_left : m_output_right;
if (!m_output_data || m_output_data->width() != output_size.width || m_output_data->height() != output_size.height)
{
initialize_image(output_size.width, output_size.height, mode);
}
if (m_output_data)
{
// Execute the pass here
auto cs_easu_task = vk::get_compute_task<vk::FidelityFX::easu_pass>();
auto cs_rcas_task = vk::get_compute_task<vk::FidelityFX::rcas_pass>();
// Prepare for EASU pass
src->push_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
if (m_intermediate_data->current_layout != VK_IMAGE_LAYOUT_GENERAL)
{
m_intermediate_data->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL);
}
else
{
// R/W CS-CS barrier in case of back-to-back upscales
vk::insert_image_memory_barrier(cmd,
m_intermediate_data->value,
m_intermediate_data->current_layout, m_intermediate_data->current_layout,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
}
// EASU
cs_easu_task->run(cmd, src, m_intermediate_data.get(), input_size, output_size);
// Prepare for RCAS pass
m_output_data->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL);
// R/W CS-CS barrier before RCAS
vk::insert_image_memory_barrier(cmd,
m_intermediate_data->value,
m_intermediate_data->current_layout, m_intermediate_data->current_layout,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_SHADER_READ_BIT,
{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
// RCAS
cs_rcas_task->run(cmd, m_intermediate_data.get(), m_output_data.get(), input_size, output_size);
// Cleanup
src->pop_layout(cmd);
// Swap input for FSR target
src_image = m_output_data.get();
// Update output parameters to match expected output
if (mode & UPSCALE_AND_COMMIT)
{
// Explicit CS-Transfer barrier
vk::insert_image_memory_barrier(cmd,
m_output_data->value,
m_output_data->current_layout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_TRANSFER_READ_BIT,
{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
m_output_data->current_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
output_request.srcOffsets[0].x = 0;
output_request.srcOffsets[1].x = output_size.width;
output_request.srcOffsets[0].y = 0;
output_request.srcOffsets[1].y = output_size.height;
// Preserve mirroring/flipping
if (request.srcOffsets[0].x > request.srcOffsets[1].x)
{
std::swap(output_request.srcOffsets[0].x, output_request.srcOffsets[1].x);
}
if (request.srcOffsets[0].y > request.srcOffsets[1].y)
{
std::swap(output_request.srcOffsets[0].y, output_request.srcOffsets[1].y);
}
}
}
}
if (mode & UPSCALE_AND_COMMIT)
{
src_image->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
vkCmdBlitImage(cmd, src_image->value, src_image->current_layout, target_image, target_image_layout, 1, &output_request, VK_FILTER_LINEAR);
src_image->pop_layout(cmd);
return nullptr;
}
return src_image;
}
}

View File

@ -0,0 +1,108 @@
R"(
#version 450
#define A_GPU 1
#define A_GLSL 1
%FFX_DEFINITIONS%
#if defined(SAMPLE_EASU) || defined(SAMPLE_RCAS)
layout(push_constant) uniform const_buffer
{
uvec4 Const0;
#if SAMPLE_EASU
uvec4 Const1;
uvec4 Const2;
uvec4 Const3;
#endif
uvec4 Sample;
};
#endif
%FFX_A_IMPORT%
layout(set=0,binding=0) uniform sampler2D InputTexture;
layout(set=0,binding=1,rgba8) uniform image2D OutputTexture;
#if A_HALF
#if SAMPLE_EASU
#define FSR_EASU_H 1
AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; }
AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; }
AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; }
#endif
#if SAMPLE_RCAS
#define FSR_RCAS_H
AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); }
void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){}
#endif
#else
#if SAMPLE_EASU
#define FSR_EASU_F 1
AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; }
AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; }
AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; }
#endif
#if SAMPLE_RCAS
#define FSR_RCAS_F
AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); }
void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
#endif
#endif
#if defined(SAMPLE_EASU) || defined(SAMPLE_RCAS)
%FFX_FSR_IMPORT%
#endif
void CurrFilter(AU2 pos)
{
#if SAMPLE_BILINEAR
AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw);
imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0));
#endif
#if SAMPLE_EASU
#if SAMPLE_SLOW_FALLBACK
AF3 c;
FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
if( Sample.x == 1 )
c *= c;
imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
#else
AH3 c;
FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
if( Sample.x == 1 )
c *= c;
imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
#endif
#endif
#if SAMPLE_RCAS
#if SAMPLE_SLOW_FALLBACK
AF3 c;
FsrRcasF(c.r, c.g, c.b, pos, Const0);
if( Sample.x == 1 )
c *= c;
imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
#else
AH3 c;
FsrRcasH(c.r, c.g, c.b, pos, Const0);
if( Sample.x == 1 )
c *= c;
imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
#endif
#endif
}
layout(local_size_x=64) in;
void main()
{
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
CurrFilter(gxy);
gxy.x += 8u;
CurrFilter(gxy);
gxy.y += 8u;
CurrFilter(gxy);
gxy.x -= 8u;
CurrFilter(gxy);
}
)"

View File

@ -0,0 +1,69 @@
#pragma once
#include "../vkutils/sampler.h"
#include "../VKCompute.h"
#include "upscaling.h"
namespace vk
{
namespace FidelityFX
{
class fsr_pass : public compute_task
{
protected:
std::unique_ptr<vk::sampler> m_sampler;
const vk::image_view* m_input_image = nullptr;
const vk::image_view* m_output_image = nullptr;
size2u m_input_size;
size2u m_output_size;
u32 m_constants_buf[20];
std::vector<std::pair<VkDescriptorType, u8>> get_descriptor_layout() override;
void declare_inputs() override;
void bind_resources() override;
virtual void configure(const vk::command_buffer& cmd) = 0;
public:
fsr_pass(const std::string& config_definitions, u32 push_constants_size_);
void run(const vk::command_buffer& cmd, vk::viewable_image* src, vk::viewable_image* dst, const size2u& input_size, const size2u& output_size);
};
class easu_pass : public fsr_pass
{
void configure(const vk::command_buffer& cmd) override;
public:
easu_pass();
};
class rcas_pass : public fsr_pass
{
void configure(const vk::command_buffer& cmd) override;
public:
rcas_pass();
};
}
class fsr_upscale_pass : public upscaler
{
std::unique_ptr<vk::viewable_image> m_output_left;
std::unique_ptr<vk::viewable_image> m_output_right;
std::unique_ptr<vk::viewable_image> m_intermediate_data;
void dispose_images();
void initialize_image(u32 output_w, u32 output_h, rsx::flags32_t mode);
public:
vk::viewable_image* scale_output(
const vk::command_buffer& cmd, // CB
vk::viewable_image* src, // Source input
VkImage present_surface, // Present target. May be VK_NULL_HANDLE for some passes
VkImageLayout present_surface_layout, // Present surface layout, or VK_IMAGE_LAYOUT_UNDEFINED if no present target is provided
const VkImageBlit& request, // Scaling request information
rsx::flags32_t mode // Mode
) override;
};
}

View File

@ -0,0 +1,36 @@
#pragma once
#include "util/types.hpp"
#include "../vkutils/commands.h"
#include "../vkutils/image.h"
namespace vk
{
namespace upscaling_flags_
{
enum upscaling_flags
{
UPSCALE_DEFAULT_VIEW = (1 << 0),
UPSCALE_LEFT_VIEW = (1 << 0),
UPSCALE_RIGHT_VIEW = (1 << 1),
UPSCALE_AND_COMMIT = (1 << 2)
};
}
using namespace upscaling_flags_;
struct upscaler
{
virtual ~upscaler() {}
virtual vk::viewable_image* scale_output(
const vk::command_buffer& cmd, // CB
vk::viewable_image* src, // Source input
VkImage present_surface, // Present target. May be VK_NULL_HANDLE for some passes
VkImageLayout present_surface_layout, // Present surface layout, or VK_IMAGE_LAYOUT_UNDEFINED if no present target is provided
const VkImageBlit& request, // Scaling request information
rsx::flags32_t mode // Mode
) = 0;
};
}

View File

@ -175,8 +175,10 @@ struct cfg_root : cfg::node
cfg::string adapter{ this, "Adapter" };
cfg::_bool force_fifo{ this, "Force FIFO present mode" };
cfg::_bool force_primitive_restart{ this, "Force primitive restart flag" };
cfg::_bool force_disable_exclusive_fullscreen_mode{this, "Force Disable Exclusive Fullscreen Mode"};
cfg::_bool force_disable_exclusive_fullscreen_mode{ this, "Force Disable Exclusive Fullscreen Mode" };
cfg::_bool asynchronous_texture_streaming{ this, "Asynchronous Texture Streaming 2", false };
cfg::_bool fsr_upscaling{ this, "Enable FidelityFX Super Resolution Upscaling", false, true };
cfg::uint<0, 100> rcas_sharpening_intensity{ this, "FidelityFX CAS Sharpening Intensity", 50, true };
cfg::_enum<vk_gpu_scheduler_mode> asynchronous_scheduler{ this, "Asynchronous Queue Scheduler", vk_gpu_scheduler_mode::device };
} vk{ this };

View File

@ -11,6 +11,9 @@
</ProjectConfiguration>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\VK\upscalers\bilinear_pass.hpp" />
<ClInclude Include="Emu\RSX\VK\upscalers\fsr_pass.h" />
<ClInclude Include="Emu\RSX\VK\upscalers\upscaling.h" />
<ClInclude Include="Emu\RSX\VK\VKAsyncScheduler.h" />
<ClInclude Include="Emu\RSX\VK\VKCommandStream.h" />
<ClInclude Include="Emu\RSX\VK\VKCommonDecompiler.h" />
@ -57,6 +60,7 @@
<ClInclude Include="Emu\RSX\VK\VulkanAPI.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="Emu\RSX\VK\upscalers\fsr1\fsr_pass.cpp" />
<ClCompile Include="Emu\RSX\VK\VKAsyncScheduler.cpp" />
<ClCompile Include="Emu\RSX\VK\VKCommandStream.cpp" />
<ClCompile Include="Emu\RSX\VK\VKCommonDecompiler.cpp" />
@ -102,6 +106,11 @@
<Project>{c4a10229-4712-4bd2-b63e-50d93c67a038}</Project>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<None Include="Emu\RSX\VK\upscalers\fsr1\fsr_ffx_a_flattened.inc" />
<None Include="Emu\RSX\VK\upscalers\fsr1\fsr_ffx_fsr1_flattened.inc" />
<None Include="Emu\RSX\VK\upscalers\fsr1\fsr_ubershader.glsl" />
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{3EE5F075-B546-42C4-B6A8-E3CCEF38B78D}</ProjectGuid>
<Keyword>Win32Proj</Keyword>

View File

@ -66,6 +66,9 @@
<ClCompile Include="Emu\RSX\VK\VKCompute.cpp" />
<ClCompile Include="Emu\RSX\VK\VKAsyncScheduler.cpp" />
<ClCompile Include="Emu\RSX\VK\VKRenderTargets.cpp" />
<ClCompile Include="Emu\RSX\VK\upscalers\fsr1\fsr_pass.cpp">
<Filter>upscalers\fsr1</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\VK\VKCommonDecompiler.h" />
@ -152,10 +155,36 @@
<Filter>vkutils</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\VKAsyncScheduler.h" />
<ClInclude Include="Emu\RSX\VK\upscalers\bilinear_pass.hpp">
<Filter>upscalers</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\upscalers\fsr_pass.h">
<Filter>upscalers</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\upscalers\upscaling.h">
<Filter>upscalers</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="vkutils">
<UniqueIdentifier>{2c6cb5a5-ed99-44fe-a0b6-7ba1949c8b29}</UniqueIdentifier>
</Filter>
<Filter Include="upscalers">
<UniqueIdentifier>{7294bfa7-a561-4032-8b96-256afbb7476d}</UniqueIdentifier>
</Filter>
<Filter Include="upscalers\fsr1">
<UniqueIdentifier>{8387b0fc-178f-4c9c-9cf2-03df99ce4df2}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<None Include="Emu\RSX\VK\upscalers\fsr1\fsr_ffx_a_flattened.inc">
<Filter>upscalers\fsr1</Filter>
</None>
<None Include="Emu\RSX\VK\upscalers\fsr1\fsr_ffx_fsr1_flattened.inc">
<Filter>upscalers\fsr1</Filter>
</None>
<None Include="Emu\RSX\VK\upscalers\fsr1\fsr_ubershader.glsl">
<Filter>upscalers\fsr1</Filter>
</None>
</ItemGroup>
</Project>

View File

@ -71,6 +71,8 @@ enum class emu_settings_type
AnisotropicFilterOverride,
ResolutionScale,
MinimumScalableDimension,
FsrUpscalingEnable,
FsrSharpeningStrength,
ForceCPUBlitEmulation,
DisableOnDiskShaderCache,
DisableVulkanMemAllocator,
@ -240,6 +242,8 @@ inline static const QMap<emu_settings_type, cfg_location> settings_location =
// Vulkan
{ emu_settings_type::VulkanAsyncTextureUploads, { "Video", "Vulkan", "Asynchronous Texture Streaming 2"}},
{ emu_settings_type::VulkanAsyncSchedulerDriver, { "Video", "Vulkan", "Asynchronous Queue Scheduler"}},
{ emu_settings_type::FsrUpscalingEnable, { "Video", "Vulkan", "Enable FidelityFX Super Resolution Upscaling"}},
{ emu_settings_type::FsrSharpeningStrength, { "Video", "Vulkan", "FidelityFX CAS Sharpening Intensity"}},
// Performance Overlay
{ emu_settings_type::PerfOverlayEnabled, { "Video", "Performance Overlay", "Enabled" } },

View File

@ -469,6 +469,9 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
m_emu_settings->EnhanceCheckBox(ui->asyncTextureStreaming, emu_settings_type::VulkanAsyncTextureUploads);
SubscribeTooltip(ui->asyncTextureStreaming, tooltips.settings.async_texture_streaming);
m_emu_settings->EnhanceCheckBox(ui->fsrUpscalingEnable, emu_settings_type::FsrUpscalingEnable);
SubscribeTooltip(ui->fsrUpscalingEnable, tooltips.settings.fsr_upscaling);
// Radio buttons
SubscribeTooltip(ui->rb_legacy_recompiler, tooltips.settings.legacy_shader_recompiler);
@ -541,6 +544,29 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
ui->minimumScalableDimension->setValue(minimum_scalable_dimension_def);
});
const int fsr_sharpening_strength_def = stoi(m_emu_settings->GetSettingDefault(emu_settings_type::FsrSharpeningStrength));
auto fmt_fsr_sharpening_strength = [fsr_sharpening_strength_def](int value)
{
if (value == fsr_sharpening_strength_def)
{
return tr("%1% (Default)").arg(value);
}
return tr("%1%").arg(value);
};
m_emu_settings->EnhanceSlider(ui->fsrSharpeningStrength, emu_settings_type::FsrSharpeningStrength);
SubscribeTooltip(ui->fsrSharpeningStrength, tooltips.settings.fsr_rcas_strength);
SubscribeTooltip(ui->fsrSharpeningStrengthVal, tooltips.settings.fsr_rcas_strength);
SubscribeTooltip(ui->fsrSharpeningStrengthReset, tooltips.settings.fsr_rcas_strength);
ui->fsrSharpeningStrengthVal->setText(fmt_fsr_sharpening_strength(ui->fsrSharpeningStrength->value()));
connect(ui->fsrSharpeningStrength, &QSlider::valueChanged, [fmt_fsr_sharpening_strength, this](int value)
{
ui->fsrSharpeningStrengthVal->setText(fmt_fsr_sharpening_strength(value));
});
connect(ui->fsrSharpeningStrengthReset, &QAbstractButton::clicked, [fsr_sharpening_strength_def, this]()
{
ui->fsrSharpeningStrength->setValue(fsr_sharpening_strength_def);
});
// Remove renderers from the renderer Combobox if not supported
for (const auto& renderer : r_creator->renderers)
{
@ -666,8 +692,12 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
auto apply_renderer_specific_options = [=, this](const QString& text)
{
// Vulkan-only
ui->asyncTextureStreaming->setEnabled(text == r_creator->Vulkan.name);
ui->vulkansched->setEnabled(text == r_creator->Vulkan.name);
const bool is_vulkan = (text == r_creator->Vulkan.name);
ui->asyncTextureStreaming->setEnabled(is_vulkan);
ui->vulkansched->setEnabled(is_vulkan);
ui->fsrUpscalingEnable->setEnabled(is_vulkan);
ui->fsrSharpeningStrength->setEnabled(is_vulkan);
ui->fsrSharpeningStrengthReset->setEnabled(is_vulkan);
};
// Handle connects to disable specific checkboxes that depend on GUI state.

View File

@ -39,7 +39,7 @@
</sizepolicy>
</property>
<property name="currentIndex">
<number>0</number>
<number>1</number>
</property>
<widget class="QWidget" name="coreTab">
<attribute name="title">
@ -345,7 +345,7 @@
</attribute>
<layout class="QVBoxLayout" name="gpuTab_layout" stretch="0,1,0">
<item>
<layout class="QHBoxLayout" name="gpuTabLayout" stretch="1,1,1">
<layout class="QHBoxLayout" name="gpuTabLayout" stretch="1,0,1">
<item>
<layout class="QVBoxLayout" name="gpuTabLayoutLeft">
<item>
@ -634,6 +634,100 @@
</item>
</layout>
</item>
<item>
<widget class="QGroupBox" name="gb_Upscaling">
<property name="title">
<string>Upscaling</string>
</property>
<layout class="QVBoxLayout" name="gb_Upscaling_layout">
<item>
<widget class="QCheckBox" name="fsrUpscalingEnable">
<property name="text">
<string>Enable FSR Upscaling</string>
</property>
</widget>
</item>
<item>
<widget class="Line" name="line">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
</widget>
</item>
<item>
<widget class="QLabel" name="fsrSharpeningStrengthLabel">
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Preferred">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>RCAS Sharpening Strength</string>
</property>
<property name="alignment">
<set>Qt::AlignBottom|Qt::AlignLeading|Qt::AlignLeft</set>
</property>
<property name="margin">
<number>1</number>
</property>
</widget>
</item>
<item>
<layout class="QHBoxLayout" name="fsrSharpeningLayoutTop">
<item>
<widget class="QLabel" name="minSharpeningVal">
<property name="text">
<string>0</string>
</property>
</widget>
</item>
<item>
<widget class="QSlider" name="fsrSharpeningStrength">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
</widget>
</item>
<item>
<widget class="QLabel" name="maxSharpeningVal">
<property name="sizePolicy">
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>100</string>
</property>
</widget>
</item>
</layout>
</item>
<item>
<layout class="QHBoxLayout" name="fsrSharpeningLayoutBottom" stretch="1,0">
<item>
<widget class="QLabel" name="fsrSharpeningStrengthVal">
<property name="text">
<string>0</string>
</property>
<property name="alignment">
<set>Qt::AlignCenter</set>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="fsrSharpeningStrengthReset">
<property name="text">
<string>Reset</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget>
</item>
<item>
<spacer name="gpu_tab_layout_middle_spacer">
<property name="orientation">

View File

@ -167,6 +167,9 @@ public:
const QString async_texture_streaming = tr("Stream textures to GPU in parallel with 3D rendering.\nCan improve performance on more powerful GPUs that have spare headroom.\nOnly works with Vulkan renderer.");
const QString fsr_upscaling = tr("Enable FidelityFX Super Resolution upscaling filter to improve the look of upscaled images.\nIf the game is rendering at an internal resolution lower than your window resolution, FidelityFX will handle the upscale.\nCan cause visual artefacts.\nDoes not work with stereo 3D output for now");
const QString fsr_rcas_strength = tr("Control the sharpening strength applied by FidelityFX Super Resolution. Higher values will give sharper output but may introduce artefacts.");
// gui
const QString log_limit = tr("Sets the maximum amount of blocks that the log can display.\nThis usually equals the number of lines.\nSet 0 in order to remove the limit.");