snes9x/vulkan/vulkan_shader_chain.cpp

600 lines
22 KiB
C++

#include "vulkan_shader_chain.hpp"
#include "slang_helpers.hpp"
#include "stb_image.h"
#include "vulkan/vulkan_enums.hpp"
namespace Vulkan
{
ShaderChain::ShaderChain(Context *context_)
{
context = context_;
original_history_size = 3;
original_width = 0;
original_height = 0;
viewport_width = 0;
viewport_height = 0;
vertex_buffer = nullptr;
vertex_buffer_allocation = nullptr;
last_frame_index = 2;
current_frame_index = 0;
}
ShaderChain::~ShaderChain()
{
if (context && context->device)
{
if (vertex_buffer)
context->allocator.destroyBuffer(vertex_buffer, vertex_buffer_allocation);
vertex_buffer = nullptr;
vertex_buffer_allocation = nullptr;
}
pipelines.clear();
}
void ShaderChain::construct_buffer_objects()
{
for (size_t i = 0; i < pipelines.size(); i++)
{
auto &pipeline = *pipelines[i];
uint8_t *ubo_memory = nullptr;
if (pipeline.shader->ubo_size > 0)
ubo_memory = (uint8_t *)context->allocator.mapMemory(pipeline.uniform_buffer_allocation).value;
for (auto &uniform : pipeline.shader->uniforms)
{
void *location = 0;
const float MVP[16] = { 1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f };
std::string block;
switch (uniform.block)
{
case SlangShader::Uniform::UBO:
location = &ubo_memory[uniform.offset];
block = "uniform";
break;
case SlangShader::Uniform::PushConstant:
location = &pipeline.push_constants[uniform.offset];
block = "push constant";
break;
}
auto write_size = [&location](int width, int height) {
std::array<float, 4> size;
size[0] = (float)width;
size[1] = (float)height;
size[2] = 1.0f / size[0];
size[3] = 1.0f / size[1];
memcpy(location, size.data(), sizeof(float) * 4);
};
switch (uniform.type)
{
case SlangShader::Uniform::PassSize:
case SlangShader::Uniform::PassFeedbackSize: // TODO: Does this need to differ?
if (uniform.specifier == -1)
{
write_size(original_width, original_height);
}
else
{
write_size(pipelines[uniform.specifier]->destination_width,
pipelines[uniform.specifier]->destination_height);
}
break;
case SlangShader::Uniform::ViewportSize:
write_size(viewport_width, viewport_height);
break;
case SlangShader::Uniform::PreviousFrameSize:
if (original.size() > 1)
write_size(original[1]->image_width, original[1]->image_height);
else
write_size(original_width, original_height);
break;
case SlangShader::Uniform::LutSize:
if (uniform.specifier < (int)lookup_textures.size())
write_size(lookup_textures[uniform.specifier]->image_width, lookup_textures[uniform.specifier]->image_height);
else
write_size(1, 1);
break;
case SlangShader::Uniform::MVP:
memcpy(location, MVP, sizeof(float) * 16);
break;
case SlangShader::Uniform::Parameter:
if (uniform.specifier < (int)preset->parameters.size())
memcpy(location, &preset->parameters[uniform.specifier].val, sizeof(float));
break;
case SlangShader::Uniform::FrameCount:
memcpy(location, &frame_count, sizeof(uint32_t));
break;
case SlangShader::Uniform::FrameDirection:
const int32_t frame_direction = 1;
memcpy(location, &frame_direction, sizeof(int32_t));
break;
}
}
if (pipeline.shader->ubo_size > 0)
{
context->allocator.unmapMemory(pipeline.uniform_buffer_allocation);
context->allocator.flushAllocation(pipeline.uniform_buffer_allocation, 0, pipeline.shader->ubo_size);
}
}
}
void ShaderChain::update_and_propagate_sizes(int original_width_new, int original_height_new, int viewport_width_new, int viewport_height_new)
{
if (pipelines.empty())
return;
if (original_width == original_width_new &&
original_height == original_height_new &&
viewport_width == viewport_width_new &&
viewport_height == viewport_height_new)
return;
original_width = original_width_new;
original_height = original_height_new;
viewport_width = viewport_width_new;
viewport_height = viewport_height_new;
for (size_t i = 0; i < pipelines.size(); i++)
{
auto &p = pipelines[i];
if (i != 0)
{
p->source_width = pipelines[i - 1]->destination_width;
p->source_height = pipelines[i - 1]->destination_height;
}
else
{
p->source_width = original_width_new;
p->source_height = original_height_new;
}
if (p->shader->scale_type_x == "viewport")
p->destination_width = viewport_width * p->shader->scale_x;
else if (p->shader->scale_type_x == "absolute")
p->destination_width = p->shader->scale_x;
else
p->destination_width = p->source_width * p->shader->scale_x;
if (p->shader->scale_type_y == "viewport")
p->destination_height = viewport_height * p->shader->scale_y;
else if (p->shader->scale_type_y == "absolute")
p->destination_height = p->shader->scale_y;
else
p->destination_height = p->source_height * p->shader->scale_y;
if (i == pipelines.size() - 1)
{
p->destination_width = viewport_width;
p->destination_height = viewport_height;
}
}
}
bool ShaderChain::load_shader_preset(std::string filename)
{
if (!ends_with(filename, ".slangp"))
printf("Warning: loading preset without .slangp extension\n");
preset = std::make_unique<SlangPreset>();
if (!preset->load_preset_file(filename))
{
printf("Couldn't load preset file: %s\n", filename.c_str());
return false;
}
if (!preset->introspect())
{
printf("Failed introspection process in preset: %s\n", filename.c_str());
return false;
}
pipelines.clear();
pipelines.resize(preset->passes.size());
int num_ubos = 0;
int num_samplers = 0;
for (size_t i = 0; i < preset->passes.size(); i++)
{
auto &p = preset->passes[i];
pipelines[i] = std::make_unique<SlangPipeline>();
pipelines[i]->init(context, &p);
bool lastpass = (i == preset->passes.size() - 1);
if (!pipelines[i]->generate_pipeline(lastpass))
{
printf("Couldn't create pipeline for shader: %s\n", p.filename.c_str());
return false;
}
for (auto &u : p.samplers)
if (u.type == SlangShader::Sampler::PreviousFrame)
if (u.specifier > (int)original_history_size)
original_history_size = u.specifier;
if (p.ubo_size)
num_ubos++;
if (p.samplers.size() > 0)
num_samplers += p.samplers.size();
}
std::array<vk::DescriptorPoolSize, 2> descriptor_pool_sizes;
descriptor_pool_sizes[0]
.setType(vk::DescriptorType::eUniformBuffer)
.setDescriptorCount(num_ubos * queue_size);
descriptor_pool_sizes[1]
.setType(vk::DescriptorType::eCombinedImageSampler)
.setDescriptorCount(num_samplers * queue_size);
auto descriptor_pool_create_info = vk::DescriptorPoolCreateInfo{}
.setPoolSizes(descriptor_pool_sizes)
.setMaxSets(pipelines.size() * queue_size)
.setFlags(vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet);
descriptor_pool = context->device.createDescriptorPoolUnique(descriptor_pool_create_info).value;
for (auto &p : pipelines)
p->generate_frame_resources(descriptor_pool.get());
load_lookup_textures();
float vertex_data[] = { -1.0f, -3.0f, 0.0f, 1.0f, /* texcoords */ 0.0, -1.0f,
3.0f, 1.0f, 0.0f, 1.0f, 2.0f, 1.0f,
-1.0f, 1.0f, 0.0f, 1.0f, 0.0f, 1.0f };
auto buffer_create_info = vk::BufferCreateInfo{}
.setSize(sizeof(vertex_data))
.setUsage(vk::BufferUsageFlagBits::eVertexBuffer);
auto allocation_create_info = vma::AllocationCreateInfo{}
.setFlags(vma::AllocationCreateFlagBits::eHostAccessSequentialWrite)
.setRequiredFlags(vk::MemoryPropertyFlagBits::eHostVisible);
std::tie(vertex_buffer, vertex_buffer_allocation) = context->allocator.createBuffer(buffer_create_info, allocation_create_info).value;
auto vertex_buffer_memory = context->allocator.mapMemory(vertex_buffer_allocation).value;
memcpy(vertex_buffer_memory, vertex_data, sizeof(vertex_data));
context->allocator.unmapMemory(vertex_buffer_allocation);
context->allocator.flushAllocation(vertex_buffer_allocation, 0, sizeof(vertex_data));
frame_count = 0;
current_frame_index = 0;
last_frame_index = 2;
context->wait_idle();
return true;
}
void ShaderChain::update_framebuffers(vk::CommandBuffer cmd, int frame_num)
{
size_t pass_count = pipelines.size() - 1;
if (preset->last_pass_uses_feedback)
pass_count++;
for (size_t i = 0; i < pass_count; i++)
{
bool mipmap = false;
if (i < pipelines.size() - 1)
mipmap = pipelines[i + 1]->shader->mipmap_input;
pipelines[i]->update_framebuffer(cmd, frame_num, mipmap);
}
}
void ShaderChain::update_descriptor_set(vk::CommandBuffer cmd, int pipe_num, int swapchain_index)
{
auto &pipe = *pipelines[pipe_num];
auto &frame = pipe.frame[swapchain_index];
if (pipe.shader->ubo_size > 0)
{
auto descriptor_buffer_info = vk::DescriptorBufferInfo{}
.setBuffer(pipe.uniform_buffer)
.setOffset(0)
.setRange(pipe.shader->ubo_size);
auto write_descriptor_set = vk::WriteDescriptorSet{}
.setDescriptorType(vk::DescriptorType::eUniformBuffer)
.setBufferInfo(descriptor_buffer_info)
.setDstBinding(pipe.shader->ubo_binding)
.setDstSet(frame.descriptor_set.get());
context->device.updateDescriptorSets(write_descriptor_set, {});
}
auto descriptor_image_info = vk::DescriptorImageInfo{}
.setImageLayout(vk::ImageLayout::eShaderReadOnlyOptimal);
for (auto &sampler : pipe.shader->samplers)
{
if (sampler.type == SlangShader::Sampler::Lut)
{
descriptor_image_info
.setImageView(lookup_textures[sampler.specifier]->image_view)
.setSampler(lookup_textures[sampler.specifier]->sampler);
}
else if (sampler.type == SlangShader::Sampler::PassFeedback)
{
assert(sampler.specifier < (int)pipelines.size());
assert(sampler.specifier >= 0);
if (!pipelines[sampler.specifier]->frame[last_frame_index].image.image)
update_framebuffers(cmd, last_frame_index);
auto &feedback_frame = pipelines[sampler.specifier]->frame[last_frame_index];
if (feedback_frame.image.current_layout == vk::ImageLayout::eUndefined)
feedback_frame.image.clear(cmd);
descriptor_image_info
.setImageView(pipelines[sampler.specifier]->frame[last_frame_index].image.image_view);
if (sampler.specifier == (int)pipelines.size() - 1)
descriptor_image_info.setSampler(pipelines[sampler.specifier]->sampler.get());
else
descriptor_image_info.setSampler(pipelines[sampler.specifier + 1]->sampler.get());;
}
else if (sampler.type == SlangShader::Sampler::Pass)
{
assert(sampler.specifier + 1 < (int)pipelines.size());
auto &sampler_to_use = pipelines[sampler.specifier + 1]->sampler.get();
if (sampler.specifier == -1)
{
descriptor_image_info
.setSampler(sampler_to_use)
.setImageView(original[0]->image_view);
}
else
{
descriptor_image_info
.setSampler(sampler_to_use)
.setImageView(pipelines[sampler.specifier]->frame[swapchain_index].image.image_view);
}
}
else if (sampler.type == SlangShader::Sampler::PreviousFrame)
{
int which_original = sampler.specifier;
if (which_original >= (int)original.size())
which_original = original.size() - 1;
assert(which_original > -1);
descriptor_image_info
.setSampler(pipelines[0]->sampler.get())
.setImageView(original[which_original]->image_view);
}
auto write_descriptor_set = vk::WriteDescriptorSet{}
.setDescriptorType(vk::DescriptorType::eCombinedImageSampler)
.setDstSet(frame.descriptor_set.get())
.setDstBinding(sampler.binding)
.setImageInfo(descriptor_image_info);
context->device.updateDescriptorSets(write_descriptor_set, {});
}
}
bool ShaderChain::do_frame(uint8_t *data, int width, int height, int stride, vk::Format format, int viewport_x, int viewport_y, int viewport_width, int viewport_height)
{
if (!do_frame_without_swap(data, width, height, stride, format, viewport_x, viewport_y, viewport_width, viewport_height))
return false;
context->swapchain->swap();
return true;
}
bool ShaderChain::do_frame_without_swap(uint8_t *data, int width, int height, int stride, vk::Format format, int viewport_x, int viewport_y, int viewport_width, int viewport_height)
{
if (!context->swapchain->begin_frame())
return false;
auto cmd = context->swapchain->get_cmd();
update_and_propagate_sizes(width, height, viewport_width, viewport_height);
update_framebuffers(cmd, current_frame_index);
upload_original(cmd, data, width, height, stride, format);
construct_buffer_objects();
for (size_t i = 0; i < pipelines.size(); i++)
{
auto &pipe = *pipelines[i];
auto &frame = pipe.frame[current_frame_index];
update_descriptor_set(cmd, i, current_frame_index);
vk::ClearValue value{};
value.color = { 0.0f, 0.0f, 0.0f, 1.0f };
auto render_pass_begin_info = vk::RenderPassBeginInfo{}
.setRenderPass(pipe.render_pass.get())
.setFramebuffer(frame.image.framebuffer.get())
.setRenderArea(vk::Rect2D({}, vk::Extent2D(frame.image.image_width, frame.image.image_height)))
.setClearValues(value);
if (i == pipelines.size() - 1)
context->swapchain->begin_render_pass();
else
cmd.beginRenderPass(render_pass_begin_info, vk::SubpassContents::eInline);
cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, pipe.pipeline.get());
cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipe.pipeline_layout.get(), 0, frame.descriptor_set.get(), {});
cmd.bindVertexBuffers(0, vertex_buffer, { 0 });
if (pipe.push_constants.size() > 0)
cmd.pushConstants(pipe.pipeline_layout.get(), vk::ShaderStageFlagBits::eAllGraphics, 0, pipe.push_constants.size(), pipe.push_constants.data());
if (i < pipelines.size() - 1)
{
cmd.setViewport(0, vk::Viewport(0, 0, pipe.destination_width, pipe.destination_height, 0.0f, 1.0f));
cmd.setScissor(0, vk::Rect2D({}, vk::Extent2D(pipe.destination_width, pipe.destination_height)));
}
else
{
cmd.setViewport(0, vk::Viewport(viewport_x, viewport_y, viewport_width, viewport_height, 0.0f, 1.0f));
cmd.setScissor(0, vk::Rect2D(vk::Offset2D(viewport_x, viewport_y), vk::Extent2D(viewport_width, viewport_height)));
}
cmd.draw(3, 1, 0, 0);
if (i < pipelines.size() - 1)
{
cmd.endRenderPass();
}
else
{
context->swapchain->end_render_pass();
}
frame.image.barrier(cmd);
if (i < pipelines.size() - 1)
frame.image.generate_mipmaps(cmd);
if (preset->last_pass_uses_feedback && i == pipelines.size() - 1)
{
std::array<vk::ImageMemoryBarrier, 2> image_memory_barrier{};
image_memory_barrier[0]
.setImage(frame.image.image)
.setOldLayout(vk::ImageLayout::eUndefined)
.setNewLayout(vk::ImageLayout::eTransferDstOptimal)
.setSrcAccessMask(vk::AccessFlagBits::eColorAttachmentWrite)
.setDstAccessMask(vk::AccessFlagBits::eTransferWrite)
.setSubresourceRange(vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1));
image_memory_barrier[1]
.setImage(context->swapchain->get_image())
.setOldLayout(vk::ImageLayout::ePresentSrcKHR)
.setNewLayout(vk::ImageLayout::eTransferSrcOptimal)
.setSrcAccessMask(vk::AccessFlagBits::eColorAttachmentWrite)
.setDstAccessMask(vk::AccessFlagBits::eTransferRead)
.setSubresourceRange(vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1));
cmd.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
vk::PipelineStageFlagBits::eTransfer,
{}, {}, {}, image_memory_barrier);
auto image_blit = vk::ImageBlit{}
.setSrcOffsets({ vk::Offset3D(viewport_x, viewport_y, 0), vk::Offset3D(viewport_x + viewport_width, viewport_y + viewport_height, 1) })
.setDstOffsets({ vk::Offset3D(0, 0, 0), vk::Offset3D(viewport_width, viewport_height, 1) })
.setSrcSubresource(vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1))
.setDstSubresource(vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1));
cmd.blitImage(context->swapchain->get_image(), vk::ImageLayout::eTransferSrcOptimal, frame.image.image, vk::ImageLayout::eTransferDstOptimal, image_blit, vk::Filter::eNearest);
image_memory_barrier[0]
.setOldLayout(vk::ImageLayout::eTransferDstOptimal)
.setNewLayout(vk::ImageLayout::eShaderReadOnlyOptimal)
.setSrcAccessMask(vk::AccessFlagBits::eTransferWrite)
.setDstAccessMask(vk::AccessFlagBits::eShaderRead)
.setSubresourceRange(vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1));
image_memory_barrier[1]
.setOldLayout(vk::ImageLayout::eTransferSrcOptimal)
.setNewLayout(vk::ImageLayout::ePresentSrcKHR)
.setSrcAccessMask(vk::AccessFlagBits::eTransferWrite)
.setDstAccessMask(vk::AccessFlagBits::eMemoryRead)
.setSubresourceRange(vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1));
cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eAllGraphics,
{}, {}, {}, image_memory_barrier);
frame.image.current_layout = vk::ImageLayout::eTransferDstOptimal;
}
}
context->swapchain->end_frame_without_swap();
last_frame_index = current_frame_index;
current_frame_index = (current_frame_index + 1) % queue_size;
frame_count++;
return true;
}
void ShaderChain::upload_original(vk::CommandBuffer cmd, uint8_t *data, int width, int height, int stride, vk::Format format)
{
std::unique_ptr<Texture> texture;
auto create_texture = [&]() {
texture->create(width,
height,
format,
wrap_mode_from_string(pipelines[0]->shader->wrap_mode),
pipelines[0]->shader->filter_linear,
pipelines[0]->shader->mipmap_input);
};
if (original.size() > original_history_size)
{
texture = std::move(original.back());
original.pop_back();
if (texture->image_width != width || texture->image_height != height || texture->format != format)
{
texture->destroy();
create_texture();
}
}
else
{
texture = std::make_unique<Texture>();
texture->init(context);
create_texture();
}
if (cmd)
texture->from_buffer(cmd, data, width, height, stride);
else
texture->from_buffer(data, width, height, stride);
original.push_front(std::move(texture));
}
void ShaderChain::upload_original(uint8_t *data, int width, int height, int stride, vk::Format format)
{
upload_original(nullptr, data, width, height, stride, format);
}
bool ShaderChain::load_lookup_textures()
{
if (preset->textures.size() < 1)
return true;
lookup_textures.clear();
for (auto &l : preset->textures)
{
int width, height, channels;
stbi_uc *bytes = stbi_load(l.filename.c_str(), &width, &height, &channels, 4);
if (!bytes)
{
printf("Couldn't load look-up texture: %s\n", l.filename.c_str());
return false;
}
auto wrap_mode = wrap_mode_from_string(l.wrap_mode);
lookup_textures.push_back(std::make_unique<Texture>());
auto &t = lookup_textures.back();
t->init(context);
t->create(width, height, vk::Format::eR8G8B8A8Unorm, wrap_mode, l.linear, l.mipmap);
t->from_buffer(bytes, width, height);
t->discard_staging_buffer();
free(bytes);
}
return true;
}
} // namespace Vulkan