vk: Implement depth scaling using hardware blit/copy engines

- Removes the old depth scaling using an overlay.
  It was never going to work properly due to per-pixel stencil writes being unavailable
- TODO: Preserve stencil buffer during ARGB8->D32S8 shader conversion pass
This commit is contained in:
kd-11 2018-06-04 19:57:16 +03:00 committed by kd-11
parent 3150619320
commit fc18e17ba6
11 changed files with 324 additions and 287 deletions

View File

@ -1632,8 +1632,13 @@ void GLGSRender::synchronize_buffers()
bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
{
m_samplers_dirty.store(true);
return m_gl_texture_cache.blit(src, dst, interpolate, m_rtts);
if (m_gl_texture_cache.blit(src, dst, interpolate, m_rtts))
{
m_samplers_dirty.store(true);
return true;
}
return false;
}
void GLGSRender::notify_tile_unbound(u32 tile)

View File

@ -84,8 +84,29 @@ namespace gl
s32 old_fbo = 0;
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo);
filter interp = linear_interpolation ? filter::linear : filter::nearest;
GLenum attachment = is_depth_copy ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0;
filter interp = (linear_interpolation && !is_depth_copy) ? filter::linear : filter::nearest;
GLenum attachment;
gl::buffers target;
if (is_depth_copy)
{
if (src->get_internal_format() == gl::texture::internal_format::depth16 ||
dst->get_internal_format() == gl::texture::internal_format::depth16)
{
attachment = GL_DEPTH_ATTACHMENT;
target = gl::buffers::depth;
}
else
{
attachment = GL_DEPTH_STENCIL_ATTACHMENT;
target = gl::buffers::depth_stencil;
}
}
else
{
attachment = GL_COLOR_ATTACHMENT0;
target = gl::buffers::color;
}
blit_src.bind();
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, src_id, 0);
@ -99,7 +120,7 @@ namespace gl
if (scissor_test_enabled)
glDisable(GL_SCISSOR_TEST);
blit_src.blit(blit_dst, src_rect, dst_rect, is_depth_copy ? buffers::depth : buffers::color, interp);
blit_src.blit(blit_dst, src_rect, dst_rect, target, interp);
if (xfer_info.dst_is_typeless)
{

View File

@ -634,9 +634,6 @@ VKGSRender::VKGSRender() : GSRender()
m_depth_converter.reset(new vk::depth_convert_pass());
m_depth_converter->create(*m_device);
m_depth_scaler.reset(new vk::depth_scaling_pass());
m_depth_scaler->create(*m_device);
m_attachment_clear_pass.reset(new vk::attachment_clear_pass());
m_attachment_clear_pass->create(*m_device);
@ -764,10 +761,6 @@ VKGSRender::~VKGSRender()
m_depth_converter->destroy();
m_depth_converter.reset();
//Depth surface blitter
m_depth_scaler->destroy();
m_depth_scaler.reset();
//Attachment clear helper
m_attachment_clear_pass->destroy();
m_attachment_clear_pass.reset();
@ -1164,24 +1157,11 @@ void VKGSRender::end()
const auto dst_w = std::get<2>(region);
const auto dst_h = std::get<3>(region);
if (!is_depth || (src_w == dst_w && src_h == dst_h))
{
const VkImageAspectFlags aspect = surface->attachment_aspect_flag;
const VkImageAspectFlags aspect = surface->attachment_aspect_flag;
vk::copy_scaled_image(*m_current_command_buffer, surface->old_contents->value, surface->value,
surface->old_contents->current_layout, surface->current_layout, 0, 0, src_w, src_h,
0, 0, dst_w, dst_h, 1, aspect, true);
}
else
{
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, surface->info.format, 0);
auto render_pass = m_render_passes[rp];
vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_depth_scaler->run(*m_current_command_buffer, { 0, 0, (f32)src_w, (f32)src_h }, { 0, 0, (f32)dst_w, (f32)dst_h }, surface,
surface->old_contents, static_cast<vk::render_target*>(surface->old_contents)->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean);
}
vk::copy_scaled_image(*m_current_command_buffer, surface->old_contents->value, surface->value,
surface->old_contents->current_layout, surface->current_layout, 0, 0, src_w, src_h,
0, 0, dst_w, dst_h, 1, aspect, true, VK_FILTER_LINEAR, surface->info.format, surface->old_contents->info.format);
}
};
@ -2048,7 +2028,6 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)
m_attachment_clear_pass->free_resources();
m_depth_converter->free_resources();
m_depth_scaler->free_resources();
m_ui_renderer->free_resources();
ctx->buffer_views_to_clean.clear();
@ -3311,46 +3290,8 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
//Stop all parallel operations until this is finished
std::lock_guard<shared_mutex> lock(m_secondary_cb_guard);
auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer);
m_current_command_buffer->begin();
if (result.succeeded)
if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer))
{
bool require_flush = false;
if (result.deferred)
{
//Requires manual scaling; depth/stencil surface
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, result.dst_image->info.format, 0);
auto render_pass = m_render_passes[rp];
auto old_src_layout = result.src_image->current_layout;
auto old_dst_layout = result.dst_image->current_layout;
vk::change_image_layout(*m_current_command_buffer, result.src_image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
vk::change_image_layout(*m_current_command_buffer, result.dst_image, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
// TODO: Insets
const areaf src_area = { 0, 0, (f32)result.src_image->width(), (f32)result.src_image->height() };
const areaf dst_area = { 0, 0, (f32)result.dst_image->width(), (f32)result.dst_image->height() };
m_depth_scaler->run(*m_current_command_buffer, src_area, dst_area, result.dst_image, result.src_image,
result.src_view, render_pass, m_framebuffers_to_clean);
vk::change_image_layout(*m_current_command_buffer, result.src_image, old_src_layout);
vk::change_image_layout(*m_current_command_buffer, result.dst_image, old_dst_layout);
require_flush = true;
}
if (result.dst_image)
{
if (m_texture_cache.flush_if_cache_miss_likely(result.dst_image->info.format, result.real_dst_address, result.real_dst_size,
*m_current_command_buffer, m_swapchain->get_graphics_queue()))
require_flush = true;
}
if (require_flush)
flush_command_queue();
m_samplers_dirty.store(true);
return true;
}

View File

@ -261,7 +261,6 @@ private:
std::unique_ptr<vk::text_writer> m_text_writer;
std::unique_ptr<vk::depth_convert_pass> m_depth_converter;
std::unique_ptr<vk::depth_scaling_pass> m_depth_scaler;
std::unique_ptr<vk::ui_overlay_renderer> m_ui_renderer;
std::unique_ptr<vk::attachment_clear_pass> m_attachment_clear_pass;

View File

@ -6,10 +6,12 @@ namespace vk
{
context* g_current_vulkan_ctx = nullptr;
render_device g_current_renderer;
driver_vendor g_driver_vendor = driver_vendor::unknown;
std::shared_ptr<vk::mem_allocator_base> g_mem_allocator = nullptr;
std::unique_ptr<image> g_null_texture;
std::unique_ptr<image_view> g_null_image_view;
std::unordered_map<VkFormat, std::unique_ptr<image>> g_typeless_textures;
VkSampler g_null_sampler = nullptr;
@ -148,7 +150,7 @@ namespace vk
if (g_null_image_view)
return g_null_image_view->value;
g_null_texture.reset(new image(g_current_renderer, get_memory_mapping(g_current_renderer.gpu()).device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
g_null_texture.reset(new image(g_current_renderer, g_current_renderer.get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_IMAGE_TYPE_2D, VK_FORMAT_B8G8R8A8_UNORM, 4, 4, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0));
@ -167,6 +169,25 @@ namespace vk
return g_null_image_view->value;
}
vk::image* get_typeless_helper(VkFormat format)
{
auto create_texture = [&]()
{
return new vk::image(g_current_renderer, g_current_renderer.get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_IMAGE_TYPE_2D, format, 4096, 4096, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, 0);
};
auto &ptr = g_typeless_textures[format];
if (!ptr)
{
auto _img = create_texture();
ptr.reset(_img);
}
return ptr.get();
}
void acquire_global_submit_lock()
{
g_submit_mutex.lock();
@ -180,7 +201,9 @@ namespace vk
void destroy_global_resources()
{
g_null_texture.reset();
g_null_image_view .reset();
g_null_image_view.reset();
g_typeless_textures.clear();
if (g_null_sampler)
vkDestroySampler(g_current_renderer, g_null_sampler, nullptr);
@ -222,6 +245,7 @@ namespace vk
g_drv_disable_fence_reset = false;
g_num_processed_frames = 0;
g_num_total_frames = 0;
g_driver_vendor = driver_vendor::unknown;
const auto gpu_name = g_current_renderer.gpu().name();
@ -240,14 +264,33 @@ namespace vk
//Disable fence reset for proprietary driver and delete+initialize a new fence instead
if (gpu_name.find("Radeon") != std::string::npos)
{
g_driver_vendor = driver_vendor::AMD;
g_drv_disable_fence_reset = true;
}
//Nvidia cards are easily susceptible to NaN poisoning
if (gpu_name.find("NVIDIA") != std::string::npos || gpu_name.find("GeForce") != std::string::npos)
{
g_driver_vendor = driver_vendor::NVIDIA;
g_drv_sanitize_fp_values = true;
}
if (g_driver_vendor == driver_vendor::unknown)
{
if (gpu_name.find("RADV") != std::string::npos)
{
g_driver_vendor = driver_vendor::RADV;
}
else
{
LOG_WARNING(RSX, "Unknown driver vendor for device '%s'", gpu_name);
}
}
}
driver_vendor get_driver_vendor()
{
return g_driver_vendor;
}
bool emulate_primitive_restart(rsx::primitive_type type)

View File

@ -66,6 +66,7 @@ namespace vk
struct image;
struct vk_data_heap;
class mem_allocator_base;
enum driver_vendor;
vk::context *get_current_thread_ctx();
void set_current_thread_ctx(const vk::context &ctx);
@ -80,6 +81,7 @@ namespace vk
bool emulate_primitive_restart(rsx::primitive_type type);
bool sanitize_fp_values();
bool fence_reset_disabled();
driver_vendor get_driver_vendor();
VkComponentMapping default_component_map();
VkComponentMapping apply_swizzle_remap(const std::array<VkComponentSwizzle, 4>& base_remap, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector);
@ -89,6 +91,7 @@ namespace vk
VkSampler null_sampler();
VkImageView null_image_view(vk::command_buffer&);
image* get_typeless_helper(VkFormat format);
//Sync helpers around vkQueueSubmit
void acquire_global_submit_lock();
@ -109,8 +112,14 @@ namespace vk
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range);
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout, VkImageSubresourceRange range);
void change_image_layout(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout);
void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect);
void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, u32 mipmaps, VkImageAspectFlags aspect, bool compatible_formats);
void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout,
const areai& src_rect, const areai& dst_rect, u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect,
VkImageAspectFlags src_transfer_mask = 0xFF, VkImageAspectFlags dst_transfer_mask = 0xFF);
void copy_scaled_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout,
u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height, u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height, u32 mipmaps,
VkImageAspectFlags aspect, bool compatible_formats, VkFilter filter = VK_FILTER_LINEAR, VkFormat src_format = VK_FORMAT_UNDEFINED, VkFormat dst_format = VK_FORMAT_UNDEFINED);
std::pair<VkFormat, VkComponentMapping> get_compatible_surface_format(rsx::surface_color_format color_format);
size_t get_render_pass_location(VkFormat color_surface_format, VkFormat depth_stencil_format, u8 color_surface_count);
@ -134,6 +143,14 @@ namespace vk
void die_with_error(const char* faulting_addr, VkResult error_code);
enum driver_vendor
{
unknown,
AMD,
NVIDIA,
RADV
};
struct memory_type_mapping
{
uint32_t host_visible_coherent;
@ -328,7 +345,7 @@ namespace vk
public:
using mem_handle_t = void *;
mem_allocator_base(VkDevice dev, VkPhysicalDevice pdev) : m_device(dev) {};
mem_allocator_base(VkDevice dev, VkPhysicalDevice /*pdev*/) : m_device(dev) {};
~mem_allocator_base() {};
virtual void destroy() = 0;
@ -386,7 +403,7 @@ namespace vk
vmaFreeMemory(m_allocator, static_cast<VmaAllocation>(mem_handle));
}
void *map(mem_handle_t mem_handle, u64 offset, u64 size) override
void *map(mem_handle_t mem_handle, u64 offset, u64 /*size*/) override
{
void *data = nullptr;
@ -432,7 +449,7 @@ namespace vk
void destroy() override {};
mem_handle_t alloc(u64 block_sz, u64 alignment, uint32_t memory_type_index) override
mem_handle_t alloc(u64 block_sz, u64 /*alignment*/, uint32_t memory_type_index) override
{
VkDeviceMemory memory;
VkMemoryAllocateInfo info = {};
@ -466,7 +483,7 @@ namespace vk
return (VkDeviceMemory)mem_handle;
}
u64 get_vk_device_memory_offset(mem_handle_t mem_handle)
u64 get_vk_device_memory_offset(mem_handle_t /*mem_handle*/)
{
return 0;
}

View File

@ -76,22 +76,27 @@ namespace vk
//Reserve descriptor pools
m_descriptor_pool.create(*m_device, descriptor_pool_sizes, 2);
VkDescriptorSetLayoutBinding bindings[2] = {};
VkDescriptorSetLayoutBinding bindings[3] = {};
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[0].descriptorCount = 1;
bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[0].binding = 0;
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[1].descriptorCount = 1;
bindings[1].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[1].binding = 1;
bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[2].descriptorCount = 1;
bindings[2].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[2].binding = 2;
VkDescriptorSetLayoutCreateInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
infos.pBindings = bindings;
infos.bindingCount = 2;
infos.bindingCount = 3;
CHECK_RESULT(vkCreateDescriptorSetLayout(*m_device, &infos, nullptr, &m_descriptor_layout));
@ -116,8 +121,9 @@ namespace vk
virtual std::vector<vk::glsl::program_input> get_fragment_inputs()
{
std::vector<vk::glsl::program_input> fs_inputs;
fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, 0, "fs0" });
fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_uniform_buffer,{},{}, 1, "static_data" });
fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_uniform_buffer,{},{}, 0, "static_data" });
fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, 1, "fs0" });
fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, 2, "fs1" });
return fs_inputs;
}
@ -209,7 +215,7 @@ namespace vk
return result;
}
void load_program(vk::command_buffer cmd, VkRenderPass pass, VkImageView src)
void load_program(vk::command_buffer cmd, VkRenderPass pass, const std::vector<VkImageView>& src)
{
vk::glsl::program *program = nullptr;
auto found = m_program_cache.find(pass);
@ -233,14 +239,18 @@ namespace vk
{
m_sampler = std::make_unique<vk::sampler>(*m_device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
VK_FALSE, 0.f, 1.f, 0.f, 0.f, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK);
VK_FALSE, 0.f, 1.f, 0.f, 0.f, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK);
}
update_uniforms(program);
VkDescriptorImageInfo info = { m_sampler->value, src, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL };
program->bind_uniform(info, "fs0", m_descriptor_set);
program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 1, m_descriptor_set);
program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0, m_descriptor_set);
for (int n = 0; n < src.size(); ++n)
{
VkDescriptorImageInfo info = { m_sampler->value, src[n], VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL };
program->bind_uniform(info, "fs" + std::to_string(n), m_descriptor_set);
}
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, program->pipeline);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline_layout, 0, 1, &m_descriptor_set, 0, nullptr);
@ -343,7 +353,7 @@ namespace vk
vkCmdSetScissor(cmd, 0, 1, &vs);
}
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* fbo, VkImageView src, VkRenderPass render_pass)
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* fbo, const std::vector<VkImageView>& src, VkRenderPass render_pass)
{
load_program(cmd, render_pass, src);
set_up_viewport(cmd, w, h);
@ -362,12 +372,18 @@ namespace vk
vkCmdEndRenderPass(cmd);
}
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::image* target, VkImageView src, VkRenderPass render_pass, std::list<std::unique_ptr<vk::framebuffer_holder>>& framebuffer_resources)
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::image* target, const std::vector<VkImageView>& src, VkRenderPass render_pass, std::list<std::unique_ptr<vk::framebuffer_holder>>& framebuffer_resources)
{
vk::framebuffer *fbo = get_framebuffer(target, render_pass, framebuffer_resources);
run(cmd, w, h, fbo, src, render_pass);
}
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::image* target, VkImageView src, VkRenderPass render_pass, std::list<std::unique_ptr<vk::framebuffer_holder>>& framebuffer_resources)
{
std::vector<VkImageView> views = { src };
run(cmd, w, h, target, views, render_pass, framebuffer_resources);
}
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::image* target, vk::image_view* src, VkRenderPass render_pass, std::list<std::unique_ptr<vk::framebuffer_holder>>& framebuffer_resources)
{
run(cmd, w, h, target, src->value, render_pass, framebuffer_resources);
@ -397,7 +413,8 @@ namespace vk
{
"#version 420\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"layout(set=0, binding=0) uniform sampler2D fs0;\n"
"#extension GL_ARB_shader_stencil_export : enable\n\n"
"layout(set=0, binding=1) uniform sampler2D fs0;\n"
"layout(location=0) in vec2 tc0;\n"
"\n"
"void main()\n"
@ -409,6 +426,7 @@ namespace vk
renderpass_config.set_depth_mask(true);
renderpass_config.enable_depth_test(VK_COMPARE_OP_ALWAYS);
renderpass_config.enable_stencil_test(VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_COMPARE_OP_ALWAYS, 0xFF, 0xFF);
m_vertex_shader.id = 100002;
m_fragment_shader.id = 100003;
@ -438,7 +456,7 @@ namespace vk
"#version 450\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"layout(location=0) in vec4 in_pos;\n"
"layout(std140, set=0, binding=1) uniform static_data{ vec4 regs[8]; };\n"
"layout(std140, set=0, binding=0) uniform static_data{ vec4 regs[8]; };\n"
"layout(location=0) out vec2 tc0;\n"
"layout(location=1) out vec4 color;\n"
"layout(location=2) out vec4 parameters;\n"
@ -459,7 +477,7 @@ namespace vk
{
"#version 420\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"layout(set=0, binding=0) uniform sampler2D fs0;\n"
"layout(set=0, binding=1) uniform sampler2D fs0;\n"
"layout(location=0) in vec2 tc0;\n"
"layout(location=1) in vec4 color;\n"
"layout(location=2) in vec4 parameters;\n"
@ -704,95 +722,13 @@ namespace vk
break;
}
overlay_pass::run(cmd, w, h, target, src, render_pass);
overlay_pass::run(cmd, w, h, target, { src }, render_pass);
}
ui.update();
}
};
struct depth_scaling_pass : public overlay_pass
{
areaf src_area;
areaf dst_area;
u16 src_width;
u16 src_height;
depth_scaling_pass()
{
vs_src =
{
"#version 450\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"layout(std140, set=0, binding=1) uniform static_data{ vec4 regs[8]; };\n"
"layout(location=0) out vec2 tc0;\n"
"\n"
"void main()\n"
"{\n"
" vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n"
" vec2 coords[] = {vec2(0., 0.), vec2(1., 0.), vec2(0., 1.), vec2(1., 1.)};\n"
" gl_Position = vec4(positions[gl_VertexIndex % 4], 0., 1.);\n"
" tc0 = coords[gl_VertexIndex % 4] * regs[0].xy + regs[0].zw;\n"
"}\n"
};
fs_src =
{
"#version 420\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"layout(set=0, binding=0) uniform sampler2D fs0;\n"
"layout(location=0) in vec2 tc0;\n"
"\n"
"void main()\n"
"{\n"
" gl_FragDepth = texture(fs0, tc0).x;\n"
"}\n"
};
renderpass_config.set_depth_mask(true);
renderpass_config.enable_depth_test(VK_COMPARE_OP_ALWAYS);
m_vertex_shader.id = 100006;
m_fragment_shader.id = 100007;
}
void update_uniforms(vk::glsl::program* /*program*/) override
{
m_ubo_offset = (u32)m_ubo.alloc<256>(128);
auto dst = (f32*)m_ubo.map(m_ubo_offset, 128);
dst[0] = f32(src_area.x2 - src_area.x1) / src_width;
dst[1] = f32(src_area.y2 - src_area.y1) / src_height;
dst[2] = src_area.x1 / f32(src_area.x2 - src_area.x1);
dst[3] = src_area.y1 / f32(src_area.y2 - src_area.y1);
m_ubo.unmap();
}
void set_up_viewport(vk::command_buffer &cmd, u16 max_w, u16 max_h) override
{
VkRect2D region = { { s32(dst_area.x1), s32(dst_area.y1) },{ u32(dst_area.x2 - dst_area.x1), u32(dst_area.y2 - dst_area.y1) } };
vkCmdSetScissor(cmd, 0, 1, &region);
VkViewport vp{};
vp.x = dst_area.x1;
vp.y = dst_area.y1;
vp.width = f32(region.extent.width);
vp.height = f32(region.extent.height);
vp.minDepth = 0.f;
vp.maxDepth = 1.f;
vkCmdSetViewport(cmd, 0, 1, &vp);
}
void run(vk::command_buffer &cmd, const areaf& src_rect, const areaf& dst_rect, vk::image* target, vk::image* src, vk::image_view* src_view, VkRenderPass render_pass, std::list<std::unique_ptr<vk::framebuffer_holder>>& framebuffer_resources)
{
src_area = src_rect;
dst_area = dst_rect;
src_width = src->width();
src_height = src->height();
overlay_pass::run(cmd, target->width(), target->height(), target, src_view, render_pass, framebuffer_resources);
}
};
struct attachment_clear_pass : public overlay_pass
{
color4f clear_color = { 0.f, 0.f, 0.f, 0.f };
@ -805,7 +741,7 @@ namespace vk
{
"#version 450\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"layout(std140, set=0, binding=1) uniform static_data{ vec4 regs[8]; };\n"
"layout(std140, set=0, binding=0) uniform static_data{ vec4 regs[8]; };\n"
"layout(location=0) out vec2 tc0;\n"
"layout(location=1) out vec4 color;\n"
"layout(location=2) out vec4 mask;\n"
@ -825,7 +761,7 @@ namespace vk
{
"#version 420\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"layout(set=0, binding=0) uniform sampler2D fs0;\n"
"layout(set=0, binding=1) uniform sampler2D fs0;\n"
"layout(location=0) in vec2 tc0;\n"
"layout(location=1) in vec4 color;\n"
"layout(location=2) in vec4 mask;\n"

View File

@ -19,7 +19,7 @@ namespace vk
u16 surface_height = 0;
VkImageAspectFlags attachment_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT;
std::unordered_map<u32, std::unique_ptr<vk::image_view>> views;
std::unordered_multimap<u32, std::unique_ptr<vk::image_view>> views;
u64 frame_tag = 0; //frame id when invalidated, 0 if not invalid
@ -40,12 +40,16 @@ namespace vk
mipmaps, layers, samples, initial_layout, tiling, usage, image_flags)
{}
vk::image_view* get_view(u32 remap_encoding, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap)
vk::image_view* get_view(u32 remap_encoding, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap,
VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT)
{
auto found = views.find(remap_encoding);
if (found != views.end())
auto found = views.equal_range(remap_encoding);
for (auto It = found.first; It != found.second; ++It)
{
return found->second.get();
if (It->second->info.subresourceRange.aspectMask & mask)
{
return It->second.get();
}
}
VkComponentMapping real_mapping = vk::apply_swizzle_remap
@ -55,10 +59,10 @@ namespace vk
);
auto view = std::make_unique<vk::image_view>(*vk::get_current_renderer(), value, VK_IMAGE_VIEW_TYPE_2D, info.format,
real_mapping, vk::get_image_subresource_range(0, 0, 1, 1, attachment_aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT)));
real_mapping, vk::get_image_subresource_range(0, 0, 1, 1, attachment_aspect_flag & mask));
auto result = view.get();
views[remap_encoding] = std::move(view);
views.emplace(remap_encoding, std::move(view));
return result;
}

View File

@ -55,30 +55,39 @@ namespace vk
}
}
void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout, u32 width, u32 height, u32 mipmaps, VkImageAspectFlagBits aspect)
void copy_image(VkCommandBuffer cmd, VkImage &src, VkImage &dst, VkImageLayout srcLayout, VkImageLayout dstLayout,
const areai& src_rect, const areai& dst_rect, u32 mipmaps, VkImageAspectFlags src_aspect, VkImageAspectFlags dst_aspect,
VkImageAspectFlags src_transfer_mask, VkImageAspectFlags dst_transfer_mask)
{
// NOTE: src_aspect should match dst_aspect according to spec but drivers seem to work just fine with the mismatch
// TODO: Implement separate pixel transfer for drivers that refuse this workaround
VkImageSubresourceLayers a_src = {}, a_dst = {};
a_src.aspectMask = aspect;
a_src.aspectMask = src_aspect & src_transfer_mask;
a_src.baseArrayLayer = 0;
a_src.layerCount = 1;
a_src.mipLevel = 0;
a_dst = a_src;
a_dst.aspectMask = dst_aspect & dst_transfer_mask;
VkImageCopy rgn = {};
rgn.extent.depth = 1;
rgn.extent.width = width;
rgn.extent.height = height;
rgn.dstOffset = { 0, 0, 0 };
rgn.srcOffset = { 0, 0, 0 };
rgn.extent.width = u32(src_rect.x2 - src_rect.x1);
rgn.extent.height = u32(src_rect.y2 - src_rect.y1);
rgn.dstOffset = { dst_rect.x1, dst_rect.y1, 0 };
rgn.srcOffset = { src_rect.x1, src_rect.y1, 0 };
rgn.srcSubresource = a_src;
rgn.dstSubresource = a_dst;
if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
change_image_layout(cmd, src, srcLayout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
auto preferred_src_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
auto preferred_dst_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
change_image_layout(cmd, dst, dstLayout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (srcLayout != preferred_src_format)
change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect));
if (dstLayout != preferred_dst_format)
change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect));
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
{
@ -88,11 +97,11 @@ namespace vk
rgn.dstSubresource.mipLevel++;
}
if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
change_image_layout(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (srcLayout != preferred_src_format)
change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect));
if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (dstLayout != preferred_dst_format)
change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect));
}
void copy_scaled_image(VkCommandBuffer cmd,
@ -100,7 +109,8 @@ namespace vk
VkImageLayout srcLayout, VkImageLayout dstLayout,
u32 src_x_offset, u32 src_y_offset, u32 src_width, u32 src_height,
u32 dst_x_offset, u32 dst_y_offset, u32 dst_width, u32 dst_height,
u32 mipmaps, VkImageAspectFlags aspect, bool compatible_formats)
u32 mipmaps, VkImageAspectFlags aspect, bool compatible_formats,
VkFilter filter, VkFormat src_format, VkFormat dst_format)
{
VkImageSubresourceLayers a_src = {}, a_dst = {};
a_src.aspectMask = aspect;
@ -110,22 +120,129 @@ namespace vk
a_dst = a_src;
auto preferred_src_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
auto preferred_dst_format = (src == dst) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
//TODO: Use an array of offsets/dimensions for mipmapped blits (mipmap count > 1) since subimages will have different dimensions
if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
change_image_layout(cmd, src, srcLayout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (srcLayout != preferred_src_format)
change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
change_image_layout(cmd, dst, dstLayout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (dstLayout != preferred_dst_format)
change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (src_width != dst_width || src_height != dst_height || mipmaps > 1 || !compatible_formats)
if (compatible_formats && src_width == dst_width && src_height != dst_height)
{
if ((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0)
{
//Most depth/stencil formats cannot be scaled using hw blit
LOG_ERROR(RSX, "Cannot perform scaled blit for depth/stencil images");
return;
}
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { (int32_t)src_x_offset, (int32_t)src_y_offset, 0 };
copy_rgn.dstOffset = { (int32_t)dst_x_offset, (int32_t)dst_y_offset, 0 };
copy_rgn.dstSubresource = { (VkImageAspectFlags)aspect, 0, 0, 1 };
copy_rgn.srcSubresource = { (VkImageAspectFlags)aspect, 0, 0, 1 };
copy_rgn.extent = { src_width, src_height, 1 };
vkCmdCopyImage(cmd, src, preferred_src_format, dst, preferred_dst_format, 1, &copy_rgn);
}
else if ((aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0)
{
//Most depth/stencil formats cannot be scaled using hw blit
if (src_format == VK_FORMAT_UNDEFINED || dst_width > 4096 || (src_height + dst_height) > 4096)
{
LOG_ERROR(RSX, "Could not blit depth/stencil image. src_fmt=0x%x, src=%dx%d, dst=%dx%d",
(u32)src_format, src_width, src_height, dst_width, dst_height);
}
else
{
auto stretch_image_typeless = [&cmd, preferred_src_format, preferred_dst_format](VkImage src, VkImage dst, VkImage typeless,
const areai& src_rect, const areai& dst_rect, VkImageAspectFlags aspect, VkImageAspectFlags transfer_flags = 0xFF)
{
const u32 src_w = u32(src_rect.x2 - src_rect.x1);
const u32 src_h = u32(src_rect.y2 - src_rect.y1);
const u32 dst_w = u32(dst_rect.x2 - dst_rect.x1);
const u32 dst_h = u32(dst_rect.y2 - dst_rect.y1);
// Drivers are not very accepting of aspect COLOR -> aspect DEPTH or aspect STENCIL separately
// However, this works okay for D24S8 (nvidia-only format)
// To work around the problem we use the non-existent DEPTH/STENCIL/DEPTH_STENCIL aspect of the color texture instead
VkImageAspectFlags typeless_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
if (transfer_flags == VK_IMAGE_ASPECT_DEPTH_BIT || transfer_flags == VK_IMAGE_ASPECT_STENCIL_BIT)
{
// NOTE: This path is only taken for VK_FORMAT_D32_SFLOAT_S8_UINT as there is no 36-bit format available
// On Nvidia, the default format is VK_FORMAT_D24_UNORM_S8_UINT which does not require this workaround
switch (vk::get_driver_vendor())
{
case driver_vendor::AMD:
// Quirks: This workaround allows proper transfer of stencil data
case driver_vendor::NVIDIA:
// Quirks: This workaround allows only transfer of depth data, stencil is ignored
typeless_aspect = aspect;
break;
default:
break;
}
}
//1. Copy unscaled to typeless surface
copy_image(cmd, src, typeless, preferred_src_format, VK_IMAGE_LAYOUT_GENERAL,
src_rect, { 0, 0, (s32)src_w, (s32)src_h }, 1, aspect, typeless_aspect, transfer_flags, 0xFF);
//2. Blit typeless surface to self
copy_scaled_image(cmd, typeless, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL,
0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, VK_FILTER_NEAREST);
//3. Copy back the aspect bits
copy_image(cmd, typeless, dst, VK_IMAGE_LAYOUT_GENERAL, preferred_dst_format,
{0, (s32)src_h, (s32)dst_w, s32(src_h + dst_h) }, dst_rect, 1, typeless_aspect, aspect, 0xFF, transfer_flags);
};
areai src_rect = { (s32)src_x_offset, (s32)src_y_offset, s32(src_x_offset + src_width), s32(src_y_offset + src_height) };
areai dst_rect = { (s32)dst_x_offset, (s32)dst_y_offset, s32(dst_x_offset + dst_width), s32(dst_y_offset + dst_height) };
switch (src_format)
{
case VK_FORMAT_D16_UNORM:
{
auto typeless = vk::get_typeless_helper(VK_FORMAT_R16_UNORM);
change_image_layout(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL);
stretch_image_typeless(src, dst, typeless->value, src_rect, dst_rect, VK_IMAGE_ASPECT_DEPTH_BIT);
break;
}
case VK_FORMAT_D24_UNORM_S8_UINT:
{
auto typeless = vk::get_typeless_helper(VK_FORMAT_B8G8R8A8_UNORM);
change_image_layout(cmd, typeless, VK_IMAGE_LAYOUT_GENERAL);
stretch_image_typeless(src, dst, typeless->value, src_rect, dst_rect, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
break;
}
case VK_FORMAT_D32_SFLOAT_S8_UINT:
{
// NOTE: Typeless transfer (Depth/Stencil->Equivalent Color->Depth/Stencil) of single aspects does not work on AMD when done from a non-depth texture
// Since the typeless transfer itself violates spec, the only way to make it work is to use a D32S8 intermediate
// Copy from src->intermediate then intermediate->dst for each aspect separately
auto typeless_depth = vk::get_typeless_helper(VK_FORMAT_R32_SFLOAT);
auto typeless_stencil = vk::get_typeless_helper(VK_FORMAT_R8_UINT);
change_image_layout(cmd, typeless_depth, VK_IMAGE_LAYOUT_GENERAL);
change_image_layout(cmd, typeless_stencil, VK_IMAGE_LAYOUT_GENERAL);
auto intermediate = vk::get_typeless_helper(VK_FORMAT_D32_SFLOAT_S8_UINT);
change_image_layout(cmd, intermediate, preferred_dst_format);
const areai intermediate_rect = { 0, 0, (s32)dst_width, (s32)dst_height };
const VkImageAspectFlags depth_stencil = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
// Blit DEPTH aspect
stretch_image_typeless(src, intermediate->value, typeless_depth->value, src_rect, intermediate_rect, depth_stencil, VK_IMAGE_ASPECT_DEPTH_BIT);
copy_image(cmd, intermediate->value, dst, preferred_dst_format, preferred_dst_format, intermediate_rect, dst_rect, 1, depth_stencil, depth_stencil, VK_IMAGE_ASPECT_DEPTH_BIT, VK_IMAGE_ASPECT_DEPTH_BIT);
// Blit STENCIL aspect
stretch_image_typeless(src, intermediate->value, typeless_stencil->value, src_rect, intermediate_rect, depth_stencil, VK_IMAGE_ASPECT_STENCIL_BIT);
copy_image(cmd, intermediate->value, dst, preferred_dst_format, preferred_dst_format, intermediate_rect, dst_rect, 1, depth_stencil, depth_stencil, VK_IMAGE_ASPECT_STENCIL_BIT, VK_IMAGE_ASPECT_STENCIL_BIT);
break;
}
}
}
}
else
{
VkImageBlit rgn = {};
rgn.srcOffsets[0] = { (int32_t)src_x_offset, (int32_t)src_y_offset, 0 };
rgn.srcOffsets[1] = { (int32_t)(src_width + src_x_offset), (int32_t)(src_height + src_y_offset), 1 };
@ -136,29 +253,18 @@ namespace vk
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
{
vkCmdBlitImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &rgn, VK_FILTER_LINEAR);
vkCmdBlitImage(cmd, src, preferred_src_format, dst, preferred_dst_format, 1, &rgn, filter);
rgn.srcSubresource.mipLevel++;
rgn.dstSubresource.mipLevel++;
}
}
else
{
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { (int32_t)src_x_offset, (int32_t)src_y_offset, 0 };
copy_rgn.dstOffset = { (int32_t)dst_x_offset, (int32_t)dst_y_offset, 0 };
copy_rgn.dstSubresource = { (VkImageAspectFlags)aspect, 0, 0, 1 };
copy_rgn.srcSubresource = { (VkImageAspectFlags)aspect, 0, 0, 1 };
copy_rgn.extent = { src_width, src_height, 1 };
vkCmdCopyImage(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy_rgn);
}
if (srcLayout != preferred_src_format)
change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (srcLayout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
change_image_layout(cmd, src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (dstLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
change_image_layout(cmd, dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (dstLayout != preferred_dst_format)
change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
}
void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image,

View File

@ -1039,18 +1039,6 @@ namespace vk
}
}
public:
struct vk_blit_op_result : public blit_op_result
{
bool deferred = false;
vk::image *src_image = nullptr;
vk::image *dst_image = nullptr;
vk::image_view *src_view = nullptr;
using blit_op_result::blit_op_result;
};
public:
void initialize(vk::render_device& device, VkQueue submit_queue, vk::vk_data_heap& upload_heap)
@ -1133,14 +1121,14 @@ namespace vk
return upload_texture(cmd, tex, m_rtts, cmd, const_cast<const VkQueue>(m_submit_queue));
}
vk::image *upload_image_simple(vk::command_buffer& /*cmd*/, u32 address, u32 width, u32 height)
vk::image *upload_image_simple(vk::command_buffer& cmd, u32 address, u32 width, u32 height)
{
//Uploads a linear memory range as a BGRA8 texture
auto image = std::make_unique<vk::image>(*m_device, m_memory_types.host_visible_coherent,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
VK_IMAGE_TYPE_2D,
VK_FORMAT_B8G8R8A8_UNORM,
width, height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
width, height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_PREINITIALIZED,
VK_IMAGE_TILING_LINEAR, VK_IMAGE_USAGE_TRANSFER_SRC_BIT, 0);
VkImageSubresource subresource{};
@ -1170,6 +1158,8 @@ namespace vk
image->memory->unmap();
vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
auto result = image.get();
const u32 resource_memory = width * height * 4; //Rough approximate
m_discardable_storage.push_back(image);
@ -1179,22 +1169,16 @@ namespace vk
return result;
}
vk_blit_op_result blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd)
{
struct blit_helper
{
vk::command_buffer* commands;
VkFormat format;
blit_helper(vk::command_buffer *c) : commands(c) {}
bool deferred = false;
vk::image* deferred_op_src = nullptr;
vk::image* deferred_op_dst = nullptr;
void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool /*interpolate*/, bool is_depth, const rsx::typeless_xfer& /*typeless*/)
void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& /*typeless*/)
{
VkImageAspectFlagBits aspect = VK_IMAGE_ASPECT_COLOR_BIT;
if (is_depth) aspect = (VkImageAspectFlagBits)(src->info.format == VK_FORMAT_D16_UNORM ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
//Checks
if (src_area.x2 <= src_area.x1 || src_area.y2 <= src_area.y1 || dst_area.x2 <= dst_area.x1 || dst_area.y2 <= dst_area.y1)
{
@ -1214,54 +1198,35 @@ namespace vk
return;
}
const auto aspect = vk::get_aspect_flags(src->info.format);
const auto src_width = src_area.x2 - src_area.x1;
const auto src_height = src_area.y2 - src_area.y1;
const auto dst_width = dst_area.x2 - dst_area.x1;
const auto dst_height = dst_area.y2 - dst_area.y1;
deferred_op_src = src;
deferred_op_dst = dst;
if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))
{
if (src_width != dst_width || src_height != dst_height || src->info.format != dst->info.format)
{
//Scaled depth scaling
deferred = true;
}
}
if (!deferred)
{
copy_scaled_image(*commands, src->value, dst->value, src->current_layout, dst->current_layout, src_area.x1, src_area.y1, src_width, src_height,
dst_area.x1, dst_area.y1, dst_width, dst_height, 1, aspect, src->info.format == dst->info.format);
}
copy_scaled_image(*commands, src->value, dst->value, src->current_layout, dst->current_layout, src_area.x1, src_area.y1, src_width, src_height,
dst_area.x1, dst_area.y1, dst_width, dst_height, 1, aspect, src->info.format == dst->info.format,
interpolate? VK_FILTER_LINEAR : VK_FILTER_NEAREST, src->info.format, dst->info.format);
change_image_layout(*commands, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, {(VkImageAspectFlags)aspect, 0, dst->info.mipLevels, 0, dst->info.arrayLayers});
format = dst->info.format;
}
}
helper(&cmd);
auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, const_cast<const VkQueue>(m_submit_queue));
vk_blit_op_result result = reply.succeeded;
result.real_dst_address = reply.real_dst_address;
result.real_dst_size = reply.real_dst_size;
result.is_depth = reply.is_depth;
result.deferred = helper.deferred;
result.dst_image = helper.deferred_op_dst;
result.src_image = helper.deferred_op_src;
if (reply.succeeded)
{
if (reply.real_dst_size)
{
flush_if_cache_miss_likely(helper.format, reply.real_dst_address, reply.real_dst_size, cmd, m_submit_queue);
}
if (!helper.deferred)
return result;
return true;
}
VkImageSubresourceRange view_range = { VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, 1 };
auto tmp_view = std::make_unique<vk::image_view>(*vk::get_current_renderer(), helper.deferred_op_src->value, VK_IMAGE_VIEW_TYPE_2D,
helper.deferred_op_src->info.format, helper.deferred_op_src->native_component_map, view_range);
result.src_view = tmp_view.get();
m_discardable_storage.push_back(tmp_view);
return result;
return false;
}
const u32 get_unreleased_textures_count() const override

View File

@ -502,7 +502,7 @@ namespace rsx
entries.push_back(tmp);
}
if ((entry_count = entries.size()) <= 2)
if ((entry_count = (u32)entries.size()) <= 2)
return;
root.rewind();