rsx: Enable MSAA

- vk: Enable depth buffer resolve+unresolve
- vk: Add AMD stenciling extension support
- rsx: Temporarily disables MSAA-compatible hacks such as transparency AA
- TODO: Add paths to optionally disable MSAA
This commit is contained in:
kd-11 2019-05-30 18:38:18 +03:00 committed by kd-11
parent f6f3b40ecc
commit 4a5bbba277
20 changed files with 1214 additions and 181 deletions

View File

@ -38,7 +38,8 @@ namespace rsx
enum surface_access : u32
{
read = 0,
write = 1
write = 1,
transfer = 2
};
//Sampled image descriptor

View File

@ -677,11 +677,11 @@ namespace rsx
{
for (auto &tex_info : data)
{
auto this_address = std::get<0>(tex_info);
const auto this_address = tex_info.first;
if (this_address >= limit)
continue;
auto surface = std::get<1>(tex_info).get();
auto surface = tex_info.second.get();
const auto pitch = surface->get_rsx_pitch();
if (!rsx::pitch_compatible(surface, required_pitch, required_height))
continue;

View File

@ -11,7 +11,9 @@ namespace rsx
enum surface_state_flags : u32
{
ready = 0,
erase_bkgnd = 1
erase_bkgnd = 1,
require_resolve = 2,
require_unresolve = 4
};
template <typename surface_type>
@ -119,8 +121,11 @@ namespace rsx
u8 samples_x = 1;
u8 samples_y = 1;
std::unique_ptr<typename std::remove_pointer<image_storage_type>::type> resolve_surface;
flags32_t memory_usage_flags = surface_usage_flags::unknown;
flags32_t state_flags = surface_state_flags::ready;
flags32_t msaa_flags = surface_state_flags::ready;
union
{
@ -410,7 +415,7 @@ namespace rsx
}
}
void on_write(u64 write_tag = 0)
void on_write(u64 write_tag = 0, rsx::surface_state_flags resolve_flags = surface_state_flags::require_resolve)
{
if (write_tag)
{
@ -424,12 +429,22 @@ namespace rsx
// HACK!! This should be cleared through memory barriers only
state_flags = rsx::surface_state_flags::ready;
if (spp > 1)
{
msaa_flags = resolve_flags;
}
if (old_contents.source)
{
clear_rw_barrier();
}
}
void on_write_copy(u64 write_tag = 0)
{
on_write(write_tag, rsx::surface_state_flags::require_unresolve);
}
// Returns the rect area occupied by this surface expressed as an 8bpp image with no AA
areau get_normalized_memory_area() const
{
@ -456,6 +471,17 @@ namespace rsx
area.y2 /= samples_y;
}
template <typename T>
void transform_pixels_to_samples(area_base<T>& area)
{
if (LIKELY(spp == 1)) return;
area.x1 *= samples_x;
area.x2 *= samples_x;
area.y1 *= samples_y;
area.y2 *= samples_y;
}
template <typename T>
void transform_samples_to_pixels(T& x1, T& x2, T& y1, T& y2)
{
@ -466,5 +492,16 @@ namespace rsx
y1 /= samples_y;
y2 /= samples_y;
}
template <typename T>
void transform_pixels_to_samples(T& x1, T& x2, T& y1, T& y2)
{
if (LIKELY(spp == 1)) return;
x1 *= samples_x;
x2 *= samples_x;
y1 *= samples_y;
y2 *= samples_y;
}
};
}

View File

@ -2564,7 +2564,7 @@ namespace rsx
// Destination dimensions are relaxed (true)
dst_area = dst_subres.get_src_area();
dest_texture = dst_subres.surface->get_surface(rsx::surface_access::write);
dest_texture = dst_subres.surface->get_surface(rsx::surface_access::transfer);
typeless_info.dst_context = texture_upload_context::framebuffer_storage;
max_dst_width = (u16)(dst_subres.surface->get_surface_width(rsx::surface_metrics::samples) * typeless_info.dst_scaling_hint);
@ -2851,7 +2851,7 @@ namespace rsx
}
else
{
dst_subres.surface->on_write(rsx::get_shared_tag());
dst_subres.surface->on_write_copy(rsx::get_shared_tag());
m_rtts.notify_memory_structure_changed();
}
@ -2889,13 +2889,13 @@ namespace rsx
}
}
if (src_is_render_target)
if (0)//src_is_render_target)
{
// TODO: Specify typeless for high sample counts
src_subres.surface->transform_samples_to_pixels(src_area);
}
if (dst_is_render_target)
if (0)//dst_is_render_target)
{
// TODO: Specify typeless for high sample counts
dst_subres.surface->transform_samples_to_pixels(dst_area);

View File

@ -94,7 +94,7 @@ namespace gl
static_cast<gl::render_target*>(t)->release();
}
texture* get_surface(rsx::surface_access access_type) override
texture* get_surface(rsx::surface_access /*access_type*/) override
{
// TODO
return (gl::texture*)this;
@ -259,7 +259,6 @@ struct gl_render_target_traits
void invalidate_surface_contents(gl::command_context&, gl::render_target *surface, u32 address, size_t pitch)
{
surface->set_rsx_pitch((u16)pitch);
surface->set_aa_mode(rsx::surface_antialiasing::center_1_sample);
surface->queue_tag(address);
surface->last_use_tag = 0;
surface->memory_usage_flags = rsx::surface_usage_flags::unknown;
@ -278,7 +277,7 @@ struct gl_render_target_traits
}
static
void notify_surface_persist(const std::unique_ptr<gl::render_target>& surface)
void notify_surface_persist(const std::unique_ptr<gl::render_target>& /*surface*/)
{}
static

View File

@ -658,8 +658,8 @@ namespace rsx
auto alpha_ref = rsx::method_registers.alpha_ref() / 255.f;
auto rop_control = rsx::method_registers.alpha_test_enabled()? 1u : 0u;
if (rsx::method_registers.msaa_alpha_to_coverage_enabled() &&
rsx::method_registers.msaa_enabled() &&
if (0 &&
rsx::method_registers.msaa_alpha_to_coverage_enabled() &&
rsx::method_registers.surface_antialias() != rsx::surface_antialiasing::center_1_sample)
{
// Alpha values generate a coverage mask for order independent blending

View File

@ -25,35 +25,49 @@ namespace vk
u32 optimal_group_size = 1;
u32 optimal_kernel_size = 1;
virtual std::vector<std::pair<VkDescriptorType, u8>> get_descriptor_layout()
{
std::vector<std::pair<VkDescriptorType, u8>> result;
result.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 });
if (uniform_inputs)
{
result.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1 });
}
return result;
}
void init_descriptors()
{
VkDescriptorPoolSize descriptor_pool_sizes[2] =
std::vector<VkDescriptorPoolSize> descriptor_pool_sizes;
std::vector<VkDescriptorSetLayoutBinding> bindings;
const auto layout = get_descriptor_layout();
for (const auto &e : layout)
{
{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_MAX_COMPUTE_TASKS },
{ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_MAX_COMPUTE_TASKS }
};
descriptor_pool_sizes.push_back({e.first, u32(VK_MAX_COMPUTE_TASKS * e.second)});
for (unsigned n = 0; n < e.second; ++n)
{
bindings.push_back
({
uint32_t(bindings.size()),
e.first,
1,
VK_SHADER_STAGE_COMPUTE_BIT,
nullptr
});
}
}
// Reserve descriptor pools
m_descriptor_pool.create(*get_current_renderer(), descriptor_pool_sizes, 2, VK_MAX_COMPUTE_TASKS, 2);
std::vector<VkDescriptorSetLayoutBinding> bindings(2);
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[0].descriptorCount = 1;
bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
bindings[0].binding = 0;
bindings[0].pImmutableSamplers = nullptr;
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[1].descriptorCount = 1;
bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
bindings[1].binding = 1;
bindings[1].pImmutableSamplers = nullptr;
m_descriptor_pool.create(*get_current_renderer(), descriptor_pool_sizes.data(), (u32)descriptor_pool_sizes.size(), VK_MAX_COMPUTE_TASKS, 2);
VkDescriptorSetLayoutCreateInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
infos.pBindings = bindings.data();
infos.bindingCount = uniform_inputs? 2u : 1u;
infos.bindingCount = (u32)bindings.size();
CHECK_RESULT(vkCreateDescriptorSetLayout(*get_current_renderer(), &infos, nullptr, &m_descriptor_layout));
@ -120,6 +134,9 @@ namespace vk
virtual void bind_resources()
{}
virtual void declare_inputs()
{}
void load_program(VkCommandBuffer cmd)
{
if (!m_program)
@ -143,8 +160,8 @@ namespace vk
VkPipeline pipeline;
vkCreateComputePipelines(*get_current_renderer(), nullptr, 1, &info, nullptr, &pipeline);
std::vector<vk::glsl::program_input> inputs;
m_program = std::make_unique<vk::glsl::program>(*get_current_renderer(), pipeline, inputs, inputs);
m_program = std::make_unique<vk::glsl::program>(*get_current_renderer(), pipeline);
declare_inputs();
}
verify(HERE), m_used_descriptors < VK_MAX_COMPUTE_TASKS;
@ -164,10 +181,15 @@ namespace vk
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline_layout, 0, 1, &m_descriptor_set, 0, nullptr);
}
virtual void run(VkCommandBuffer cmd, u32 num_invocations)
virtual void run(VkCommandBuffer cmd, u32 invocations_x, u32 invocations_y)
{
load_program(cmd);
vkCmdDispatch(cmd, num_invocations, 1, 1);
vkCmdDispatch(cmd, invocations_x, invocations_y, 1);
}
virtual void run(VkCommandBuffer cmd, u32 num_invocations)
{
run(cmd, num_invocations, 1);
}
};

View File

@ -89,10 +89,10 @@ namespace vk
return std::make_pair(VK_FORMAT_R32G32B32A32_SFLOAT, vk::default_component_map());
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, o_rgb);
return std::make_pair(VK_FORMAT_A1R5G5B5_UNORM_PACK16, o_rgb);
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, z_rgb);
return std::make_pair(VK_FORMAT_A1R5G5B5_UNORM_PACK16, z_rgb);
case rsx::surface_color_format::b8:
{
@ -539,7 +539,7 @@ VKGSRender::VKGSRender() : GSRender()
else
m_vertex_cache = std::make_unique<vk::weak_vertex_cache>();
m_shaders_cache = std::make_unique<vk::shader_cache>(*m_prog_buffer, "vulkan", "v1.7");
m_shaders_cache = std::make_unique<vk::shader_cache>(*m_prog_buffer, "vulkan", "v1.8");
open_command_buffer();
@ -1566,7 +1566,11 @@ void VKGSRender::end()
if (!image_ptr)
{
LOG_ERROR(RSX, "Texture upload failed to vtexture index %d. Binding null sampler.", i);
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, rsx::constants::vertex_texture_names[i], m_current_frame->descriptor_set);
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
i,
::glsl::program_domain::glsl_vertex_program,
m_current_frame->descriptor_set);
continue;
}
@ -1623,16 +1627,6 @@ void VKGSRender::end()
m_current_command_buffer->flags |= vk::command_buffer::cb_has_occlusion_task;
}
// Final heap check...
check_heap_status(VK_HEAP_CHECK_VERTEX_STORAGE | VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE);
// While vertex upload is an interruptible process, if we made it this far, there's no need to sync anything that occurs past this point
// Only textures are synchronized tightly with the GPU and they have been read back above
vk::enter_uninterruptible();
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
update_draw_state();
// Apply write memory barriers
if (true)//g_cfg.video.strict_rendering_mode)
{
@ -1682,6 +1676,16 @@ void VKGSRender::end()
}
}
// Final heap check...
check_heap_status(VK_HEAP_CHECK_VERTEX_STORAGE | VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE);
// While vertex upload is an interruptible process, if we made it this far, there's no need to sync anything that occurs past this point
// Only textures are synchronized tightly with the GPU and they have been read back above
vk::enter_uninterruptible();
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
update_draw_state();
u32 sub_index = 0;
rsx::method_registers.current_draw_clause.begin();
do
@ -2238,7 +2242,7 @@ void VKGSRender::frame_context_cleanup(frame_context_t *ctx, bool free_resources
m_overlay_manager->dispose(uids_to_dispose);
}
vk::reset_compute_tasks();
vk::reset_global_resources();
m_attachment_clear_pass->free_resources();
m_depth_converter->free_resources();
@ -2472,8 +2476,18 @@ bool VKGSRender::load_program()
}
}
const auto rasterization_samples = u8((m_current_renderpass_key >> 16) & 0xF);
if (rasterization_samples > 1)
{
properties.state.set_multisample_state(
rasterization_samples,
rsx::method_registers.msaa_sample_mask(),
rsx::method_registers.msaa_enabled(),
rsx::method_registers.msaa_alpha_to_coverage_enabled(),
rsx::method_registers.msaa_alpha_to_one_enabled());
}
properties.renderpass_key = m_current_renderpass_key;
properties.num_targets = (u32)m_draw_buffers.size();
vk::enter_uninterruptible();

View File

@ -3,6 +3,7 @@
#include "VKCompute.h"
#include "VKRenderPass.h"
#include "VKFramebuffer.h"
#include "VKResolveHelper.h"
#include "Utilities/mutex.h"
namespace vk
@ -234,11 +235,18 @@ namespace vk
}
}
void reset_global_resources()
{
vk::reset_compute_tasks();
vk::reset_resolve_resources();
}
void destroy_global_resources()
{
VkDevice dev = *g_current_renderer;
vk::clear_renderpass_cache(dev);
vk::clear_framebuffer_cache();
vk::clear_resolve_helpers();
g_null_texture.reset();
g_null_image_view.reset();
@ -416,6 +424,27 @@ namespace vk
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 1, &barrier, 0, nullptr);
}
void insert_image_memory_barrier(
VkCommandBuffer cmd, VkImage image,
VkImageLayout current_layout, VkImageLayout new_layout,
VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage,
VkAccessFlags src_mask, VkAccessFlags dst_mask,
const VkImageSubresourceRange& range)
{
VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.newLayout = new_layout;
barrier.oldLayout = current_layout;
barrier.image = image;
barrier.srcAccessMask = src_mask;
barrier.dstAccessMask = dst_mask;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange = range;
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 1, &barrier);
}
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range)
{
//Prepare an image to match the new layout..
@ -615,6 +644,12 @@ namespace vk
void insert_texture_barrier(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout)
{
if (image->samples() > 1)
{
// This barrier is pointless for multisampled images as they require a resolve operation before access anyway
return;
}
insert_texture_barrier(cmd, image->value, image->current_layout, new_layout, { image->aspect(), 0, 1, 0, 1 });
image->current_layout = new_layout;
}
@ -835,6 +870,8 @@ namespace vk
{
if (msgFlags & VK_DEBUG_REPORT_ERROR_BIT_EXT)
{
if (strstr(pMsg, "IMAGE_VIEW_TYPE_1D")) return false;
LOG_ERROR(RSX, "ERROR: [%s] Code %d : %s", pLayerPrefix, msgCode, pMsg);
}
else if (msgFlags & VK_DEBUG_REPORT_WARNING_BIT_EXT)

View File

@ -133,6 +133,7 @@ namespace vk
void reset_compute_tasks();
void destroy_global_resources();
void reset_global_resources();
/**
* Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer.
@ -171,6 +172,10 @@ namespace vk
void insert_buffer_memory_barrier(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize length,
VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask);
void insert_image_memory_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout,
VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask,
const VkImageSubresourceRange& range);
//Manage 'uininterruptible' state where secondary operations (e.g violation handlers) will have to wait
void enter_uninterruptible();
@ -1217,7 +1222,7 @@ namespace vk
public:
using image::image;
image_view* get_view(u32 remap_encoding, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap,
virtual image_view* get_view(u32 remap_encoding, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap,
VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT)
{
auto found = views.equal_range(remap_encoding);
@ -2799,6 +2804,13 @@ public:
VkPipelineColorBlendAttachmentState att_state[4];
VkPipelineColorBlendStateCreateInfo cs;
VkPipelineRasterizationStateCreateInfo rs;
VkPipelineMultisampleStateCreateInfo ms;
struct extra_parameters
{
VkSampleMask msaa_sample_mask;
}
temp_storage;
graphics_pipeline_state()
{
@ -2814,6 +2826,10 @@ public:
rs.cullMode = VK_CULL_MODE_NONE;
rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
rs.lineWidth = 1.f;
ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
temp_storage.msaa_sample_mask = 0xFFFFFFFF;
}
graphics_pipeline_state(const graphics_pipeline_state& other)
@ -2973,6 +2989,27 @@ public:
cs.attachmentCount = count;
cs.pAttachments = att_state;
}
void set_multisample_state(u8 sample_count, u32 sample_mask, bool msaa_enabled, bool alpha_to_coverage, bool alpha_to_one)
{
temp_storage.msaa_sample_mask = sample_mask;
ms.rasterizationSamples = static_cast<VkSampleCountFlagBits>(sample_count);
ms.alphaToCoverageEnable = alpha_to_coverage;
ms.alphaToOneEnable = alpha_to_one;
if (!msaa_enabled)
{
// This register is likely glMinSampleShading but in reverse; probably sets max sample shading rate of 1
// I (kd-11) suspect its what the control panel setting affects when MSAA is set to disabled
}
}
void set_multisample_shading_rate(float shading_rate)
{
ms.sampleShadingEnable = VK_TRUE;
ms.minSampleShading = shading_rate;
}
};
namespace glsl
@ -3095,21 +3132,24 @@ public:
std::array<u32, 4> vs_texture_bindings;
bool linked;
void create_impl();
public:
VkPipeline pipeline;
u64 attribute_location_mask;
u64 vertex_attributes_mask;
program(VkDevice dev, VkPipeline p, const std::vector<program_input> &vertex_input, const std::vector<program_input>& fragment_inputs);
program(VkDevice dev, VkPipeline p);
program(const program&) = delete;
program(program&& other) = delete;
~program();
program& load_uniforms(::glsl::program_domain domain, const std::vector<program_input>& inputs);
program& load_uniforms(const std::vector<program_input>& inputs);
program& link();
bool has_uniform(program_input_type type, const std::string &uniform_name);
void bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string &uniform_name, VkDescriptorSet &descriptor_set);
void bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string &uniform_name, VkDescriptorType type, VkDescriptorSet &descriptor_set);
void bind_uniform(const VkDescriptorImageInfo &image_descriptor, int texture_unit, ::glsl::program_domain domain, VkDescriptorSet &descriptor_set, bool is_stencil_mirror = false);
void bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorSet &descriptor_set);
void bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, VkDescriptorSet &descriptor_set);

View File

@ -80,7 +80,7 @@ namespace vk
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[0].descriptorCount = 1;
bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[0].binding = 0;
bindings[0].pImmutableSamplers = nullptr;
@ -187,11 +187,6 @@ namespace vk
vp.scissorCount = 1;
vp.viewportCount = 1;
VkPipelineMultisampleStateCreateInfo ms = {};
ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
ms.pSampleMask = NULL;
ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
VkPipeline pipeline;
VkGraphicsPipelineCreateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
@ -199,7 +194,7 @@ namespace vk
info.pInputAssemblyState = &renderpass_config.ia;
info.pRasterizationState = &renderpass_config.rs;
info.pColorBlendState = &renderpass_config.cs;
info.pMultisampleState = &ms;
info.pMultisampleState = &renderpass_config.ms;
info.pViewportState = &vp;
info.pDepthStencilState = &renderpass_config.ds;
info.stageCount = 2;
@ -253,7 +248,7 @@ namespace vk
for (int n = 0; n < src.size(); ++n)
{
VkDescriptorImageInfo info = { m_sampler->value, src[n]->value, src[n]->image()->current_layout };
program->bind_uniform(info, "fs" + std::to_string(n), m_descriptor_set);
program->bind_uniform(info, "fs" + std::to_string(n), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, m_descriptor_set);
}
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, program->pipeline);

View File

@ -11,36 +11,38 @@ namespace vk
struct pipeline_props
{
graphics_pipeline_state state;
int num_targets;
u64 renderpass_key;
bool operator==(const pipeline_props& other) const
{
if (memcmp(&state.att_state[0], &other.state.att_state[0], sizeof(VkPipelineColorBlendAttachmentState)))
return false;
if (renderpass_key != other.renderpass_key)
return false;
if (memcmp(&state.rs, &other.state.rs, sizeof(VkPipelineRasterizationStateCreateInfo)))
return false;
//Cannot memcmp cs due to pAttachments being a pointer to memory
if (state.cs.attachmentCount != other.state.cs.attachmentCount ||
state.cs.flags != other.state.cs.flags ||
state.cs.logicOp != other.state.cs.logicOp ||
state.cs.logicOpEnable != other.state.cs.logicOpEnable ||
state.cs.sType != other.state.cs.sType ||
memcmp(state.cs.blendConstants, other.state.cs.blendConstants, 4 * sizeof(f32)))
return false;
if (memcmp(&state.ia, &other.state.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo)))
return false;
if (memcmp(&state.att_state[0], &other.state.att_state[0], sizeof(VkPipelineColorBlendAttachmentState)))
return false;
if (memcmp(&state.rs, &other.state.rs, sizeof(VkPipelineRasterizationStateCreateInfo)))
return false;
// Cannot memcmp cs due to pAttachments being a pointer to memory
if (state.cs.logicOp != other.state.cs.logicOp ||
state.cs.logicOpEnable != other.state.cs.logicOpEnable ||
memcmp(state.cs.blendConstants, other.state.cs.blendConstants, 4 * sizeof(f32)))
return false;
if (memcmp(&state.ds, &other.state.ds, sizeof(VkPipelineDepthStencilStateCreateInfo)))
return false;
return num_targets == other.num_targets;
if (state.ms.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT)
{
if (memcmp(&state.ms, &other.state.ms, sizeof(VkPipelineMultisampleStateCreateInfo)))
return false;
}
return true;
}
};
}
@ -50,10 +52,11 @@ namespace rpcs3
template <>
size_t hash_struct<vk::pipeline_props>(const vk::pipeline_props &pipelineProperties)
{
size_t seed = hash_base(pipelineProperties.num_targets);
size_t seed = hash_base(pipelineProperties.renderpass_key);
seed ^= hash_struct(pipelineProperties.state.ia);
seed ^= hash_struct(pipelineProperties.state.ds);
seed ^= hash_struct(pipelineProperties.state.rs);
seed ^= hash_struct(pipelineProperties.state.ms);
// Do not compare pointers to memory!
VkPipelineColorBlendStateCreateInfo tmp;
@ -134,10 +137,13 @@ struct VKTraits
vp.viewportCount = 1;
vp.scissorCount = 1;
VkPipelineMultisampleStateCreateInfo ms = {};
ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
ms.pSampleMask = NULL;
ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
VkPipelineMultisampleStateCreateInfo ms = pipelineProperties.state.ms;
verify("Multisample state mismatch!" HERE), ms.rasterizationSamples == VkSampleCountFlagBits((pipelineProperties.renderpass_key >> 16) & 0xF);
if (ms.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT)
{
// Update the sample mask pointer
ms.pSampleMask = &pipelineProperties.state.temp_storage.msaa_sample_mask;
}
// Rebase pointers from pipeline structure in case it is moved/copied
VkPipelineColorBlendStateCreateInfo cs = pipelineProperties.state.cs;

View File

@ -9,13 +9,9 @@ namespace vk
{
using namespace ::glsl;
program::program(VkDevice dev, VkPipeline p, const std::vector<program_input> &vertex_input, const std::vector<program_input>& fragment_inputs)
: m_device(dev), pipeline(p)
void program::create_impl()
{
linked = false;
load_uniforms(program_domain::glsl_vertex_program, vertex_input);
load_uniforms(program_domain::glsl_vertex_program, fragment_inputs);
attribute_location_mask = 0;
vertex_attributes_mask = 0;
@ -24,12 +20,26 @@ namespace vk
vs_texture_bindings.fill(~0u);
}
program::program(VkDevice dev, VkPipeline p, const std::vector<program_input> &vertex_input, const std::vector<program_input>& fragment_inputs)
: m_device(dev), pipeline(p)
{
create_impl();
load_uniforms(vertex_input);
load_uniforms(fragment_inputs);
}
program::program(VkDevice dev, VkPipeline p)
: m_device(dev), pipeline(p)
{
create_impl();
}
program::~program()
{
vkDestroyPipeline(m_device, pipeline, nullptr);
}
program& program::load_uniforms(program_domain domain, const std::vector<program_input>& inputs)
program& program::load_uniforms(const std::vector<program_input>& inputs)
{
verify("Cannot change uniforms in already linked program!" HERE), !linked;
@ -92,7 +102,7 @@ namespace vk
return false;
}
void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string& uniform_name, VkDescriptorSet &descriptor_set)
void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string& uniform_name, VkDescriptorType type, VkDescriptorSet &descriptor_set)
{
for (const auto &uniform : uniforms[program_input_type::input_type_texture])
{
@ -106,7 +116,7 @@ namespace vk
uniform.location, // dstBinding
0, // dstArrayElement
1, // descriptorCount
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, // descriptorType
type, // descriptorType
&image_descriptor, // pImageInfo
nullptr, // pBufferInfo
nullptr // pTexelBufferView

View File

@ -119,7 +119,7 @@ namespace vk
}
// Decode
VkSampleCountFlagBits samples = VkSampleCountFlagBits((renderpass_key >> 16) & 0x1F);
VkSampleCountFlagBits samples = VkSampleCountFlagBits((renderpass_key >> 16) & 0xF);
std::vector<VkImageLayout> rtv_layouts;
VkImageLayout dsv_layout;

View File

@ -11,6 +11,9 @@
namespace vk
{
void resolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src);
void unresolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src);
struct render_target : public viewable_image, public rsx::ref_counted, public rsx::render_target_descriptor<vk::viewable_image*>
{
u64 frame_tag = 0; // frame id when invalidated, 0 if not invalid
@ -19,8 +22,14 @@ namespace vk
vk::viewable_image* get_surface(rsx::surface_access access_type) override
{
// TODO
return (vk::viewable_image*)this;
if (spp == 1 || access_type == rsx::surface_access::write)
{
return this;
}
// A read barrier should have been called before this!
verify("Read access without explicit barrier" HERE), resolve_surface, !(msaa_flags & rsx::surface_state_flags::require_resolve);
return resolve_surface.get();
}
bool is_depth_surface() const override
@ -39,58 +48,282 @@ namespace vk
return (rsx::apply_resolution_scale(_width, true) == width()) && (rsx::apply_resolution_scale(_height, true) == height());
}
void memory_barrier(vk::command_buffer& cmd, bool force_init = false)
image_view* get_view(u32 remap_encoding, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap,
VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) override
{
if (remap_encoding != 0xDEADBEEF && resolve_surface)
{
return resolve_surface->get_view(remap_encoding, remap, mask);
}
else
{
if (remap_encoding == 0xDEADBEEF)
{
// Special encoding to skip the resolve target fetch
remap_encoding = 0xAAE4;
}
return vk::viewable_image::get_view(remap_encoding, remap, mask);
}
}
void resolve(vk::command_buffer& cmd)
{
VkImageSubresourceRange range = { aspect(), 0, 1, 0, 1 };
// NOTE: This surface can only be in the ATTACHMENT_OPTIMAL layout
// The resolve surface can be in any type of access, but we have to assume it is likely in read-only mode like shader read-only
if (LIKELY(!is_depth_surface()))
{
verify(HERE), current_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
// This is the source; finish writing before reading
vk::insert_image_memory_barrier(
cmd, this->value,
this->current_layout, VK_IMAGE_LAYOUT_GENERAL,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
VK_ACCESS_SHADER_READ_BIT,
range);
// This is the target; finish reading before writing
vk::insert_image_memory_barrier(
cmd, resolve_surface->value,
resolve_surface->current_layout, VK_IMAGE_LAYOUT_GENERAL,
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
range);
this->current_layout = VK_IMAGE_LAYOUT_GENERAL;
resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL;
}
else
{
this->push_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
}
vk::resolve_image(cmd, resolve_surface.get(), this);
if (LIKELY(!is_depth_surface()))
{
vk::insert_image_memory_barrier(
cmd, this->value,
this->current_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
range);
vk::insert_image_memory_barrier(
cmd, resolve_surface->value,
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
range);
this->current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL;
}
else
{
this->pop_layout(cmd);
resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL);
}
msaa_flags &= ~(rsx::surface_state_flags::require_resolve);
}
void unresolve(vk::command_buffer& cmd)
{
verify(HERE), !(msaa_flags & rsx::surface_state_flags::require_resolve);
VkImageSubresourceRange range = { aspect(), 0, 1, 0, 1 };
if (LIKELY(!is_depth_surface()))
{
verify(HERE), current_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
// This is the dest; finish reading before writing
vk::insert_image_memory_barrier(
cmd, this->value,
this->current_layout, VK_IMAGE_LAYOUT_GENERAL,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
range);
// This is the source; finish writing before reading
vk::insert_image_memory_barrier(
cmd, resolve_surface->value,
resolve_surface->current_layout, VK_IMAGE_LAYOUT_GENERAL,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_SHADER_READ_BIT,
range);
this->current_layout = VK_IMAGE_LAYOUT_GENERAL;
resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL;
}
else
{
this->push_layout(cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
}
vk::unresolve_image(cmd, this, resolve_surface.get());
if (LIKELY(!is_depth_surface()))
{
vk::insert_image_memory_barrier(
cmd, this->value,
this->current_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT,
range);
vk::insert_image_memory_barrier(
cmd, resolve_surface->value,
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT,
range);
this->current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL;
}
else
{
this->pop_layout(cmd);
resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL);
}
msaa_flags &= ~(rsx::surface_state_flags::require_unresolve);
}
void memory_barrier(vk::command_buffer& cmd, rsx::surface_access access)
{
// Helper to optionally clear/initialize memory contents depending on barrier type
auto clear_surface_impl = [&]()
auto clear_surface_impl = [&cmd, this](vk::image* surface)
{
push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
VkImageSubresourceRange range{ aspect(), 0, 1, 0, 1 };
const auto optimal_layout = (surface->current_layout == VK_IMAGE_LAYOUT_GENERAL) ?
VK_IMAGE_LAYOUT_GENERAL :
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
if (aspect() & VK_IMAGE_ASPECT_COLOR_BIT)
surface->push_layout(cmd, optimal_layout);
VkImageSubresourceRange range{ surface->aspect(), 0, 1, 0, 1 };
if (surface->aspect() & VK_IMAGE_ASPECT_COLOR_BIT)
{
VkClearColorValue color{};
vkCmdClearColorImage(cmd, value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &color, 1, &range);
vkCmdClearColorImage(cmd, surface->value, surface->current_layout, &color, 1, &range);
}
else
{
VkClearDepthStencilValue clear{ 1.f, 255 };
vkCmdClearDepthStencilImage(cmd, value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &range);
vkCmdClearDepthStencilImage(cmd, surface->value, surface->current_layout, &clear, 1, &range);
}
pop_layout(cmd);
state_flags &= ~rsx::surface_state_flags::erase_bkgnd;
};
surface->pop_layout(cmd);
auto null_transfer_impl = [&]()
{
if (dirty() && (force_init || state_flags & rsx::surface_state_flags::erase_bkgnd))
if (surface == this)
{
// Initialize memory contents if we did not find anything usable
// TODO: Properly sync with Cell
clear_surface_impl();
on_write();
}
else
{
verify(HERE), state_flags == rsx::surface_state_flags::ready;
state_flags &= ~rsx::surface_state_flags::erase_bkgnd;
}
};
if (!old_contents)
auto get_resolve_target = [&]()
{
null_transfer_impl();
if (!resolve_surface)
{
// Create a resolve surface
auto pdev = vk::get_current_renderer();
const auto resolve_w = width() * samples_x;
const auto resolve_h = height() * samples_y;
VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
usage |= (this->info.usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT));
resolve_surface.reset(new vk::viewable_image(
*pdev,
pdev->get_memory_mapping().device_local,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_IMAGE_TYPE_2D,
format(),
resolve_w, resolve_h, 1, 1, 1,
VK_SAMPLE_COUNT_1_BIT,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL,
usage,
0));
resolve_surface->native_component_map = native_component_map;
resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL);
}
return resolve_surface.get();
};
const bool read_access = (access != rsx::surface_access::write);
if (spp > 1 && read_access)
{
get_resolve_target();
}
if (old_contents && !rsx::pitch_compatible(this, static_cast<vk::render_target*>(old_contents.source)))
{
LOG_TRACE(RSX, "Pitch mismatch, could not transfer inherited memory");
clear_rw_barrier();
}
if (LIKELY(!old_contents))
{
if (state_flags & rsx::surface_state_flags::erase_bkgnd)
{
clear_surface_impl(this);
if (resolve_surface && read_access)
{
// Only clear the resolve surface if reading from it, otherwise it's a waste
clear_surface_impl(resolve_surface.get());
}
on_write(rsx::get_shared_tag(), rsx::surface_state_flags::ready);
}
else if (msaa_flags & rsx::surface_state_flags::require_resolve)
{
if (read_access)
{
// Only do this step when read access is required
resolve(cmd);
}
}
else if (msaa_flags & rsx::surface_state_flags::require_unresolve)
{
if (!read_access)
{
// Only do this step when it is needed to start rendering
unresolve(cmd);
}
}
return;
}
auto src_texture = static_cast<vk::render_target*>(old_contents.source);
if (!rsx::pitch_compatible(this, src_texture))
{
LOG_TRACE(RSX, "Pitch mismatch, could not transfer inherited memory");
clear_rw_barrier();
return;
}
src_texture->read_barrier(cmd);
const auto src_bpp = src_texture->get_bpp();
const auto dst_bpp = get_bpp();
@ -116,29 +349,52 @@ namespace vk
vk::blitter hw_blitter;
old_contents.init_transfer(this);
if (state_flags & rsx::surface_state_flags::erase_bkgnd)
auto src_area = old_contents.src_rect();
auto dst_area = old_contents.dst_rect();
src_texture->transform_pixels_to_samples(src_area);
this->transform_pixels_to_samples(dst_area);
vk::image *target_image = (spp > 1) ? get_resolve_target() : this;
if (dst_area.x1 == 0 && dst_area.y1 == 0 &&
unsigned(dst_area.x2) == target_image->width() && unsigned(dst_area.y2) == target_image->height())
{
const auto area = old_contents.dst_rect();
if (area.x1 > 0 || area.y1 > 0 || unsigned(area.x2) < width() || unsigned(area.y2) < height())
{
clear_surface_impl();
}
else
{
state_flags &= ~rsx::surface_state_flags::erase_bkgnd;
}
// Skip a bunch of useless work
state_flags &= ~(rsx::surface_state_flags::erase_bkgnd);
msaa_flags = rsx::surface_state_flags::ready;
}
else if (state_flags & rsx::surface_state_flags::erase_bkgnd)
{
clear_surface_impl(target_image);
state_flags &= ~(rsx::surface_state_flags::erase_bkgnd);
msaa_flags = rsx::surface_state_flags::ready;
}
else if (msaa_flags & rsx::surface_state_flags::require_resolve)
{
// Need to forward resolve this
resolve(cmd);
}
hw_blitter.scale_image(cmd, old_contents.source, this,
old_contents.src_rect(),
old_contents.dst_rect(),
hw_blitter.scale_image(
cmd,
src_texture->get_surface(rsx::surface_access::read),
this->get_surface(rsx::surface_access::transfer),
src_area,
dst_area,
/*linear?*/false, /*depth?(unused)*/false, typeless_info);
on_write();
on_write_copy();
if (!read_access && spp > 1)
{
// Write barrier, must initialize
unresolve(cmd);
}
}
void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, true); }
void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, false); }
void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::read); }
void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::write); }
};
static inline vk::render_target* as_rtt(vk::image* t)
@ -164,8 +420,19 @@ namespace rsx
rsx::surface_antialiasing antialias,
vk::render_device &device, vk::command_buffer& cmd)
{
auto fmt = vk::get_compatible_surface_format(format);
const auto fmt = vk::get_compatible_surface_format(format);
const auto spp = get_format_sample_count(antialias);
VkFormat requested_format = fmt.first;
VkImageUsageFlags usage_flags = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
if (antialias == rsx::surface_antialiasing::center_1_sample)
{
usage_flags |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
}
else
{
usage_flags |= VK_IMAGE_USAGE_STORAGE_BIT;
}
std::unique_ptr<vk::render_target> rtt;
rtt = std::make_unique<vk::render_target>(device, device.get_memory_mapping().device_local,
@ -173,13 +440,13 @@ namespace rsx
VK_IMAGE_TYPE_2D,
requested_format,
static_cast<uint32_t>(rsx::apply_resolution_scale((u16)width, true)), static_cast<uint32_t>(rsx::apply_resolution_scale((u16)height, true)), 1, 1, 1,
VK_SAMPLE_COUNT_1_BIT,
static_cast<VkSampleCountFlagBits>(spp),
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_SAMPLED_BIT,
usage_flags,
0);
change_image_layout(cmd, rtt.get(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT));
rtt->change_layout(cmd, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
rtt->set_format(format);
rtt->set_aa_mode(antialias);
@ -187,7 +454,7 @@ namespace rsx
rtt->state_flags = rsx::surface_state_flags::erase_bkgnd;
rtt->native_component_map = fmt.second;
rtt->rsx_pitch = (u16)pitch;
rtt->native_pitch = (u16)width * get_format_block_size_in_bytes(format) * get_format_sample_count(antialias);
rtt->native_pitch = (u16)width * get_format_block_size_in_bytes(format) * rtt->samples_x;
rtt->surface_width = (u16)width;
rtt->surface_height = (u16)height;
rtt->queue_tag(address);
@ -203,13 +470,14 @@ namespace rsx
rsx::surface_antialiasing antialias,
vk::render_device &device, vk::command_buffer& cmd)
{
VkFormat requested_format = vk::get_compatible_depth_surface_format(device.get_formats_support(), format);
VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_DEPTH_BIT);
const VkFormat requested_format = vk::get_compatible_depth_surface_format(device.get_formats_support(), format);
const auto spp = get_format_sample_count(antialias);
VkImageUsageFlags usage_flags = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
if (requested_format != VK_FORMAT_D16_UNORM)
range.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
const auto scale = rsx::get_resolution_scale();
if (antialias == rsx::surface_antialiasing::center_1_sample)
{
usage_flags |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
}
std::unique_ptr<vk::render_target> ds;
ds = std::make_unique<vk::render_target>(device, device.get_memory_mapping().device_local,
@ -217,12 +485,13 @@ namespace rsx
VK_IMAGE_TYPE_2D,
requested_format,
static_cast<uint32_t>(rsx::apply_resolution_scale((u16)width, true)), static_cast<uint32_t>(rsx::apply_resolution_scale((u16)height, true)), 1, 1, 1,
VK_SAMPLE_COUNT_1_BIT,
static_cast<VkSampleCountFlagBits>(spp),
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT| VK_IMAGE_USAGE_TRANSFER_SRC_BIT| VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_SAMPLED_BIT,
usage_flags,
0);
ds->change_layout(cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
ds->set_format(format);
ds->set_aa_mode(antialias);
@ -230,9 +499,7 @@ namespace rsx
ds->state_flags = rsx::surface_state_flags::erase_bkgnd;
ds->native_component_map = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R };
change_image_layout(cmd, ds.get(), VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range);
ds->native_pitch = (u16)width * 2 * get_format_sample_count(antialias);
ds->native_pitch = (u16)width * 2 * ds->samples_x;
if (format == rsx::surface_depth_format::z24s8)
ds->native_pitch *= 2;
@ -261,7 +528,7 @@ namespace rsx
VK_IMAGE_TYPE_2D,
ref->format(),
new_w, new_h, 1, 1, 1,
(VkSampleCountFlagBits)ref->samples(),
static_cast<VkSampleCountFlagBits>(ref->samples()),
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL,
ref->info.usage,
@ -273,12 +540,16 @@ namespace rsx
sink->memory_usage_flags = rsx::surface_usage_flags::storage;
sink->state_flags = rsx::surface_state_flags::erase_bkgnd;
sink->native_component_map = ref->native_component_map;
sink->native_pitch = u16(prev.width * ref->get_bpp() * ref->get_spp());
sink->native_pitch = u16(prev.width * ref->get_bpp() * ref->samples_x);
sink->surface_width = prev.width;
sink->surface_height = prev.height;
sink->queue_tag(address);
change_image_layout(cmd, sink.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
const auto best_layout = (ref->info.usage & VK_IMAGE_USAGE_SAMPLED_BIT) ?
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL :
ref->current_layout;
sink->change_layout(cmd, best_layout);
}
prev.target = sink.get();
@ -314,7 +585,10 @@ namespace rsx
static void prepare_surface_for_sampling(vk::command_buffer& cmd, vk::render_target *surface)
{
surface->change_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
if (surface->info.usage & VK_IMAGE_USAGE_SAMPLED_BIT)
{
surface->change_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
}
}
static bool surface_is_pitch_compatible(const std::unique_ptr<vk::render_target> &surface, size_t pitch)
@ -325,7 +599,6 @@ namespace rsx
static void invalidate_surface_contents(vk::command_buffer& /*cmd*/, vk::render_target *surface, u32 address, size_t pitch)
{
surface->rsx_pitch = (u16)pitch;
surface->set_aa_mode(rsx::surface_antialiasing::center_1_sample);
surface->queue_tag(address);
surface->last_use_tag = 0;
surface->memory_usage_flags = rsx::surface_usage_flags::unknown;
@ -345,7 +618,7 @@ namespace rsx
surface->release();
}
static void notify_surface_persist(const std::unique_ptr<vk::render_target> &surface)
static void notify_surface_persist(const std::unique_ptr<vk::render_target>& /*surface*/)
{}
static void notify_surface_reused(const std::unique_ptr<vk::render_target> &surface)

View File

@ -0,0 +1,181 @@
#include "stdafx.h"
#include "VKResolveHelper.h"
#include "VKRenderPass.h"
namespace
{
const char *get_format_prefix(VkFormat format)
{
switch (format)
{
case VK_FORMAT_R5G6B5_UNORM_PACK16:
return "r16ui";
case VK_FORMAT_B8G8R8A8_UNORM:
return "rgba8";
case VK_FORMAT_R16G16B16A16_SFLOAT:
return "rgba16f";
case VK_FORMAT_R32G32B32A32_SFLOAT:
return "rgba32f";
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
return "r16ui";
case VK_FORMAT_R8_UNORM:
return "r8";
case VK_FORMAT_R8G8_UNORM:
return "rg8";
case VK_FORMAT_R32_SFLOAT:
return "r32f";
default:
fmt::throw_exception("Unhandled VkFormat 0x%x" HERE, u32(format));
}
}
}
namespace vk
{
std::unordered_map<VkFormat, std::unique_ptr<vk::cs_resolve_task>> g_resolve_helpers;
std::unordered_map<VkFormat, std::unique_ptr<vk::cs_unresolve_task>> g_unresolve_helpers;
std::unique_ptr<vk::depthonly_resolve> g_depth_resolver;
std::unique_ptr<vk::depthonly_unresolve> g_depth_unresolver;
std::unique_ptr<vk::depthstencil_resolve_AMD> g_depthstencil_resolverAMD;
std::unique_ptr<vk::depthstencil_unresolve_AMD> g_depthstencil_unresolverAMD;
void resolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src)
{
if (src->aspect() == VK_IMAGE_ASPECT_COLOR_BIT)
{
auto &job = g_resolve_helpers[src->format()];
if (!job)
{
job.reset(new vk::cs_resolve_task(get_format_prefix(src->format())));
}
job->run(cmd, src, dst);
}
else
{
std::vector<vk::image*> surface = { dst };
auto& dev = cmd.get_command_pool().get_owner();
const auto key = vk::get_renderpass_key(surface);
auto renderpass = vk::get_renderpass(dev, key);
if (src->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT)
{
if (!g_depthstencil_resolverAMD)
{
g_depthstencil_resolverAMD.reset(new vk::depthstencil_resolve_AMD());
g_depthstencil_resolverAMD->create(dev);
}
g_depthstencil_resolverAMD->run(cmd, src, dst, renderpass);
}
else
{
if (!g_depth_resolver)
{
g_depth_resolver.reset(new vk::depthonly_resolve());
g_depth_resolver->create(dev);
}
g_depth_resolver->run(cmd, src, dst, renderpass);
}
}
}
void unresolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src)
{
if (src->aspect() == VK_IMAGE_ASPECT_COLOR_BIT)
{
auto &job = g_unresolve_helpers[src->format()];
if (!job)
{
job.reset(new vk::cs_unresolve_task(get_format_prefix(src->format())));
}
job->run(cmd, dst, src);
}
else
{
std::vector<vk::image*> surface = { dst };
auto& dev = cmd.get_command_pool().get_owner();
const auto key = vk::get_renderpass_key(surface);
auto renderpass = vk::get_renderpass(dev, key);
if (src->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT)
{
if (!g_depthstencil_unresolverAMD)
{
g_depthstencil_unresolverAMD.reset(new vk::depthstencil_unresolve_AMD());
g_depthstencil_unresolverAMD->create(dev);
}
g_depthstencil_unresolverAMD->run(cmd, dst, src, renderpass);
}
else
{
if (!g_depth_unresolver)
{
g_depth_unresolver.reset(new vk::depthonly_unresolve());
g_depth_unresolver->create(dev);
}
g_depth_unresolver->run(cmd, dst, src, renderpass);
}
}
}
void clear_resolve_helpers()
{
for (auto &task : g_resolve_helpers)
{
task.second->destroy();
}
for (auto &task : g_unresolve_helpers)
{
task.second->destroy();
}
g_resolve_helpers.clear();
g_unresolve_helpers.clear();
if (g_depth_resolver)
{
g_depth_resolver->destroy();
g_depth_resolver.reset();
}
if (g_depthstencil_resolverAMD)
{
g_depthstencil_resolverAMD->destroy();
g_depthstencil_resolverAMD.reset();
}
if (g_depth_unresolver)
{
g_depth_unresolver->destroy();
g_depth_unresolver.reset();
}
if (g_depthstencil_unresolverAMD)
{
g_depthstencil_unresolverAMD->destroy();
g_depthstencil_unresolverAMD.reset();
}
}
void reset_resolve_resources()
{
for (auto &e : g_resolve_helpers) e.second->free_resources();
for (auto &e : g_unresolve_helpers) e.second->free_resources();
if (g_depth_resolver) g_depth_resolver->free_resources();
if (g_depth_unresolver) g_depth_unresolver->free_resources();
if (g_depthstencil_resolverAMD) g_depthstencil_resolverAMD->free_resources();
if (g_depthstencil_unresolverAMD) g_depthstencil_unresolverAMD->free_resources();
}
}

View File

@ -0,0 +1,406 @@
#pragma once
#include "VKHelpers.h"
#include "VKCompute.h"
#include "VKOverlays.h"
namespace vk
{
struct cs_resolve_base : compute_task
{
vk::viewable_image* multisampled;
vk::viewable_image* resolve;
u32 cs_wave_x = 1;
u32 cs_wave_y = 1;
cs_resolve_base()
{}
virtual ~cs_resolve_base()
{}
void build(const std::string& kernel, const std::string& format_prefix, int direction)
{
create();
// TODO: Tweak occupancy
switch (optimal_group_size)
{
default:
case 64:
cs_wave_x = 8;
cs_wave_y = 8;
break;
case 32:
cs_wave_x = 8;
cs_wave_y = 4;
break;
}
const std::pair<std::string, std::string> syntax_replace[] =
{
{ "%wx", std::to_string(cs_wave_x) },
{ "%wy", std::to_string(cs_wave_y) },
};
m_src =
"#version 430\n"
"layout(local_size_x=%wx, local_size_y=%wy, local_size_z=1) in;\n"
"\n";
m_src = fmt::replace_all(m_src, syntax_replace);
if (direction == 0)
{
m_src +=
"layout(set=0, binding=0, " + format_prefix + ") uniform readonly restrict image2DMS multisampled;\n"
"layout(set=0, binding=1) uniform writeonly restrict image2D resolve;\n";
}
else
{
m_src +=
"layout(set=0, binding=0) uniform writeonly restrict image2DMS multisampled;\n"
"layout(set=0, binding=1, " + format_prefix + ") uniform readonly restrict image2D resolve;\n";
}
m_src +=
"\n"
"void main()\n"
"{\n"
" ivec2 resolve_size = imageSize(resolve);\n"
" ivec2 aa_size = imageSize(multisampled);\n"
" ivec2 sample_count = resolve_size / aa_size;\n"
"\n"
" if (any(greaterThanEqual(gl_GlobalInvocationID.xy, uvec2(resolve_size)))) return;"
"\n"
" ivec2 resolve_coords = ivec2(gl_GlobalInvocationID.xy);\n"
" ivec2 aa_coords = resolve_coords / sample_count;\n"
" ivec2 sample_loc = ivec2(resolve_coords % sample_count);\n"
" int sample_index = sample_loc.x + (sample_loc.y * sample_count.y);\n"
+ kernel +
"}\n";
LOG_ERROR(RSX, "Compute shader:\n%s", m_src);
}
std::vector<std::pair<VkDescriptorType, u8>> get_descriptor_layout() override
{
return
{
{ VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2 }
};
}
void declare_inputs() override
{
std::vector<vk::glsl::program_input> inputs =
{
{
::glsl::program_domain::glsl_compute_program,
vk::glsl::program_input_type::input_type_texture,
{}, {},
0,
"multisampled"
},
{
::glsl::program_domain::glsl_compute_program,
vk::glsl::program_input_type::input_type_texture,
{}, {},
1,
"resolve"
}
};
m_program->load_uniforms(inputs);
}
void bind_resources() override
{
auto msaa_view = multisampled->get_view(0xDEADBEEF, rsx::default_remap_vector);
auto resolved_view = resolve->get_view(0xAAE4, rsx::default_remap_vector);
m_program->bind_uniform({ VK_NULL_HANDLE, msaa_view->value, multisampled->current_layout }, "multisampled", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set);
m_program->bind_uniform({ VK_NULL_HANDLE, resolved_view->value, resolve->current_layout }, "resolve", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set);
}
void run(VkCommandBuffer cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image)
{
verify(HERE), msaa_image->samples() > 1, resolve_image->samples() == 1;
multisampled = msaa_image;
resolve = resolve_image;
const u32 invocations_x = align(resolve_image->width(), cs_wave_x) / cs_wave_x;
const u32 invocations_y = align(resolve_image->height(), cs_wave_y) / cs_wave_y;
compute_task::run(cmd, invocations_x, invocations_y);
}
};
struct cs_resolve_task : cs_resolve_base
{
cs_resolve_task(const std::string& format_prefix)
{
std::string kernel =
" vec4 aa_sample = imageLoad(multisampled, aa_coords, sample_index);\n"
" imageStore(resolve, resolve_coords, aa_sample);\n";
build(kernel, format_prefix, 0);
}
};
struct cs_unresolve_task : cs_resolve_base
{
cs_unresolve_task(const std::string& format_prefix)
{
std::string kernel =
" vec4 resolved_sample = imageLoad(resolve, resolve_coords);\n"
" imageStore(multisampled, aa_coords, sample_index, resolved_sample);\n";
build(kernel, format_prefix, 1);
}
};
struct depth_resolve_base : public overlay_pass
{
u8 samples_x = 1;
u8 samples_y = 1;
depth_resolve_base()
{
renderpass_config.set_depth_mask(true);
renderpass_config.enable_depth_test(VK_COMPARE_OP_ALWAYS);
}
void build(const std::string& kernel, const std::string& extensions, bool stencil_texturing, bool input_is_multisampled)
{
vs_src =
"#version 450\n"
"#extension GL_ARB_separate_shader_objects : enable\n\n"
"\n"
"void main()\n"
"{\n"
" vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n"
" gl_Position = vec4(positions[gl_VertexIndex % 4], 0., 1.);\n"
"}\n";
fs_src =
"#version 420\n"
"#extension GL_ARB_separate_shader_objects : enable\n";
fs_src += extensions +
"\n"
"layout(std140, set=0, binding=0) uniform static_data{ ivec4 regs[8]; };\n"
"layout(set=0, binding=1) uniform sampler2D fs0;\n";
if (stencil_texturing)
{
m_num_usable_samplers = 2;
fs_src +=
"layout(set=0, binding=2) uniform usampler2D fs1;\n";
}
fs_src +=
"layout(pixel_center_integer) in vec4 gl_FragCoord;\n"
"\n"
"void main()\n"
"{\n";
fs_src += kernel +
"}\n";
if (input_is_multisampled)
{
auto sampler_loc = fs_src.find("sampler2D fs0");
fs_src.insert(sampler_loc + 9, "MS");
if (stencil_texturing)
{
sampler_loc = fs_src.find("sampler2D fs1");
fs_src.insert(sampler_loc + 9, "MS");
}
}
LOG_ERROR(RSX, "Resolve shader:\n%s", fs_src);
}
void update_uniforms(vk::glsl::program* /*program*/) override
{
m_ubo_offset = (u32)m_ubo.alloc<256>(8);
auto dst = (s32*)m_ubo.map(m_ubo_offset, 128);
dst[0] = samples_x;
dst[1] = samples_y;
m_ubo.unmap();
}
void update_sample_configuration(vk::image* msaa_image)
{
switch (msaa_image->samples())
{
case 1:
fmt::throw_exception("MSAA input not multisampled!" HERE);
case 2:
samples_x = 2;
samples_y = 1;
break;
case 4:
samples_x = samples_y = 2;
break;
default:
fmt::throw_exception("Unsupported sample count %d" HERE, msaa_image->samples());
}
}
};
struct depthonly_resolve : depth_resolve_base
{
depthonly_resolve()
{
build(
" ivec2 out_coord = ivec2(gl_FragCoord.xy);\n"
" ivec2 in_coord = (out_coord / regs[0].xy);\n"
" ivec2 sample_loc = out_coord % ivec2(regs[0].xy);\n"
" int sample_index = sample_loc.x + (sample_loc.y * regs[0].y);\n"
" float frag_depth = texelFetch(fs0, in_coord, sample_index).x;\n"
" gl_FragDepth = frag_depth;\n",
"",
false,
true);
}
void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass)
{
update_sample_configuration(msaa_image);
auto src_view = msaa_image->get_view(0xDEADBEEF, rsx::default_remap_vector);
overlay_pass::run(
cmd,
(u16)resolve_image->width(), (u16)resolve_image->height(),
resolve_image, src_view,
render_pass);
}
};
struct depthonly_unresolve : depth_resolve_base
{
depthonly_unresolve()
{
build(
" ivec2 pixel_coord = ivec2(gl_FragCoord.xy);\n"
" pixel_coord *= regs[0].xy;\n"
" pixel_coord.x += (gl_SampleID % regs[0].x);\n"
" pixel_coord.y += (gl_SampleID / regs[0].x);\n"
" float frag_depth = texelFetch(fs0, pixel_coord, 0).x;\n"
" gl_FragDepth = frag_depth;\n",
"",
false,
false);
}
void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass)
{
renderpass_config.set_multisample_state(msaa_image->samples(), 0xFFFF, true, false, false);
renderpass_config.set_multisample_shading_rate(1.f);
update_sample_configuration(msaa_image);
auto src_view = resolve_image->get_view(0xAAE4, rsx::default_remap_vector);
overlay_pass::run(
cmd,
(u16)msaa_image->width(), (u16)msaa_image->height(),
msaa_image, src_view,
render_pass);
}
};
struct depthstencil_resolve_AMD : depth_resolve_base
{
depthstencil_resolve_AMD()
{
renderpass_config.enable_stencil_test(
VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, // Always replace
VK_COMPARE_OP_ALWAYS, // Always pass
0xFF, // Full write-through
0); // Unused
m_num_usable_samplers = 2;
build(
" ivec2 out_coord = ivec2(gl_FragCoord.xy);\n"
" ivec2 in_coord = (out_coord / regs[0].xy);\n"
" ivec2 sample_loc = out_coord % ivec2(regs[0].xy);\n"
" int sample_index = sample_loc.x + (sample_loc.y * regs[0].y);\n"
" float frag_depth = texelFetch(fs0, in_coord, sample_index).x;\n"
" uint frag_stencil = texelFetch(fs1, in_coord, sample_index).x;\n"
" gl_FragDepth = frag_depth;\n"
" gl_FragStencilRefARB = int(frag_stencil);\n",
"#extension GL_ARB_shader_stencil_export : enable\n",
true,
true);
}
void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass)
{
update_sample_configuration(msaa_image);
auto depth_view = msaa_image->get_view(0xDEADBEEF, rsx::default_remap_vector, VK_IMAGE_ASPECT_DEPTH_BIT);
auto stencil_view = msaa_image->get_view(0xDEADBEEF, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT);
overlay_pass::run(
cmd,
(u16)resolve_image->width(), (u16)resolve_image->height(),
resolve_image, { depth_view, stencil_view },
render_pass);
}
};
struct depthstencil_unresolve_AMD : depth_resolve_base
{
depthstencil_unresolve_AMD()
{
renderpass_config.enable_stencil_test(
VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, // Always replace
VK_COMPARE_OP_ALWAYS, // Always pass
0xFF, // Full write-through
0); // Unused
m_num_usable_samplers = 2;
build(
" ivec2 pixel_coord = ivec2(gl_FragCoord.xy);\n"
" pixel_coord *= regs[0].xy;\n"
" pixel_coord.x += (gl_SampleID % regs[0].x);\n"
" pixel_coord.y += (gl_SampleID / regs[0].x);\n"
" float frag_depth = texelFetch(fs0, pixel_coord, 0).x;\n"
" uint frag_stencil = texelFetch(fs1, pixel_coord, 0).x;\n"
" gl_FragDepth = frag_depth;\n"
" gl_FragStencilRefARB = int(frag_stencil);\n",
"#extension GL_ARB_shader_stencil_export : enable\n",
true,
false);
}
void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass)
{
renderpass_config.set_multisample_state(msaa_image->samples(), 0xFFFF, true, false, false);
renderpass_config.set_multisample_shading_rate(1.f);
update_sample_configuration(msaa_image);
auto depth_view = resolve_image->get_view(0xAAE4, rsx::default_remap_vector, VK_IMAGE_ASPECT_DEPTH_BIT);
auto stencil_view = resolve_image->get_view(0xAAE4, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT);
overlay_pass::run(
cmd,
(u16)msaa_image->width(), (u16)msaa_image->height(),
msaa_image, { depth_view, stencil_view },
render_pass);
}
};
void resolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src);
void unresolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src);
void reset_resolve_resources();
void clear_resolve_helpers();
}

View File

@ -577,20 +577,24 @@ namespace rsx
template <typename SurfaceType>
std::tuple<u16, u16, u16, u16> get_transferable_region(const SurfaceType* surface)
{
const u16 src_w = surface->old_contents.source->width();
const u16 src_h = surface->old_contents.source->height();
u16 dst_w = src_w;
u16 dst_h = src_h;
auto src = static_cast<const SurfaceType*>(surface->old_contents.source);
dst_w = (dst_w * src->samples_x) / surface->samples_x;
dst_h = (dst_h * src->samples_y) / surface->samples_y;
auto area1 = surface->get_normalized_memory_area();
auto area2 = surface->get_normalized_memory_area();
const f32 scale_x = (f32)dst_w / src_w;
const f32 scale_y = (f32)dst_h / src_h;
auto w = std::min(area1.x2, area2.x2);
auto h = std::min(area1.y2, area2.y2);
std::tie(std::ignore, std::ignore, dst_w, dst_h) = clip_region<u16>(dst_w, dst_h, 0, 0, surface->width(), surface->height(), true);
return std::make_tuple(u16(dst_w / scale_x), u16(dst_h / scale_y), dst_w, dst_h);
const auto src_scale_x = src->get_bpp() * src->samples_x;
const auto src_scale_y = src->samples_y;
const auto dst_scale_x = surface->get_bpp() * surface->samples_x;
const auto dst_scale_y = surface->samples_y;
const u16 src_w = u16(w / src_scale_x);
const u16 src_h = u16(h / src_scale_y);
const u16 dst_w = u16(w / dst_scale_x);
const u16 dst_h = u16(h / dst_scale_y);
return std::make_tuple(src_w, src_h, dst_w, dst_h);
}
template <typename SurfaceType>

View File

@ -34,6 +34,7 @@
<ClInclude Include="Emu\RSX\VK\VKProgramBuffer.h" />
<ClInclude Include="Emu\RSX\VK\VKRenderPass.h" />
<ClInclude Include="Emu\RSX\VK\VKRenderTargets.h" />
<ClInclude Include="Emu\RSX\VK\VKResolveHelper.h" />
<ClInclude Include="Emu\RSX\VK\VKTextOut.h" />
<ClInclude Include="Emu\RSX\VK\VKTextureCache.h" />
<ClInclude Include="Emu\RSX\VK\VKVertexProgram.h" />
@ -45,9 +46,10 @@
<ClCompile Include="Emu\RSX\VK\VKFragmentProgram.cpp" />
<ClCompile Include="Emu\RSX\VK\VKFramebuffer.cpp" />
<ClCompile Include="Emu\RSX\VK\VKGSRender.cpp" />
<ClCompile Include="Emu\RSX\VK\VKHelpers.cpp" />
<ClCompile Include="Emu\RSX\VK\VKProgramPipeline.cpp" />
<ClCompile Include="Emu\RSX\VK\VKHelpers.cpp" />
<ClCompile Include="Emu\RSX\VK\VKProgramPipeline.cpp" />
<ClCompile Include="Emu\RSX\VK\VKRenderPass.cpp" />
<ClCompile Include="Emu\RSX\VK\VKResolveHelper.cpp" />
<ClCompile Include="Emu\RSX\VK\VKTexture.cpp" />
<ClCompile Include="Emu\RSX\VK\VKVertexBuffers.cpp" />
<ClCompile Include="Emu\RSX\VK\VKVertexProgram.cpp" />

View File

@ -45,9 +45,12 @@
</ClInclude>
<ClInclude Include="Emu\RSX\VK\VKCompute.h">
<Filter>Source Files</Filter>
</ClInclude>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\VKRenderPass.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\VKResolveHelper.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\VK\VKFramebuffer.h">
<Filter>Source Files</Filter>
@ -86,9 +89,12 @@
</ClCompile>
<ClCompile Include="Emu\RSX\VK\VKMemAlloc.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\VKRenderPass.cpp">
<Filter>Source Files</Filter>
</ClInclude>
<ClCompile Include="Emu\RSX\VK\VKResolveHelper.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="Emu\RSX\VK\VKFramebuffer.cpp">
<Filter>Source Files</Filter>