vk: Emulate conditional rendering for AMD

This commit is contained in:
kd-11 2019-12-10 09:10:13 +03:00 committed by kd-11
parent 93895838c7
commit cdd9c12132
6 changed files with 109 additions and 20 deletions

View File

@ -319,7 +319,7 @@ namespace rsx
else
{
zcull_ctrl->read_barrier(this, cond_render_ctrl.eval_address, 4, reports::sync_no_notify);
cond_render_ctrl.eval_result(this);
verify(HERE), !cond_render_ctrl.eval_pending();
}
}
@ -2959,8 +2959,6 @@ namespace rsx
{
if (hint || ptimer->async_tasks_pending >= max_safe_queue_depth)
{
verify(HERE), !active || !hint;
// Prepare the whole queue for reading. This happens when zcull activity is disabled or queue is too long
for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); ++It)
{

View File

@ -286,6 +286,13 @@ namespace
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[idx].binding = CONDITIONAL_RENDER_PREDICATE_SLOT;
idx++;
for (int i = 0; i < rsx::limits::fragment_textures_count; i++)
{
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
@ -311,6 +318,12 @@ namespace
push_constants[0].size = 16;
push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
if (vk::emulate_conditional_rendering())
{
// Conditional render toggle
push_constants[0].size = 20;
}
VkDescriptorSetLayoutCreateInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
infos.pBindings = bindings.data();
@ -439,11 +452,13 @@ VKGSRender::VKGSRender() : GSRender()
m_occlusion_query_data[n].driver_handle = n;
//Generate frame contexts
VkDescriptorPoolSize uniform_buffer_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * DESCRIPTOR_MAX_DRAW_CALLS };
VkDescriptorPoolSize uniform_texel_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS };
VkDescriptorPoolSize texture_pool = { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 20 * DESCRIPTOR_MAX_DRAW_CALLS };
std::vector<VkDescriptorPoolSize> sizes;
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * DESCRIPTOR_MAX_DRAW_CALLS });
sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS });
sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 20 * DESCRIPTOR_MAX_DRAW_CALLS });
std::vector<VkDescriptorPoolSize> sizes{ uniform_buffer_pool, uniform_texel_pool, texture_pool };
// Conditional rendering predicate slot; refactor to allow skipping this when not needed
sizes.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 * DESCRIPTOR_MAX_DRAW_CALLS });
VkSemaphoreCreateInfo semaphore_info = {};
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
@ -1153,7 +1168,7 @@ void VKGSRender::emit_geometry(u32 sub_index)
update_draw_state();
begin_render_pass();
if (cond_render_ctrl.hw_cond_active)
if (cond_render_ctrl.hw_cond_active && m_device->get_conditional_render_support())
{
// It is inconvenient that conditional rendering breaks other things like compute dispatch
// TODO: If this is heavy, add refactor the resources into global and add checks around compute dispatch
@ -2802,6 +2817,12 @@ void VKGSRender::load_program_env()
m_program->bind_uniform(m_fragment_texture_params_buffer_info, FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, m_current_frame->descriptor_set);
}
if (vk::emulate_conditional_rendering())
{
auto predicate = m_cond_render_buffer ? m_cond_render_buffer->value : vk::get_scratch_buffer()->value;
m_program->bind_buffer({ predicate, 0, 4 }, CONDITIONAL_RENDER_PREDICATE_SLOT, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set);
}
//Clear flags
const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty | rsx::pipeline_state::fragment_constants_dirty | rsx::pipeline_state::fragment_texture_state_dirty);
m_graphics_state &= ~handled_flags;
@ -2826,13 +2847,21 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_
base_offset = 0;
}
u32 draw_info[4];
u8 data_size = 16;
u32 draw_info[5];
draw_info[0] = vertex_info.vertex_index_base;
draw_info[1] = vertex_info.vertex_index_offset;
draw_info[2] = id;
draw_info[3] = (id * 16) + (base_offset / 8);
vkCmdPushConstants(*m_current_command_buffer, pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 16, draw_info);
if (vk::emulate_conditional_rendering())
{
draw_info[4] = cond_render_ctrl.hw_cond_active ? 1 : 0;
data_size = 20;
}
vkCmdPushConstants(*m_current_command_buffer, pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, data_size, draw_info);
const size_t data_offset = (id * 128) + m_vertex_layout_stream_info.offset;
auto dst = m_vertex_layout_ring_info.map(data_offset, 128);
@ -3792,10 +3821,31 @@ void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occ
if (!m_cond_render_buffer)
{
auto& memory_props = m_device->get_memory_mapping();
auto usage_flags = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
if (m_device->get_conditional_render_support())
{
usage_flags |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT;
}
m_cond_render_buffer = std::make_unique<vk::buffer>(
*m_device, 4,
memory_props.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
usage_flags, 0);
}
VkPipelineStageFlags dst_stage;
VkAccessFlags dst_access;
if (m_device->get_conditional_render_support())
{
dst_stage = VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT;
dst_access = VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT;
}
else
{
dst_stage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
dst_access = VK_ACCESS_SHADER_READ_BIT;
}
if (sources.size() == 1)
@ -3809,8 +3859,8 @@ void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occ
m_occlusion_query_pool.get_query_result_indirect(*m_current_command_buffer, index, m_cond_render_buffer->value, 0);
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT);
VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage,
VK_ACCESS_TRANSFER_WRITE_BIT, dst_access);
rsx::thread::begin_conditional_rendering(sources);
return;
@ -3863,8 +3913,8 @@ void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occ
vk::get_compute_task<vk::cs_aggregator>()->run(*m_current_command_buffer, m_cond_render_buffer.get(), scratch, dst_offset / 4);
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT);
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, dst_stage,
VK_ACCESS_SHADER_WRITE_BIT, dst_access);
}
else
{

View File

@ -90,6 +90,7 @@ namespace vk
bool g_drv_no_primitive_restart_flag = false;
bool g_drv_sanitize_fp_values = false;
bool g_drv_disable_fence_reset = false;
bool g_drv_emulate_cond_render = false;
u64 g_num_processed_frames = 0;
u64 g_num_total_frames = 0;
@ -425,6 +426,7 @@ namespace vk
g_drv_no_primitive_restart_flag = false;
g_drv_sanitize_fp_values = false;
g_drv_disable_fence_reset = false;
g_drv_emulate_cond_render = (g_cfg.video.relaxed_zcull_sync && !g_current_renderer->get_conditional_render_support());
g_num_processed_frames = 0;
g_num_total_frames = 0;
g_heap_compatible_buffer_types = 0;
@ -533,6 +535,11 @@ namespace vk
return g_drv_disable_fence_reset;
}
bool emulate_conditional_rendering()
{
return g_drv_emulate_cond_render;
}
void insert_buffer_memory_barrier(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize length, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask)
{
VkBufferMemoryBarrier barrier = {};

View File

@ -39,8 +39,9 @@
#define FRAGMENT_STATE_BIND_SLOT 3
#define FRAGMENT_TEXTURE_PARAMS_BIND_SLOT 4
#define VERTEX_BUFFERS_FIRST_BIND_SLOT 5
#define TEXTURES_FIRST_BIND_SLOT 8
#define VERTEX_TEXTURES_FIRST_BIND_SLOT 24 //8+16
#define CONDITIONAL_RENDER_PREDICATE_SLOT 8
#define TEXTURES_FIRST_BIND_SLOT 9
#define VERTEX_TEXTURES_FIRST_BIND_SLOT (TEXTURES_FIRST_BIND_SLOT + 16)
#define VK_NUM_DESCRIPTOR_BINDINGS (VERTEX_TEXTURES_FIRST_BIND_SLOT + 4)
@ -138,6 +139,7 @@ namespace vk
bool emulate_primitive_restart(rsx::primitive_type type);
bool sanitize_fp_values();
bool fence_reset_disabled();
bool emulate_conditional_rendering();
VkFlags get_heap_compatible_buffer_types();
driver_vendor get_driver_vendor();
chip_class get_chip_family(uint32_t vendor_id, uint32_t device_id);

View File

@ -43,12 +43,26 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
OS << " float z_far;\n";
OS << "};\n\n";
if (m_device_props.emulate_conditional_rendering)
{
OS << "layout(std430, set = 0, binding = 8) readonly buffer EXT_Conditional_Rendering\n";
OS << "{\n";
OS << " uint conditional_rendering_predicate;\n";
OS << "};\n\n";
}
OS << "layout(push_constant) uniform VertexLayoutBuffer\n";
OS << "{\n";
OS << " uint vertex_base_index;\n";
OS << " uint vertex_index_offset;\n";
OS << " uint draw_id;\n";
OS << " uint layout_ptr_offset;\n";
if (m_device_props.emulate_conditional_rendering)
{
OS << " uint conditional_rendering_enabled;\n";
}
OS << "};\n\n";
vk::glsl::program_input in;
@ -238,9 +252,18 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
OS << "}\n\n";
OS << "void main ()\n";
OS << "{\n";
OS << "{\n\n";
OS << "\n" << " vs_main();\n\n";
if (m_device_props.emulate_conditional_rendering)
{
OS << " if (conditional_rendering_enabled != 0 && conditional_rendering_predicate == 0)\n";
OS << " {\n";
OS << " gl_Position = vec4(0.);\n";
OS << " return;\n";
OS << "}\n\n";
}
OS << " vs_main();\n\n";
for (auto &i : reg_table)
{
@ -286,6 +309,8 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
void VKVertexDecompilerThread::Task()
{
m_device_props.emulate_conditional_rendering = vk::emulate_conditional_rendering();
m_shader = Decompile();
vk_prog->SetInputs(inputs);
}

View File

@ -1,4 +1,4 @@
#pragma once
#pragma once
#include "../Common/VertexProgramDecompiler.h"
#include "Emu/RSX/RSXVertexProgram.h"
#include "Utilities/Thread.h"
@ -10,6 +10,13 @@ struct VKVertexDecompilerThread : public VertexProgramDecompiler
std::string &m_shader;
std::vector<vk::glsl::program_input> inputs;
class VKVertexProgram *vk_prog;
struct
{
bool emulate_conditional_rendering;
}
m_device_props;
protected:
std::string getFloatTypeName(size_t elementCount) override;
std::string getIntTypeName(size_t elementCount) override;