vk: Implement hw conditional rendering

This commit is contained in:
kd-11 2019-12-10 07:56:44 +03:00 committed by kd-11
parent a51395370e
commit 93895838c7
6 changed files with 328 additions and 35 deletions

View File

@ -314,7 +314,7 @@ namespace rsx
verify(HERE), !cond_render_ctrl.hw_cond_active;
// Pending evaluation, use hardware test
begin_conditional_rendering();
begin_conditional_rendering(cond_render_ctrl.eval_sources);
}
else
{
@ -2158,13 +2158,13 @@ namespace rsx
{
cond_render_ctrl.enable_conditional_render(this, ref);
auto result = zcull_ctrl->find_query(ref);
auto result = zcull_ctrl->find_query(ref, true);
if (result.found)
{
if (result.query)
if (!result.queries.empty())
{
cond_render_ctrl.set_sync_tag(result.query->sync_tag);
sync_hint(FIFO_hint::hint_conditional_render_eval, result.query);
cond_render_ctrl.set_eval_sources(result.queries);
sync_hint(FIFO_hint::hint_conditional_render_eval, cond_render_ctrl.eval_sources.front());
}
else
{
@ -2183,9 +2183,10 @@ namespace rsx
cond_render_ctrl.disable_conditional_render(this);
}
void thread::begin_conditional_rendering()
void thread::begin_conditional_rendering(const std::vector<reports::occlusion_query_info*>& /*sources*/)
{
cond_render_ctrl.hw_cond_active = true;
cond_render_ctrl.eval_sources.clear();
}
void thread::end_conditional_rendering()
@ -2709,6 +2710,12 @@ namespace rsx
}
ptimer->async_tasks_pending++;
if (m_statistics_map[m_statistics_tag_id] != 0)
{
// Flush guaranteed results; only one positive is needed
update(ptimer);
}
}
void ZCULL_control::allocate_new_query(::rsx::thread* ptimer)
@ -2888,7 +2895,7 @@ namespace rsx
// No other queries in the chain, write result
write(&writer, ptimer->timestamp(), result);
if (query && ptimer->cond_render_ctrl.sync_tag == query->sync_tag)
if (query && query->sync_tag == ptimer->cond_render_ctrl.eval_sync_tag)
{
const bool eval_failed = (result == 0);
ptimer->cond_render_ctrl.set_eval_result(ptimer, eval_failed);
@ -3083,7 +3090,7 @@ namespace rsx
// No other queries in the chain, write result
write(&writer, ptimer->timestamp(), result);
if (query && ptimer->cond_render_ctrl.sync_tag == query->sync_tag)
if (query && query->sync_tag == ptimer->cond_render_ctrl.eval_sync_tag)
{
const bool eval_failed = (result == 0);
ptimer->cond_render_ctrl.set_eval_result(ptimer, eval_failed);
@ -3175,36 +3182,56 @@ namespace rsx
return result_zcull_intr;
}
query_search_result ZCULL_control::find_query(vm::addr_t sink_address)
query_search_result ZCULL_control::find_query(vm::addr_t sink_address, bool all)
{
query_search_result result{};
u32 stat_id = 0;
for (auto It = m_pending_writes.crbegin(); It != m_pending_writes.crend(); ++It)
{
if (UNLIKELY(stat_id))
{
if (It->counter_tag != stat_id)
{
// Zcull stats were cleared between this query and the required one
return { true, 0, nullptr };
if (result.found)
{
// Some result was found, return it instead
break;
}
// Zcull stats were cleared between this query and the required stats, result can only be 0
return { true, 0, {} };
}
if (It->query)
if (It->query && It->query->num_draws)
{
return { true, 0, It->query };
result.found = true;
result.queries.push_back(It->query);
if (!all)
{
break;
}
}
}
else if (It->sink == sink_address)
{
if (It->query)
if (It->query && It->query->num_draws)
{
return { true, 0, It->query };
result.found = true;
result.queries.push_back(It->query);
if (!all)
{
break;
}
}
stat_id = It->counter_tag;
}
}
return {};
return result;
}
u32 ZCULL_control::copy_reports_to(u32 start, u32 range, u32 dest)
@ -3228,6 +3255,15 @@ namespace rsx
// Conditional rendering helpers
void conditional_render_eval::reset()
{
eval_address = 0;
eval_sync_tag = 0;
eval_sources.clear();
eval_failed = false;
}
bool conditional_render_eval::disable_rendering() const
{
return (enabled && eval_failed);
@ -3246,10 +3282,10 @@ namespace rsx
pthr->end_conditional_rendering();
}
reset();
enabled = true;
eval_failed = false;
eval_address = address;
sync_tag = 0;
}
void conditional_render_eval::disable_conditional_render(::rsx::thread* pthr)
@ -3260,15 +3296,14 @@ namespace rsx
pthr->end_conditional_rendering();
}
reset();
enabled = false;
eval_failed = false;
eval_address = 0;
sync_tag = 0;
}
void conditional_render_eval::set_sync_tag(u64 value)
void conditional_render_eval::set_eval_sources(std::vector<occlusion_query_info*>& sources)
{
sync_tag = value;
eval_sources = std::move(sources);
eval_sync_tag = eval_sources.front()->sync_tag;
}
void conditional_render_eval::set_eval_result(::rsx::thread* pthr, bool failed)
@ -3279,9 +3314,8 @@ namespace rsx
pthr->end_conditional_rendering();
}
reset();
eval_failed = failed;
eval_address = 0;
sync_tag = 0;
}
void conditional_render_eval::eval_result(::rsx::thread* pthr)

View File

@ -362,7 +362,7 @@ namespace rsx
{
bool found;
u32 raw_zpass_result;
occlusion_query_info* query;
std::vector<occlusion_query_info*> queries;
};
enum sync_control
@ -443,7 +443,7 @@ namespace rsx
bool has_pending() const { return !m_pending_writes.empty(); }
// Search for query synchronized at address
query_search_result find_query(vm::addr_t sink_address);
query_search_result find_query(vm::addr_t sink_address, bool all);
// Copies queries in range rebased from source range to destination range
u32 copy_reports_to(u32 start, u32 range, u32 dest);
@ -463,8 +463,13 @@ namespace rsx
bool eval_failed = false;
bool hw_cond_active = false;
bool reserved = false;
u32 eval_address = 0;
u64 sync_tag = 0;
std::vector<occlusion_query_info*> eval_sources;
u32 eval_sync_tag = 0;
u32 eval_address = 0;
// Resets common data
void reset();
// Returns true if rendering is disabled as per conditional render test
bool disable_rendering() const;
@ -478,8 +483,8 @@ namespace rsx
// Disable conditional rendering
void disable_conditional_render(thread* pthr);
// Sets up the zcull sync tag
void set_sync_tag(u64 value);
// Sets data sources for predicate evaluation
void set_eval_sources(std::vector<occlusion_query_info*>& sources);
// Sets evaluation result. Result is true if conditional evaluation failed
void set_eval_result(thread* pthr, bool failed);
@ -765,7 +770,7 @@ namespace rsx
void enable_conditional_rendering(vm::addr_t ref);
void disable_conditional_rendering();
virtual void begin_conditional_rendering();
virtual void begin_conditional_rendering(const std::vector<reports::occlusion_query_info*>& sources);
virtual void end_conditional_rendering();
// sync

View File

@ -784,6 +784,60 @@ namespace vk
}
};
struct cs_aggregator : compute_task
{
const buffer* src = nullptr;
const buffer* dst = nullptr;
u32 block_length = 0;
u32 word_count = 0;
cs_aggregator()
{
ssbo_count = 2;
create();
m_src =
"#version 450\n"
"layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;\n\n"
"layout(set=0, binding=0, std430) readonly buffer ssbo0{ uint src[]; };\n"
"layout(set=0, binding=1, std430) writeonly buffer ssbo1{ uint result; };\n\n"
"void main()\n"
"{\n"
" if (gl_GlobalInvocationID.x < src.length())\n"
" {\n"
" atomicAdd(result, src[gl_GlobalInvocationID.x]);\n"
" }\n"
"}\n";
const std::pair<std::string, std::string> syntax_replace[] =
{
{ "%ws", std::to_string(optimal_group_size) },
};
m_src = fmt::replace_all(m_src, syntax_replace);
}
void bind_resources() override
{
m_program->bind_buffer({ src->value, 0, block_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
m_program->bind_buffer({ dst->value, 0, 4 }, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
}
void run(VkCommandBuffer cmd, const vk::buffer* dst, const vk::buffer* src, u32 num_words)
{
this->dst = dst;
this->src = src;
word_count = num_words;
block_length = num_words * 4;
const u32 linear_invocations = aligned_div(word_count, optimal_group_size);
compute_task::run(cmd, linear_invocations);
}
};
// TODO: Replace with a proper manager
extern std::unordered_map<u32, std::unique_ptr<vk::compute_task>> g_compute_tasks;

View File

@ -643,6 +643,7 @@ VKGSRender::~VKGSRender()
//Queries
m_occlusion_query_pool.destroy();
m_cond_render_buffer.reset();
//Command buffer
for (auto &cb : m_primary_cb_list)
@ -1151,6 +1152,18 @@ void VKGSRender::emit_geometry(u32 sub_index)
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
update_draw_state();
begin_render_pass();
if (cond_render_ctrl.hw_cond_active)
{
// It is inconvenient that conditional rendering breaks other things like compute dispatch
// TODO: If this is heavy, add refactor the resources into global and add checks around compute dispatch
VkConditionalRenderingBeginInfoEXT info{};
info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
info.buffer = m_cond_render_buffer->value;
m_device->cmdBeginConditionalRenderingEXT(*m_current_command_buffer, &info);
m_current_command_buffer->flags |= vk::command_buffer::cb_has_conditional_render;
}
}
// Bind the new set of descriptors for use with this draw call
@ -1787,6 +1800,12 @@ void VKGSRender::end()
}
while (rsx::method_registers.current_draw_clause.next());
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_conditional_render)
{
m_device->cmdEndConditionalRenderingEXT(*m_current_command_buffer);
m_current_command_buffer->flags &= ~(vk::command_buffer::cb_has_conditional_render);
}
// Close any open passes unconditionally
close_render_pass();
@ -2702,7 +2721,7 @@ void VKGSRender::load_program_env()
// Vertex state
const auto mem = m_vertex_env_ring_info.alloc<256>(256);
auto buf = static_cast<u8*>(m_vertex_env_ring_info.map(mem, 144));
auto buf = static_cast<u8*>(m_vertex_env_ring_info.map(mem, 148));
fill_scale_offset_data(buf, false);
fill_user_clip_data(buf + 64);
@ -2866,6 +2885,14 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
vk::clear_status_interrupt(vk::heap_dirty);
}
#if 0 // Currently unreachable
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_conditional_render)
{
verify(HERE), m_render_pass_open;
m_device->cmdEndConditionalRenderingEXT(*m_current_command_buffer);
}
#endif
// End any active renderpasses; the caller should handle reopening
if (m_render_pass_open)
{
@ -3689,7 +3716,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info*
busy_wait();
}
data.command_buffer_to_wait->wait();
data.command_buffer_to_wait->flush();
// Gather data
for (const auto occlusion_id : data.indices)
@ -3734,6 +3761,124 @@ void VKGSRender::emergency_query_cleanup(vk::command_buffer* commands)
}
}
void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occlusion_query_info*>& sources)
{
verify(HERE), !sources.empty();
// Flag check whether to calculate all entries or only one
bool partial_eval;
// Try and avoid regenerating the data if its a repeat/spam
// NOTE: The incoming list is reversed with the first entry being the newest
if (m_cond_render_sync_tag == sources.front()->sync_tag)
{
// Already synched, check subdraw which is possible if last sync happened while query was active
if (!m_active_query_info || m_active_query_info != sources.front())
{
rsx::thread::begin_conditional_rendering(sources);
return;
}
// Partial evaluation only
partial_eval = true;
}
else
{
m_cond_render_sync_tag = sources.front()->sync_tag;
partial_eval = false;
}
// Time to aggregate
if (!m_cond_render_buffer)
{
auto& memory_props = m_device->get_memory_mapping();
m_cond_render_buffer = std::make_unique<vk::buffer>(
*m_device, 4,
memory_props.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
}
if (sources.size() == 1)
{
const auto query = sources.front();
const auto& query_info = m_occlusion_map[query->driver_handle];
if (query_info.indices.size() == 1)
{
const auto& index = query_info.indices.front();
m_occlusion_query_pool.get_query_result_indirect(*m_current_command_buffer, index, m_cond_render_buffer->value, 0);
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT);
rsx::thread::begin_conditional_rendering(sources);
return;
}
}
auto scratch = vk::get_scratch_buffer();
u32 dst_offset = 0;
size_t first = 0;
size_t last;
if (LIKELY(!partial_eval))
{
last = sources.size();
}
else
{
last = 1;
}
for (size_t i = first; i < last; ++i)
{
auto& query_info = m_occlusion_map[sources[i]->driver_handle];
for (const auto& index : query_info.indices)
{
m_occlusion_query_pool.get_query_result_indirect(*m_current_command_buffer, index, scratch->value, dst_offset);
dst_offset += 4;
}
}
if (dst_offset)
{
// Fast path should have been caught above
verify(HERE), dst_offset > 4;
if (!partial_eval)
{
// Clear result to zero
vkCmdFillBuffer(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4, 0);
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT);
}
vk::insert_buffer_memory_barrier(*m_current_command_buffer, scratch->value, 0, dst_offset,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
vk::get_compute_task<vk::cs_aggregator>()->run(*m_current_command_buffer, m_cond_render_buffer.get(), scratch, dst_offset / 4);
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT,
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT);
}
else
{
LOG_ERROR(RSX, "Dubious query data pushed to cond render!, Please report to developers(q.pending=%d)", sources.front()->pending);
}
rsx::thread::begin_conditional_rendering(sources);
}
void VKGSRender::end_conditional_rendering()
{
thread::end_conditional_rendering();
}
bool VKGSRender::on_decompiler_task()
{
return m_prog_buffer->async_update(8, *m_device, pipeline_layout).first;

View File

@ -321,6 +321,9 @@ private:
std::unique_ptr<vk::attachment_clear_pass> m_attachment_clear_pass;
std::unique_ptr<vk::video_out_calibration_pass> m_video_output_pass;
std::unique_ptr<vk::buffer> m_cond_render_buffer;
u64 m_cond_render_sync_tag = 0;
shared_mutex m_sampler_mutex;
u64 surface_store_tag = 0;
std::atomic_bool m_samplers_dirty = { true };
@ -479,6 +482,10 @@ public:
// External callback in case we need to suddenly submit a commandlist unexpectedly, e.g in a violation handler
void emergency_query_cleanup(vk::command_buffer* commands);
// Conditional rendering
void begin_conditional_rendering(const std::vector<rsx::reports::occlusion_query_info*>& sources) override;
void end_conditional_rendering() override;
protected:
void clear_surface(u32 mask) override;
void begin() override;

View File

@ -545,6 +545,8 @@ namespace vk
gpu_shader_types_support shader_types_support{};
VkPhysicalDeviceDriverPropertiesKHR driver_properties{};
bool stencil_export_support = false;
bool conditional_render_support = false;
bool host_query_reset_support = false;
friend class render_device;
private:
@ -594,6 +596,8 @@ private:
}
stencil_export_support = device_extensions.is_supported(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
conditional_render_support = device_extensions.is_supported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
host_query_reset_support = device_extensions.is_supported(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);
}
public:
@ -764,6 +768,12 @@ private:
std::unique_ptr<mem_allocator_base> m_allocator;
VkDevice dev = VK_NULL_HANDLE;
public:
// Exported device endpoints
PFN_vkCmdBeginConditionalRenderingEXT cmdBeginConditionalRenderingEXT = nullptr;
PFN_vkCmdEndConditionalRenderingEXT cmdEndConditionalRenderingEXT = nullptr;
PFN_vkResetQueryPoolEXT resetQueryPoolEXT = nullptr;
public:
render_device() = default;
~render_device() = default;
@ -797,6 +807,16 @@ private:
requested_extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
if (pgpu->conditional_render_support)
{
requested_extensions.push_back(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
}
if (pgpu->host_query_reset_support)
{
requested_extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);
}
enabled_features.robustBufferAccess = VK_TRUE;
enabled_features.fullDrawIndexUint32 = VK_TRUE;
enabled_features.independentBlend = VK_TRUE;
@ -880,6 +900,18 @@ private:
CHECK_RESULT(vkCreateDevice(*pgpu, &device, nullptr, &dev));
// Import optional function endpoints
if (pgpu->conditional_render_support)
{
cmdBeginConditionalRenderingEXT = (PFN_vkCmdBeginConditionalRenderingEXT)vkGetDeviceProcAddr(dev, "vkCmdBeginConditionalRenderingEXT");
cmdEndConditionalRenderingEXT = (PFN_vkCmdEndConditionalRenderingEXT)vkGetDeviceProcAddr(dev, "vkCmdEndConditionalRenderingEXT");
}
if (pgpu->host_query_reset_support)
{
resetQueryPoolEXT = (PFN_vkResetQueryPoolEXT)vkGetDeviceProcAddr(dev, "vkResetQueryPoolEXT");
}
memory_map = vk::get_memory_mapping(pdev);
m_formats_support = vk::get_optimal_tiling_supported_formats(pdev);
@ -979,6 +1011,16 @@ private:
return pgpu->features.alphaToOne != VK_FALSE;
}
bool get_conditional_render_support() const
{
return pgpu->conditional_render_support;
}
bool get_host_query_reset_support() const
{
return pgpu->host_query_reset_support;
}
mem_allocator_base* get_allocator() const
{
return m_allocator.get();
@ -1097,7 +1139,8 @@ private:
cb_has_blit_transfer = 2,
cb_has_dma_transfer = 4,
cb_has_open_query = 8,
cb_load_occluson_task = 16
cb_load_occluson_task = 16,
cb_has_conditional_render = 32
};
u32 flags = 0;
@ -3045,6 +3088,11 @@ public:
while (true);
}
void get_query_result_indirect(vk::command_buffer &cmd, u32 index, VkBuffer dst, VkDeviceSize dst_offset)
{
vkCmdCopyQueryPoolResults(cmd, query_pool, index, 1, dst, dst_offset, 4, VK_QUERY_RESULT_WAIT_BIT);
}
void reset_query(vk::command_buffer &cmd, u32 index)
{
if (query_active_status[index])