rsx: Synchronization rewritten

- Do not do a full sync on a texture read barrier
- Avoid calling zcull sync in FIFO spin wait
- Do not flush memory to cache from the renderer side; this method is now obsolete
This commit is contained in:
kd-11 2018-07-19 09:08:20 +03:00 committed by kd-11
parent 23b52e1b1c
commit 3b47e43380
7 changed files with 32 additions and 128 deletions

View File

@ -593,7 +593,6 @@ void GLGSRender::end()
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
m_draw_calls++;
synchronize_buffers();
rsx::thread::end();
}
@ -1100,7 +1099,6 @@ bool GLGSRender::do_method(u32 cmd, u32 arg)
if (arg & 0x3) ctx |= rsx::framebuffer_creation_context::context_clear_depth;
init_buffers((rsx::framebuffer_creation_context)ctx, true);
synchronize_buffers();
clear_surface(arg);
}
@ -1113,10 +1111,16 @@ bool GLGSRender::do_method(u32 cmd, u32 arg)
return true;
}
case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE:
case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE:
flush_draw_buffers = true;
{
// Texture barrier, seemingly not very useful
return true;
}
case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE:
{
//flush_draw_buffers = true;
return true;
}
}
return false;
}
@ -1695,15 +1699,6 @@ work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::thrash
return result;
}
void GLGSRender::synchronize_buffers()
{
if (flush_draw_buffers)
{
write_buffers();
flush_draw_buffers = false;
}
}
bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
{
if (m_gl_texture_cache.blit(src, dst, interpolate, m_rtts))

View File

@ -325,7 +325,6 @@ private:
shared_mutex queue_guard;
std::list<work_item> work_queue;
bool flush_draw_buffers = false;
std::thread::id m_thread_id;
GLProgramBuffer m_prog_buffer;
@ -369,10 +368,8 @@ private:
public:
void read_buffers();
void write_buffers();
void set_viewport();
void synchronize_buffers();
work_item& post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data);
bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override;

View File

@ -179,9 +179,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
return;
}
//We are about to change buffers, flush any pending requests for the old buffers
synchronize_buffers();
m_rtts_dirty = false;
zcull_surface_active = false;
@ -475,28 +472,28 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
case rsx::surface_target::none: break;
case rsx::surface_target::surface_a:
__glcheck draw_fbo.draw_buffer(draw_fbo.color[0]);
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
draw_fbo.draw_buffer(draw_fbo.color[0]);
draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surface_b:
__glcheck draw_fbo.draw_buffer(draw_fbo.color[1]);
__glcheck draw_fbo.read_buffer(draw_fbo.color[1]);
draw_fbo.draw_buffer(draw_fbo.color[1]);
draw_fbo.read_buffer(draw_fbo.color[1]);
break;
case rsx::surface_target::surfaces_a_b:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] });
draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surfaces_a_b_c:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] });
draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surfaces_a_b_c_d:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] });
draw_fbo.read_buffer(draw_fbo.color[0]);
break;
}
@ -590,7 +587,7 @@ void GLGSRender::read_buffers()
{
if (!color_buffer.tile)
{
__glcheck std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(color_buffer.ptr, color_format.format, color_format.type);
std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(color_buffer.ptr, color_format.format, color_format.type);
}
else
{
@ -599,7 +596,7 @@ void GLGSRender::read_buffers()
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
color_buffer.read(buffer.get(), width, height, pitch);
__glcheck std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(buffer.get(), color_format.format, color_format.type);
std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(buffer.get(), color_format.format, color_format.type);
}
}
}
@ -654,8 +651,8 @@ void GLGSRender::read_buffers()
int pixel_size = rsx::internals::get_pixel_size(rsx::method_registers.surface_depth_fmt());
gl::buffer pbo_depth;
__glcheck pbo_depth.create(width * height * pixel_size);
__glcheck pbo_depth.map([&](GLubyte* pixels)
pbo_depth.create(width * height * pixel_size);
pbo_depth.map([&](GLubyte* pixels)
{
u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
@ -679,42 +676,6 @@ void GLGSRender::read_buffers()
}
}, gl::buffer::access::write);
__glcheck std::get<1>(m_rtts.m_bound_depth_stencil)->copy_from(pbo_depth, depth_format.format, depth_format.type);
}
}
void GLGSRender::write_buffers()
{
if (!draw_fbo)
return;
if (g_cfg.video.write_color_buffers)
{
auto write_color_buffers = [&](int index, int count)
{
for (int i = index; i < index + count; ++i)
{
if (m_surface_info[i].pitch == 0)
continue;
/**Even tiles are loaded as whole textures during read_buffers from testing.
* Need further evaluation to determine correct behavior. Separate paths for both show no difference,
* but using the GPU to perform the caching is many times faster.
*/
const u32 range = m_surface_info[i].pitch * m_surface_info[i].height;
m_gl_texture_cache.flush_memory_to_cache(m_surface_info[i].address, range, true, 0xFF);
}
};
write_color_buffers(0, 4);
}
if (g_cfg.video.write_depth_buffer)
{
if (m_depth_surface_info.pitch == 0) return;
const u32 range = m_depth_surface_info.pitch * m_depth_surface_info.height;
m_gl_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, range, true, 0xFF);
std::get<1>(m_rtts.m_bound_depth_stencil)->copy_from(pbo_depth, depth_format.format, depth_format.type);
}
}

View File

@ -587,7 +587,7 @@ namespace rsx
}
else if (zcull_ctrl->has_pending())
{
zcull_ctrl->sync(this);
//zcull_ctrl->sync(this);
}
else
{

View File

@ -1491,7 +1491,6 @@ void VKGSRender::end()
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - textures_end).count();
copy_render_targets_to_dma_location();
m_draw_calls++;
rsx::thread::end();
@ -1638,8 +1637,6 @@ void VKGSRender::clear_surface(u32 mask)
if (!framebuffer_status_valid) return;
copy_render_targets_to_dma_location();
float depth_clear = 1.f;
u32 stencil_clear = 0;
u32 depth_stencil_mask = 0;
@ -1793,53 +1790,6 @@ void VKGSRender::clear_surface(u32 mask)
}
}
void VKGSRender::sync_at_semaphore_release()
{
m_flush_draw_buffers = true;
}
void VKGSRender::copy_render_targets_to_dma_location()
{
if (!m_flush_draw_buffers)
return;
if (!g_cfg.video.write_color_buffers && !g_cfg.video.write_depth_buffer)
return;
//TODO: Make this asynchronous. Should be similar to a glFlush() but in this case its similar to glFinish
//This is due to all the hard waits for fences
//TODO: Use a command buffer array to allow explicit draw command tracking
vk::enter_uninterruptible();
if (g_cfg.video.write_color_buffers)
{
for (u8 index = 0; index < rsx::limits::color_buffers_count; index++)
{
if (!m_surface_info[index].pitch)
continue;
m_texture_cache.flush_memory_to_cache(m_surface_info[index].address, m_surface_info[index].pitch * m_surface_info[index].height, true, 0xFF,
*m_current_command_buffer, m_swapchain->get_graphics_queue());
}
}
if (g_cfg.video.write_depth_buffer)
{
if (m_depth_surface_info.pitch)
{
m_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, true, 0xFF,
*m_current_command_buffer, m_swapchain->get_graphics_queue());
}
}
vk::leave_uninterruptible();
flush_command_queue();
m_flush_draw_buffers = false;
}
void VKGSRender::flush_command_queue(bool hard_sync)
{
close_and_submit_command_buffer({}, m_current_command_buffer->submit_fence);
@ -2192,9 +2142,11 @@ bool VKGSRender::do_method(u32 cmd, u32 arg)
clear_surface(arg);
return true;
case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE:
// Texture barrier, seemingly not very useful
return true;
case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE:
sync_at_semaphore_release();
return false; //call rsx::thread method implementation
//sync_at_semaphore_release();
return true;
default:
return false;
}
@ -2541,7 +2493,6 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
if (m_draw_fbo && !m_rtts_dirty)
return;
copy_render_targets_to_dma_location();
m_rtts_dirty = false;
u32 clip_width = rsx::method_registers.surface_clip_width();

View File

@ -354,7 +354,6 @@ private:
s64 m_flip_time = 0;
u8 m_draw_buffers_count = 0;
bool m_flush_draw_buffers = false;
shared_mutex m_flush_queue_mutex;
flush_request_task m_flush_requests;
@ -380,9 +379,7 @@ private:
void clear_surface(u32 mask);
void close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
void open_command_buffer();
void sync_at_semaphore_release();
void prepare_rtts(rsx::framebuffer_creation_context context);
void copy_render_targets_to_dma_location();
void flush_command_queue(bool hard_sync = false);
void queue_swap_request();

View File

@ -160,6 +160,8 @@ namespace rsx
void texture_read_semaphore_release(thread* rsx, u32 _reg, u32 arg)
{
// Pipeline barrier seems to be equivalent to a SHADER_READ stage barrier
const u32 index = method_registers.semaphore_offset_4097() >> 4;
// lle-gcm likes to inject system reserved semaphores, presumably for system/vsh usage
// Avoid calling render to avoid any havoc(flickering) they may cause from invalid flush/write
@ -169,7 +171,6 @@ namespace rsx
//
}
rsx->sync();
auto& sema = vm::_ref<RsxReports>(rsx->label_addr);
sema.semaphore[index].val = arg;
sema.semaphore[index].pad = 0;
@ -178,6 +179,8 @@ namespace rsx
void back_end_write_semaphore_release(thread* rsx, u32 _reg, u32 arg)
{
// Full pipeline barrier
const u32 index = method_registers.semaphore_offset_4097() >> 4;
if (index > 63 && !rsx->do_method(NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE, arg))
{