From a17025c465d71c67a0c6efd0d78d46fbd1e33b1d Mon Sep 17 00:00:00 2001 From: pauls-gh Date: Tue, 27 Mar 2018 09:50:22 -0700 Subject: [PATCH] Strict Rendering Mode (SRM) fix. Move old surface copy before texture upload. Fixes the following issues on Tales of Vesperia which requires SRM. - Blacked out scene after the sleeping dog now renders correctly - Ghosting effect. The ghosting was most noticeable as a delay between the character rendering and the cell shading around the character. This appears to be gone with this change. --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 214 ++++++++++++++++---------------- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 150 +++++++++++----------- 2 files changed, 183 insertions(+), 181 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index d049b482a3..d5be463872 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -205,6 +205,113 @@ void GLGSRender::end() //Do vertex upload before RTT prep / texture lookups to give the driver time to push data auto upload_info = set_vertex_buffer(); + //Check if depth buffer is bound and valid + //If ds is not initialized clear it; it seems new depth textures should have depth cleared + auto copy_rtt_contents = [](gl::render_target *surface) + { + if (surface->get_compatible_internal_format() == surface->old_contents->get_compatible_internal_format()) + { + //Copy data from old contents onto this one + //1. Clip a rectangular region defning the data + //2. Perform a GPU blit + u16 parent_w = surface->old_contents->width(); + u16 parent_h = surface->old_contents->height(); + u16 copy_w, copy_h; + + std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true); + glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1); + surface->set_cleared(); + } + //TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible + + surface->old_contents = nullptr; + }; + + //Check if we have any 'recycled' surfaces in memory and if so, clear them + std::vector buffers_to_clear; + bool clear_all_color = true; + bool clear_depth = false; + + for (int index = 0; index < 4; index++) + { + if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0) + { + if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared()) + clear_all_color = false; + else + buffers_to_clear.push_back(index); + } + } + + gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); + if (ds && !ds->cleared()) + { + clear_depth = true; + } + + //Temporarily disable pixel tests + glDisable(GL_SCISSOR_TEST); + + if (clear_depth || buffers_to_clear.size() > 0) + { + GLenum mask = 0; + + if (clear_depth) + { + gl_state.depth_mask(GL_TRUE); + gl_state.clear_depth(1.0); + gl_state.clear_stencil(255); + mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } + + if (clear_all_color) + mask |= GL_COLOR_BUFFER_BIT; + + glClear(mask); + + if (buffers_to_clear.size() > 0 && !clear_all_color) + { + GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f }; + //It is impossible for the render target to be typa A or B here (clear all would have been flagged) + for (auto &i : buffers_to_clear) + glClearBufferfv(draw_fbo.id(), i, colors); + } + + if (clear_depth) + gl_state.depth_mask(rsx::method_registers.depth_write_enabled()); + + ds->set_cleared(); + } + + if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == ds->old_contents->get_rsx_pitch() && + ds->old_contents->get_compatible_internal_format() == gl::texture::internal_format::rgba8) + { + m_depth_converter.run(ds->width(), ds->height(), ds->id(), ds->old_contents->id()); + ds->old_contents = nullptr; + } + + if (g_cfg.video.strict_rendering_mode) + { + if (ds && ds->old_contents != nullptr) + copy_rtt_contents(ds); + + for (auto &rtt : m_rtts.m_bound_render_targets) + { + if (auto surface = std::get<1>(rtt)) + { + if (surface->old_contents != nullptr) + copy_rtt_contents(surface); + } + } + } + else + { + // Old contents are one use only. Keep the depth conversion check from firing over and over + if (ds) ds->old_contents = nullptr; + } + + glEnable(GL_SCISSOR_TEST); + //Load textures { std::chrono::time_point textures_start = steady_clock::now(); @@ -359,113 +466,6 @@ void GLGSRender::end() update_draw_state(); - //Check if depth buffer is bound and valid - //If ds is not initialized clear it; it seems new depth textures should have depth cleared - auto copy_rtt_contents = [](gl::render_target *surface) - { - if (surface->get_compatible_internal_format() == surface->old_contents->get_compatible_internal_format()) - { - //Copy data from old contents onto this one - //1. Clip a rectangular region defning the data - //2. Perform a GPU blit - u16 parent_w = surface->old_contents->width(); - u16 parent_h = surface->old_contents->height(); - u16 copy_w, copy_h; - - std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true); - glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1); - surface->set_cleared(); - } - //TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible - - surface->old_contents = nullptr; - }; - - //Check if we have any 'recycled' surfaces in memory and if so, clear them - std::vector buffers_to_clear; - bool clear_all_color = true; - bool clear_depth = false; - - for (int index = 0; index < 4; index++) - { - if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0) - { - if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared()) - clear_all_color = false; - else - buffers_to_clear.push_back(index); - } - } - - gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); - if (ds && !ds->cleared()) - { - clear_depth = true; - } - - //Temporarily disable pixel tests - glDisable(GL_SCISSOR_TEST); - - if (clear_depth || buffers_to_clear.size() > 0) - { - GLenum mask = 0; - - if (clear_depth) - { - gl_state.depth_mask(GL_TRUE); - gl_state.clear_depth(1.0); - gl_state.clear_stencil(255); - mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; - } - - if (clear_all_color) - mask |= GL_COLOR_BUFFER_BIT; - - glClear(mask); - - if (buffers_to_clear.size() > 0 && !clear_all_color) - { - GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f }; - //It is impossible for the render target to be typa A or B here (clear all would have been flagged) - for (auto &i: buffers_to_clear) - glClearBufferfv(draw_fbo.id(), i, colors); - } - - if (clear_depth) - gl_state.depth_mask(rsx::method_registers.depth_write_enabled()); - - ds->set_cleared(); - } - - if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == ds->old_contents->get_rsx_pitch() && - ds->old_contents->get_compatible_internal_format() == gl::texture::internal_format::rgba8) - { - m_depth_converter.run(ds->width(), ds->height(), ds->id(), ds->old_contents->id()); - ds->old_contents = nullptr; - } - - if (g_cfg.video.strict_rendering_mode) - { - if (ds && ds->old_contents != nullptr) - copy_rtt_contents(ds); - - for (auto &rtt : m_rtts.m_bound_render_targets) - { - if (auto surface = std::get<1>(rtt)) - { - if (surface->old_contents != nullptr) - copy_rtt_contents(surface); - } - } - } - else - { - // Old contents are one use only. Keep the depth conversion check from firing over and over - if (ds) ds->old_contents = nullptr; - } - - glEnable(GL_SCISSOR_TEST); - std::chrono::time_point draw_start = steady_clock::now(); if (g_cfg.video.debug_output) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 6eadd24bd3..abf834d912 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1072,6 +1072,82 @@ void VKGSRender::end() m_vertex_upload_time += std::chrono::duration_cast(vertex_end - vertex_start).count(); std::chrono::time_point textures_start = vertex_end; + + + auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); + + //Check for data casts + if (ds && ds->old_contents) + { + if (ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM) + { + auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0); + auto render_pass = m_render_passes[rp]; + m_depth_converter->run(*m_current_command_buffer, ds->width(), ds->height(), ds, ds->old_contents->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean); + + ds->old_contents = nullptr; + ds->dirty = false; + } + else if (!g_cfg.video.strict_rendering_mode) + { + //Clear this to avoid dereferencing stale ptr + ds->old_contents = nullptr; + } + } + + if (g_cfg.video.strict_rendering_mode) + { + auto copy_rtt_contents = [&](vk::render_target* surface) + { + if (surface->info.format == surface->old_contents->info.format) + { + const VkImageAspectFlags aspect = surface->attachment_aspect_flag; + + const u16 parent_w = surface->old_contents->width(); + const u16 parent_h = surface->old_contents->height(); + u16 copy_w, copy_h; + + std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true); + + VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; + VkImageLayout old_layout = surface->current_layout; + + vk::change_image_layout(*m_current_command_buffer, surface, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); + vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range); + + VkImageCopy copy_rgn; + copy_rgn.srcOffset = { 0, 0, 0 }; + copy_rgn.dstOffset = { 0, 0, 0 }; + copy_rgn.dstSubresource = { aspect, 0, 0, 1 }; + copy_rgn.srcSubresource = { aspect, 0, 0, 1 }; + copy_rgn.extent = { copy_w, copy_h, 1 }; + + vkCmdCopyImage(*m_current_command_buffer, surface->old_contents->value, surface->old_contents->current_layout, surface->value, surface->current_layout, 1, ©_rgn); + vk::change_image_layout(*m_current_command_buffer, surface, old_layout, subresource_range); + + surface->dirty = false; + } + //TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible + + surface->old_contents = nullptr; + }; + + //Prepare surfaces if needed + for (auto &rtt : m_rtts.m_bound_render_targets) + { + if (auto surface = std::get<1>(rtt)) + { + if (surface->old_contents != nullptr) + copy_rtt_contents(surface); + } + } + + if (ds && ds->old_contents) + { + copy_rtt_contents(ds); + } + } + //Load textures { std::lock_guard lock(m_sampler_mutex); @@ -1288,80 +1364,6 @@ void VKGSRender::end() //Only textures are synchronized tightly with the GPU and they have been read back above vk::enter_uninterruptible(); - auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); - - //Check for data casts - if (ds && ds->old_contents) - { - if (ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM) - { - auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0); - auto render_pass = m_render_passes[rp]; - m_depth_converter->run(*m_current_command_buffer, ds->width(), ds->height(), ds, ds->old_contents->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean); - - ds->old_contents = nullptr; - ds->dirty = false; - } - else if (!g_cfg.video.strict_rendering_mode) - { - //Clear this to avoid dereferencing stale ptr - ds->old_contents = nullptr; - } - } - - if (g_cfg.video.strict_rendering_mode) - { - auto copy_rtt_contents = [&](vk::render_target* surface) - { - if (surface->info.format == surface->old_contents->info.format) - { - const VkImageAspectFlags aspect = surface->attachment_aspect_flag; - - const u16 parent_w = surface->old_contents->width(); - const u16 parent_h = surface->old_contents->height(); - u16 copy_w, copy_h; - - std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true); - - VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 }; - VkImageLayout old_layout = surface->current_layout; - - vk::change_image_layout(*m_current_command_buffer, surface, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); - vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range); - - VkImageCopy copy_rgn; - copy_rgn.srcOffset = { 0, 0, 0 }; - copy_rgn.dstOffset = { 0, 0, 0 }; - copy_rgn.dstSubresource = { aspect, 0, 0, 1 }; - copy_rgn.srcSubresource = { aspect, 0, 0, 1 }; - copy_rgn.extent = { copy_w, copy_h, 1 }; - - vkCmdCopyImage(*m_current_command_buffer, surface->old_contents->value, surface->old_contents->current_layout, surface->value, surface->current_layout, 1, ©_rgn); - vk::change_image_layout(*m_current_command_buffer, surface, old_layout, subresource_range); - - surface->dirty = false; - } - //TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible - - surface->old_contents = nullptr; - }; - - //Prepare surfaces if needed - for (auto &rtt : m_rtts.m_bound_render_targets) - { - if (auto surface = std::get<1>(rtt)) - { - if (surface->old_contents != nullptr) - copy_rtt_contents(surface); - } - } - - if (ds && ds->old_contents) - { - copy_rtt_contents(ds); - } - } - u32 occlusion_id = 0; if (m_occlusion_query_active) {