Strict Rendering Mode (SRM) fix. Move old surface copy before texture upload.

Fixes the following issues on Tales of Vesperia which requires SRM.
- Blacked out scene after the sleeping dog now renders correctly
- Ghosting effect. The ghosting was most noticeable as a delay between the character rendering and the cell shading around the character. This appears to be gone with this change.
This commit is contained in:
pauls-gh 2018-03-27 09:50:22 -07:00 committed by kd-11
parent 70cc2e3665
commit a17025c465
2 changed files with 183 additions and 181 deletions

View File

@ -205,6 +205,113 @@ void GLGSRender::end()
//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
auto upload_info = set_vertex_buffer();
//Check if depth buffer is bound and valid
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
auto copy_rtt_contents = [](gl::render_target *surface)
if (surface->get_compatible_internal_format() == surface->old_contents->get_compatible_internal_format())
//Copy data from old contents onto this one
//1. Clip a rectangular region defning the data
//2. Perform a GPU blit
u16 parent_w = surface->old_contents->width();
u16 parent_h = surface->old_contents->height();
u16 copy_w, copy_h;
std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1);
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible
surface->old_contents = nullptr;
//Check if we have any 'recycled' surfaces in memory and if so, clear them
std::vector<int> buffers_to_clear;
bool clear_all_color = true;
bool clear_depth = false;
for (int index = 0; index < 4; index++)
if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0)
if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared())
clear_all_color = false;
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && !ds->cleared())
clear_depth = true;
//Temporarily disable pixel tests
if (clear_depth || buffers_to_clear.size() > 0)
GLenum mask = 0;
if (clear_depth)
if (clear_all_color)
if (buffers_to_clear.size() > 0 && !clear_all_color)
GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f };
//It is impossible for the render target to be typa A or B here (clear all would have been flagged)
for (auto &i : buffers_to_clear)
glClearBufferfv(, i, colors);
if (clear_depth)
if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == ds->old_contents->get_rsx_pitch() &&
ds->old_contents->get_compatible_internal_format() == gl::texture::internal_format::rgba8)
{>width(), ds->height(), ds->id(), ds->old_contents->id());
ds->old_contents = nullptr;
if (
if (ds && ds->old_contents != nullptr)
for (auto &rtt : m_rtts.m_bound_render_targets)
if (auto surface = std::get<1>(rtt))
if (surface->old_contents != nullptr)
// Old contents are one use only. Keep the depth conversion check from firing over and over
if (ds) ds->old_contents = nullptr;
//Load textures
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
@ -359,113 +466,6 @@ void GLGSRender::end()
//Check if depth buffer is bound and valid
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
auto copy_rtt_contents = [](gl::render_target *surface)
if (surface->get_compatible_internal_format() == surface->old_contents->get_compatible_internal_format())
//Copy data from old contents onto this one
//1. Clip a rectangular region defning the data
//2. Perform a GPU blit
u16 parent_w = surface->old_contents->width();
u16 parent_h = surface->old_contents->height();
u16 copy_w, copy_h;
std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1);
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible
surface->old_contents = nullptr;
//Check if we have any 'recycled' surfaces in memory and if so, clear them
std::vector<int> buffers_to_clear;
bool clear_all_color = true;
bool clear_depth = false;
for (int index = 0; index < 4; index++)
if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0)
if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared())
clear_all_color = false;
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && !ds->cleared())
clear_depth = true;
//Temporarily disable pixel tests
if (clear_depth || buffers_to_clear.size() > 0)
GLenum mask = 0;
if (clear_depth)
if (clear_all_color)
if (buffers_to_clear.size() > 0 && !clear_all_color)
GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f };
//It is impossible for the render target to be typa A or B here (clear all would have been flagged)
for (auto &i: buffers_to_clear)
glClearBufferfv(, i, colors);
if (clear_depth)
if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == ds->old_contents->get_rsx_pitch() &&
ds->old_contents->get_compatible_internal_format() == gl::texture::internal_format::rgba8)
{>width(), ds->height(), ds->id(), ds->old_contents->id());
ds->old_contents = nullptr;
if (
if (ds && ds->old_contents != nullptr)
for (auto &rtt : m_rtts.m_bound_render_targets)
if (auto surface = std::get<1>(rtt))
if (surface->old_contents != nullptr)
// Old contents are one use only. Keep the depth conversion check from firing over and over
if (ds) ds->old_contents = nullptr;
std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
if (

View File

@ -1072,6 +1072,82 @@ void VKGSRender::end()
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
std::chrono::time_point<steady_clock> textures_start = vertex_end;
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
//Check for data casts
if (ds && ds->old_contents)
if (ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM)
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0);
auto render_pass = m_render_passes[rp];
m_depth_converter->run(*m_current_command_buffer, ds->width(), ds->height(), ds, ds->old_contents->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean);
ds->old_contents = nullptr;
ds->dirty = false;
else if (!
//Clear this to avoid dereferencing stale ptr
ds->old_contents = nullptr;
if (
auto copy_rtt_contents = [&](vk::render_target* surface)
if (surface->info.format == surface->old_contents->info.format)
const VkImageAspectFlags aspect = surface->attachment_aspect_flag;
const u16 parent_w = surface->old_contents->width();
const u16 parent_h = surface->old_contents->height();
u16 copy_w, copy_h;
std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 };
VkImageLayout old_layout = surface->current_layout;
vk::change_image_layout(*m_current_command_buffer, surface, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { 0, 0, 0 };
copy_rgn.dstOffset = { 0, 0, 0 };
copy_rgn.dstSubresource = { aspect, 0, 0, 1 };
copy_rgn.srcSubresource = { aspect, 0, 0, 1 };
copy_rgn.extent = { copy_w, copy_h, 1 };
vkCmdCopyImage(*m_current_command_buffer, surface->old_contents->value, surface->old_contents->current_layout, surface->value, surface->current_layout, 1, &copy_rgn);
vk::change_image_layout(*m_current_command_buffer, surface, old_layout, subresource_range);
surface->dirty = false;
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible
surface->old_contents = nullptr;
//Prepare surfaces if needed
for (auto &rtt : m_rtts.m_bound_render_targets)
if (auto surface = std::get<1>(rtt))
if (surface->old_contents != nullptr)
if (ds && ds->old_contents)
//Load textures
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
@ -1288,80 +1364,6 @@ void VKGSRender::end()
//Only textures are synchronized tightly with the GPU and they have been read back above
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
//Check for data casts
if (ds && ds->old_contents)
if (ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM)
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0);
auto render_pass = m_render_passes[rp];
m_depth_converter->run(*m_current_command_buffer, ds->width(), ds->height(), ds, ds->old_contents->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean);
ds->old_contents = nullptr;
ds->dirty = false;
else if (!
//Clear this to avoid dereferencing stale ptr
ds->old_contents = nullptr;
if (
auto copy_rtt_contents = [&](vk::render_target* surface)
if (surface->info.format == surface->old_contents->info.format)
const VkImageAspectFlags aspect = surface->attachment_aspect_flag;
const u16 parent_w = surface->old_contents->width();
const u16 parent_h = surface->old_contents->height();
u16 copy_w, copy_h;
std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 };
VkImageLayout old_layout = surface->current_layout;
vk::change_image_layout(*m_current_command_buffer, surface, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { 0, 0, 0 };
copy_rgn.dstOffset = { 0, 0, 0 };
copy_rgn.dstSubresource = { aspect, 0, 0, 1 };
copy_rgn.srcSubresource = { aspect, 0, 0, 1 };
copy_rgn.extent = { copy_w, copy_h, 1 };
vkCmdCopyImage(*m_current_command_buffer, surface->old_contents->value, surface->old_contents->current_layout, surface->value, surface->current_layout, 1, &copy_rgn);
vk::change_image_layout(*m_current_command_buffer, surface, old_layout, subresource_range);
surface->dirty = false;
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible
surface->old_contents = nullptr;
//Prepare surfaces if needed
for (auto &rtt : m_rtts.m_bound_render_targets)
if (auto surface = std::get<1>(rtt))
if (surface->old_contents != nullptr)
if (ds && ds->old_contents)
u32 occlusion_id = 0;
if (m_occlusion_query_active)