Strict Rendering Mode (SRM) fix. Move old surface copy before texture upload.

Fixes the following issues on Tales of Vesperia which requires SRM.
- Blacked out scene after the sleeping dog now renders correctly
- Ghosting effect. The ghosting was most noticeable as a delay between the character rendering and the cell shading around the character. This appears to be gone with this change.
This commit is contained in:
pauls-gh 2018-03-27 09:50:22 -07:00 committed by kd-11
parent 70cc2e3665
commit a17025c465
2 changed files with 183 additions and 181 deletions

View File

@ -205,6 +205,113 @@ void GLGSRender::end()
//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
auto upload_info = set_vertex_buffer();
//Check if depth buffer is bound and valid
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
auto copy_rtt_contents = [](gl::render_target *surface)
{
if (surface->get_compatible_internal_format() == surface->old_contents->get_compatible_internal_format())
{
//Copy data from old contents onto this one
//1. Clip a rectangular region defning the data
//2. Perform a GPU blit
u16 parent_w = surface->old_contents->width();
u16 parent_h = surface->old_contents->height();
u16 copy_w, copy_h;
std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1);
surface->set_cleared();
}
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible
surface->old_contents = nullptr;
};
//Check if we have any 'recycled' surfaces in memory and if so, clear them
std::vector<int> buffers_to_clear;
bool clear_all_color = true;
bool clear_depth = false;
for (int index = 0; index < 4; index++)
{
if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0)
{
if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared())
clear_all_color = false;
else
buffers_to_clear.push_back(index);
}
}
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && !ds->cleared())
{
clear_depth = true;
}
//Temporarily disable pixel tests
glDisable(GL_SCISSOR_TEST);
if (clear_depth || buffers_to_clear.size() > 0)
{
GLenum mask = 0;
if (clear_depth)
{
gl_state.depth_mask(GL_TRUE);
gl_state.clear_depth(1.0);
gl_state.clear_stencil(255);
mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
}
if (clear_all_color)
mask |= GL_COLOR_BUFFER_BIT;
glClear(mask);
if (buffers_to_clear.size() > 0 && !clear_all_color)
{
GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f };
//It is impossible for the render target to be typa A or B here (clear all would have been flagged)
for (auto &i : buffers_to_clear)
glClearBufferfv(draw_fbo.id(), i, colors);
}
if (clear_depth)
gl_state.depth_mask(rsx::method_registers.depth_write_enabled());
ds->set_cleared();
}
if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == ds->old_contents->get_rsx_pitch() &&
ds->old_contents->get_compatible_internal_format() == gl::texture::internal_format::rgba8)
{
m_depth_converter.run(ds->width(), ds->height(), ds->id(), ds->old_contents->id());
ds->old_contents = nullptr;
}
if (g_cfg.video.strict_rendering_mode)
{
if (ds && ds->old_contents != nullptr)
copy_rtt_contents(ds);
for (auto &rtt : m_rtts.m_bound_render_targets)
{
if (auto surface = std::get<1>(rtt))
{
if (surface->old_contents != nullptr)
copy_rtt_contents(surface);
}
}
}
else
{
// Old contents are one use only. Keep the depth conversion check from firing over and over
if (ds) ds->old_contents = nullptr;
}
glEnable(GL_SCISSOR_TEST);
//Load textures
{
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
@ -359,113 +466,6 @@ void GLGSRender::end()
update_draw_state();
//Check if depth buffer is bound and valid
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
auto copy_rtt_contents = [](gl::render_target *surface)
{
if (surface->get_compatible_internal_format() == surface->old_contents->get_compatible_internal_format())
{
//Copy data from old contents onto this one
//1. Clip a rectangular region defning the data
//2. Perform a GPU blit
u16 parent_w = surface->old_contents->width();
u16 parent_h = surface->old_contents->height();
u16 copy_w, copy_h;
std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1);
surface->set_cleared();
}
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible
surface->old_contents = nullptr;
};
//Check if we have any 'recycled' surfaces in memory and if so, clear them
std::vector<int> buffers_to_clear;
bool clear_all_color = true;
bool clear_depth = false;
for (int index = 0; index < 4; index++)
{
if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0)
{
if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared())
clear_all_color = false;
else
buffers_to_clear.push_back(index);
}
}
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && !ds->cleared())
{
clear_depth = true;
}
//Temporarily disable pixel tests
glDisable(GL_SCISSOR_TEST);
if (clear_depth || buffers_to_clear.size() > 0)
{
GLenum mask = 0;
if (clear_depth)
{
gl_state.depth_mask(GL_TRUE);
gl_state.clear_depth(1.0);
gl_state.clear_stencil(255);
mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
}
if (clear_all_color)
mask |= GL_COLOR_BUFFER_BIT;
glClear(mask);
if (buffers_to_clear.size() > 0 && !clear_all_color)
{
GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f };
//It is impossible for the render target to be typa A or B here (clear all would have been flagged)
for (auto &i: buffers_to_clear)
glClearBufferfv(draw_fbo.id(), i, colors);
}
if (clear_depth)
gl_state.depth_mask(rsx::method_registers.depth_write_enabled());
ds->set_cleared();
}
if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == ds->old_contents->get_rsx_pitch() &&
ds->old_contents->get_compatible_internal_format() == gl::texture::internal_format::rgba8)
{
m_depth_converter.run(ds->width(), ds->height(), ds->id(), ds->old_contents->id());
ds->old_contents = nullptr;
}
if (g_cfg.video.strict_rendering_mode)
{
if (ds && ds->old_contents != nullptr)
copy_rtt_contents(ds);
for (auto &rtt : m_rtts.m_bound_render_targets)
{
if (auto surface = std::get<1>(rtt))
{
if (surface->old_contents != nullptr)
copy_rtt_contents(surface);
}
}
}
else
{
// Old contents are one use only. Keep the depth conversion check from firing over and over
if (ds) ds->old_contents = nullptr;
}
glEnable(GL_SCISSOR_TEST);
std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
if (g_cfg.video.debug_output)

View File

@ -1072,6 +1072,82 @@ void VKGSRender::end()
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
std::chrono::time_point<steady_clock> textures_start = vertex_end;
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
//Check for data casts
if (ds && ds->old_contents)
{
if (ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM)
{
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0);
auto render_pass = m_render_passes[rp];
m_depth_converter->run(*m_current_command_buffer, ds->width(), ds->height(), ds, ds->old_contents->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean);
ds->old_contents = nullptr;
ds->dirty = false;
}
else if (!g_cfg.video.strict_rendering_mode)
{
//Clear this to avoid dereferencing stale ptr
ds->old_contents = nullptr;
}
}
if (g_cfg.video.strict_rendering_mode)
{
auto copy_rtt_contents = [&](vk::render_target* surface)
{
if (surface->info.format == surface->old_contents->info.format)
{
const VkImageAspectFlags aspect = surface->attachment_aspect_flag;
const u16 parent_w = surface->old_contents->width();
const u16 parent_h = surface->old_contents->height();
u16 copy_w, copy_h;
std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 };
VkImageLayout old_layout = surface->current_layout;
vk::change_image_layout(*m_current_command_buffer, surface, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { 0, 0, 0 };
copy_rgn.dstOffset = { 0, 0, 0 };
copy_rgn.dstSubresource = { aspect, 0, 0, 1 };
copy_rgn.srcSubresource = { aspect, 0, 0, 1 };
copy_rgn.extent = { copy_w, copy_h, 1 };
vkCmdCopyImage(*m_current_command_buffer, surface->old_contents->value, surface->old_contents->current_layout, surface->value, surface->current_layout, 1, &copy_rgn);
vk::change_image_layout(*m_current_command_buffer, surface, old_layout, subresource_range);
surface->dirty = false;
}
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible
surface->old_contents = nullptr;
};
//Prepare surfaces if needed
for (auto &rtt : m_rtts.m_bound_render_targets)
{
if (auto surface = std::get<1>(rtt))
{
if (surface->old_contents != nullptr)
copy_rtt_contents(surface);
}
}
if (ds && ds->old_contents)
{
copy_rtt_contents(ds);
}
}
//Load textures
{
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
@ -1288,80 +1364,6 @@ void VKGSRender::end()
//Only textures are synchronized tightly with the GPU and they have been read back above
vk::enter_uninterruptible();
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
//Check for data casts
if (ds && ds->old_contents)
{
if (ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM)
{
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0);
auto render_pass = m_render_passes[rp];
m_depth_converter->run(*m_current_command_buffer, ds->width(), ds->height(), ds, ds->old_contents->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean);
ds->old_contents = nullptr;
ds->dirty = false;
}
else if (!g_cfg.video.strict_rendering_mode)
{
//Clear this to avoid dereferencing stale ptr
ds->old_contents = nullptr;
}
}
if (g_cfg.video.strict_rendering_mode)
{
auto copy_rtt_contents = [&](vk::render_target* surface)
{
if (surface->info.format == surface->old_contents->info.format)
{
const VkImageAspectFlags aspect = surface->attachment_aspect_flag;
const u16 parent_w = surface->old_contents->width();
const u16 parent_h = surface->old_contents->height();
u16 copy_w, copy_h;
std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 };
VkImageLayout old_layout = surface->current_layout;
vk::change_image_layout(*m_current_command_buffer, surface, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
VkImageCopy copy_rgn;
copy_rgn.srcOffset = { 0, 0, 0 };
copy_rgn.dstOffset = { 0, 0, 0 };
copy_rgn.dstSubresource = { aspect, 0, 0, 1 };
copy_rgn.srcSubresource = { aspect, 0, 0, 1 };
copy_rgn.extent = { copy_w, copy_h, 1 };
vkCmdCopyImage(*m_current_command_buffer, surface->old_contents->value, surface->old_contents->current_layout, surface->value, surface->current_layout, 1, &copy_rgn);
vk::change_image_layout(*m_current_command_buffer, surface, old_layout, subresource_range);
surface->dirty = false;
}
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible
surface->old_contents = nullptr;
};
//Prepare surfaces if needed
for (auto &rtt : m_rtts.m_bound_render_targets)
{
if (auto surface = std::get<1>(rtt))
{
if (surface->old_contents != nullptr)
copy_rtt_contents(surface);
}
}
if (ds && ds->old_contents)
{
copy_rtt_contents(ds);
}
}
u32 occlusion_id = 0;
if (m_occlusion_query_active)
{