forked from ShuriZma/suyu
1
0
Fork 0

Merge pull request #3805 from ReinUsesLisp/preserve-contents

texture_cache: Reintroduce preserve_contents accurately
This commit is contained in:
bunnei 2020-04-30 12:56:19 -04:00 committed by GitHub
commit da2b8295e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 81 additions and 41 deletions

View File

@ -348,7 +348,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
texture_cache.GuardRenderTargets(true); texture_cache.GuardRenderTargets(true);
View depth_surface = texture_cache.GetDepthBufferSurface(); View depth_surface = texture_cache.GetDepthBufferSurface(true);
const auto& regs = gpu.regs; const auto& regs = gpu.regs;
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
@ -357,7 +357,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
FramebufferCacheKey key; FramebufferCacheKey key;
const auto colors_count = static_cast<std::size_t>(regs.rt_control.count); const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
for (std::size_t index = 0; index < colors_count; ++index) { for (std::size_t index = 0; index < colors_count; ++index) {
View color_surface{texture_cache.GetColorBufferSurface(index)}; View color_surface{texture_cache.GetColorBufferSurface(index, true)};
if (!color_surface) { if (!color_surface) {
continue; continue;
} }
@ -381,28 +381,52 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
} }
void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
bool using_stencil_fb) {
auto& gpu = system.GPU().Maxwell3D(); auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs; const auto& regs = gpu.regs;
texture_cache.GuardRenderTargets(true); texture_cache.GuardRenderTargets(true);
View color_surface; View color_surface;
if (using_color_fb) {
if (using_color) {
// Determine if we have to preserve the contents.
// First we have to make sure all clear masks are enabled.
bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
!regs.clear_buffers.B || !regs.clear_buffers.A;
const std::size_t index = regs.clear_buffers.RT; const std::size_t index = regs.clear_buffers.RT;
color_surface = texture_cache.GetColorBufferSurface(index); if (regs.clear_flags.scissor) {
// Then we have to confirm scissor testing clears the whole image.
const auto& scissor = regs.scissor_test[0];
preserve_contents |= scissor.min_x > 0;
preserve_contents |= scissor.min_y > 0;
preserve_contents |= scissor.max_x < regs.rt[index].width;
preserve_contents |= scissor.max_y < regs.rt[index].height;
}
color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
texture_cache.MarkColorBufferInUse(index); texture_cache.MarkColorBufferInUse(index);
} }
View depth_surface; View depth_surface;
if (using_depth_fb || using_stencil_fb) { if (using_depth_stencil) {
depth_surface = texture_cache.GetDepthBufferSurface(); bool preserve_contents = false;
if (regs.clear_flags.scissor) {
// For depth stencil clears we only have to confirm scissor test covers the whole image.
const auto& scissor = regs.scissor_test[0];
preserve_contents |= scissor.min_x > 0;
preserve_contents |= scissor.min_y > 0;
preserve_contents |= scissor.max_x < regs.zeta_width;
preserve_contents |= scissor.max_y < regs.zeta_height;
}
depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
texture_cache.MarkDepthBufferInUse(); texture_cache.MarkDepthBufferInUse();
} }
texture_cache.GuardRenderTargets(false); texture_cache.GuardRenderTargets(false);
FramebufferCacheKey key; FramebufferCacheKey key;
key.colors[0] = color_surface; key.colors[0] = std::move(color_surface);
key.zeta = depth_surface; key.zeta = std::move(depth_surface);
state_tracker.NotifyFramebuffer(); state_tracker.NotifyFramebuffer();
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
@ -422,8 +446,7 @@ void RasterizerOpenGL::Clear() {
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) { regs.clear_buffers.A) {
use_color = true; use_color = true;
}
if (use_color) {
state_tracker.NotifyColorMask0(); state_tracker.NotifyColorMask0();
glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
@ -461,7 +484,7 @@ void RasterizerOpenGL::Clear() {
UNIMPLEMENTED_IF(regs.clear_flags.viewport); UNIMPLEMENTED_IF(regs.clear_flags.viewport);
ConfigureClearFramebuffer(use_color, use_depth, use_stencil); ConfigureClearFramebuffer(use_color, use_depth || use_stencil);
if (use_color) { if (use_color) {
glClearBufferfv(GL_COLOR, 0, regs.clear_color); glClearBufferfv(GL_COLOR, 0, regs.clear_color);

View File

@ -95,7 +95,8 @@ private:
/// Configures the color and depth framebuffer states. /// Configures the color and depth framebuffer states.
void ConfigureFramebuffers(); void ConfigureFramebuffers();
void ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, bool using_stencil_fb); /// Configures the color and depth framebuffer for clearing.
void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
/// Configures the current constbuffers to use for the draw command. /// Configures the current constbuffers to use for the draw command.
void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);

View File

@ -656,7 +656,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
Texceptions texceptions; Texceptions texceptions;
for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
if (update_rendertargets) { if (update_rendertargets) {
color_attachments[rt] = texture_cache.GetColorBufferSurface(rt); color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
} }
if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
texceptions[rt] = true; texceptions[rt] = true;
@ -664,7 +664,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
} }
if (update_rendertargets) { if (update_rendertargets) {
zeta_attachment = texture_cache.GetDepthBufferSurface(); zeta_attachment = texture_cache.GetDepthBufferSurface(true);
} }
if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
texceptions[ZETA_TEXCEPTION_INDEX] = true; texceptions[ZETA_TEXCEPTION_INDEX] = true;

View File

@ -143,7 +143,7 @@ public:
} }
const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false); const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
if (guard_samplers) { if (guard_samplers) {
sampled_textures.push_back(surface); sampled_textures.push_back(surface);
} }
@ -163,7 +163,7 @@ public:
return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
} }
const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false); const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
if (guard_samplers) { if (guard_samplers) {
sampled_textures.push_back(surface); sampled_textures.push_back(surface);
} }
@ -178,7 +178,7 @@ public:
return any_rt; return any_rt;
} }
TView GetDepthBufferSurface() { TView GetDepthBufferSurface(bool preserve_contents) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
auto& maxwell3d = system.GPU().Maxwell3D(); auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
@ -199,7 +199,7 @@ public:
return {}; return {};
} }
const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, true); auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
if (depth_buffer.target) if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false, NO_RT); depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
depth_buffer.target = surface_view.first; depth_buffer.target = surface_view.first;
@ -209,7 +209,7 @@ public:
return surface_view.second; return surface_view.second;
} }
TView GetColorBufferSurface(std::size_t index) { TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
auto& maxwell3d = system.GPU().Maxwell3D(); auto& maxwell3d = system.GPU().Maxwell3D();
@ -239,8 +239,9 @@ public:
return {}; return {};
} }
auto surface_view = GetSurface(gpu_addr, *cpu_addr, auto surface_view =
SurfaceParams::CreateForFramebuffer(system, index), true); GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
preserve_contents, true);
if (render_targets[index].target) { if (render_targets[index].target) {
auto& surface = render_targets[index].target; auto& surface = render_targets[index].target;
surface->MarkAsRenderTarget(false, NO_RT); surface->MarkAsRenderTarget(false, NO_RT);
@ -300,9 +301,9 @@ public:
const std::optional<VAddr> src_cpu_addr = const std::optional<VAddr> src_cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
std::pair<TSurface, TView> dst_surface = std::pair<TSurface, TView> dst_surface =
GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, false); GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
std::pair<TSurface, TView> src_surface = std::pair<TSurface, TView> src_surface =
GetSurface(src_gpu_addr, *src_cpu_addr, src_params, false); GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
ImageBlit(src_surface.second, dst_surface.second, copy_config); ImageBlit(src_surface.second, dst_surface.second, copy_config);
dst_surface.first->MarkAsModified(true, Tick()); dst_surface.first->MarkAsModified(true, Tick());
} }
@ -532,18 +533,22 @@ private:
* @param overlaps The overlapping surfaces registered in the cache. * @param overlaps The overlapping surfaces registered in the cache.
* @param params The parameters for the new surface. * @param params The parameters for the new surface.
* @param gpu_addr The starting address of the new surface. * @param gpu_addr The starting address of the new surface.
* @param preserve_contents Indicates that the new surface should be loaded from memory or left
* blank.
* @param untopological Indicates to the recycler that the texture has no way to match the * @param untopological Indicates to the recycler that the texture has no way to match the
* overlaps due to topological reasons. * overlaps due to topological reasons.
**/ **/
std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
const SurfaceParams& params, const GPUVAddr gpu_addr, const SurfaceParams& params, const GPUVAddr gpu_addr,
const bool preserve_contents,
const MatchTopologyResult untopological) { const MatchTopologyResult untopological) {
const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
for (auto& surface : overlaps) { for (auto& surface : overlaps) {
Unregister(surface); Unregister(surface);
} }
switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
case RecycleStrategy::Ignore: { case RecycleStrategy::Ignore: {
return InitializeSurface(gpu_addr, params, Settings::IsGPULevelExtreme()); return InitializeSurface(gpu_addr, params, do_load);
} }
case RecycleStrategy::Flush: { case RecycleStrategy::Flush: {
std::sort(overlaps.begin(), overlaps.end(), std::sort(overlaps.begin(), overlaps.end(),
@ -553,7 +558,7 @@ private:
for (auto& surface : overlaps) { for (auto& surface : overlaps) {
FlushSurface(surface); FlushSurface(surface);
} }
return InitializeSurface(gpu_addr, params); return InitializeSurface(gpu_addr, params, preserve_contents);
} }
case RecycleStrategy::BufferCopy: { case RecycleStrategy::BufferCopy: {
auto new_surface = GetUncachedSurface(gpu_addr, params); auto new_surface = GetUncachedSurface(gpu_addr, params);
@ -562,7 +567,7 @@ private:
} }
default: { default: {
UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
return InitializeSurface(gpu_addr, params); return InitializeSurface(gpu_addr, params, do_load);
} }
} }
} }
@ -700,11 +705,14 @@ private:
* @param params The parameters on the new surface. * @param params The parameters on the new surface.
* @param gpu_addr The starting address of the new surface. * @param gpu_addr The starting address of the new surface.
* @param cpu_addr The starting address of the new surface on physical memory. * @param cpu_addr The starting address of the new surface on physical memory.
* @param preserve_contents Indicates that the new surface should be loaded from memory or
* left blank.
*/ */
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
const SurfaceParams& params, const SurfaceParams& params,
const GPUVAddr gpu_addr, const GPUVAddr gpu_addr,
const VAddr cpu_addr) { const VAddr cpu_addr,
bool preserve_contents) {
if (params.target == SurfaceTarget::Texture3D) { if (params.target == SurfaceTarget::Texture3D) {
bool failed = false; bool failed = false;
if (params.num_levels > 1) { if (params.num_levels > 1) {
@ -754,7 +762,7 @@ private:
return std::nullopt; return std::nullopt;
} }
Unregister(surface); Unregister(surface);
return InitializeSurface(gpu_addr, params); return InitializeSurface(gpu_addr, params, preserve_contents);
} }
return std::nullopt; return std::nullopt;
} }
@ -765,7 +773,7 @@ private:
return {{surface, surface->GetMainView()}}; return {{surface, surface->GetMainView()}};
} }
} }
return InitializeSurface(gpu_addr, params); return InitializeSurface(gpu_addr, params, preserve_contents);
} }
} }
@ -788,10 +796,13 @@ private:
* *
* @param gpu_addr The starting address of the candidate surface. * @param gpu_addr The starting address of the candidate surface.
* @param params The parameters on the candidate surface. * @param params The parameters on the candidate surface.
* @param preserve_contents Indicates that the new surface should be loaded from memory or
* left blank.
* @param is_render Whether or not the surface is a render target. * @param is_render Whether or not the surface is a render target.
**/ **/
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
const SurfaceParams& params, bool is_render) { const SurfaceParams& params, bool preserve_contents,
bool is_render) {
// Step 1 // Step 1
// Check Level 1 Cache for a fast structural match. If candidate surface // Check Level 1 Cache for a fast structural match. If candidate surface
// matches at certain level we are pretty much done. // matches at certain level we are pretty much done.
@ -800,7 +811,8 @@ private:
const auto topological_result = current_surface->MatchesTopology(params); const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) { if (topological_result != MatchTopologyResult::FullMatch) {
std::vector<TSurface> overlaps{current_surface}; std::vector<TSurface> overlaps{current_surface};
return RecycleSurface(overlaps, params, gpu_addr, topological_result); return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
topological_result);
} }
const auto struct_result = current_surface->MatchesStructure(params); const auto struct_result = current_surface->MatchesStructure(params);
@ -825,7 +837,7 @@ private:
// If none are found, we are done. we just load the surface and create it. // If none are found, we are done. we just load the surface and create it.
if (overlaps.empty()) { if (overlaps.empty()) {
return InitializeSurface(gpu_addr, params); return InitializeSurface(gpu_addr, params, preserve_contents);
} }
// Step 3 // Step 3
@ -835,13 +847,15 @@ private:
for (const auto& surface : overlaps) { for (const auto& surface : overlaps) {
const auto topological_result = surface->MatchesTopology(params); const auto topological_result = surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) { if (topological_result != MatchTopologyResult::FullMatch) {
return RecycleSurface(overlaps, params, gpu_addr, topological_result); return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
topological_result);
} }
} }
// Check if it's a 3D texture // Check if it's a 3D texture
if (params.block_depth > 0) { if (params.block_depth > 0) {
auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr); auto surface =
Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
if (surface) { if (surface) {
return *surface; return *surface;
} }
@ -861,7 +875,8 @@ private:
return *view; return *view;
} }
} }
return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch); return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch);
} }
// Now we check if the candidate is a mipmap/layer of the overlap // Now we check if the candidate is a mipmap/layer of the overlap
std::optional<TView> view = std::optional<TView> view =
@ -885,7 +900,7 @@ private:
pair.first->EmplaceView(params, gpu_addr, candidate_size); pair.first->EmplaceView(params, gpu_addr, candidate_size);
if (mirage_view) if (mirage_view)
return {pair.first, *mirage_view}; return {pair.first, *mirage_view};
return RecycleSurface(overlaps, params, gpu_addr, return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch); MatchTopologyResult::FullMatch);
} }
return {current_surface, *view}; return {current_surface, *view};
@ -901,7 +916,8 @@ private:
} }
} }
// We failed all the tests, recycle the overlaps into a new texture. // We failed all the tests, recycle the overlaps into a new texture.
return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch); return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch);
} }
/** /**
@ -1059,10 +1075,10 @@ private:
} }
std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
bool do_load = true) { bool preserve_contents) {
auto new_surface{GetUncachedSurface(gpu_addr, params)}; auto new_surface{GetUncachedSurface(gpu_addr, params)};
Register(new_surface); Register(new_surface);
if (do_load) { if (preserve_contents) {
LoadSurface(new_surface); LoadSurface(new_surface);
} }
return {new_surface, new_surface->GetMainView()}; return {new_surface, new_surface->GetMainView()};