From de940a5fd6a07728c51d53ebdab7489aa03cdb55 Mon Sep 17 00:00:00 2001 From: Michael Maltese Date: Sun, 5 Mar 2017 15:17:54 -0800 Subject: [PATCH 1/6] VideoConfig: add bSupportsFragmentStoresAndAtomics --- Source/Core/Core/Analytics.cpp | 2 ++ Source/Core/VideoBackends/D3D/main.cpp | 3 ++- Source/Core/VideoBackends/D3D12/main.cpp | 3 ++- Source/Core/VideoBackends/OGL/BoundingBox.cpp | 6 ++++-- Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp | 2 +- Source/Core/VideoBackends/OGL/Render.cpp | 4 +++- Source/Core/VideoBackends/Vulkan/VulkanContext.cpp | 4 +++- Source/Core/VideoCommon/PixelShaderGen.cpp | 1 + Source/Core/VideoCommon/VideoConfig.h | 1 + 9 files changed, 19 insertions(+), 7 deletions(-) diff --git a/Source/Core/Core/Analytics.cpp b/Source/Core/Core/Analytics.cpp index 8638b79dba..a3060833c9 100644 --- a/Source/Core/Core/Analytics.cpp +++ b/Source/Core/Core/Analytics.cpp @@ -239,6 +239,8 @@ void DolphinAnalytics::MakePerGameBuilder() builder.AddData("gpu-has-early-z", g_Config.backend_info.bSupportsEarlyZ); builder.AddData("gpu-has-binding-layout", g_Config.backend_info.bSupportsBindingLayout); builder.AddData("gpu-has-bbox", g_Config.backend_info.bSupportsBBox); + builder.AddData("gpu-has-fragment-stores-and-atomics", + g_Config.backend_info.bSupportsFragmentStoresAndAtomics); builder.AddData("gpu-has-gs-instancing", g_Config.backend_info.bSupportsGSInstancing); builder.AddData("gpu-has-post-processing", g_Config.backend_info.bSupportsPostProcessing); builder.AddData("gpu-has-palette-conversion", g_Config.backend_info.bSupportsPaletteConversion); diff --git a/Source/Core/VideoBackends/D3D/main.cpp b/Source/Core/VideoBackends/D3D/main.cpp index 2fc00dc037..e8a51d3441 100644 --- a/Source/Core/VideoBackends/D3D/main.cpp +++ b/Source/Core/VideoBackends/D3D/main.cpp @@ -112,7 +112,8 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsEarlyZ = shader_model_5_supported; // Requires full UAV functionality (only available in shader model 5) - g_Config.backend_info.bSupportsBBox = shader_model_5_supported; + g_Config.backend_info.bSupportsBBox = + g_Config.backend_info.bSupportsFragmentStoresAndAtomics = shader_model_5_supported; // Requires the instance attribute (only available in shader model 5) g_Config.backend_info.bSupportsGSInstancing = shader_model_5_supported; diff --git a/Source/Core/VideoBackends/D3D12/main.cpp b/Source/Core/VideoBackends/D3D12/main.cpp index bae916e0f8..7a48647c0e 100644 --- a/Source/Core/VideoBackends/D3D12/main.cpp +++ b/Source/Core/VideoBackends/D3D12/main.cpp @@ -120,7 +120,8 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsEarlyZ = true; // Requires full UAV functionality (only available in shader model 5) - g_Config.backend_info.bSupportsBBox = true; + g_Config.backend_info.bSupportsBBox = + g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true; // Requires the instance attribute (only available in shader model 5) g_Config.backend_info.bSupportsGSInstancing = true; diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.cpp b/Source/Core/VideoBackends/OGL/BoundingBox.cpp index 0ee4bd2afe..6bce10754e 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.cpp +++ b/Source/Core/VideoBackends/OGL/BoundingBox.cpp @@ -17,7 +17,7 @@ namespace OGL { void BoundingBox::Init() { - if (g_ActiveConfig.backend_info.bSupportsBBox) + if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) { int initial_values[4] = {0, 0, 0, 0}; glGenBuffers(1, &s_bbox_buffer_id); @@ -29,8 +29,10 @@ void BoundingBox::Init() void BoundingBox::Shutdown() { - if (g_ActiveConfig.backend_info.bSupportsBBox) + if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) + { glDeleteBuffers(1, &s_bbox_buffer_id); + } } void BoundingBox::Set(int index, int value) diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index ef91b01f02..508d6e41ed 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -618,7 +618,7 @@ void ProgramShaderCache::CreateHeader() "#define SAMPLER_BINDING(x)\n", // Input/output blocks are matched by name during program linking "#define VARYING_LOCATION(x)\n", - !is_glsles && g_ActiveConfig.backend_info.bSupportsBBox ? + !is_glsles && g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics ? "#extension GL_ARB_shader_storage_buffer_object : enable" : "", v < GLSL_400 && g_ActiveConfig.backend_info.bSupportsGSInstancing ? diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index f898d73722..341e6b3686 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -411,7 +411,7 @@ Renderer::Renderer() g_Config.backend_info.bSupportsPrimitiveRestart = !DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART) && ((GLExtensions::Version() >= 310) || GLExtensions::Supports("GL_NV_primitive_restart")); - g_Config.backend_info.bSupportsBBox = + g_Config.backend_info.bSupportsBBox = g_Config.backend_info.bSupportsFragmentStoresAndAtomics = GLExtensions::Supports("GL_ARB_shader_storage_buffer_object"); g_Config.backend_info.bSupportsGSInstancing = GLExtensions::Supports("GL_ARB_gpu_shader5"); g_Config.backend_info.bSupportsSSAA = GLExtensions::Supports("GL_ARB_gpu_shader5") && @@ -498,6 +498,7 @@ Renderer::Renderer() g_Config.backend_info.bSupportsGeometryShaders && g_ogl_config.SupportedESPointSize > 0; g_Config.backend_info.bSupportsSSAA = g_ogl_config.bSupportsAEP; g_Config.backend_info.bSupportsBBox = true; + g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true; g_ogl_config.bSupportsMSAA = true; g_ogl_config.bSupports2DTextureStorage = true; if (g_ActiveConfig.iStereoMode > 0 && g_ActiveConfig.iMultisamples > 1 && @@ -519,6 +520,7 @@ Renderer::Renderer() g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsSSAA = true; g_Config.backend_info.bSupportsBBox = true; + g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true; g_ogl_config.bSupportsCopySubImage = true; g_ogl_config.bSupportsGLBaseVertex = true; g_ogl_config.bSupportsDebug = true; diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index 1487e09afc..74e7786130 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -240,6 +240,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsGeometryShaders = false; // Dependent on features. config->backend_info.bSupportsGSInstancing = false; // Dependent on features. config->backend_info.bSupportsBBox = false; // Dependent on features. + config->backend_info.bSupportsFragmentStoresAndAtomics = false; // Dependent on features. config->backend_info.bSupportsSSAA = false; // Dependent on features. config->backend_info.bSupportsDepthClamp = false; // Dependent on features. config->backend_info.bSupportsReversedDepthRange = false; // No support yet due to driver bugs. @@ -264,7 +265,8 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD config->backend_info.bSupportsDualSourceBlend = (features.dualSrcBlend == VK_TRUE); config->backend_info.bSupportsGeometryShaders = (features.geometryShader == VK_TRUE); config->backend_info.bSupportsGSInstancing = (features.geometryShader == VK_TRUE); - config->backend_info.bSupportsBBox = (features.fragmentStoresAndAtomics == VK_TRUE); + config->backend_info.bSupportsBBox = config->backend_info.bSupportsFragmentStoresAndAtomics = + (features.fragmentStoresAndAtomics == VK_TRUE); config->backend_info.bSupportsSSAA = (features.sampleRateShading == VK_TRUE); // Disable geometry shader when shaderTessellationAndGeometryPointSize is not supported. diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 608248fab8..6360a68a37 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -172,6 +172,7 @@ PixelShaderUid GetPixelShaderUid() uid_data->genMode_numtexgens = bpmem.genMode.numtexgens; uid_data->per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting; uid_data->bounding_box = g_ActiveConfig.backend_info.bSupportsBBox && + g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics && g_ActiveConfig.bBBoxEnable && BoundingBox::active; uid_data->rgba6_format = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor; diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index ec79e6de48..b8e348157d 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -189,6 +189,7 @@ struct VideoConfig final bool bSupportsPaletteConversion; bool bSupportsClipControl; // Needed by VertexShaderGen, so must stay in VideoCommon bool bSupportsSSAA; + bool bSupportsFragmentStoresAndAtomics; // a.k.a. OpenGL SSBOs a.k.a. Direct3D UAVs bool bSupportsDepthClamp; // Needed by VertexShaderGen, so must stay in VideoCommon bool bSupportsReversedDepthRange; bool bSupportsMultithreading; From 69cedf4144293153d0b40749ae178501db529c89 Mon Sep 17 00:00:00 2001 From: Michael Maltese Date: Thu, 9 Mar 2017 14:14:47 -0800 Subject: [PATCH 2/6] extract OGL::FramebufferManager::CreateTexture --- .../VideoBackends/OGL/FramebufferManager.cpp | 150 ++++++------------ .../VideoBackends/OGL/FramebufferManager.h | 2 + 2 files changed, 49 insertions(+), 103 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp index 45a4b196a0..144fa20d36 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp @@ -12,6 +12,7 @@ #include "Common/CommonTypes.h" #include "Common/GL/GLInterfaceBase.h" #include "Common/Logging/Log.h" +#include "Common/MsgHandler.h" #include "Core/HW/Memmap.h" @@ -49,6 +50,44 @@ GLuint FramebufferManager::m_EfbPokes_VBO; GLuint FramebufferManager::m_EfbPokes_VAO; SHADER FramebufferManager::m_EfbPokes; +GLuint FramebufferManager::CreateTexture(GLenum texture_type, GLenum internal_format, + GLenum pixel_format, GLenum data_type) +{ + GLuint texture; + glGenTextures(1, &texture); + glBindTexture(texture_type, texture); + if (texture_type == GL_TEXTURE_2D_ARRAY) + { + glTexParameteri(texture_type, GL_TEXTURE_MAX_LEVEL, 0); + glTexImage3D(texture_type, 0, internal_format, m_targetWidth, m_targetHeight, m_EFBLayers, 0, + pixel_format, data_type, nullptr); + } + else if (texture == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) + { + if (g_ogl_config.bSupports3DTextureStorage) + glTexStorage3DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, + m_targetHeight, m_EFBLayers, false); + else + glTexImage3DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, + m_targetHeight, m_EFBLayers, false); + } + else if (texture == GL_TEXTURE_2D_MULTISAMPLE) + { + if (g_ogl_config.bSupports2DTextureStorage) + glTexStorage2DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, + m_targetHeight, false); + else + glTexImage2DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, + m_targetHeight, false); + } + else + { + PanicAlert("Unhandled texture type %d", texture_type); + } + glBindTexture(texture_type, 0); + return texture; +} + FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int msaaSamples) { m_xfbFramebuffer = 0; @@ -76,132 +115,33 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms glActiveTexture(GL_TEXTURE9); - GLuint glObj[3]; - glGenTextures(3, glObj); - m_efbColor = glObj[0]; - m_efbDepth = glObj[1]; - m_efbColorSwap = glObj[2]; - m_EFBLayers = (g_ActiveConfig.iStereoMode > 0) ? 2 : 1; m_efbFramebuffer.resize(m_EFBLayers); m_resolvedFramebuffer.resize(m_EFBLayers); - // OpenGL MSAA textures are a different kind of texture type and must be allocated - // with a different function, so we create them separately. if (m_msaaSamples <= 1) { m_textureType = GL_TEXTURE_2D_ARRAY; - - glBindTexture(m_textureType, m_efbColor); - glTexParameteri(m_textureType, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage3D(m_textureType, 0, GL_RGBA, m_targetWidth, m_targetHeight, m_EFBLayers, 0, GL_RGBA, - GL_UNSIGNED_BYTE, nullptr); - - glBindTexture(m_textureType, m_efbDepth); - glTexParameteri(m_textureType, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage3D(m_textureType, 0, GL_DEPTH_COMPONENT32F, m_targetWidth, m_targetHeight, - m_EFBLayers, 0, GL_DEPTH_COMPONENT, GL_FLOAT, nullptr); - - glBindTexture(m_textureType, m_efbColorSwap); - glTexParameteri(m_textureType, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage3D(m_textureType, 0, GL_RGBA, m_targetWidth, m_targetHeight, m_EFBLayers, 0, GL_RGBA, - GL_UNSIGNED_BYTE, nullptr); } else { - GLenum resolvedType = GL_TEXTURE_2D_ARRAY; - // Only use a layered multisample texture if needed. Some drivers // slow down significantly with single-layered multisample textures. if (m_EFBLayers > 1) - { m_textureType = GL_TEXTURE_2D_MULTISAMPLE_ARRAY; - - if (g_ogl_config.bSupports3DTextureStorage) - { - glBindTexture(m_textureType, m_efbColor); - glTexStorage3DMultisample(m_textureType, m_msaaSamples, GL_RGBA8, m_targetWidth, - m_targetHeight, m_EFBLayers, false); - - glBindTexture(m_textureType, m_efbDepth); - glTexStorage3DMultisample(m_textureType, m_msaaSamples, GL_DEPTH_COMPONENT32F, - m_targetWidth, m_targetHeight, m_EFBLayers, false); - - glBindTexture(m_textureType, m_efbColorSwap); - glTexStorage3DMultisample(m_textureType, m_msaaSamples, GL_RGBA8, m_targetWidth, - m_targetHeight, m_EFBLayers, false); - glBindTexture(m_textureType, 0); - } - else - { - glBindTexture(m_textureType, m_efbColor); - glTexImage3DMultisample(m_textureType, m_msaaSamples, GL_RGBA, m_targetWidth, - m_targetHeight, m_EFBLayers, false); - - glBindTexture(m_textureType, m_efbDepth); - glTexImage3DMultisample(m_textureType, m_msaaSamples, GL_DEPTH_COMPONENT32F, m_targetWidth, - m_targetHeight, m_EFBLayers, false); - - glBindTexture(m_textureType, m_efbColorSwap); - glTexImage3DMultisample(m_textureType, m_msaaSamples, GL_RGBA, m_targetWidth, - m_targetHeight, m_EFBLayers, false); - glBindTexture(m_textureType, 0); - } - } else - { m_textureType = GL_TEXTURE_2D_MULTISAMPLE; - if (g_ogl_config.bSupports2DTextureStorage) - { - glBindTexture(m_textureType, m_efbColor); - glTexStorage2DMultisample(m_textureType, m_msaaSamples, GL_RGBA8, m_targetWidth, - m_targetHeight, false); - - glBindTexture(m_textureType, m_efbDepth); - glTexStorage2DMultisample(m_textureType, m_msaaSamples, GL_DEPTH_COMPONENT32F, - m_targetWidth, m_targetHeight, false); - - glBindTexture(m_textureType, m_efbColorSwap); - glTexStorage2DMultisample(m_textureType, m_msaaSamples, GL_RGBA8, m_targetWidth, - m_targetHeight, false); - glBindTexture(m_textureType, 0); - } - else - { - glBindTexture(m_textureType, m_efbColor); - glTexImage2DMultisample(m_textureType, m_msaaSamples, GL_RGBA, m_targetWidth, - m_targetHeight, false); - - glBindTexture(m_textureType, m_efbDepth); - glTexImage2DMultisample(m_textureType, m_msaaSamples, GL_DEPTH_COMPONENT32F, m_targetWidth, - m_targetHeight, false); - - glBindTexture(m_textureType, m_efbColorSwap); - glTexImage2DMultisample(m_textureType, m_msaaSamples, GL_RGBA, m_targetWidth, - m_targetHeight, false); - glBindTexture(m_textureType, 0); - } - } - // Although we are able to access the multisampled texture directly, we don't do it everywhere. // The old way is to "resolve" this multisampled texture by copying it into a non-sampled // texture. // This would lead to an unneeded copy of the EFB, so we are going to avoid it. // But as this job isn't done right now, we do need that texture for resolving: - glGenTextures(2, glObj); - m_resolvedColorTexture = glObj[0]; - m_resolvedDepthTexture = glObj[1]; + GLenum resolvedType = GL_TEXTURE_2D_ARRAY; - glBindTexture(resolvedType, m_resolvedColorTexture); - glTexParameteri(resolvedType, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage3D(resolvedType, 0, GL_RGBA, m_targetWidth, m_targetHeight, m_EFBLayers, 0, GL_RGBA, - GL_UNSIGNED_BYTE, nullptr); - - glBindTexture(resolvedType, m_resolvedDepthTexture); - glTexParameteri(resolvedType, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage3D(resolvedType, 0, GL_DEPTH_COMPONENT32F, m_targetWidth, m_targetHeight, m_EFBLayers, - 0, GL_DEPTH_COMPONENT, GL_FLOAT, nullptr); + m_resolvedColorTexture = CreateTexture(resolvedType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); + m_resolvedDepthTexture = + CreateTexture(resolvedType, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT); // Bind resolved textures to resolved framebuffer. glGenFramebuffers(m_EFBLayers, m_resolvedFramebuffer.data()); @@ -220,6 +160,10 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms } } + m_efbColor = CreateTexture(m_textureType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); + m_efbDepth = CreateTexture(m_textureType, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT); + m_efbColorSwap = CreateTexture(m_textureType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); + // Create XFB framebuffer; targets will be created elsewhere. glGenFramebuffers(1, &m_xfbFramebuffer); diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.h b/Source/Core/VideoBackends/OGL/FramebufferManager.h index 84270777c4..6083d97f34 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.h +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.h @@ -102,6 +102,8 @@ public: static void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points); private: + GLuint CreateTexture(GLenum texture_type, GLenum internal_format, GLenum pixel_format, + GLenum data_type); std::unique_ptr CreateXFBSource(unsigned int target_width, unsigned int target_height, unsigned int layers) override; From 56fe938366950062984e11364af5ce1ffe474f27 Mon Sep 17 00:00:00 2001 From: Michael Maltese Date: Thu, 9 Mar 2017 14:55:31 -0800 Subject: [PATCH 3/6] extract OGL::FramebufferManager::BindLayeredTexture --- .../VideoBackends/OGL/FramebufferManager.cpp | 42 ++++++++----------- .../VideoBackends/OGL/FramebufferManager.h | 1 + 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp index 144fa20d36..001cbb59fc 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp @@ -88,6 +88,17 @@ GLuint FramebufferManager::CreateTexture(GLenum texture_type, GLenum internal_fo return texture; } +void FramebufferManager::BindLayeredTexture(GLuint texture, const std::vector& framebuffers, GLenum attachment, GLenum texture_type) +{ + glBindFramebuffer(GL_FRAMEBUFFER, framebuffers[0]); + FramebufferTexture(GL_FRAMEBUFFER, attachment, texture_type, texture, 0); + // Bind all the other layers as separate FBOs for blitting. + for (unsigned int i = 1; i < m_EFBLayers; i++) { + glBindFramebuffer(GL_FRAMEBUFFER, m_resolvedFramebuffer[i]); + glFramebufferTextureLayer(GL_FRAMEBUFFER, attachment, texture, 0, i); + } +} + FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int msaaSamples) { m_xfbFramebuffer = 0; @@ -145,23 +156,13 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms // Bind resolved textures to resolved framebuffer. glGenFramebuffers(m_EFBLayers, m_resolvedFramebuffer.data()); - glBindFramebuffer(GL_FRAMEBUFFER, m_resolvedFramebuffer[0]); - FramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, resolvedType, m_resolvedColorTexture, - 0); - FramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, resolvedType, m_resolvedDepthTexture, - 0); - - // Bind all the other layers as separate FBOs for blitting. - for (unsigned int i = 1; i < m_EFBLayers; i++) - { - glBindFramebuffer(GL_FRAMEBUFFER, m_resolvedFramebuffer[i]); - glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_resolvedColorTexture, 0, i); - glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, m_resolvedDepthTexture, 0, i); - } + BindLayeredTexture(m_resolvedColorTexture, m_resolvedFramebuffer, GL_COLOR_ATTACHMENT0, resolvedType); + BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_DEPTH_ATTACHMENT, resolvedType); } m_efbColor = CreateTexture(m_textureType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); - m_efbDepth = CreateTexture(m_textureType, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT); + m_efbDepth = CreateTexture(m_textureType, GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV); m_efbColorSwap = CreateTexture(m_textureType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); // Create XFB framebuffer; targets will be created elsewhere. @@ -169,17 +170,8 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms // Bind target textures to EFB framebuffer. glGenFramebuffers(m_EFBLayers, m_efbFramebuffer.data()); - glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer[0]); - FramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_textureType, m_efbColor, 0); - FramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, m_textureType, m_efbDepth, 0); - - // Bind all the other layers as separate FBOs for blitting. - for (unsigned int i = 1; i < m_EFBLayers; i++) - { - glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer[i]); - glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_efbColor, 0, i); - glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, m_efbDepth, 0, i); - } + BindLayeredTexture(m_efbColor, m_efbFramebuffer, GL_COLOR_ATTACHMENT0, m_textureType); + BindLayeredTexture(m_efbDepth, m_efbFramebuffer, GL_DEPTH_ATTACHMENT, m_textureType); // EFB framebuffer is currently bound, make sure to clear it before use. glViewport(0, 0, m_targetWidth, m_targetHeight); diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.h b/Source/Core/VideoBackends/OGL/FramebufferManager.h index 6083d97f34..54dd93936b 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.h +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.h @@ -104,6 +104,7 @@ public: private: GLuint CreateTexture(GLenum texture_type, GLenum internal_format, GLenum pixel_format, GLenum data_type); + void BindLayeredTexture(GLuint texture, const std::vector& framebuffers, GLenum attachment, GLenum texture_type); std::unique_ptr CreateXFBSource(unsigned int target_width, unsigned int target_height, unsigned int layers) override; From ba6e917b49fcb192bc658557742106e29159dff7 Mon Sep 17 00:00:00 2001 From: Michael Maltese Date: Sun, 5 Mar 2017 15:34:30 -0800 Subject: [PATCH 4/6] OGL: implement Bounding Box on systems w/o SSBO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit should have zero performance effect if SSBOs are supported. If they aren't (e.g. on all Macs), this commit alters FramebufferManager to attach a new stencil buffer and VertexManager to draw to it when bounding box is active. `BBoxRead` gets the pixel data from the buffer and dumbly loops through it to find the bounding box. This patch can run Paper Mario: The Thousand-Year Door at almost full speed (50–60 FPS) without Dual-Core enabled for all common bounding box-using actions I tested (going through pipes, Plane Mode, Paper Mode, Prof. Frankly's gate, combat, walking around the overworld, etc.) on my computer (macOS 10.12.3, 2.8 GHz Intel Core i7, 16 GB 1600 MHz DDR3, and Intel Iris 1536 MB). A few more demanding scenes (e.g. the self-building bridge on the way to Petalburg) slow to ~15% of their speed without this patch (though they don't run quite at full speed even on master). The slowdown is caused almost solely by `glReadPixels` in `OGL::BoundingBox::Get`. Other implementation ideas: - Use a stencil buffer that's separate from the depth buffer. This would require ARB_texture_stencil8 / OpenGL 4.4, which isn't available on macOS. - Use `glGetTexImage` instead of `glReadPixels`. This is ~5 FPS slower on my computer, presumably because it has to transfer the entire combined depth-stencil buffer instead of only the stencil data. Getting only stencil data from `glGetTexImage` requires ARB_texture_stencil8 / OpenGL 4.4, which (again) is not available on macOS. - Don't use a PBO, and use `glReadPixels` synchronously. This has no visible performance effect on my computer, and is theoretically slower. --- Source/Core/VideoBackends/OGL/BoundingBox.cpp | 142 +++++++++++++++--- Source/Core/VideoBackends/OGL/BoundingBox.h | 9 +- .../VideoBackends/OGL/FramebufferManager.cpp | 32 +++- .../VideoBackends/OGL/FramebufferManager.h | 3 +- Source/Core/VideoBackends/OGL/Render.cpp | 18 ++- .../Core/VideoBackends/OGL/VertexManager.cpp | 14 +- Source/Core/VideoBackends/OGL/main.cpp | 2 +- 7 files changed, 184 insertions(+), 36 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.cpp b/Source/Core/VideoBackends/OGL/BoundingBox.cpp index 6bce10754e..44ba0ab118 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.cpp +++ b/Source/Core/VideoBackends/OGL/BoundingBox.cpp @@ -2,20 +2,45 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. +#include +#include #include #include "Common/GL/GLUtil.h" #include "VideoBackends/OGL/BoundingBox.h" +#include "VideoBackends/OGL/FramebufferManager.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/VideoConfig.h" static GLuint s_bbox_buffer_id; +static GLuint s_pbo; + +static std::array s_stencil_bounds; +static bool s_stencil_updated; +static bool s_stencil_cleared; + +static int s_target_width; +static int s_target_height; namespace OGL { -void BoundingBox::Init() +void BoundingBox::SetTargetSizeChanged(int target_width, int target_height) +{ + if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) + return; + + s_target_width = target_width; + s_target_height = target_height; + s_stencil_updated = false; + + glBindBuffer(GL_PIXEL_PACK_BUFFER, s_pbo); + glBufferData(GL_PIXEL_PACK_BUFFER, s_target_width * s_target_height, nullptr, GL_STREAM_READ); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); +} + +void BoundingBox::Init(int target_width, int target_height) { if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) { @@ -25,6 +50,12 @@ void BoundingBox::Init() glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(s32), initial_values, GL_DYNAMIC_DRAW); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, s_bbox_buffer_id); } + else + { + s_stencil_bounds = {{0, 0, 0, 0}}; + glGenBuffers(1, &s_pbo); + SetTargetSizeChanged(target_width, target_height); + } } void BoundingBox::Shutdown() @@ -33,40 +64,107 @@ void BoundingBox::Shutdown() { glDeleteBuffers(1, &s_bbox_buffer_id); } + else + { + glDeleteBuffers(1, &s_pbo); + } } void BoundingBox::Set(int index, int value) { - glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) + { + glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); + glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + } + else + { + s_stencil_bounds[index] = value; + + if (!s_stencil_cleared) + { + // Assumes that the EFB framebuffer is currently bound + glClearStencil(0); + glClear(GL_STENCIL_BUFFER_BIT); + s_stencil_updated = false; + s_stencil_cleared = true; + } + } } int BoundingBox::Get(int index) { - int data = 0; - glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - - if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA)) + if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) { - // Using glMapBufferRange to read back the contents of the SSBO is extremely slow - // on nVidia drivers. This is more noticeable at higher internal resolutions. - // Using glGetBufferSubData instead does not seem to exhibit this slowdown. - glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data); + int data = 0; + glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); + if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA)) + { + // Using glMapBufferRange to read back the contents of the SSBO is extremely slow + // on nVidia drivers. This is more noticeable at higher internal resolutions. + // Using glGetBufferSubData instead does not seem to exhibit this slowdown. + glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data); + } + else + { + // Using glMapBufferRange is faster on AMD cards by a measurable margin. + void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), + GL_MAP_READ_BIT); + if (ptr) + { + memcpy(&data, ptr, sizeof(int)); + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); + } + } + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + return data; } else { - // Using glMapBufferRange is faster on AMD cards by a measurable margin. - void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), - GL_MAP_READ_BIT); - if (ptr) + if (s_stencil_updated) { - memcpy(&data, ptr, sizeof(int)); - glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); - } - } + s_stencil_updated = false; - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); - return data; + FramebufferManager::ResolveEFBStencilTexture(); + glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetResolvedFramebuffer()); + glBindBuffer(GL_PIXEL_PACK_BUFFER, s_pbo); + glPixelStorei(GL_PACK_ALIGNMENT, 1); + glReadPixels(0, 0, s_target_width, s_target_height, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, 0); + glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetEFBFramebuffer()); + + // Eke every bit of performance out of the compiler that we can + std::array bounds = s_stencil_bounds; + + u8* data = static_cast(glMapBufferRange( + GL_PIXEL_PACK_BUFFER, 0, s_target_height * s_target_width, GL_MAP_READ_BIT)); + + for (int row = 0; row < s_target_height; row++) + { + for (int col = 0; col < s_target_width; col++) + { + if (data[row * s_target_width + col] == 0) + continue; + bounds[0] = std::min(bounds[0], col); + bounds[1] = std::max(bounds[1], col); + bounds[2] = std::min(bounds[2], row); + bounds[3] = std::max(bounds[3], row); + } + } + + s_stencil_bounds = bounds; + + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + } + + return s_stencil_bounds[index]; + } +} + +void BoundingBox::StencilWasUpdated() +{ + s_stencil_updated = true; + s_stencil_cleared = false; } }; diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.h b/Source/Core/VideoBackends/OGL/BoundingBox.h index 0aedff54df..033ea56cd5 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.h +++ b/Source/Core/VideoBackends/OGL/BoundingBox.h @@ -9,9 +9,16 @@ namespace OGL class BoundingBox { public: - static void Init(); + static void Init(int target_width, int target_height); static void Shutdown(); + static void SetTargetSizeChanged(int target_width, int target_height); + + // When SSBO isn't available, the bounding box is calculated directly from the + // stencil buffer. When the stencil buffer is changed, this function needs to + // be called to invalidate the cached bounding box data. + static void StencilWasUpdated(); + static void Set(int index, int value); static int Get(int index); }; diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp index 001cbb59fc..13b6759a01 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp @@ -152,12 +152,13 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms m_resolvedColorTexture = CreateTexture(resolvedType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); m_resolvedDepthTexture = - CreateTexture(resolvedType, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT); + CreateTexture(resolvedType, GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV); // Bind resolved textures to resolved framebuffer. glGenFramebuffers(m_EFBLayers, m_resolvedFramebuffer.data()); BindLayeredTexture(m_resolvedColorTexture, m_resolvedFramebuffer, GL_COLOR_ATTACHMENT0, resolvedType); BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_DEPTH_ATTACHMENT, resolvedType); + BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_STENCIL_ATTACHMENT, resolvedType); } m_efbColor = CreateTexture(m_textureType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); @@ -172,13 +173,15 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms glGenFramebuffers(m_EFBLayers, m_efbFramebuffer.data()); BindLayeredTexture(m_efbColor, m_efbFramebuffer, GL_COLOR_ATTACHMENT0, m_textureType); BindLayeredTexture(m_efbDepth, m_efbFramebuffer, GL_DEPTH_ATTACHMENT, m_textureType); + BindLayeredTexture(m_efbDepth, m_efbFramebuffer, GL_STENCIL_ATTACHMENT, m_textureType); // EFB framebuffer is currently bound, make sure to clear it before use. glViewport(0, 0, m_targetWidth, m_targetHeight); glScissor(0, 0, m_targetWidth, m_targetHeight); glClearColor(0.f, 0.f, 0.f, 0.f); glClearDepthf(1.0f); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + glClearStencil(0); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); // reinterpret pixel format const char* vs = m_EFBLayers > 1 ? "void main(void) {\n" @@ -478,6 +481,24 @@ GLuint FramebufferManager::GetEFBDepthTexture(const EFBRectangle& sourceRc) } } +void FramebufferManager::ResolveEFBStencilTexture() +{ + if (m_msaaSamples <= 1) + return; + + // Resolve. + for (unsigned int i = 0; i < m_EFBLayers; i++) + { + glBindFramebuffer(GL_READ_FRAMEBUFFER, m_efbFramebuffer[i]); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_resolvedFramebuffer[i]); + glBlitFramebuffer(0, 0, m_targetWidth, m_targetHeight, 0, 0, m_targetWidth, m_targetHeight, + GL_STENCIL_BUFFER_BIT, GL_NEAREST); + } + + // Return to EFB. + glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer[0]); +} + void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma) { @@ -493,6 +514,13 @@ void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, sourceRc.GetWidth(), fbStride, fbHeight); } +GLuint FramebufferManager::GetResolvedFramebuffer() +{ + if (m_msaaSamples <= 1) + return m_efbFramebuffer[0]; + return m_resolvedFramebuffer[0]; +} + void FramebufferManager::SetFramebuffer(GLuint fb) { glBindFramebuffer(GL_FRAMEBUFFER, fb != 0 ? fb : GetEFBFramebuffer()); diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.h b/Source/Core/VideoBackends/OGL/FramebufferManager.h index 54dd93936b..93f8e5bad3 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.h +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.h @@ -70,6 +70,7 @@ public: // the EFB to a resolved texture first. static GLuint GetEFBColorTexture(const EFBRectangle& sourceRc); static GLuint GetEFBDepthTexture(const EFBRectangle& sourceRc); + static void ResolveEFBStencilTexture(); static GLuint GetEFBFramebuffer(unsigned int layer = 0) { @@ -77,7 +78,7 @@ public: } static GLuint GetXFBFramebuffer() { return m_xfbFramebuffer; } // Resolved framebuffer is only used in MSAA mode. - static GLuint GetResolvedFramebuffer() { return m_resolvedFramebuffer[0]; } + static GLuint GetResolvedFramebuffer(); static void SetFramebuffer(GLuint fb); static void FramebufferTexture(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index 341e6b3686..d2124c6c5c 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -411,7 +411,8 @@ Renderer::Renderer() g_Config.backend_info.bSupportsPrimitiveRestart = !DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART) && ((GLExtensions::Version() >= 310) || GLExtensions::Supports("GL_NV_primitive_restart")); - g_Config.backend_info.bSupportsBBox = g_Config.backend_info.bSupportsFragmentStoresAndAtomics = + g_Config.backend_info.bSupportsBBox = true; + g_Config.backend_info.bSupportsFragmentStoresAndAtomics = GLExtensions::Supports("GL_ARB_shader_storage_buffer_object"); g_Config.backend_info.bSupportsGSInstancing = GLExtensions::Supports("GL_ARB_gpu_shader5"); g_Config.backend_info.bSupportsSSAA = GLExtensions::Supports("GL_ARB_gpu_shader5") && @@ -497,7 +498,6 @@ Renderer::Renderer() g_Config.backend_info.bSupportsGSInstancing = g_Config.backend_info.bSupportsGeometryShaders && g_ogl_config.SupportedESPointSize > 0; g_Config.backend_info.bSupportsSSAA = g_ogl_config.bSupportsAEP; - g_Config.backend_info.bSupportsBBox = true; g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true; g_ogl_config.bSupportsMSAA = true; g_ogl_config.bSupports2DTextureStorage = true; @@ -519,7 +519,6 @@ Renderer::Renderer() g_Config.backend_info.bSupportsGSInstancing = g_ogl_config.SupportedESPointSize > 0; g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsSSAA = true; - g_Config.backend_info.bSupportsBBox = true; g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true; g_ogl_config.bSupportsCopySubImage = true; g_ogl_config.bSupportsGLBaseVertex = true; @@ -657,10 +656,13 @@ Renderer::Renderer() // options while running g_Config.bRunning = true; - glStencilFunc(GL_ALWAYS, 0, 0); - glBlendFunc(GL_ONE, GL_ONE); + // The stencil is used for bounding box emulation when SSBOs are not available + glDisable(GL_STENCIL_TEST); + glStencilFunc(GL_ALWAYS, 1, 0xFF); + glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); - glViewport(0, 0, GetTargetWidth(), GetTargetHeight()); // Reset The Current Viewport + // Reset The Current Viewport + glViewport(0, 0, GetTargetWidth(), GetTargetHeight()); if (g_ActiveConfig.backend_info.bSupportsClipControl) glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); @@ -677,10 +679,9 @@ Renderer::Renderer() glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment - glDisable(GL_STENCIL_TEST); glEnable(GL_SCISSOR_TEST); - glScissor(0, 0, GetTargetWidth(), GetTargetHeight()); + glBlendFunc(GL_ONE, GL_ONE); glBlendColor(0, 0, 0, 0.5f); glClearDepthf(1.0f); @@ -1364,6 +1365,7 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, g_framebuffer_manager.reset(); g_framebuffer_manager = std::make_unique(m_target_width, m_target_height, s_MSAASamples); + BoundingBox::SetTargetSizeChanged(m_target_width, m_target_height); } } diff --git a/Source/Core/VideoBackends/OGL/VertexManager.cpp b/Source/Core/VideoBackends/OGL/VertexManager.cpp index bd9878b692..d8d9249965 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.cpp +++ b/Source/Core/VideoBackends/OGL/VertexManager.cpp @@ -14,9 +14,11 @@ #include "Common/GL/GLExtensions/GLExtensions.h" #include "Common/StringUtil.h" +#include "VideoBackends/OGL/BoundingBox.h" #include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/StreamBuffer.h" +#include "VideoCommon/BoundingBox.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/Statistics.h" @@ -156,8 +158,19 @@ void VertexManager::vFlush() // setup the pointers nativeVertexFmt->SetupVertexPointers(); + if (!g_Config.backend_info.bSupportsFragmentStoresAndAtomics && ::BoundingBox::active) + { + glEnable(GL_STENCIL_TEST); + } + Draw(stride); + if (!g_Config.backend_info.bSupportsFragmentStoresAndAtomics && ::BoundingBox::active) + { + OGL::BoundingBox::StencilWasUpdated(); + glDisable(GL_STENCIL_TEST); + } + #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS) { @@ -177,7 +190,6 @@ void VertexManager::vFlush() } #endif g_Config.iSaveTargetId++; - ClearEFBCache(); } diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index 3be16a6cbf..c1cf73f413 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -212,7 +212,7 @@ void VideoBackend::Video_Prepare() g_sampler_cache = std::make_unique(); static_cast(g_renderer.get())->Init(); TextureConverter::Init(); - BoundingBox::Init(); + BoundingBox::Init(g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight()); } void VideoBackend::Shutdown() From 53224d41d05a9ec70d0a2715e1a7601a0e60bb14 Mon Sep 17 00:00:00 2001 From: Michael Maltese Date: Thu, 9 Mar 2017 15:33:10 -0800 Subject: [PATCH 5/6] OGL: only create stencil buffer when needed --- Source/Core/VideoBackends/OGL/BoundingBox.cpp | 6 +++ Source/Core/VideoBackends/OGL/BoundingBox.h | 6 ++- .../VideoBackends/OGL/FramebufferManager.cpp | 54 ++++++++++++++----- .../VideoBackends/OGL/FramebufferManager.h | 9 +++- Source/Core/VideoBackends/OGL/Render.cpp | 53 +++++++++--------- 5 files changed, 86 insertions(+), 42 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.cpp b/Source/Core/VideoBackends/OGL/BoundingBox.cpp index 44ba0ab118..2d370a115d 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.cpp +++ b/Source/Core/VideoBackends/OGL/BoundingBox.cpp @@ -167,4 +167,10 @@ void BoundingBox::StencilWasUpdated() s_stencil_updated = true; s_stencil_cleared = false; } + +bool BoundingBox::NeedsStencilBuffer() +{ + return g_ActiveConfig.bBBoxEnable && + !g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics; +} }; diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.h b/Source/Core/VideoBackends/OGL/BoundingBox.h index 033ea56cd5..44365c9fbc 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.h +++ b/Source/Core/VideoBackends/OGL/BoundingBox.h @@ -15,8 +15,10 @@ public: static void SetTargetSizeChanged(int target_width, int target_height); // When SSBO isn't available, the bounding box is calculated directly from the - // stencil buffer. When the stencil buffer is changed, this function needs to - // be called to invalidate the cached bounding box data. + // stencil buffer. + static bool NeedsStencilBuffer(); + // When the stencil buffer is changed, this function needs to be called to + // invalidate the cached bounding box data. static void StencilWasUpdated(); static void Set(int index, int value); diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp index 13b6759a01..17244db24e 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp @@ -29,6 +29,7 @@ namespace OGL int FramebufferManager::m_targetWidth; int FramebufferManager::m_targetHeight; int FramebufferManager::m_msaaSamples; +bool FramebufferManager::m_enable_stencil_buffer; GLenum FramebufferManager::m_textureType; std::vector FramebufferManager::m_efbFramebuffer; @@ -88,18 +89,26 @@ GLuint FramebufferManager::CreateTexture(GLenum texture_type, GLenum internal_fo return texture; } -void FramebufferManager::BindLayeredTexture(GLuint texture, const std::vector& framebuffers, GLenum attachment, GLenum texture_type) +void FramebufferManager::BindLayeredTexture(GLuint texture, const std::vector& framebuffers, + GLenum attachment, GLenum texture_type) { glBindFramebuffer(GL_FRAMEBUFFER, framebuffers[0]); FramebufferTexture(GL_FRAMEBUFFER, attachment, texture_type, texture, 0); // Bind all the other layers as separate FBOs for blitting. - for (unsigned int i = 1; i < m_EFBLayers; i++) { + for (unsigned int i = 1; i < m_EFBLayers; i++) + { glBindFramebuffer(GL_FRAMEBUFFER, m_resolvedFramebuffer[i]); glFramebufferTextureLayer(GL_FRAMEBUFFER, attachment, texture, 0, i); } } -FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int msaaSamples) +bool FramebufferManager::HasStencilBuffer() +{ + return m_enable_stencil_buffer; +} + +FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int msaaSamples, + bool enable_stencil_buffer) { m_xfbFramebuffer = 0; m_efbColor = 0; @@ -110,8 +119,8 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms m_targetWidth = targetWidth; m_targetHeight = targetHeight; - m_msaaSamples = msaaSamples; + m_enable_stencil_buffer = enable_stencil_buffer; // The EFB can be set to different pixel formats by the game through the // BPMEM_ZCOMPARE register (which should probably have a different name). @@ -130,6 +139,16 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms m_efbFramebuffer.resize(m_EFBLayers); m_resolvedFramebuffer.resize(m_EFBLayers); + GLenum depth_internal_format = GL_DEPTH_COMPONENT32F; + GLenum depth_pixel_format = GL_DEPTH_COMPONENT; + GLenum depth_data_type = GL_FLOAT; + if (m_enable_stencil_buffer) + { + depth_internal_format = GL_DEPTH32F_STENCIL8; + depth_pixel_format = GL_DEPTH_STENCIL; + depth_data_type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV; + } + if (m_msaaSamples <= 1) { m_textureType = GL_TEXTURE_2D_ARRAY; @@ -152,18 +171,22 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms m_resolvedColorTexture = CreateTexture(resolvedType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); m_resolvedDepthTexture = - CreateTexture(resolvedType, GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + CreateTexture(resolvedType, depth_internal_format, depth_pixel_format, depth_data_type); // Bind resolved textures to resolved framebuffer. glGenFramebuffers(m_EFBLayers, m_resolvedFramebuffer.data()); - BindLayeredTexture(m_resolvedColorTexture, m_resolvedFramebuffer, GL_COLOR_ATTACHMENT0, resolvedType); - BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_DEPTH_ATTACHMENT, resolvedType); - BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_STENCIL_ATTACHMENT, resolvedType); + BindLayeredTexture(m_resolvedColorTexture, m_resolvedFramebuffer, GL_COLOR_ATTACHMENT0, + resolvedType); + BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_DEPTH_ATTACHMENT, + resolvedType); + if (m_enable_stencil_buffer) + BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_STENCIL_ATTACHMENT, + resolvedType); } m_efbColor = CreateTexture(m_textureType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); - m_efbDepth = CreateTexture(m_textureType, GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, - GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + m_efbDepth = + CreateTexture(m_textureType, depth_internal_format, depth_pixel_format, depth_data_type); m_efbColorSwap = CreateTexture(m_textureType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); // Create XFB framebuffer; targets will be created elsewhere. @@ -173,15 +196,20 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms glGenFramebuffers(m_EFBLayers, m_efbFramebuffer.data()); BindLayeredTexture(m_efbColor, m_efbFramebuffer, GL_COLOR_ATTACHMENT0, m_textureType); BindLayeredTexture(m_efbDepth, m_efbFramebuffer, GL_DEPTH_ATTACHMENT, m_textureType); - BindLayeredTexture(m_efbDepth, m_efbFramebuffer, GL_STENCIL_ATTACHMENT, m_textureType); + if (m_enable_stencil_buffer) + BindLayeredTexture(m_efbDepth, m_efbFramebuffer, GL_STENCIL_ATTACHMENT, m_textureType); // EFB framebuffer is currently bound, make sure to clear it before use. glViewport(0, 0, m_targetWidth, m_targetHeight); glScissor(0, 0, m_targetWidth, m_targetHeight); glClearColor(0.f, 0.f, 0.f, 0.f); glClearDepthf(1.0f); - glClearStencil(0); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + if (m_enable_stencil_buffer) + { + glClearStencil(0); + glClear(GL_STENCIL_BUFFER_BIT); + } // reinterpret pixel format const char* vs = m_EFBLayers > 1 ? "void main(void) {\n" diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.h b/Source/Core/VideoBackends/OGL/FramebufferManager.h index 93f8e5bad3..62b2f5ce3c 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.h +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.h @@ -63,7 +63,8 @@ struct XFBSource : public XFBSourceBase class FramebufferManager : public FramebufferManagerBase { public: - FramebufferManager(int targetWidth, int targetHeight, int msaaSamples); + FramebufferManager(int targetWidth, int targetHeight, int msaaSamples, + bool enable_stencil_buffer); ~FramebufferManager(); // To get the EFB in texture form, these functions may have to transfer @@ -101,11 +102,13 @@ public: static void ReinterpretPixelData(unsigned int convtype); static void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points); + static bool HasStencilBuffer(); private: GLuint CreateTexture(GLenum texture_type, GLenum internal_format, GLenum pixel_format, GLenum data_type); - void BindLayeredTexture(GLuint texture, const std::vector& framebuffers, GLenum attachment, GLenum texture_type); + void BindLayeredTexture(GLuint texture, const std::vector& framebuffers, + GLenum attachment, GLenum texture_type); std::unique_ptr CreateXFBSource(unsigned int target_width, unsigned int target_height, unsigned int layers) override; @@ -126,6 +129,8 @@ private: static GLuint m_efbColorSwap; // will be hot swapped with m_efbColor when reinterpreting EFB pixel formats + static bool m_enable_stencil_buffer; + // Only used in MSAA mode, TODO: try to avoid them static std::vector m_resolvedFramebuffer; static GLuint m_resolvedColorTexture; diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index d2124c6c5c..69554269cb 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -734,8 +734,8 @@ void Renderer::Shutdown() void Renderer::Init() { // Initialize the FramebufferManager - g_framebuffer_manager = - std::make_unique(m_target_width, m_target_height, s_MSAASamples); + g_framebuffer_manager = std::make_unique( + m_target_width, m_target_height, s_MSAASamples, BoundingBox::NeedsStencilBuffer()); m_post_processor = std::make_unique(); s_raster_font = std::make_unique(); @@ -1338,35 +1338,38 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, } bool target_size_changed = CalculateTargetSize(); - if (target_size_changed || xfbchanged || window_resized || - (s_last_multisamples != g_ActiveConfig.iMultisamples) || - (s_last_stereo_mode != (g_ActiveConfig.iStereoMode > 0))) + bool stencil_buffer_enabled = + static_cast(g_framebuffer_manager.get())->HasStencilBuffer(); + + bool fb_needs_update = target_size_changed || + s_last_multisamples != g_ActiveConfig.iMultisamples || + stencil_buffer_enabled != BoundingBox::NeedsStencilBuffer() || + s_last_stereo_mode != (g_ActiveConfig.iStereoMode > 0); + + if (xfbchanged || window_resized || fb_needs_update) { s_last_xfb_mode = g_ActiveConfig.bUseRealXFB; - UpdateDrawRectangle(); + } + if (fb_needs_update) + { + s_last_stereo_mode = g_ActiveConfig.iStereoMode > 0; + s_last_multisamples = g_ActiveConfig.iMultisamples; + s_MSAASamples = s_last_multisamples; - if (target_size_changed || s_last_multisamples != g_ActiveConfig.iMultisamples || - s_last_stereo_mode != (g_ActiveConfig.iStereoMode > 0)) + if (s_MSAASamples > 1 && s_MSAASamples > g_ogl_config.max_samples) { - s_last_stereo_mode = g_ActiveConfig.iStereoMode > 0; - s_last_multisamples = g_ActiveConfig.iMultisamples; - s_MSAASamples = s_last_multisamples; - - if (s_MSAASamples > 1 && s_MSAASamples > g_ogl_config.max_samples) - { - s_MSAASamples = g_ogl_config.max_samples; - OSD::AddMessage(StringFromFormat( - "%d Anti Aliasing samples selected, but only %d supported by your GPU.", - s_last_multisamples, g_ogl_config.max_samples), - 10000); - } - - g_framebuffer_manager.reset(); - g_framebuffer_manager = - std::make_unique(m_target_width, m_target_height, s_MSAASamples); - BoundingBox::SetTargetSizeChanged(m_target_width, m_target_height); + s_MSAASamples = g_ogl_config.max_samples; + OSD::AddMessage( + StringFromFormat("%d Anti Aliasing samples selected, but only %d supported by your GPU.", + s_last_multisamples, g_ogl_config.max_samples), + 10000); } + + g_framebuffer_manager.reset(); + g_framebuffer_manager = std::make_unique( + m_target_width, m_target_height, s_MSAASamples, BoundingBox::NeedsStencilBuffer()); + BoundingBox::SetTargetSizeChanged(m_target_width, m_target_height); } // --------------------------------------------------------------------- From 134317e07f7c371000389e7c3f45ceaf046eea69 Mon Sep 17 00:00:00 2001 From: Michael Maltese Date: Fri, 10 Mar 2017 00:07:49 -0800 Subject: [PATCH 6/6] videoconfig: add BBoxPreferStencilImplementation @JMC47 requested this to be able to do performance comparisons. --- Source/Core/VideoBackends/OGL/BoundingBox.cpp | 13 ++++++------- Source/Core/VideoBackends/OGL/VertexManager.cpp | 4 ++-- Source/Core/VideoCommon/PixelShaderGen.cpp | 3 +-- Source/Core/VideoCommon/VideoConfig.cpp | 2 ++ Source/Core/VideoCommon/VideoConfig.h | 7 +++++++ 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.cpp b/Source/Core/VideoBackends/OGL/BoundingBox.cpp index 2d370a115d..ebf8433d92 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.cpp +++ b/Source/Core/VideoBackends/OGL/BoundingBox.cpp @@ -28,7 +28,7 @@ namespace OGL { void BoundingBox::SetTargetSizeChanged(int target_width, int target_height) { - if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) + if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) return; s_target_width = target_width; @@ -42,7 +42,7 @@ void BoundingBox::SetTargetSizeChanged(int target_width, int target_height) void BoundingBox::Init(int target_width, int target_height) { - if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) + if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) { int initial_values[4] = {0, 0, 0, 0}; glGenBuffers(1, &s_bbox_buffer_id); @@ -60,7 +60,7 @@ void BoundingBox::Init(int target_width, int target_height) void BoundingBox::Shutdown() { - if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) + if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) { glDeleteBuffers(1, &s_bbox_buffer_id); } @@ -72,7 +72,7 @@ void BoundingBox::Shutdown() void BoundingBox::Set(int index, int value) { - if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) + if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) { glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value); @@ -95,7 +95,7 @@ void BoundingBox::Set(int index, int value) int BoundingBox::Get(int index) { - if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) + if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) { int data = 0; glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); @@ -170,7 +170,6 @@ void BoundingBox::StencilWasUpdated() bool BoundingBox::NeedsStencilBuffer() { - return g_ActiveConfig.bBBoxEnable && - !g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics; + return g_ActiveConfig.bBBoxEnable && !g_ActiveConfig.BBoxUseFragmentShaderImplementation(); } }; diff --git a/Source/Core/VideoBackends/OGL/VertexManager.cpp b/Source/Core/VideoBackends/OGL/VertexManager.cpp index d8d9249965..58d7b19ad9 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.cpp +++ b/Source/Core/VideoBackends/OGL/VertexManager.cpp @@ -158,14 +158,14 @@ void VertexManager::vFlush() // setup the pointers nativeVertexFmt->SetupVertexPointers(); - if (!g_Config.backend_info.bSupportsFragmentStoresAndAtomics && ::BoundingBox::active) + if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation()) { glEnable(GL_STENCIL_TEST); } Draw(stride); - if (!g_Config.backend_info.bSupportsFragmentStoresAndAtomics && ::BoundingBox::active) + if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation()) { OGL::BoundingBox::StencilWasUpdated(); glDisable(GL_STENCIL_TEST); diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 6360a68a37..9eb0d625cb 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -171,8 +171,7 @@ PixelShaderUid GetPixelShaderUid() uid_data->genMode_numtevstages = bpmem.genMode.numtevstages; uid_data->genMode_numtexgens = bpmem.genMode.numtexgens; uid_data->per_pixel_lighting = g_ActiveConfig.bEnablePixelLighting; - uid_data->bounding_box = g_ActiveConfig.backend_info.bSupportsBBox && - g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics && + uid_data->bounding_box = g_ActiveConfig.BBoxUseFragmentShaderImplementation() && g_ActiveConfig.bBBoxEnable && BoundingBox::active; uid_data->rgba6_format = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor; diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 2db2494a05..7579c90c6e 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -119,6 +119,7 @@ void VideoConfig::Load(const std::string& ini_file) IniFile::Section* hacks = iniFile.GetOrCreateSection("Hacks"); hacks->Get("EFBAccessEnable", &bEFBAccessEnable, true); hacks->Get("BBoxEnable", &bBBoxEnable, false); + hacks->Get("BBoxPreferStencilImplementation", &bBBoxPreferStencilImplementation, false); hacks->Get("ForceProgressive", &bForceProgressive, true); hacks->Get("EFBToTextureEnable", &bSkipEFBCopyToRam, true); hacks->Get("EFBScaledCopy", &bCopyEFBScaled, true); @@ -342,6 +343,7 @@ void VideoConfig::Save(const std::string& ini_file) IniFile::Section* hacks = iniFile.GetOrCreateSection("Hacks"); hacks->Set("EFBAccessEnable", bEFBAccessEnable); hacks->Set("BBoxEnable", bBBoxEnable); + hacks->Set("BBoxPreferStencilImplementation", bBBoxPreferStencilImplementation); hacks->Set("ForceProgressive", bForceProgressive); hacks->Set("EFBToTextureEnable", bSkipEFBCopyToRam); hacks->Set("EFBScaledCopy", bCopyEFBScaled); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index b8e348157d..a3a44f2ec7 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -114,6 +114,7 @@ struct VideoConfig final bool bEFBAccessEnable; bool bPerfQueriesEnable; bool bBBoxEnable; + bool bBBoxPreferStencilImplementation; // OpenGL-only, to see how slow it is compared to SSBOs bool bForceProgressive; bool bEFBEmulateFormatChanges; @@ -203,6 +204,12 @@ struct VideoConfig final { return backend_info.bSupportsExclusiveFullscreen && !bBorderlessFullscreen; } + bool BBoxUseFragmentShaderImplementation() const + { + if (backend_info.api_type == APIType::OpenGL && bBBoxPreferStencilImplementation) + return false; + return backend_info.bSupportsBBox && backend_info.bSupportsFragmentStoresAndAtomics; + } }; extern VideoConfig g_Config;