diff --git a/src/common/gl/context_wgl.cpp b/src/common/gl/context_wgl.cpp index 7dcde8f5e..2cfbecf42 100644 --- a/src/common/gl/context_wgl.cpp +++ b/src/common/gl/context_wgl.cpp @@ -1,6 +1,7 @@ #include "context_wgl.h" #include "../assert.h" #include "../log.h" +#include "../make_array.h" #include "glad.h" #include "glad_wgl.h" Log_SetChannel(GL::ContextWGL); @@ -29,8 +30,17 @@ ContextWGL::~ContextWGL() if (m_rc) wglDeleteContext(m_rc); - if (m_dc) - ReleaseDC(GetHWND(), m_dc); + if (m_pbuffer) + { + if (m_dc) + wglReleasePbufferDCARB(m_pbuffer, m_dc); + wglDestroyPbufferARB(m_pbuffer); + } + else + { + if (m_dc) + ReleaseDC(GetHWND(), m_dc); + } } std::unique_ptr ContextWGL::Create(const WindowInfo& wi, const Version* versions_to_try, @@ -153,7 +163,8 @@ std::unique_ptr ContextWGL::CreateSharedContext(const WindowInfo& wi) } else { - Panic("Create pbuffer"); + if (!context->InitializePBuffer(m_dc, wi.surface_width, wi.surface_height)) + return nullptr; } if (m_version.profile == Profile::NoProfile) @@ -232,6 +243,47 @@ bool ContextWGL::InitializeDC() return true; } +bool ContextWGL::InitializePBuffer(HDC onscreen_dc, u32 width, u32 height) +{ + if (!GLAD_WGL_ARB_pbuffer) + { + Log_ErrorPrintf("WGL_EXT_pbuffer not supported"); + return false; + } + + static constexpr auto attribs = + make_array(WGL_DRAW_TO_PBUFFER_ARB, 1, WGL_RED_BITS_ARB, 0, WGL_GREEN_BITS_ARB, 0, WGL_BLUE_BITS_ARB, 0, + WGL_DEPTH_BITS_ARB, 0, WGL_STENCIL_BITS_ARB, 0, 0, 0); + + static constexpr auto fattribs = make_array(0.0f, 0.0f); + + int pixel_format; + UINT num_pixel_formats; + if (!wglChoosePixelFormatARB(onscreen_dc, attribs.data(), fattribs.data(), 1, &pixel_format, &num_pixel_formats)) + { + Log_ErrorPrintf("wglChoosePixelFormatARB() failed: %08X", GetLastError()); + return false; + } + + static constexpr auto pbattribs = make_array(0, 0); + m_pbuffer = + wglCreatePbufferARB(onscreen_dc, pixel_format, std::max(width, 1), std::max(height, 1), pbattribs.data()); + if (!m_pbuffer) + { + Log_ErrorPrintf("wglCreatePbufferARB() failed: %08X", GetLastError()); + return false; + } + + m_dc = wglGetPbufferDCARB(m_pbuffer); + if (!m_dc) + { + Log_ErrorPrintf("wglGetPbufferDCARB() failed: %08X", GetLastError()); + return false; + } + + return true; +} + bool ContextWGL::CreateAnyContext(HGLRC share_context, bool make_current) { m_rc = wglCreateContext(m_dc); @@ -294,7 +346,7 @@ bool ContextWGL::CreateVersionContext(const Version& version, HGLRC share_contex 0, 0}; - new_rc = wglCreateContextAttribsARB(m_dc, share_context, attribs); + new_rc = wglCreateContextAttribsARB(m_dc, nullptr, attribs); } else if (version.profile == Profile::ES) { @@ -308,7 +360,7 @@ bool ContextWGL::CreateVersionContext(const Version& version, HGLRC share_contex 0, 0}; - new_rc = wglCreateContextAttribsARB(m_dc, share_context, attribs); + new_rc = wglCreateContextAttribsARB(m_dc, nullptr, attribs); } else { @@ -316,6 +368,12 @@ bool ContextWGL::CreateVersionContext(const Version& version, HGLRC share_contex return false; } + if (share_context && !wglShareLists(share_context, new_rc)) + { + Log_ErrorPrintf("wglShareLists() failed: %08X", GetLastError()); + return false; + } + if (!new_rc) return false; diff --git a/src/common/gl/context_wgl.h b/src/common/gl/context_wgl.h index 6303235e2..f5e218737 100644 --- a/src/common/gl/context_wgl.h +++ b/src/common/gl/context_wgl.h @@ -2,6 +2,7 @@ #include "../windows_headers.h" #include "context.h" #include +#include namespace GL { @@ -28,11 +29,13 @@ private: bool Initialize(const Version* versions_to_try, size_t num_versions_to_try); bool InitializeDC(); + bool InitializePBuffer(HDC onscreen_dc, u32 width, u32 height); bool CreateAnyContext(HGLRC share_context, bool make_current); bool CreateVersionContext(const Version& version, HGLRC share_context, bool make_current); HDC m_dc = {}; HGLRC m_rc = {}; + HPBUFFERARB m_pbuffer = {}; }; } // namespace GL \ No newline at end of file diff --git a/src/common/gl/stream_buffer.cpp b/src/common/gl/stream_buffer.cpp index 6de7702fe..ec7323b57 100644 --- a/src/common/gl/stream_buffer.cpp +++ b/src/common/gl/stream_buffer.cpp @@ -279,12 +279,14 @@ private: std::unique_ptr StreamBuffer::Create(GLenum target, u32 size) { std::unique_ptr buf; +#if 0 if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage) { buf = detail::BufferStorageStreamBuffer::Create(target, size); if (buf) return buf; } +#endif // BufferSubData is slower on all drivers except NVIDIA... #if 0 diff --git a/src/common/gl/texture.cpp b/src/common/gl/texture.cpp index 819223c0a..880ef9516 100644 --- a/src/common/gl/texture.cpp +++ b/src/common/gl/texture.cpp @@ -61,10 +61,10 @@ void Texture::SetLinearFilter(bool enabled) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, enabled ? GL_LINEAR : GL_NEAREST); } -bool Texture::CreateFramebuffer() +u32 Texture::CreateAndReturnFramebuffer() { if (!IsValid()) - return false; + return 0; glGetError(); @@ -75,9 +75,18 @@ bool Texture::CreateFramebuffer() if (glGetError() != GL_NO_ERROR || glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { glDeleteFramebuffers(1, &fbo_id); - return false; + return 0; } + return fbo_id; +} + +bool Texture::CreateFramebuffer() +{ + GLuint fbo_id = CreateAndReturnFramebuffer(); + if (fbo_id == 0) + return false; + if (m_fbo_id != 0) glDeleteFramebuffers(1, &m_fbo_id); diff --git a/src/common/gl/texture.h b/src/common/gl/texture.h index 531974c05..42a6645a3 100644 --- a/src/common/gl/texture.h +++ b/src/common/gl/texture.h @@ -13,6 +13,7 @@ public: bool Create(u32 width, u32 height, GLenum internal_format, GLenum format, GLenum type, const void* data = nullptr, bool linear_filter = false, bool wrap = false); bool CreateFramebuffer(); + GLuint CreateAndReturnFramebuffer(); void Destroy(); diff --git a/src/frontend-common/opengl_host_display.cpp b/src/frontend-common/opengl_host_display.cpp index 69f385fb1..d0f4a0637 100644 --- a/src/frontend-common/opengl_host_display.cpp +++ b/src/frontend-common/opengl_host_display.cpp @@ -124,12 +124,17 @@ bool OpenGLHostDisplay::DownloadTexture(const void* texture_handle, u32 x, u32 y void OpenGLHostDisplay::SetVSync(bool enabled) { - // Window framebuffer has to be bound to call SetSwapInterval. - GLint current_fbo = 0; - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, ¤t_fbo); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); - m_gl_context->SetSwapInterval(enabled ? 1 : 0); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, current_fbo); + if (!m_present_context) + { + // Window framebuffer has to be bound to call SetSwapInterval. + GLint current_fbo = 0; + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, ¤t_fbo); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + m_gl_context->SetSwapInterval(enabled ? 1 : 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, current_fbo); + } + + m_vsync = enabled; } const char* OpenGLHostDisplay::GetGLSLVersionString() const @@ -205,6 +210,11 @@ bool OpenGLHostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_vie m_window_info = wi; m_window_info.surface_width = m_gl_context->GetSurfaceWidth(); m_window_info.surface_height = m_gl_context->GetSurfaceHeight(); + +#ifndef LIBRETRO + InitializeAsyncPresentation(); +#endif + return true; } @@ -255,6 +265,11 @@ void OpenGLHostDisplay::DestroyRenderDevice() if (!m_gl_context) return; +#ifndef LIBRETRO + if (m_present_context) + StopPresentThread(); +#endif + #ifdef WITH_IMGUI if (ImGui::GetCurrentContext()) DestroyImGuiContext(); @@ -443,8 +458,20 @@ void OpenGLHostDisplay::DestroyResources() bool OpenGLHostDisplay::Render() { +#ifndef LIBRETRO + if (m_present_context) + { + if (!CheckPresentDrawFramebuffer()) + return false; + + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_present_draw_framebuffer->draw_fbo); + } + else + { + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + } + glDisable(GL_SCISSOR_TEST); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); glClearColor(0.0f, 0.0f, 0.0f, 1.0f); glClear(GL_COLOR_BUFFER_BIT); @@ -457,13 +484,17 @@ bool OpenGLHostDisplay::Render() RenderSoftwareCursor(); - m_gl_context->SwapBuffers(); + if (m_present_context) + PresentDrawFramebuffer(); + else + m_gl_context->SwapBuffers(); #ifdef WITH_IMGUI if (ImGui::GetCurrentContext()) ImGui_ImplOpenGL3_NewFrame(); #endif +#endif return true; } @@ -486,16 +517,17 @@ void OpenGLHostDisplay::RenderDisplay() #ifndef LIBRETRO if (!m_post_processing_chain.IsEmpty()) { - ApplyPostProcessingChain(0, left, GetWindowHeight() - top - height, width, height, m_display_texture_handle, + ApplyPostProcessingChain(m_present_context ? m_present_draw_framebuffer->draw_fbo : 0, left, + GetWindowHeight() - top - height, width, height, m_display_texture_handle, m_display_texture_width, m_display_texture_height, m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height); return; } #endif - RenderDisplay(left, GetWindowHeight() - top - height, width, height, m_display_texture_handle, m_display_texture_width, m_display_texture_height, - m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, - m_display_texture_view_height, m_display_linear_filtering); + RenderDisplay(left, GetWindowHeight() - top - height, width, height, m_display_texture_handle, + m_display_texture_width, m_display_texture_height, m_display_texture_view_x, m_display_texture_view_y, + m_display_texture_view_width, m_display_texture_view_height, m_display_linear_filtering); } void OpenGLHostDisplay::RenderDisplay(s32 left, s32 bottom, s32 width, s32 height, void* texture_handle, @@ -719,6 +751,197 @@ void OpenGLHostDisplay::ApplyPostProcessingChain(GLuint final_target, s32 final_ m_post_processing_ubo->Unbind(); } +bool OpenGLHostDisplay::InitializeAsyncPresentation() +{ + WindowInfo shared_wi; + std::unique_ptr shared_context = m_gl_context->CreateSharedContext(shared_wi); + if (!shared_context) + return false; + + m_gl_context->MakeCurrent(); + + for (u32 i = 0; i < static_cast(m_present_framebuffers.size()); i++) + { + PresentFramebuffer& fb = m_present_framebuffers[i]; + + if (!fb.texture.Create(m_window_info.surface_width, m_window_info.surface_height, GL_RGBA8, GL_RGBA, + GL_UNSIGNED_BYTE) || + (fb.present_fbo = fb.texture.CreateAndReturnFramebuffer()) == 0) + { + // TODO: Leak fbo here + m_present_framebuffers = {}; + m_gl_context->DoneCurrent(); + return false; + } + + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fb.present_fbo); + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + } + + glFinish(); + + m_gl_context->DoneCurrent(); + + shared_context->MakeCurrent(); + + for (u32 i = 0; i < static_cast(m_present_framebuffers.size()); i++) + { + PresentFramebuffer& fb = m_present_framebuffers[i]; + if ((fb.draw_fbo = fb.texture.CreateAndReturnFramebuffer()) == 0) + { + // TODO: Leak fbo here + m_present_framebuffers = {}; + shared_context->DoneCurrent(); + return false; + } + } + + glFinish(); + shared_context->DoneCurrent(); + + Log_InfoPrintf("Using shared context for async presentation"); + m_present_context = std::move(m_gl_context); + m_gl_context = std::move(shared_context); + m_present_thread_stop.store(false); + m_present_thread = std::thread(&OpenGLHostDisplay::PresentThread, this); + return true; +} + +bool OpenGLHostDisplay::CheckPresentDrawFramebuffer() +{ + PresentFramebuffer* fb = m_present_draw_framebuffer; + if (fb->texture.GetWidth() == m_window_info.surface_width && fb->texture.GetHeight() == m_window_info.surface_height) + return true; + + fb->texture.Destroy(); + if (fb->draw_fbo) + { + glDeleteFramebuffers(1, &fb->draw_fbo); + fb->draw_fbo = 0; + } + + if (!fb->texture.Create(m_window_info.surface_width, m_window_info.surface_height, GL_RGBA8, GL_RGBA, + GL_UNSIGNED_BYTE) || + (fb->draw_fbo = fb->texture.CreateAndReturnFramebuffer()) == 0) + { + fb->texture.Destroy(); + return false; + } + + fb->changed = true; + return true; +} + +void OpenGLHostDisplay::PresentDrawFramebuffer() +{ + m_present_draw_framebuffer->draw_sync_id = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + glFlush(); + + { + std::unique_lock guard(m_present_lock); + m_present_draw_framebuffer->ready = true; + std::swap(m_present_draw_framebuffer, m_present_next_present_framebuffer); + + if (m_vsync && !m_frame_presented.load()) + { + // last frame not presented yet, we can wait + m_present_complete_cv.wait(guard, [this]() { return m_frame_presented.load(); }); + } + + m_frame_presented.store(false); + } + + // block until the presenter thread is done with it + if (m_present_draw_framebuffer->present_sync_id) + { + glWaitSync(m_present_draw_framebuffer->present_sync_id, 0, GL_TIMEOUT_IGNORED); + m_present_draw_framebuffer->present_sync_id = {}; + } + + // if the last frame wasn't rendered, don't leak the sync + if (m_present_draw_framebuffer->draw_sync_id) + { + glDeleteSync(m_present_draw_framebuffer->draw_sync_id); + m_present_draw_framebuffer->draw_sync_id = {}; + } +} + +void OpenGLHostDisplay::PresentThread() +{ + if (!m_present_context->MakeCurrent()) + Panic("Failed to make present context current"); + + if (!m_present_context->SetSwapInterval(1)) + Log_ErrorPrint("Failed to set swap interval to 1"); + + + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + + while (!m_present_thread_stop.load()) + { + { + std::unique_lock guard(m_present_lock); + if (m_present_next_present_framebuffer->ready) + { + std::swap(m_present_next_present_framebuffer, m_present_current_present_framebuffer); + m_present_current_present_framebuffer->ready = false; + } + + PresentFramebuffer* fb = m_present_current_present_framebuffer; + if (fb->draw_sync_id) + { + glWaitSync(fb->draw_sync_id, 0, GL_TIMEOUT_IGNORED); + fb->draw_sync_id = {}; + } + + if (fb->changed) + { + glDeleteFramebuffers(1, &fb->present_fbo); + fb->present_fbo = fb->texture.CreateAndReturnFramebuffer(); + } + + if (fb->present_fbo != 0) + { + glBindFramebuffer(GL_READ_FRAMEBUFFER, fb->present_fbo); + glBlitFramebuffer(0, 0, fb->texture.GetWidth(), fb->texture.GetHeight(), 0, 0, m_window_info.surface_width, + m_window_info.surface_height, GL_COLOR_BUFFER_BIT, GL_NEAREST); + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + } + + // this will be non-null if we're duplicating frames + if (fb->present_sync_id) + glDeleteSync(fb->present_sync_id); + fb->present_sync_id = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + glFlush(); + + m_frame_presented.store(true); + m_present_complete_cv.notify_one(); + } + + m_present_context->SwapBuffers(); + } + + m_present_context->DoneCurrent(); +} + +void OpenGLHostDisplay::StopPresentThread() +{ + m_present_thread_stop.store(true); + m_present_thread.join(); + + for (PresentFramebuffer& fb : m_present_framebuffers) + { + if (fb.draw_sync_id) + { + glClientWaitSync(fb.draw_sync_id, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); + fb.draw_sync_id = {}; + } + + fb.texture.Destroy(); + } +} + #else bool OpenGLHostDisplay::SetPostProcessingChain(const std::string_view& config) diff --git a/src/frontend-common/opengl_host_display.h b/src/frontend-common/opengl_host_display.h index 7f01307bc..ac7c0d082 100644 --- a/src/frontend-common/opengl_host_display.h +++ b/src/frontend-common/opengl_host_display.h @@ -14,7 +14,10 @@ #include "common/gl/texture.h" #include "common/window_info.h" #include "core/host_display.h" +#include +#include #include +#include #ifndef LIBRETRO #include "postprocessing_chain.h" @@ -106,6 +109,35 @@ protected: GL::Texture m_post_processing_input_texture; std::unique_ptr m_post_processing_ubo; std::vector m_post_processing_stages; + + struct PresentFramebuffer + { + GL::Texture texture; + GLuint draw_fbo; + GLuint present_fbo; + GLsync draw_sync_id; + GLsync present_sync_id; + bool changed = false; + bool ready = false; + }; + + bool InitializeAsyncPresentation(); + bool CheckPresentDrawFramebuffer(); + void PresentDrawFramebuffer(); + void PresentThread(); + void StopPresentThread(); + + std::unique_ptr m_present_context; + std::array m_present_framebuffers{}; + PresentFramebuffer* m_present_draw_framebuffer = &m_present_framebuffers[0]; + PresentFramebuffer* m_present_next_present_framebuffer = &m_present_framebuffers[1]; + PresentFramebuffer* m_present_current_present_framebuffer = &m_present_framebuffers[2]; + std::thread m_present_thread; + std::mutex m_present_lock; + std::condition_variable m_present_complete_cv; + std::atomic_bool m_frame_presented{false}; + std::atomic_bool m_present_thread_stop{false}; + bool m_vsync = true; #endif };