From 4fa79f15030ea396493f1eae3f50f99342f449fe Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Fri, 4 Oct 2019 22:10:43 +1000 Subject: [PATCH] GL: Eliminiate most redundant state setting calls at draw time --- src/common/gl_program.cpp | 10 +++ src/common/gl_program.h | 1 + src/core/gpu.cpp | 28 ++++++-- src/core/gpu.h | 5 ++ src/core/gpu_hw_opengl.cpp | 108 ++++++++++++++++++------------ src/core/gpu_hw_opengl.h | 12 ++-- src/duckstation/sdl_interface.cpp | 6 +- 7 files changed, 118 insertions(+), 52 deletions(-) diff --git a/src/common/gl_program.cpp b/src/common/gl_program.cpp index c5bcd5d52..480310584 100644 --- a/src/common/gl_program.cpp +++ b/src/common/gl_program.cpp @@ -6,6 +6,7 @@ Log_SetChannel(GL); static u32 s_next_bad_shader_id = 1; +static GLuint s_last_program_id = 0; namespace GL { @@ -63,6 +64,11 @@ GLuint Program::CompileShader(GLenum type, const char* source) return id; } +void Program::ResetLastProgram() +{ + s_last_program_id = 0; +} + bool Program::Compile(const char* vertex_shader, const char* fragment_shader) { GLuint vertex_shader_id = CompileShader(GL_VERTEX_SHADER, vertex_shader); @@ -138,7 +144,11 @@ bool Program::Link() void Program::Bind() const { + if (s_last_program_id == m_program_id) + return; + glUseProgram(m_program_id); + s_last_program_id = m_program_id; } void Program::Destroy() diff --git a/src/common/gl_program.h b/src/common/gl_program.h index c8c1cc321..fdf628375 100644 --- a/src/common/gl_program.h +++ b/src/common/gl_program.h @@ -11,6 +11,7 @@ public: ~Program(); static GLuint CompileShader(GLenum type, const char* source); + static void ResetLastProgram(); bool IsVaild() const { return m_program_id != 0; } diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index ba4d51ecd..5356e4a06 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -135,6 +135,10 @@ bool GPU::DoState(StateWrapper& sw) return !sw.HasError(); } +void GPU::ResetGraphicsAPIState() {} + +void GPU::RestoreGraphicsAPIState() {} + void GPU::RenderStatistics() {} void GPU::RenderDebugMenu() @@ -445,17 +449,29 @@ void GPU::WriteGP0(u32 value) case 0xE3: // Set drawing area top left { - m_drawing_area.left = param & UINT32_C(0x3FF); - m_drawing_area.top = (param >> 10) & UINT32_C(0x1FF); - Log_DebugPrintf("Set drawing area top-left: (%u, %u)", m_drawing_area.left, m_drawing_area.top); + const u32 left = param & UINT32_C(0x3FF); + const u32 top = (param >> 10) & UINT32_C(0x1FF); + Log_DebugPrintf("Set drawing area top-left: (%u, %u)", left, top); + if (m_drawing_area.left != left || m_drawing_area.top != top) + { + m_drawing_area.left = left; + m_drawing_area.top = top; + UpdateDrawingArea(); + } } break; case 0xE4: // Set drawing area bottom right { - m_drawing_area.right = param & UINT32_C(0x3FF); - m_drawing_area.bottom = (param >> 10) & UINT32_C(0x1FF); + const u32 right = param & UINT32_C(0x3FF); + const u32 bottom = (param >> 10) & UINT32_C(0x1FF); Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.right, m_drawing_area.bottom); + if (m_drawing_area.right != right || m_drawing_area.bottom != bottom) + { + m_drawing_area.right = right; + m_drawing_area.bottom = bottom; + UpdateDrawingArea(); + } } break; @@ -772,6 +788,8 @@ bool GPU::HandleCopyRectangleVRAMToVRAMCommand() void GPU::UpdateDisplay() {} +void GPU::UpdateDrawingArea() {} + void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) {} void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color) {} diff --git a/src/core/gpu.h b/src/core/gpu.h index 67544e8d9..1ee1a868e 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -35,6 +35,10 @@ public: virtual void Reset(); virtual bool DoState(StateWrapper& sw); + // Graphics API state reset/restore - call when drawing the UI etc. + virtual void ResetGraphicsAPIState(); + virtual void RestoreGraphicsAPIState(); + // Render statistics debug window. virtual void RenderStatistics(); @@ -198,6 +202,7 @@ protected: // Rendering in the backend virtual void UpdateDisplay(); + virtual void UpdateDrawingArea(); virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer); virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color); virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data); diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index f2e713416..5468549d5 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -26,6 +26,7 @@ bool GPU_HW_OpenGL::Initialize(System* system, DMA* dma, InterruptController* in return false; m_system->GetHostInterface()->SetDisplayTexture(m_display_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 1.0f); + RestoreGraphicsAPIState(); return true; } @@ -36,6 +37,37 @@ void GPU_HW_OpenGL::Reset() ClearFramebuffer(); } +void GPU_HW_OpenGL::ResetGraphicsAPIState() +{ + GPU_HW::ResetGraphicsAPIState(); + + glEnable(GL_CULL_FACE); + glDisable(GL_SCISSOR_TEST); + glDisable(GL_BLEND); + glDepthMask(GL_TRUE); + glLineWidth(1.0f); + glBindVertexArray(0); +} + +void GPU_HW_OpenGL::RestoreGraphicsAPIState() +{ + glBindFramebuffer(GL_FRAMEBUFFER, m_vram_fbo); + glViewport(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); + + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glEnable(GL_SCISSOR_TEST); + glDepthMask(GL_FALSE); + glLineWidth(static_cast(m_resolution_scale)); + UpdateDrawingArea(); + + m_last_transparency_enable = false; + glDisable(GL_BLEND); + + glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer); + glBindVertexArray(m_vao_id); +} + void GPU_HW_OpenGL::RenderStatistics() { GPU_HW::RenderStatistics(); @@ -156,6 +188,7 @@ void GPU_HW_OpenGL::CreateFramebuffer() linear_filter ? GL_LINEAR : GL_NEAREST); glDeleteFramebuffers(1, &old_vram_fbo); + glEnable(GL_SCISSOR_TEST); old_vram_texture.reset(); } @@ -187,15 +220,16 @@ void GPU_HW_OpenGL::CreateFramebuffer() glBindFramebuffer(GL_FRAMEBUFFER, m_display_fbo); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_display_texture->GetGLId(), 0); Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + + glBindFramebuffer(GL_FRAMEBUFFER, m_vram_fbo); } void GPU_HW_OpenGL::ClearFramebuffer() { - // TODO: get rid of the FBO switches - glBindFramebuffer(GL_FRAMEBUFFER, m_vram_fbo); + glDisable(GL_SCISSOR_TEST); glClearColor(0.0f, 0.0f, 0.0f, 0.0f); glClear(GL_COLOR_BUFFER_BIT); - glBindFramebuffer(GL_FRAMEBUFFER, 0); + glEnable(GL_SCISSOR_TEST); m_vram_read_texture_dirty = true; } @@ -305,7 +339,7 @@ bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, bool textured, bool blendi return true; } -void GPU_HW_OpenGL::SetProgram() +void GPU_HW_OpenGL::SetDrawState() { const GL::Program& prog = m_render_programs[BoolToUInt32(m_batch.texture_enable)][BoolToUInt32(m_batch.texture_blending_enable)] @@ -326,14 +360,30 @@ void GPU_HW_OpenGL::SetProgram() if (m_batch.texture_enable) m_vram_read_texture->Bind(); + + if (m_last_transparency_enable != m_batch.transparency_enable || + (!m_last_transparency_enable && m_last_transparency_mode != m_batch.transparency_mode)) + { + m_last_transparency_enable = m_batch.texture_enable; + m_last_transparency_mode = m_batch.transparency_mode; + + if (!m_batch.transparency_enable) + { + glDisable(GL_BLEND); + } + else + { + glEnable(GL_BLEND); + glBlendEquationSeparate(m_batch.transparency_mode == GPU::TransparencyMode::BackgroundMinusForeground ? + GL_FUNC_REVERSE_SUBTRACT : + GL_FUNC_ADD, + GL_FUNC_ADD); + glBlendFuncSeparate(GL_ONE, GL_SRC_ALPHA, GL_ONE, GL_ZERO); + } + } } -void GPU_HW_OpenGL::SetViewport() -{ - glViewport(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); -} - -void GPU_HW_OpenGL::SetScissor() +void GPU_HW_OpenGL::UpdateDrawingArea() { int left, top, right, bottom; CalcScissorRect(&left, &top, &right, &bottom); @@ -347,22 +397,6 @@ void GPU_HW_OpenGL::SetScissor() glScissor(x, y, width, height); } -void GPU_HW_OpenGL::SetBlendState() -{ - if (!m_batch.transparency_enable) - { - glDisable(GL_BLEND); - return; - } - - glEnable(GL_BLEND); - glBlendEquationSeparate(m_batch.transparency_mode == GPU::TransparencyMode::BackgroundMinusForeground ? - GL_FUNC_REVERSE_SUBTRACT : - GL_FUNC_ADD, - GL_FUNC_ADD); - glBlendFuncSeparate(GL_ONE, GL_SRC_ALPHA, GL_ONE, GL_ZERO); -} - void GPU_HW_OpenGL::UpdateDisplay() { GPU_HW::UpdateDisplay(); @@ -415,7 +449,9 @@ void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) glBlitFramebuffer(scaled_x, texture_height - scaled_y - height, scaled_x + scaled_width, scaled_y + scaled_height, 0, 0, width, height, GL_COLOR_BUFFER_BIT, GL_LINEAR); glBindFramebuffer(GL_READ_FRAMEBUFFER, m_vram_downsample_fbo); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo); glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, temp_buffer.data()); + glEnable(GL_SCISSOR_TEST); } else { @@ -457,8 +493,6 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color) width *= m_resolution_scale; height *= m_resolution_scale; - glBindFramebuffer(GL_FRAMEBUFFER, m_vram_fbo); - glEnable(GL_SCISSOR_TEST); glScissor(x, m_vram_texture->GetHeight() - y - height, width, height); @@ -466,6 +500,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color) glClearColor(r, g, b, a); glClear(GL_COLOR_BUFFER_BIT); + UpdateDrawingArea(); InvalidateVRAMReadCache(); } @@ -517,9 +552,9 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* const u32 scaled_flipped_y = m_vram_texture->GetHeight() - scaled_y - scaled_height; glDisable(GL_SCISSOR_TEST); glBindFramebuffer(GL_READ_FRAMEBUFFER, m_vram_downsample_fbo); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo); glBlitFramebuffer(x, flipped_y, x + width, flipped_y + height, scaled_x, scaled_flipped_y, scaled_x + scaled_width, scaled_flipped_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST); + glEnable(GL_SCISSOR_TEST); } } @@ -540,6 +575,7 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid glBindFramebuffer(GL_FRAMEBUFFER, m_vram_fbo); glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height, GL_COLOR_BUFFER_BIT, GL_NEAREST); + glEnable(GL_SCISSOR_TEST); InvalidateVRAMReadCache(); } @@ -565,21 +601,9 @@ void GPU_HW_OpenGL::FlushRender() m_stats.num_batches++; m_stats.num_vertices += static_cast(m_batch.vertices.size()); - glDisable(GL_CULL_FACE); - glDisable(GL_DEPTH_TEST); - glEnable(GL_SCISSOR_TEST); - glDepthMask(GL_FALSE); - glLineWidth(static_cast(m_resolution_scale)); - SetProgram(); - SetViewport(); - SetScissor(); - SetBlendState(); - - glBindFramebuffer(GL_FRAMEBUFFER, m_vram_fbo); - glBindVertexArray(m_vao_id); + SetDrawState(); Assert((m_batch.vertices.size() * sizeof(HWVertex)) <= VERTEX_BUFFER_SIZE); - glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer); glBufferSubData(GL_ARRAY_BUFFER, 0, static_cast(sizeof(HWVertex) * m_batch.vertices.size()), m_batch.vertices.data()); diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index c90d9b969..a117230d0 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -15,11 +15,16 @@ public: bool Initialize(System* system, DMA* dma, InterruptController* interrupt_controller, Timers* timers) override; void Reset() override; + + void ResetGraphicsAPIState() override; + void RestoreGraphicsAPIState() override; + void RenderStatistics() override; void UpdateSettings() override; protected: void UpdateDisplay() override; + void UpdateDrawingArea() override; void ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; @@ -48,10 +53,7 @@ private: bool CompilePrograms(); bool CompileProgram(GL::Program& prog, bool textured, bool blending, bool transparent, TextureColorMode texture_color_mode); - void SetProgram(); - void SetViewport(); - void SetScissor(); - void SetBlendState(); + void SetDrawState(); // downsample texture - used for readbacks at >1xIR. std::unique_ptr m_vram_texture; @@ -69,6 +71,8 @@ private: GLuint m_attributeless_vao_id = 0; bool m_vram_read_texture_dirty = true; + bool m_last_transparency_enable = false; + TransparencyMode m_last_transparency_mode = TransparencyMode::BackgroundMinusForeground; std::array, 2>, 2>, 2> m_render_programs; std::array m_texture_page_programs; diff --git a/src/duckstation/sdl_interface.cpp b/src/duckstation/sdl_interface.cpp index e5dcc1229..58a60bd50 100644 --- a/src/duckstation/sdl_interface.cpp +++ b/src/duckstation/sdl_interface.cpp @@ -388,8 +388,9 @@ bool SDLInterface::PassEventToImGui(const SDL_Event* event) void SDLInterface::Render() { + m_system->GetGPU()->ResetGraphicsAPIState(); + glBindFramebuffer(GL_FRAMEBUFFER, 0); - glDisable(GL_SCISSOR_TEST); glClearColor(0.0f, 0.0f, 0.0f, 0.0f); glClear(GL_COLOR_BUFFER_BIT); @@ -405,6 +406,9 @@ void SDLInterface::Render() ImGui_ImplOpenGL3_NewFrame(); ImGui::NewFrame(); + + GL::Program::ResetLastProgram(); + m_system->GetGPU()->RestoreGraphicsAPIState(); } static std::tuple CalculateDrawRect(int window_width, int window_height, float display_ratio)