diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 4387a36d8c..9b5328bc94 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -728,7 +728,6 @@ set(pcsx2GSHeaders GS/Renderers/OpenGL/GSTextureCacheOGL.h GS/Renderers/OpenGL/GSTextureOGL.h GS/Renderers/OpenGL/GSUniformBufferOGL.h - GS/Renderers/OpenGL/GSVertexArrayOGL.h GS/Window/GSCaptureDlg.h GS/Window/GSDialog.h GS/Window/GSSetting.h diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index fbdf91d89a..963670d8e3 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -42,6 +42,9 @@ static constexpr uint32 g_convert_index = 15; static constexpr uint32 g_vs_cb_index = 20; static constexpr uint32 g_ps_cb_index = 21; +static constexpr u32 VERTEX_BUFFER_SIZE = 32 * 1024 * 1024; +static constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024; + bool GSDeviceOGL::m_debug_gl_call = false; int GSDeviceOGL::m_shader_inst = 0; int GSDeviceOGL::m_shader_reg = 0; @@ -51,7 +54,6 @@ GSDeviceOGL::GSDeviceOGL() : m_force_texture_clear(0) , m_fbo(0) , m_fbo_read(0) - , m_va(NULL) , m_apitrace(0) , m_palette_ss(0) , m_vs_cb(NULL) @@ -105,7 +107,10 @@ GSDeviceOGL::~GSDeviceOGL() GL_PUSH("GSDeviceOGL destructor"); // Clean vertex buffer state - delete m_va; + if (m_vertex_array_object) + glDeleteVertexArrays(0, &m_vertex_array_object); + m_vertex_stream_buffer.reset(); + m_index_stream_buffer.reset(); // Clean m_merge_obj delete m_merge_obj.cb; @@ -381,18 +386,33 @@ bool GSDeviceOGL::Create(const WindowInfo& wi) { GL_PUSH("GSDeviceOGL::Vertex Buffer"); + glGenVertexArrays(1, &m_vertex_array_object); + glBindVertexArray(m_vertex_array_object); + + m_vertex_stream_buffer = GL::StreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE); + m_index_stream_buffer = GL::StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE); + if (!m_vertex_stream_buffer || !m_index_stream_buffer) + { + Console.Error("Failed to create vertex/index streaming buffers"); + return false; + } + + // rebind because of VAO state + m_vertex_stream_buffer->Bind(); + m_index_stream_buffer->Bind(); + static_assert(sizeof(GSVertexPT1) == sizeof(GSVertex), "wrong GSVertex size"); - std::vector il_convert = { - {0, 2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)( 0) } , - {1, 2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)(16) } , - {2, 4 , GL_UNSIGNED_BYTE , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)( 8) } , - {3, 1 , GL_FLOAT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(12) } , - {4, 2 , GL_UNSIGNED_SHORT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(16) } , - {5, 1 , GL_UNSIGNED_INT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(20) } , - {6, 2 , GL_UNSIGNED_SHORT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(24) } , - {7, 4 , GL_UNSIGNED_BYTE , GL_TRUE , sizeof(GSVertex) , (const GLvoid*)(28) } , // Only 1 byte is useful but hardware unit only support 4B - }; - m_va = new GSVertexBufferStateOGL(il_convert); + for (u32 i = 0; i < 8; i++) + glEnableVertexAttribArray(i); + + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(GSVertexPT1), (const GLvoid*)(0)); + glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(GSVertexPT1), (const GLvoid*)(16)); + glVertexAttribPointer(2, 4, GL_UNSIGNED_BYTE, GL_FALSE, sizeof(GSVertex), (const GLvoid*)(8)); + glVertexAttribPointer(3, 1, GL_FLOAT, GL_FALSE, sizeof(GSVertex), (const GLvoid*)(12)); + glVertexAttribIPointer(4, 2, GL_UNSIGNED_SHORT, sizeof(GSVertex), (const GLvoid*)(16)); + glVertexAttribIPointer(5, 1, GL_UNSIGNED_INT, sizeof(GSVertex), (const GLvoid*)(20)); + glVertexAttribIPointer(6, 2, GL_UNSIGNED_SHORT, sizeof(GSVertex), (const GLvoid*)(24)); + glVertexAttribPointer(7, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(GSVertex), (const GLvoid*)(28)); } // **************************************************************** @@ -686,18 +706,16 @@ void GSDeviceOGL::Flip() void GSDeviceOGL::DrawPrimitive() { - m_va->DrawPrimitive(); -} - -void GSDeviceOGL::DrawPrimitive(int offset, int count) -{ - m_va->DrawPrimitive(offset, count); + glDrawArrays(m_draw_topology, m_vertex.start, m_vertex.count); } void GSDeviceOGL::DrawIndexedPrimitive() { if (!m_disable_hw_gl_draw) - m_va->DrawIndexedPrimitive(); + { + glDrawElementsBaseVertex(m_draw_topology, static_cast(m_index.count), GL_UNSIGNED_INT, + reinterpret_cast(static_cast(m_index.start) * sizeof(u32)), static_cast(m_vertex.start)); + } } void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count) @@ -705,7 +723,11 @@ void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count) //ASSERT(offset + count <= (int)m_index.count); if (!m_disable_hw_gl_draw) - m_va->DrawIndexedPrimitive(offset, count); + { + glDrawElementsBaseVertex(m_draw_topology, count, GL_UNSIGNED_INT, + reinterpret_cast((static_cast(m_index.start) + static_cast(offset)) * sizeof(u32)), + static_cast(m_vertex.start)); + } } void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) @@ -1472,9 +1494,11 @@ void GSDeviceOGL::RenderOsd(GSTexture* dt) // Note scaling could also be done in shader (require gl3/dx10) size_t count = m_osd.Size(); - GSVertexPT1* dst = (GSVertexPT1*)m_va->MapVB(count); - count = m_osd.GeneratePrimitives(dst, count); - m_va->UnmapVB(); + auto res = m_vertex_stream_buffer->Map(sizeof(GSVertexPT1), static_cast(count) * sizeof(GSVertexPT1)); + count = m_osd.GeneratePrimitives(reinterpret_cast(res.pointer), count); + m_vertex.start = res.index_aligned; + m_vertex.count = count; + m_vertex_stream_buffer->Unmap(static_cast(count) * sizeof(GSVertexPT1)); DrawPrimitive(); @@ -1707,24 +1731,29 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver EndScene(); } -void GSDeviceOGL::EndScene() -{ - m_va->EndScene(); -} - void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t count) { - m_va->UploadVB(vertices, count); + const u32 size = static_cast(count) * sizeof(GSVertexPT1); + auto res = m_vertex_stream_buffer->Map(sizeof(GSVertexPT1), size); + std::memcpy(res.pointer, vertices, size); + m_vertex.start = res.index_aligned; + m_vertex.count = count; + m_vertex_stream_buffer->Unmap(size); } void GSDeviceOGL::IASetIndexBuffer(const void* index, size_t count) { - m_va->UploadIB(index, count); + const u32 size = static_cast(count) * sizeof(u32); + auto res = m_index_stream_buffer->Map(sizeof(u32), size); + m_index.start = res.index_aligned; + m_index.count = count; + std::memcpy(res.pointer, index, size); + m_index_stream_buffer->Unmap(size); } void GSDeviceOGL::IASetPrimitiveTopology(GLenum topology) { - m_va->SetTopology(topology); + m_draw_topology = topology; } void GSDeviceOGL::PSSetShaderResource(int i, GSTexture* sr) diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h index 0d1be2a0fd..beace42426 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h @@ -16,10 +16,10 @@ #pragma once #include "common/GL/Context.h" +#include "common/GL/StreamBuffer.h" #include "GS/Renderers/Common/GSDevice.h" #include "GSTextureOGL.h" #include "GS/GS.h" -#include "GSVertexArrayOGL.h" #include "GSUniformBufferOGL.h" #include "GSShaderOGL.h" #include "GLState.h" @@ -482,7 +482,12 @@ private: GLuint m_fbo; // frame buffer container GLuint m_fbo_read; // frame buffer container only for reading - GSVertexBufferStateOGL* m_va; // state of the vertex buffer/array + std::unique_ptr m_vertex_stream_buffer; + std::unique_ptr m_index_stream_buffer; + GLuint m_vertex_array_object = 0; + u32 m_vertex_buffer_base_vertex = 0; + u32 m_index_buffer_offset = 0; + GLenum m_draw_topology = 0; struct { @@ -589,7 +594,6 @@ public: void SetVSync(int vsync) override; void DrawPrimitive() final; - void DrawPrimitive(int offset, int count); void DrawIndexedPrimitive() final; void DrawIndexedPrimitive(int offset, int count) final; @@ -612,9 +616,6 @@ public: void SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm); - void BeginScene() final {} - void EndScene() final; - void IASetPrimitiveTopology(GLenum topology); void IASetVertexBuffer(const void* vertices, size_t count); void IASetIndexBuffer(const void* index, size_t count); diff --git a/pcsx2/GS/Renderers/OpenGL/GSVertexArrayOGL.h b/pcsx2/GS/Renderers/OpenGL/GSVertexArrayOGL.h deleted file mode 100644 index 7528a2e95b..0000000000 --- a/pcsx2/GS/Renderers/OpenGL/GSVertexArrayOGL.h +++ /dev/null @@ -1,367 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2021 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#pragma once - -#include "GS/config.h" - -#ifdef ENABLE_OGL_DEBUG_MEM_BW -extern uint64 g_vertex_upload_byte; -#endif - -struct GSInputLayoutOGL -{ - GLint location; - GLint size; - GLenum type; - GLboolean normalize; - GLsizei stride; - const GLvoid* offset; -}; - -template -class GSBufferOGL -{ - size_t m_start; - size_t m_count; - size_t m_limit; - size_t m_quarter_shift; - const GLenum m_target; - GLuint m_buffer_name; - uint8* m_buffer_ptr; - GLsync m_fence[5]; - -public: - GSBufferOGL(GLenum target, size_t count) - : m_start(0) - , m_count(0) - , m_limit(0) - , m_target(target) - { - glGenBuffers(1, &m_buffer_name); - // Warning m_limit is the number of object (not the size in Bytes) - // Round it to next power of 2 - m_limit = static_cast(1) << (1u + (size_t)std::log2(count - 1u)); - m_quarter_shift = (size_t)std::log2(m_limit * STRIDE) - 2; - - for (size_t i = 0; i < 5; i++) - { - m_fence[i] = 0; - } - - // TODO: if we do manually the synchronization, I'm not sure size is important. It worths to investigate it. - // => bigger buffer => less sync - bind(); - - if (STRIDE <= 4) - glObjectLabel(GL_BUFFER, m_buffer_name, -1, "IBO"); - else - glObjectLabel(GL_BUFFER, m_buffer_name, -1, "VBO"); - - // coherency will be done by flushing - const GLbitfield common_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; - const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT; - const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT; - - glBufferStorage(m_target, STRIDE * m_limit, NULL, create_flags); - m_buffer_ptr = (uint8*)glMapBufferRange(m_target, 0, STRIDE * m_limit, map_flags); - if (!m_buffer_ptr) - { - fprintf(stderr, "Failed to map buffer\n"); - throw GSError(); - } - } - - ~GSBufferOGL() - { - for (size_t i = 0; i < 5; i++) - { - glDeleteSync(m_fence[i]); - } - glDeleteBuffers(1, &m_buffer_name); - } - - void bind() - { - glBindBuffer(m_target, m_buffer_name); - } - - void* map(size_t count) - { - m_count = count; - - if (m_count >= m_limit) - throw GSErrorGlVertexArrayTooSmall(); - - size_t offset = m_start * STRIDE; - size_t length = m_count * STRIDE; - - if (m_count > (m_limit - m_start)) - { - size_t current_chunk = offset >> m_quarter_shift; -#ifdef ENABLE_OGL_DEBUG_FENCE - fprintf(stderr, "%x: Wrap buffer\n", m_target); - fprintf(stderr, "%x: Insert a fence in chunk %zu\n", m_target, current_chunk); -#endif - ASSERT(current_chunk > 0 && current_chunk < 5); - if (m_fence[current_chunk] == 0) - { - m_fence[current_chunk] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - } - - // Wrap at startup - m_start = 0; - offset = 0; - - // Only check first chunk - if (m_fence[0]) - { -#ifdef ENABLE_OGL_DEBUG_FENCE - GLenum status = glClientWaitSync(m_fence[0], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); - if (status != GL_ALREADY_SIGNALED) - { - fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target); - } -#else - glClientWaitSync(m_fence[0], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); -#endif - glDeleteSync(m_fence[0]); - m_fence[0] = 0; - } - } - - // Protect buffer with fences - size_t current_chunk = offset >> m_quarter_shift; - size_t next_chunk = (offset + length) >> m_quarter_shift; - for (size_t c = current_chunk + 1; c <= next_chunk; c++) - { -#ifdef ENABLE_OGL_DEBUG_FENCE - fprintf(stderr, "%x: Insert a fence in chunk %d\n", m_target, c - 1); -#endif - ASSERT(c > 0 && c < 5); - m_fence[c - 1] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - if (m_fence[c]) - { -#ifdef ENABLE_OGL_DEBUG_FENCE - GLenum status = glClientWaitSync(m_fence[c], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); -#else - glClientWaitSync(m_fence[c], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); -#endif - glDeleteSync(m_fence[c]); - m_fence[c] = 0; - -#ifdef ENABLE_OGL_DEBUG_FENCE - if (status != GL_ALREADY_SIGNALED) - { - fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target); - } -#endif - } - } - - return m_buffer_ptr + offset; - } - - void unmap() - { - glFlushMappedBufferRange(m_target, m_start * STRIDE, m_count * STRIDE); - } - - void upload(const void* src, size_t count) - { -#ifdef ENABLE_OGL_DEBUG_MEM_BW - g_vertex_upload_byte += count * STRIDE; -#endif - - void* dst = map(count); - memcpy(dst, src, count * STRIDE); - unmap(); - } - - void EndScene() - { - m_start += m_count; - m_count = 0; - } - - void Draw(GLenum mode) - { - glDrawArrays(mode, m_start, m_count); - } - - void Draw(GLenum mode, int offset, int count) - { - glDrawArrays(mode, m_start + offset, count); - } - - - void Draw(GLenum mode, GLint basevertex) - { - glDrawElementsBaseVertex(mode, m_count, GL_UNSIGNED_INT, (void*)(m_start * STRIDE), basevertex); - } - - void Draw(GLenum mode, GLint basevertex, int offset, int count) - { - glDrawElementsBaseVertex(mode, count, GL_UNSIGNED_INT, (void*)((m_start + offset) * STRIDE), basevertex); - } - - size_t GetStart() { return m_start; } -}; - -class GSVertexBufferStateOGL -{ - std::unique_ptr> m_vb; - std::unique_ptr> m_ib; - - GLuint m_va; - GLenum m_topology; - std::vector m_layout; - - // No copy constructor please - GSVertexBufferStateOGL(const GSVertexBufferStateOGL&) = delete; - -public: - GSVertexBufferStateOGL(const std::vector& layout) - : m_topology(0), m_layout(layout) - { - glGenVertexArrays(1, &m_va); - glBindVertexArray(m_va); - - m_vb.reset(new GSBufferOGL(GL_ARRAY_BUFFER, 256 * 1024)); - m_ib.reset(new GSBufferOGL(GL_ELEMENT_ARRAY_BUFFER, 2 * 1024 * 1024)); - - m_vb->bind(); - m_ib->bind(); - - set_internal_format(); - } - - void bind() - { - // Note: index array are part of the VA state so it need to be bound only once. - glBindVertexArray(m_va); - if (m_vb) - m_vb->bind(); - } - - void set_internal_format() - { - for (const auto& l : m_layout) - { - // Note this function need both a vertex array object and a GL_ARRAY_BUFFER buffer - glEnableVertexAttribArray(l.location); - switch (l.type) - { - case GL_UNSIGNED_SHORT: - case GL_UNSIGNED_INT: - if (l.normalize) - { - glVertexAttribPointer(l.location, l.size, l.type, l.normalize, l.stride, l.offset); - } - else - { - // Rule: when shader use integral (not normalized) you must use glVertexAttribIPointer (note the extra I) - glVertexAttribIPointer(l.location, l.size, l.type, l.stride, l.offset); - } - break; - default: - glVertexAttribPointer(l.location, l.size, l.type, l.normalize, l.stride, l.offset); - break; - } - } - } - - void EndScene() - { - m_vb->EndScene(); - m_ib->EndScene(); - } - - void DrawPrimitive() { m_vb->Draw(m_topology); } - - void DrawPrimitive(int offset, int count) { m_vb->Draw(m_topology, offset, count); } - - void DrawIndexedPrimitive() { m_ib->Draw(m_topology, m_vb->GetStart()); } - - void DrawIndexedPrimitive(int offset, int count) { m_ib->Draw(m_topology, m_vb->GetStart(), offset, count); } - - void SetTopology(GLenum topology) { m_topology = topology; } - - void* MapVB(size_t count) - { - void* ptr; - while (true) - { - try - { - ptr = m_vb->map(count); - break; - } - catch (GSErrorGlVertexArrayTooSmall) - { - GL_INS("GL vertex buffer is too small"); - - m_vb.reset(new GSBufferOGL(GL_ARRAY_BUFFER, count)); - - set_internal_format(); - } - } - - return ptr; - } - void UnmapVB() { m_vb->unmap(); } - void UploadVB(const void* vertices, size_t count) - { - while (true) - { - try - { - m_vb->upload(vertices, count); - break; - } - catch (GSErrorGlVertexArrayTooSmall) - { - GL_INS("GL vertex buffer is too small"); - - m_vb.reset(new GSBufferOGL(GL_ARRAY_BUFFER, count)); - - set_internal_format(); - } - } - } - - void UploadIB(const void* index, size_t count) - { - while (true) - { - try - { - m_ib->upload(index, count); - break; - } - catch (GSErrorGlVertexArrayTooSmall) - { - GL_INS("GL index buffer is too small"); - - m_ib.reset(new GSBufferOGL(GL_ELEMENT_ARRAY_BUFFER, count)); - } - } - } - - ~GSVertexBufferStateOGL() - { - glDeleteVertexArrays(1, &m_va); - } -}; diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index 9038bf059d..e794b02cfa 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -875,7 +875,6 @@ - @@ -1171,4 +1170,4 @@ - \ No newline at end of file + diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index 5128a2fca4..038d3a920a 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -2616,9 +2616,6 @@ System\Ps2\GS\Renderers\OpenGL - - System\Ps2\GS\Renderers\OpenGL - System\Ps2\GS\Renderers\OpenGL @@ -2914,4 +2911,4 @@ AppHost\Resources - \ No newline at end of file +