GS: Use stream buffer for vertices/indices

This commit is contained in:
Connor McLaughlin 2021-10-11 16:26:53 +10:00 committed by refractionpcsx2
parent 9d2bdd5681
commit 62b40b516c
6 changed files with 70 additions and 412 deletions

View File

@ -728,7 +728,6 @@ set(pcsx2GSHeaders
GS/Renderers/OpenGL/GSTextureCacheOGL.h
GS/Renderers/OpenGL/GSTextureOGL.h
GS/Renderers/OpenGL/GSUniformBufferOGL.h
GS/Renderers/OpenGL/GSVertexArrayOGL.h
GS/Window/GSCaptureDlg.h
GS/Window/GSDialog.h
GS/Window/GSSetting.h

View File

@ -42,6 +42,9 @@ static constexpr uint32 g_convert_index = 15;
static constexpr uint32 g_vs_cb_index = 20;
static constexpr uint32 g_ps_cb_index = 21;
static constexpr u32 VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
static constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
bool GSDeviceOGL::m_debug_gl_call = false;
int GSDeviceOGL::m_shader_inst = 0;
int GSDeviceOGL::m_shader_reg = 0;
@ -51,7 +54,6 @@ GSDeviceOGL::GSDeviceOGL()
: m_force_texture_clear(0)
, m_fbo(0)
, m_fbo_read(0)
, m_va(NULL)
, m_apitrace(0)
, m_palette_ss(0)
, m_vs_cb(NULL)
@ -105,7 +107,10 @@ GSDeviceOGL::~GSDeviceOGL()
GL_PUSH("GSDeviceOGL destructor");
// Clean vertex buffer state
delete m_va;
if (m_vertex_array_object)
glDeleteVertexArrays(0, &m_vertex_array_object);
m_vertex_stream_buffer.reset();
m_index_stream_buffer.reset();
// Clean m_merge_obj
delete m_merge_obj.cb;
@ -381,18 +386,33 @@ bool GSDeviceOGL::Create(const WindowInfo& wi)
{
GL_PUSH("GSDeviceOGL::Vertex Buffer");
glGenVertexArrays(1, &m_vertex_array_object);
glBindVertexArray(m_vertex_array_object);
m_vertex_stream_buffer = GL::StreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE);
m_index_stream_buffer = GL::StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE);
if (!m_vertex_stream_buffer || !m_index_stream_buffer)
{
Console.Error("Failed to create vertex/index streaming buffers");
return false;
}
// rebind because of VAO state
m_vertex_stream_buffer->Bind();
m_index_stream_buffer->Bind();
static_assert(sizeof(GSVertexPT1) == sizeof(GSVertex), "wrong GSVertex size");
std::vector<GSInputLayoutOGL> il_convert = {
{0, 2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)( 0) } ,
{1, 2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)(16) } ,
{2, 4 , GL_UNSIGNED_BYTE , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)( 8) } ,
{3, 1 , GL_FLOAT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(12) } ,
{4, 2 , GL_UNSIGNED_SHORT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(16) } ,
{5, 1 , GL_UNSIGNED_INT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(20) } ,
{6, 2 , GL_UNSIGNED_SHORT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(24) } ,
{7, 4 , GL_UNSIGNED_BYTE , GL_TRUE , sizeof(GSVertex) , (const GLvoid*)(28) } , // Only 1 byte is useful but hardware unit only support 4B
};
m_va = new GSVertexBufferStateOGL(il_convert);
for (u32 i = 0; i < 8; i++)
glEnableVertexAttribArray(i);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(GSVertexPT1), (const GLvoid*)(0));
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(GSVertexPT1), (const GLvoid*)(16));
glVertexAttribPointer(2, 4, GL_UNSIGNED_BYTE, GL_FALSE, sizeof(GSVertex), (const GLvoid*)(8));
glVertexAttribPointer(3, 1, GL_FLOAT, GL_FALSE, sizeof(GSVertex), (const GLvoid*)(12));
glVertexAttribIPointer(4, 2, GL_UNSIGNED_SHORT, sizeof(GSVertex), (const GLvoid*)(16));
glVertexAttribIPointer(5, 1, GL_UNSIGNED_INT, sizeof(GSVertex), (const GLvoid*)(20));
glVertexAttribIPointer(6, 2, GL_UNSIGNED_SHORT, sizeof(GSVertex), (const GLvoid*)(24));
glVertexAttribPointer(7, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(GSVertex), (const GLvoid*)(28));
}
// ****************************************************************
@ -686,18 +706,16 @@ void GSDeviceOGL::Flip()
void GSDeviceOGL::DrawPrimitive()
{
m_va->DrawPrimitive();
}
void GSDeviceOGL::DrawPrimitive(int offset, int count)
{
m_va->DrawPrimitive(offset, count);
glDrawArrays(m_draw_topology, m_vertex.start, m_vertex.count);
}
void GSDeviceOGL::DrawIndexedPrimitive()
{
if (!m_disable_hw_gl_draw)
m_va->DrawIndexedPrimitive();
{
glDrawElementsBaseVertex(m_draw_topology, static_cast<u32>(m_index.count), GL_UNSIGNED_INT,
reinterpret_cast<void*>(static_cast<u32>(m_index.start) * sizeof(u32)), static_cast<GLint>(m_vertex.start));
}
}
void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count)
@ -705,7 +723,11 @@ void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count)
//ASSERT(offset + count <= (int)m_index.count);
if (!m_disable_hw_gl_draw)
m_va->DrawIndexedPrimitive(offset, count);
{
glDrawElementsBaseVertex(m_draw_topology, count, GL_UNSIGNED_INT,
reinterpret_cast<void*>((static_cast<u32>(m_index.start) + static_cast<u32>(offset)) * sizeof(u32)),
static_cast<GLint>(m_vertex.start));
}
}
void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
@ -1472,9 +1494,11 @@ void GSDeviceOGL::RenderOsd(GSTexture* dt)
// Note scaling could also be done in shader (require gl3/dx10)
size_t count = m_osd.Size();
GSVertexPT1* dst = (GSVertexPT1*)m_va->MapVB(count);
count = m_osd.GeneratePrimitives(dst, count);
m_va->UnmapVB();
auto res = m_vertex_stream_buffer->Map(sizeof(GSVertexPT1), static_cast<u32>(count) * sizeof(GSVertexPT1));
count = m_osd.GeneratePrimitives(reinterpret_cast<GSVertexPT1*>(res.pointer), count);
m_vertex.start = res.index_aligned;
m_vertex.count = count;
m_vertex_stream_buffer->Unmap(static_cast<u32>(count) * sizeof(GSVertexPT1));
DrawPrimitive();
@ -1707,24 +1731,29 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
EndScene();
}
void GSDeviceOGL::EndScene()
{
m_va->EndScene();
}
void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t count)
{
m_va->UploadVB(vertices, count);
const u32 size = static_cast<u32>(count) * sizeof(GSVertexPT1);
auto res = m_vertex_stream_buffer->Map(sizeof(GSVertexPT1), size);
std::memcpy(res.pointer, vertices, size);
m_vertex.start = res.index_aligned;
m_vertex.count = count;
m_vertex_stream_buffer->Unmap(size);
}
void GSDeviceOGL::IASetIndexBuffer(const void* index, size_t count)
{
m_va->UploadIB(index, count);
const u32 size = static_cast<u32>(count) * sizeof(u32);
auto res = m_index_stream_buffer->Map(sizeof(u32), size);
m_index.start = res.index_aligned;
m_index.count = count;
std::memcpy(res.pointer, index, size);
m_index_stream_buffer->Unmap(size);
}
void GSDeviceOGL::IASetPrimitiveTopology(GLenum topology)
{
m_va->SetTopology(topology);
m_draw_topology = topology;
}
void GSDeviceOGL::PSSetShaderResource(int i, GSTexture* sr)

View File

@ -16,10 +16,10 @@
#pragma once
#include "common/GL/Context.h"
#include "common/GL/StreamBuffer.h"
#include "GS/Renderers/Common/GSDevice.h"
#include "GSTextureOGL.h"
#include "GS/GS.h"
#include "GSVertexArrayOGL.h"
#include "GSUniformBufferOGL.h"
#include "GSShaderOGL.h"
#include "GLState.h"
@ -482,7 +482,12 @@ private:
GLuint m_fbo; // frame buffer container
GLuint m_fbo_read; // frame buffer container only for reading
GSVertexBufferStateOGL* m_va; // state of the vertex buffer/array
std::unique_ptr<GL::StreamBuffer> m_vertex_stream_buffer;
std::unique_ptr<GL::StreamBuffer> m_index_stream_buffer;
GLuint m_vertex_array_object = 0;
u32 m_vertex_buffer_base_vertex = 0;
u32 m_index_buffer_offset = 0;
GLenum m_draw_topology = 0;
struct
{
@ -589,7 +594,6 @@ public:
void SetVSync(int vsync) override;
void DrawPrimitive() final;
void DrawPrimitive(int offset, int count);
void DrawIndexedPrimitive() final;
void DrawIndexedPrimitive(int offset, int count) final;
@ -612,9 +616,6 @@ public:
void SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm);
void BeginScene() final {}
void EndScene() final;
void IASetPrimitiveTopology(GLenum topology);
void IASetVertexBuffer(const void* vertices, size_t count);
void IASetIndexBuffer(const void* index, size_t count);

View File

@ -1,367 +0,0 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "GS/config.h"
#ifdef ENABLE_OGL_DEBUG_MEM_BW
extern uint64 g_vertex_upload_byte;
#endif
struct GSInputLayoutOGL
{
GLint location;
GLint size;
GLenum type;
GLboolean normalize;
GLsizei stride;
const GLvoid* offset;
};
template <int STRIDE>
class GSBufferOGL
{
size_t m_start;
size_t m_count;
size_t m_limit;
size_t m_quarter_shift;
const GLenum m_target;
GLuint m_buffer_name;
uint8* m_buffer_ptr;
GLsync m_fence[5];
public:
GSBufferOGL(GLenum target, size_t count)
: m_start(0)
, m_count(0)
, m_limit(0)
, m_target(target)
{
glGenBuffers(1, &m_buffer_name);
// Warning m_limit is the number of object (not the size in Bytes)
// Round it to next power of 2
m_limit = static_cast<size_t>(1) << (1u + (size_t)std::log2(count - 1u));
m_quarter_shift = (size_t)std::log2(m_limit * STRIDE) - 2;
for (size_t i = 0; i < 5; i++)
{
m_fence[i] = 0;
}
// TODO: if we do manually the synchronization, I'm not sure size is important. It worths to investigate it.
// => bigger buffer => less sync
bind();
if (STRIDE <= 4)
glObjectLabel(GL_BUFFER, m_buffer_name, -1, "IBO");
else
glObjectLabel(GL_BUFFER, m_buffer_name, -1, "VBO");
// coherency will be done by flushing
const GLbitfield common_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT;
const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT;
glBufferStorage(m_target, STRIDE * m_limit, NULL, create_flags);
m_buffer_ptr = (uint8*)glMapBufferRange(m_target, 0, STRIDE * m_limit, map_flags);
if (!m_buffer_ptr)
{
fprintf(stderr, "Failed to map buffer\n");
throw GSError();
}
}
~GSBufferOGL()
{
for (size_t i = 0; i < 5; i++)
{
glDeleteSync(m_fence[i]);
}
glDeleteBuffers(1, &m_buffer_name);
}
void bind()
{
glBindBuffer(m_target, m_buffer_name);
}
void* map(size_t count)
{
m_count = count;
if (m_count >= m_limit)
throw GSErrorGlVertexArrayTooSmall();
size_t offset = m_start * STRIDE;
size_t length = m_count * STRIDE;
if (m_count > (m_limit - m_start))
{
size_t current_chunk = offset >> m_quarter_shift;
#ifdef ENABLE_OGL_DEBUG_FENCE
fprintf(stderr, "%x: Wrap buffer\n", m_target);
fprintf(stderr, "%x: Insert a fence in chunk %zu\n", m_target, current_chunk);
#endif
ASSERT(current_chunk > 0 && current_chunk < 5);
if (m_fence[current_chunk] == 0)
{
m_fence[current_chunk] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
// Wrap at startup
m_start = 0;
offset = 0;
// Only check first chunk
if (m_fence[0])
{
#ifdef ENABLE_OGL_DEBUG_FENCE
GLenum status = glClientWaitSync(m_fence[0], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
if (status != GL_ALREADY_SIGNALED)
{
fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target);
}
#else
glClientWaitSync(m_fence[0], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
#endif
glDeleteSync(m_fence[0]);
m_fence[0] = 0;
}
}
// Protect buffer with fences
size_t current_chunk = offset >> m_quarter_shift;
size_t next_chunk = (offset + length) >> m_quarter_shift;
for (size_t c = current_chunk + 1; c <= next_chunk; c++)
{
#ifdef ENABLE_OGL_DEBUG_FENCE
fprintf(stderr, "%x: Insert a fence in chunk %d\n", m_target, c - 1);
#endif
ASSERT(c > 0 && c < 5);
m_fence[c - 1] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
if (m_fence[c])
{
#ifdef ENABLE_OGL_DEBUG_FENCE
GLenum status = glClientWaitSync(m_fence[c], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
#else
glClientWaitSync(m_fence[c], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
#endif
glDeleteSync(m_fence[c]);
m_fence[c] = 0;
#ifdef ENABLE_OGL_DEBUG_FENCE
if (status != GL_ALREADY_SIGNALED)
{
fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target);
}
#endif
}
}
return m_buffer_ptr + offset;
}
void unmap()
{
glFlushMappedBufferRange(m_target, m_start * STRIDE, m_count * STRIDE);
}
void upload(const void* src, size_t count)
{
#ifdef ENABLE_OGL_DEBUG_MEM_BW
g_vertex_upload_byte += count * STRIDE;
#endif
void* dst = map(count);
memcpy(dst, src, count * STRIDE);
unmap();
}
void EndScene()
{
m_start += m_count;
m_count = 0;
}
void Draw(GLenum mode)
{
glDrawArrays(mode, m_start, m_count);
}
void Draw(GLenum mode, int offset, int count)
{
glDrawArrays(mode, m_start + offset, count);
}
void Draw(GLenum mode, GLint basevertex)
{
glDrawElementsBaseVertex(mode, m_count, GL_UNSIGNED_INT, (void*)(m_start * STRIDE), basevertex);
}
void Draw(GLenum mode, GLint basevertex, int offset, int count)
{
glDrawElementsBaseVertex(mode, count, GL_UNSIGNED_INT, (void*)((m_start + offset) * STRIDE), basevertex);
}
size_t GetStart() { return m_start; }
};
class GSVertexBufferStateOGL
{
std::unique_ptr<GSBufferOGL<sizeof(GSVertexPT1)>> m_vb;
std::unique_ptr<GSBufferOGL<sizeof(uint32)>> m_ib;
GLuint m_va;
GLenum m_topology;
std::vector<GSInputLayoutOGL> m_layout;
// No copy constructor please
GSVertexBufferStateOGL(const GSVertexBufferStateOGL&) = delete;
public:
GSVertexBufferStateOGL(const std::vector<GSInputLayoutOGL>& layout)
: m_topology(0), m_layout(layout)
{
glGenVertexArrays(1, &m_va);
glBindVertexArray(m_va);
m_vb.reset(new GSBufferOGL<sizeof(GSVertexPT1)>(GL_ARRAY_BUFFER, 256 * 1024));
m_ib.reset(new GSBufferOGL<sizeof(uint32)>(GL_ELEMENT_ARRAY_BUFFER, 2 * 1024 * 1024));
m_vb->bind();
m_ib->bind();
set_internal_format();
}
void bind()
{
// Note: index array are part of the VA state so it need to be bound only once.
glBindVertexArray(m_va);
if (m_vb)
m_vb->bind();
}
void set_internal_format()
{
for (const auto& l : m_layout)
{
// Note this function need both a vertex array object and a GL_ARRAY_BUFFER buffer
glEnableVertexAttribArray(l.location);
switch (l.type)
{
case GL_UNSIGNED_SHORT:
case GL_UNSIGNED_INT:
if (l.normalize)
{
glVertexAttribPointer(l.location, l.size, l.type, l.normalize, l.stride, l.offset);
}
else
{
// Rule: when shader use integral (not normalized) you must use glVertexAttribIPointer (note the extra I)
glVertexAttribIPointer(l.location, l.size, l.type, l.stride, l.offset);
}
break;
default:
glVertexAttribPointer(l.location, l.size, l.type, l.normalize, l.stride, l.offset);
break;
}
}
}
void EndScene()
{
m_vb->EndScene();
m_ib->EndScene();
}
void DrawPrimitive() { m_vb->Draw(m_topology); }
void DrawPrimitive(int offset, int count) { m_vb->Draw(m_topology, offset, count); }
void DrawIndexedPrimitive() { m_ib->Draw(m_topology, m_vb->GetStart()); }
void DrawIndexedPrimitive(int offset, int count) { m_ib->Draw(m_topology, m_vb->GetStart(), offset, count); }
void SetTopology(GLenum topology) { m_topology = topology; }
void* MapVB(size_t count)
{
void* ptr;
while (true)
{
try
{
ptr = m_vb->map(count);
break;
}
catch (GSErrorGlVertexArrayTooSmall)
{
GL_INS("GL vertex buffer is too small");
m_vb.reset(new GSBufferOGL<sizeof(GSVertexPT1)>(GL_ARRAY_BUFFER, count));
set_internal_format();
}
}
return ptr;
}
void UnmapVB() { m_vb->unmap(); }
void UploadVB(const void* vertices, size_t count)
{
while (true)
{
try
{
m_vb->upload(vertices, count);
break;
}
catch (GSErrorGlVertexArrayTooSmall)
{
GL_INS("GL vertex buffer is too small");
m_vb.reset(new GSBufferOGL<sizeof(GSVertexPT1)>(GL_ARRAY_BUFFER, count));
set_internal_format();
}
}
}
void UploadIB(const void* index, size_t count)
{
while (true)
{
try
{
m_ib->upload(index, count);
break;
}
catch (GSErrorGlVertexArrayTooSmall)
{
GL_INS("GL index buffer is too small");
m_ib.reset(new GSBufferOGL<sizeof(uint32)>(GL_ELEMENT_ARRAY_BUFFER, count));
}
}
}
~GSVertexBufferStateOGL()
{
glDeleteVertexArrays(1, &m_va);
}
};

View File

@ -875,7 +875,6 @@
<ClInclude Include="GS\GSVector8i.h" />
<ClInclude Include="GS\GSVector8.h" />
<ClInclude Include="GS\Renderers\Common\GSVertex.h" />
<ClInclude Include="GS\Renderers\OpenGL\GSVertexArrayOGL.h" />
<ClInclude Include="GS\Renderers\HW\GSVertexHW.h" />
<ClInclude Include="GS\Renderers\Common\GSVertexList.h" />
<ClInclude Include="GS\Renderers\SW\GSVertexSW.h" />
@ -1171,4 +1170,4 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>
</Project>

View File

@ -2616,9 +2616,6 @@
<ClInclude Include="GS\Renderers\OpenGL\GSTextureOGL.h">
<Filter>System\Ps2\GS\Renderers\OpenGL</Filter>
</ClInclude>
<ClInclude Include="GS\Renderers\OpenGL\GSVertexArrayOGL.h">
<Filter>System\Ps2\GS\Renderers\OpenGL</Filter>
</ClInclude>
<ClInclude Include="GS\Renderers\OpenGL\GLState.h">
<Filter>System\Ps2\GS\Renderers\OpenGL</Filter>
</ClInclude>
@ -2914,4 +2911,4 @@
<Filter>AppHost\Resources</Filter>
</Manifest>
</ItemGroup>
</Project>
</Project>