From 91c99f02269dc74fa2326187043c5404b54ac4ea Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sun, 3 Nov 2019 13:15:17 +1000 Subject: [PATCH] GPU/HW: Use uniform blocks for batch rendering --- src/common/gl_stream_buffer.cpp | 2 +- src/common/gl_stream_buffer.h | 1 + src/core/gpu.cpp | 5 +++ src/core/gpu.h | 1 + src/core/gpu_commands.cpp | 1 + src/core/gpu_hw.cpp | 55 ++++++++++++++++++++++++------- src/core/gpu_hw.h | 24 ++++++++++---- src/core/gpu_hw_opengl.cpp | 58 +++++++++++++++++++-------------- src/core/gpu_hw_opengl.h | 15 ++++++--- 9 files changed, 114 insertions(+), 48 deletions(-) diff --git a/src/common/gl_stream_buffer.cpp b/src/common/gl_stream_buffer.cpp index 982500b0f..9e1c37736 100644 --- a/src/common/gl_stream_buffer.cpp +++ b/src/common/gl_stream_buffer.cpp @@ -24,7 +24,7 @@ void StreamBuffer::Unbind() StreamBuffer::MappingResult StreamBuffer::Map(u32 alignment, u32 min_size) { - return MappingResult{static_cast(m_cpu_buffer.data()), 0, m_size / alignment}; + return MappingResult{static_cast(m_cpu_buffer.data()), 0, 0, m_size / alignment}; } void StreamBuffer::Unmap(u32 used_size) diff --git a/src/common/gl_stream_buffer.h b/src/common/gl_stream_buffer.h index 9e3ea144c..5d6490f6c 100644 --- a/src/common/gl_stream_buffer.h +++ b/src/common/gl_stream_buffer.h @@ -22,6 +22,7 @@ public: struct MappingResult { void* pointer; + u32 buffer_offset; u32 index_aligned; // offset / alignment, suitable for base vertex u32 space_aligned; // remaining space / alignment }; diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 6d0216e54..1339ddc45 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -51,6 +51,8 @@ void GPU::SoftReset() m_render_state.texture_page_changed = true; UpdateGPUSTAT(); UpdateCRTCConfig(); + UpdateDrawingArea(); + UpdateDrawingOffset(); } bool GPU::DoState(StateWrapper& sw) @@ -115,6 +117,7 @@ bool GPU::DoState(StateWrapper& sw) m_render_state.texture_page_changed = true; m_render_state.texture_window_changed = true; UpdateDrawingArea(); + UpdateDrawingOffset(); UpdateGPUSTAT(); } @@ -665,6 +668,8 @@ void GPU::UpdateDisplay() {} void GPU::UpdateDrawingArea() {} +void GPU::UpdateDrawingOffset() {} + void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) {} void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) {} diff --git a/src/core/gpu.h b/src/core/gpu.h index 22314b22d..e17ac914d 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -283,6 +283,7 @@ protected: // Rendering in the backend virtual void UpdateDisplay(); virtual void UpdateDrawingArea(); + virtual void UpdateDrawingOffset(); virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer); virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color); virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data); diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index ee1219c1d..bdd77bd6a 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -151,6 +151,7 @@ bool GPU::HandleSetDrawingOffsetCommand(const u32*& command_ptr, u32 command_siz m_drawing_offset.x = x; m_drawing_offset.y = y; + UpdateDrawingOffset(); } return true; } diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 662145306..71f7bcd19 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -15,6 +15,8 @@ void GPU_HW::Reset() GPU::Reset(); m_batch = {}; + m_batch_ubo_data = {}; + m_batch_ubo_dirty = true; } bool GPU_HW::Initialize(System* system, DMA* dma, InterruptController* interrupt_controller, Timers* timers) @@ -38,6 +40,15 @@ void GPU_HW::UpdateSettings() m_true_color = m_system->GetSettings().gpu_true_color; } +void GPU_HW::UpdateDrawingOffset() +{ + GPU::UpdateDrawingOffset(); + + m_batch_ubo_data.u_pos_offset[0] = m_drawing_offset.x; + m_batch_ubo_data.u_pos_offset[1] = m_drawing_offset.y; + m_batch_ubo_dirty = true; +} + void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr) { const u32 texpage = @@ -216,11 +227,25 @@ vec4 RGBA5551ToRGBA8(uint v) )"; } +void GPU_HW::GenerateBatchUniformBuffer(std::stringstream& ss) +{ + ss << R"( +uniform UBOBlock { + ivec2 u_pos_offset; + uvec2 u_texture_window_mask; + uvec2 u_texture_window_offset; + float u_src_alpha_factor; + float u_dst_alpha_factor; +}; +)"; +} + std::string GPU_HW::GenerateVertexShader(bool textured) { std::stringstream ss; GenerateShaderHeader(ss); DefineMacro(ss, "TEXTURED", textured); + GenerateBatchUniformBuffer(ss); ss << R"( in ivec2 a_pos; @@ -234,8 +259,6 @@ out vec3 v_col0; flat out ivec4 v_texpage; #endif -uniform ivec2 u_pos_offset; - void main() { // 0..+1023 -> -1..1 @@ -268,6 +291,7 @@ std::string GPU_HW::GenerateFragmentShader(HWBatchRenderMode transparency, Textu std::stringstream ss; GenerateShaderHeader(ss); + GenerateBatchUniformBuffer(ss); DefineMacro(ss, "TRANSPARENCY", transparency != HWBatchRenderMode::TransparencyDisabled); DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == HWBatchRenderMode::OnlyOpaque); DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENCY", transparency == HWBatchRenderMode::OnlyTransparent); @@ -292,12 +316,10 @@ std::string GPU_HW::GenerateFragmentShader(HWBatchRenderMode transparency, Textu ss << R"( in vec3 v_col0; -uniform vec2 u_transparent_alpha; #if TEXTURED in vec2 v_tex0; flat in ivec4 v_texpage; uniform sampler2D samp0; - uniform uvec4 u_texture_window; #endif out vec4 o_col0; @@ -318,8 +340,8 @@ ivec3 TruncateTo15Bit(ivec3 icol) #if TEXTURED ivec2 ApplyNativeTextureWindow(ivec2 coords) { - uint x = (uint(coords.x) & ~(u_texture_window.x * 8u)) | ((u_texture_window.z & u_texture_window.x) * 8u); - uint y = (uint(coords.y) & ~(u_texture_window.y * 8u)) | ((u_texture_window.w & u_texture_window.y) * 8u); + uint x = (uint(coords.x) & ~(u_texture_window_mask.x * 8u)) | ((u_texture_window_offset.x & u_texture_window_mask.x) * 8u); + uint y = (uint(coords.y) & ~(u_texture_window_mask.y * 8u)) | ((u_texture_window_offset.y & u_texture_window_mask.y) * 8u); return ivec2(int(x), int(y)); } @@ -419,7 +441,7 @@ void main() #if TRANSPARENCY_ONLY_OPAQUE discard; #endif - o_col0 = vec4(color * u_transparent_alpha.x, u_transparent_alpha.y); + o_col0 = vec4(color * u_src_alpha_factor, u_dst_alpha_factor); } else { @@ -679,6 +701,15 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32 } } + // transparency mode change + if (m_batch.transparency_mode != transparency_mode && transparency_mode != TransparencyMode::Disabled) + { + static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}}; + m_batch_ubo_data.u_src_alpha_factor = transparent_alpha[static_cast(transparency_mode)][0]; + m_batch_ubo_data.u_dst_alpha_factor = transparent_alpha[static_cast(transparency_mode)][1]; + m_batch_ubo_dirty = true; + } + // map buffer if it's not already done if (!m_batch_current_vertex_ptr) MapBatchVertexPointer(max_added_vertices); @@ -691,11 +722,13 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32 if (m_render_state.IsTextureWindowChanged()) { - m_batch.texture_window_values[0] = m_render_state.texture_window_mask_x; - m_batch.texture_window_values[1] = m_render_state.texture_window_mask_y; - m_batch.texture_window_values[2] = m_render_state.texture_window_offset_x; - m_batch.texture_window_values[3] = m_render_state.texture_window_offset_y; m_render_state.ClearTextureWindowChangedFlag(); + + m_batch_ubo_data.u_texture_window_mask[0] = ZeroExtend32(m_render_state.texture_window_mask_x); + m_batch_ubo_data.u_texture_window_mask[1] = ZeroExtend32(m_render_state.texture_window_mask_y); + m_batch_ubo_data.u_texture_window_offset[0] = ZeroExtend32(m_render_state.texture_window_offset_x); + m_batch_ubo_data.u_texture_window_offset[1] = ZeroExtend32(m_render_state.texture_window_offset_y); + m_batch_ubo_dirty = true; } LoadVertices(rc, num_vertices, command_ptr); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 0e739bed2..5af1a5242 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -60,7 +60,6 @@ protected: HWPrimitive primitive; TextureMode texture_mode; TransparencyMode transparency_mode; - std::array texture_window_values; bool dithering; // We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled @@ -79,14 +78,20 @@ protected: } }; + struct HWBatchUBOData + { + s32 u_pos_offset[2]; + u32 u_texture_window_mask[2]; + u32 u_texture_window_offset[2]; + float u_src_alpha_factor; + float u_dst_alpha_factor; + }; + static constexpr u32 VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32); static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024; static constexpr u32 MIN_BATCH_VERTEX_COUNT = 6; static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex); - static constexpr u32 TEXTURE_TILE_SIZE = 256; - static constexpr u32 TEXTURE_TILE_X_COUNT = VRAM_WIDTH / TEXTURE_TILE_SIZE; - static constexpr u32 TEXTURE_TILE_Y_COUNT = VRAM_HEIGHT / TEXTURE_TILE_SIZE; - static constexpr u32 TEXTURE_TILE_COUNT = TEXTURE_TILE_X_COUNT * TEXTURE_TILE_Y_COUNT; + static constexpr u32 UNIFORM_BUFFER_SIZE = 512 * 1024; static constexpr std::tuple RGBA8ToFloat(u32 rgba) { @@ -96,6 +101,8 @@ protected: static_cast(rgba >> 24) * (1.0f / 255.0f)); } + virtual void UpdateDrawingOffset() override; + virtual void InvalidateVRAMReadCache() = 0; virtual void MapBatchVertexPointer(u32 required_vertices) = 0; @@ -121,8 +128,6 @@ protected: std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced); std::string GenerateVRAMWriteFragmentShader(); - HWBatchConfig m_batch = {}; - HWVertex* m_batch_start_vertex_ptr = nullptr; HWVertex* m_batch_end_vertex_ptr = nullptr; HWVertex* m_batch_current_vertex_ptr = nullptr; @@ -132,10 +137,15 @@ protected: u32 m_max_resolution_scale = 1; bool m_true_color = false; + HWBatchConfig m_batch = {}; + HWBatchUBOData m_batch_ubo_data = {}; + bool m_batch_ubo_dirty = true; + private: static HWPrimitive GetPrimitiveForCommand(RenderCommand rc); void GenerateShaderHeader(std::stringstream& ss); + void GenerateBatchUniformBuffer(std::stringstream& ss); void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr); void AddDuplicateVertex(); diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index d12a3009e..5548616d1 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -16,13 +16,14 @@ GPU_HW_OpenGL::~GPU_HW_OpenGL() bool GPU_HW_OpenGL::Initialize(System* system, DMA* dma, InterruptController* interrupt_controller, Timers* timers) { - SetMaxResolutionScale(); + SetCapabilities(); if (!GPU_HW::Initialize(system, dma, interrupt_controller, timers)) return false; CreateFramebuffer(); CreateVertexBuffer(); + CreateUniformBuffer(); CreateTextureBuffer(); if (!CompilePrograms()) return false; @@ -150,7 +151,7 @@ std::tuple GPU_HW_OpenGL::ConvertToFramebufferCoordinates(s32 x, s32 y return std::make_tuple(x, static_cast(static_cast(VRAM_HEIGHT) - y)); } -void GPU_HW_OpenGL::SetMaxResolutionScale() +void GPU_HW_OpenGL::SetCapabilities() { GLint max_texture_size = VRAM_WIDTH; glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size); @@ -163,6 +164,9 @@ void GPU_HW_OpenGL::SetMaxResolutionScale() m_max_resolution_scale = std::min(max_texture_scale, line_width_range[1]); Log_InfoPrintf("Maximum resolution scale is %u", m_max_resolution_scale); + + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast(&m_uniform_buffer_alignment)); + Log_InfoPrintf("Uniform buffer offset alignment: %u", m_uniform_buffer_alignment); } void GPU_HW_OpenGL::CreateFramebuffer() @@ -252,6 +256,13 @@ void GPU_HW_OpenGL::CreateVertexBuffer() glGenVertexArrays(1, &m_attributeless_vao_id); } +void GPU_HW_OpenGL::CreateUniformBuffer() +{ + m_uniform_stream_buffer = GL::StreamBuffer::Create(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE); + if (!m_uniform_stream_buffer) + Panic("Failed to create uniform buffer"); +} + void GPU_HW_OpenGL::CreateTextureBuffer() { // const GLenum target = GL_PIXEL_UNPACK_BUFFER; @@ -346,17 +357,13 @@ bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, HWBatchRenderMode render_m if (!prog.Link()) return false; - prog.Bind(); - prog.RegisterUniform("u_pos_offset"); - prog.RegisterUniform("u_transparent_alpha"); - prog.Uniform2i(0, 0, 0); - prog.Uniform2f(1, 1.0f, 0.0f); + prog.BindUniformBlock("UBOBlock", 1); if (textured) { - prog.RegisterUniform("u_texture_window"); + prog.Bind(); prog.RegisterUniform("samp0"); - prog.Uniform1i(3, 0); + prog.Uniform1i(0, 0); } return true; @@ -368,24 +375,8 @@ void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode) [BoolToUInt8(m_batch.dithering)]; prog.Bind(); - prog.Uniform2i(0, m_drawing_offset.x, m_drawing_offset.y); - if (m_batch.transparency_mode != TransparencyMode::Disabled) - { - static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}}; - prog.Uniform2fv(1, transparent_alpha[static_cast(m_batch.transparency_mode)]); - } - else - { - static constexpr float disabled_alpha[2] = {1.0f, 0.0f}; - prog.Uniform2fv(1, disabled_alpha); - } - if (m_batch.texture_mode != TextureMode::Disabled) - { - prog.Uniform4ui(2, m_batch.texture_window_values[0], m_batch.texture_window_values[1], - m_batch.texture_window_values[2], m_batch.texture_window_values[3]); m_vram_read_texture->Bind(); - } if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == HWBatchRenderMode::OnlyOpaque) { @@ -415,6 +406,23 @@ void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode) Log_DebugPrintf("SetScissor: (%d-%d, %d-%d)", x, x + width, y, y + height); glScissor(x, y, width, height); } + + if (m_batch_ubo_dirty) + { + UploadUniformBlock(&m_batch_ubo_data, sizeof(m_batch_ubo_data)); + m_batch_ubo_dirty = false; + } +} + +void GPU_HW_OpenGL::UploadUniformBlock(const void* data, u32 data_size) +{ + const GL::StreamBuffer::MappingResult res = m_uniform_stream_buffer->Map(m_uniform_buffer_alignment, data_size); + std::memcpy(res.pointer, data, data_size); + m_uniform_stream_buffer->Unmap(data_size); + + glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_stream_buffer->GetGLBufferId(), res.buffer_offset, data_size); + + m_stats.num_uniform_buffer_updates++; } void GPU_HW_OpenGL::UpdateDrawingArea() diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index e4431886c..b9cac7ac8 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -42,22 +42,25 @@ private: u32 num_vram_reads; u32 num_vram_writes; u32 num_vram_read_texture_updates; + u32 num_uniform_buffer_updates; }; std::tuple ConvertToFramebufferCoordinates(s32 x, s32 y); - void SetMaxResolutionScale(); + void SetCapabilities(); void CreateFramebuffer(); void ClearFramebuffer(); void DestroyFramebuffer(); void UpdateVRAMReadTexture(); void CreateVertexBuffer(); + void CreateUniformBuffer(); void CreateTextureBuffer(); bool CompilePrograms(); bool CompileProgram(GL::Program& prog, HWBatchRenderMode render_mode, TextureMode texture_mode, bool dithering); void SetDrawState(HWBatchRenderMode render_mode); + void UploadUniformBlock(const void* data, u32 data_size); // downsample texture - used for readbacks at >1xIR. std::unique_ptr m_vram_texture; @@ -69,12 +72,12 @@ private: GLuint m_vao_id = 0; GLuint m_attributeless_vao_id = 0; + std::unique_ptr m_uniform_stream_buffer; + std::unique_ptr m_texture_stream_buffer; GLuint m_texture_buffer_r16ui_texture = 0; - bool m_vram_read_texture_dirty = true; - bool m_drawing_area_changed = true; - bool m_show_renderer_statistics = false; + u32 m_uniform_buffer_alignment = 1; std::array, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering] std::array, 2> m_display_programs; // [depth_24][interlaced] @@ -82,4 +85,8 @@ private: GLStats m_stats = {}; GLStats m_last_stats = {}; + + bool m_vram_read_texture_dirty = true; + bool m_drawing_area_changed = true; + bool m_show_renderer_statistics = false; };