From 41434693538f8de1cdb88c880cc1a7a384e82283 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sat, 2 Nov 2019 23:43:20 +1000 Subject: [PATCH] GPU: Use texture buffer/fragment shader for VRAM writes --- src/core/gpu_hw.cpp | 43 ++++++++++++++++++++++++++++---- src/core/gpu_hw.h | 1 + src/core/gpu_hw_opengl.cpp | 51 +++++++++++++++++++++++++++++++++++--- src/core/gpu_hw_opengl.h | 2 ++ 4 files changed, 89 insertions(+), 8 deletions(-) diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 3c48e4eb9..e983bbace 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -201,12 +201,17 @@ uint RGBA8ToRGBA5551(vec4 v) vec4 RGBA5551ToRGBA8(uint v) { - uint r = (v & 0x1Fu); - uint g = ((v >> 5) & 0x1Fu); - uint b = ((v >> 10) & 0x1Fu); - uint a = ((v >> 15) & 0x01u); + uint r = (v & 31u); + uint g = ((v >> 5) & 31u); + uint b = ((v >> 10) & 31u); + uint a = ((v >> 15) & 1u); - return vec4(float(r) * 255.0, float(g) * 255.0, float(b) * 255.0, float(a) * 255.0); + // repeat lower bits + r = (r << 3) | (r & 7u); + g = (g << 3) | (g & 7u); + b = (b << 3) | (b & 7u); + + return vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, float(a)); } )"; } @@ -547,6 +552,34 @@ void main() return ss.str(); } +std::string GPU_HW::GenerateVRAMWriteFragmentShader() +{ + std::stringstream ss; + GenerateShaderHeader(ss); + + ss << R"( + +uniform ivec2 u_base_coords; +uniform ivec2 u_size; +uniform usamplerBuffer samp0; + +out vec4 o_col0; + +void main() +{ + ivec2 coords = ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE); + ivec2 offset = coords - u_base_coords; + offset.y = u_size.y - offset.y - 1; + + int buffer_offset = offset.y * u_size.x + offset.x; + uint value = texelFetch(samp0, buffer_offset).r; + + o_col0 = RGBA5551ToRGBA8(value); +})"; + + return ss.str(); +} + GPU_HW::HWPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc) { if (rc.primitive == Primitive::Line) diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 235074464..1a421bf64 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -123,6 +123,7 @@ protected: std::string GenerateScreenQuadVertexShader(); std::string GenerateFillFragmentShader(); std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced); + std::string GenerateVRAMWriteFragmentShader(); HWBatchConfig m_batch = {}; diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index fac84ff4e..8eabc7172 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -254,9 +254,17 @@ void GPU_HW_OpenGL::CreateVertexBuffer() void GPU_HW_OpenGL::CreateTextureBuffer() { - m_texture_stream_buffer = GL::StreamBuffer::Create(GL_PIXEL_UNPACK_BUFFER, VRAM_UPDATE_TEXTURE_BUFFER_SIZE); + // const GLenum target = GL_PIXEL_UNPACK_BUFFER; + const GLenum target = GL_TEXTURE_BUFFER; + m_texture_stream_buffer = GL::StreamBuffer::Create(target, VRAM_UPDATE_TEXTURE_BUFFER_SIZE); if (!m_texture_stream_buffer) Panic("Failed to create texture stream buffer"); + + glGenTextures(1, &m_texture_buffer_r16ui_texture); + glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture); + glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, m_texture_stream_buffer->GetGLBufferId()); + + m_texture_stream_buffer->Unbind(); } bool GPU_HW_OpenGL::CompilePrograms() @@ -300,6 +308,19 @@ bool GPU_HW_OpenGL::CompilePrograms() } } + if (!m_vram_write_program.Compile(GenerateScreenQuadVertexShader(), GenerateVRAMWriteFragmentShader())) + return false; + + m_vram_write_program.BindFragData(0, "o_col0"); + if (!m_vram_write_program.Link()) + return false; + + m_vram_write_program.Bind(); + m_vram_write_program.RegisterUniform("u_base_coords"); + m_vram_write_program.RegisterUniform("u_size"); + m_vram_write_program.RegisterUniform("samp0"); + m_vram_write_program.Uniform1i(2, 0); + return true; } @@ -559,7 +580,6 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color) width *= m_resolution_scale; height *= m_resolution_scale; - glEnable(GL_SCISSOR_TEST); glScissor(x, m_vram_texture->GetHeight() - y - height, width, height); const auto [r, g, b, a] = RGBA8ToFloat(RGBA5551ToRGBA8888(color)); @@ -573,6 +593,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color) void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) { const u32 num_pixels = width * height; +#if 0 const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32)); // reverse copy the rows so it matches opengl's lower-left origin @@ -596,6 +617,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* } m_texture_stream_buffer->Unmap(num_pixels * sizeof(u32)); + m_texture_stream_buffer->Bind(); // have to write to the 1x texture first if (m_resolution_scale > 1) @@ -609,7 +631,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* // update texture data glTexSubImage2D(GL_TEXTURE_2D, 0, x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE, reinterpret_cast(map_result.index_aligned * sizeof(u32))); - InvalidateVRAMReadCache(); + m_texture_stream_buffer->Unbind(); if (m_resolution_scale > 1) { @@ -625,7 +647,30 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* scaled_flipped_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST); glEnable(GL_SCISSOR_TEST); } +#else + const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16)); + std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16)); + m_texture_stream_buffer->Unmap(num_pixels * sizeof(u16)); + // viewport should be set to the whole VRAM size, so we can just set the scissor + const u32 flipped_y = VRAM_HEIGHT - y - height; + const u32 scaled_width = width * m_resolution_scale; + const u32 scaled_height = height * m_resolution_scale; + const u32 scaled_x = x * m_resolution_scale; + const u32 scaled_y = y * m_resolution_scale; + const u32 scaled_flipped_y = m_vram_texture->GetHeight() - scaled_y - scaled_height; + glScissor(scaled_x, scaled_flipped_y, scaled_width, scaled_height); + + m_vram_write_program.Bind(); + glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture); + m_vram_write_program.Uniform2i(0, x, flipped_y); + m_vram_write_program.Uniform2i(1, width, height); + glDrawArrays(GL_TRIANGLES, 0, 3); + + UpdateDrawingArea(); +#endif + + InvalidateVRAMReadCache(); m_stats.num_vram_writes++; } diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index 8ba2eac34..ca392faf7 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -70,6 +70,7 @@ private: GLuint m_attributeless_vao_id = 0; std::unique_ptr m_texture_stream_buffer; + GLuint m_texture_buffer_r16ui_texture = 0; bool m_vram_read_texture_dirty = true; bool m_drawing_area_changed = true; @@ -77,6 +78,7 @@ private: std::array, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering] std::array, 2> m_display_programs; // [depth_24][interlaced] + GL::Program m_vram_write_program; GLStats m_stats = {}; GLStats m_last_stats = {};