GPU/HW/OpenGL: Use geometry shaders for upscaled line rendering

This commit is contained in:
Connor McLaughlin 2020-04-16 21:29:11 +10:00
parent 02b1e084c1
commit 197b193ca3
4 changed files with 157 additions and 47 deletions

View File

@ -91,7 +91,8 @@ void GPU_HW_OpenGL::ResetGraphicsAPIState()
glDisable(GL_SCISSOR_TEST);
glDisable(GL_BLEND);
glDepthMask(GL_TRUE);
glLineWidth(1.0f);
if (m_resolution_scale > 1 && !m_supports_geometry_shaders)
glLineWidth(1.0f);
glBindVertexArray(0);
}
@ -104,7 +105,8 @@ void GPU_HW_OpenGL::RestoreGraphicsAPIState()
glDisable(GL_DEPTH_TEST);
glEnable(GL_SCISSOR_TEST);
glDepthMask(GL_FALSE);
glLineWidth(static_cast<float>(m_resolution_scale));
if (m_resolution_scale > 1 && !m_supports_geometry_shaders)
glLineWidth(static_cast<float>(m_resolution_scale));
glBindVertexArray(m_vao_id);
SetScissorFromDrawingArea();
@ -148,13 +150,7 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display)
GLint max_texture_size = VRAM_WIDTH;
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size);
Log_InfoPrintf("Max texture size: %dx%d", max_texture_size, max_texture_size);
const int max_texture_scale = max_texture_size / VRAM_WIDTH;
std::array<int, 2> line_width_range = {{1, 1}};
glGetIntegerv(GL_ALIASED_LINE_WIDTH_RANGE, line_width_range.data());
Log_InfoPrintf("Max line width: %d", line_width_range[1]);
m_max_resolution_scale = std::min(max_texture_scale, line_width_range[1]);
m_max_resolution_scale = static_cast<u32>(max_texture_size / VRAM_WIDTH);
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast<GLint*>(&m_uniform_buffer_alignment));
Log_InfoPrintf("Uniform buffer offset alignment: %u", m_uniform_buffer_alignment);
@ -180,6 +176,19 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display)
m_supports_dual_source_blend = (max_dual_source_draw_buffers > 0);
if (!m_supports_dual_source_blend)
Log_WarningPrintf("Dual-source blending is not supported, this may break some mask effects.");
m_supports_geometry_shaders = GLAD_GL_VERSION_3_2 || GLAD_GL_ARB_geometry_shader4 || GLAD_GL_ES_VERSION_3_2;
if (!m_supports_geometry_shaders)
{
Log_WarningPrintf("Geometry shaders are not supported, line rendering at higher resolutions may be incorrect. We "
"will try to use glLineWidth() to emulate this, but the accuracy depends on your driver.");
std::array<int, 2> line_width_range = {{1, 1}};
glGetIntegerv(GL_ALIASED_LINE_WIDTH_RANGE, line_width_range.data());
Log_InfoPrintf("Max line width: %d", line_width_range[1]);
m_max_resolution_scale = std::min<int>(m_max_resolution_scale, line_width_range[1]);
}
}
bool GPU_HW_OpenGL::CreateFramebuffer()
@ -305,12 +314,12 @@ bool GPU_HW_OpenGL::CompilePrograms()
for (u8 interlacing = 0; interlacing < 2; interlacing++)
{
const bool textured = (static_cast<TextureMode>(texture_mode) != TextureMode::Disabled);
const std::string vs = shadergen.GenerateBatchVertexShader(textured);
const std::string batch_vs = shadergen.GenerateBatchVertexShader(textured);
const std::string fs = shadergen.GenerateBatchFragmentShader(
static_cast<BatchRenderMode>(render_mode), static_cast<TextureMode>(texture_mode),
ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing));
std::optional<GL::Program> prog = m_shader_cache.GetProgram(vs, {}, fs, [this, textured](GL::Program& prog) {
const auto link_callback = [this, textured](GL::Program& prog) {
prog.BindAttribute(0, "a_pos");
prog.BindAttribute(1, "a_col0");
if (textured)
@ -321,7 +330,9 @@ bool GPU_HW_OpenGL::CompilePrograms()
if (!m_is_gles)
prog.BindFragData(0, "o_col0");
});
};
std::optional<GL::Program> prog = m_shader_cache.GetProgram(batch_vs, {}, fs, link_callback);
if (!prog)
return false;
@ -333,6 +344,18 @@ bool GPU_HW_OpenGL::CompilePrograms()
}
m_render_programs[render_mode][texture_mode][dithering][interlacing] = std::move(*prog);
if (!textured && m_supports_geometry_shaders)
{
const std::string line_expand_gs = shadergen.GenerateBatchLineExpandGeometryShader();
prog = m_shader_cache.GetProgram(batch_vs, line_expand_gs, fs, link_callback);
if (!prog)
return false;
prog->BindUniformBlock("UBOBlock", 1);
m_line_render_programs[render_mode][dithering][interlacing] = std::move(*prog);
}
}
}
}
@ -418,8 +441,12 @@ bool GPU_HW_OpenGL::CompilePrograms()
void GPU_HW_OpenGL::SetDrawState(BatchRenderMode render_mode)
{
const GL::Program& prog = m_render_programs[static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)]
[BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)];
const GL::Program& prog =
((m_batch.primitive < BatchPrimitive::Triangles && m_supports_geometry_shaders && m_resolution_scale > 1) ?
m_line_render_programs[static_cast<u8>(render_mode)][BoolToUInt8(m_batch.dithering)]
[BoolToUInt8(m_batch.interlacing)] :
m_render_programs[static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)]
[BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]);
prog.Bind();
if (m_batch.texture_mode != TextureMode::Disabled)

View File

@ -77,7 +77,9 @@ private:
GLuint m_texture_buffer_r16ui_texture = 0;
std::array<std::array<std::array<std::array<GL::Program, 2>, 2>, 9>, 4>
m_render_programs; // [render_mode][texture_mode][dithering][interlacing]
m_render_programs; // [render_mode][texture_mode][dithering][interlacing]
std::array<std::array<std::array<GL::Program, 2>, 2>, 4>
m_line_render_programs; // [render_mode][dithering][interlacing]
std::array<std::array<GL::Program, 2>, 2> m_display_programs; // [depth_24][interlaced]
GL::Program m_vram_interlaced_fill_program;
GL::Program m_vram_read_program;
@ -88,4 +90,5 @@ private:
bool m_is_gles = false;
bool m_supports_texture_buffer = false;
bool m_supports_geometry_shaders = false;
};

View File

@ -9,10 +9,15 @@ GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolu
bool scaled_dithering, bool texture_filtering, bool supports_dual_source_blend)
: m_render_api(render_api), m_resolution_scale(resolution_scale), m_true_color(true_color),
m_scaled_dithering(scaled_dithering), m_texture_filering(texture_filtering),
m_glsl(render_api != HostDisplay::RenderAPI::D3D11), m_supports_dual_source_blend(supports_dual_source_blend)
m_glsl(render_api != HostDisplay::RenderAPI::D3D11), m_supports_dual_source_blend(supports_dual_source_blend),
m_use_glsl_interface_blocks(false)
{
if (m_glsl)
{
SetGLSLVersionString();
m_use_glsl_interface_blocks = (GLAD_GL_ES_VERSION_3_2 || GLAD_GL_VERSION_3_2);
}
}
GPU_HW_ShaderGen::~GPU_HW_ShaderGen() = default;
@ -216,25 +221,40 @@ void GPU_HW_ShaderGen::DeclareTextureBuffer(std::stringstream& ss, const char* n
}
}
void GPU_HW_ShaderGen::DeclareVertexEntryPoint(std::stringstream& ss,
const std::initializer_list<const char*>& attributes,
u32 num_color_outputs, u32 num_texcoord_outputs,
const std::initializer_list<const char*>& additional_outputs,
bool declare_vertex_id)
void GPU_HW_ShaderGen::DeclareVertexEntryPoint(
std::stringstream& ss, const std::initializer_list<const char*>& attributes, u32 num_color_outputs,
u32 num_texcoord_outputs, const std::initializer_list<std::pair<const char*, const char*>>& additional_outputs,
bool declare_vertex_id)
{
if (m_glsl)
{
for (const char* attribute : attributes)
ss << "in " << attribute << ";\n";
for (u32 i = 0; i < num_color_outputs; i++)
ss << "out float4 v_col" << i << ";\n";
if (m_use_glsl_interface_blocks)
{
ss << "out VertexData {\n";
for (u32 i = 0; i < num_color_outputs; i++)
ss << " float4 v_col" << i << ";\n";
for (u32 i = 0; i < num_texcoord_outputs; i++)
ss << "out float2 v_tex" << i << ";\n";
for (u32 i = 0; i < num_texcoord_outputs; i++)
ss << " float2 v_tex" << i << ";\n";
for (const char* output : additional_outputs)
ss << output << ";\n";
for (const auto [qualifiers, name] : additional_outputs)
ss << " " << qualifiers << " " << name << ";\n";
ss << "};\n";
}
else
{
for (u32 i = 0; i < num_color_outputs; i++)
ss << "out float4 v_col" << i << ";\n";
for (u32 i = 0; i < num_texcoord_outputs; i++)
ss << "out float2 v_tex" << i << ";\n";
for (const auto [qualifiers, name] : additional_outputs)
ss << qualifiers << " out " << name << ";\n";
}
ss << "#define v_pos gl_Position\n\n";
if (declare_vertex_id)
@ -264,9 +284,9 @@ void GPU_HW_ShaderGen::DeclareVertexEntryPoint(std::stringstream& ss,
ss << " out float2 v_tex" << i << " : TEXCOORD" << i << ",\n";
u32 additional_counter = num_texcoord_outputs;
for (const char* output : additional_outputs)
for (const auto [qualifiers, name] : additional_outputs)
{
ss << " " << output << " : TEXCOORD" << additional_counter << ",\n";
ss << " " << qualifiers << " out " << name << " : TEXCOORD" << additional_counter << ",\n";
additional_counter++;
}
@ -274,20 +294,37 @@ void GPU_HW_ShaderGen::DeclareVertexEntryPoint(std::stringstream& ss,
}
}
void GPU_HW_ShaderGen::DeclareFragmentEntryPoint(std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs,
const std::initializer_list<const char*>& additional_inputs,
bool declare_fragcoord, bool dual_color_output)
void GPU_HW_ShaderGen::DeclareFragmentEntryPoint(
std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs,
const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs,
bool declare_fragcoord /* = false */, bool dual_color_output /* = false */)
{
if (m_glsl)
{
for (u32 i = 0; i < num_color_inputs; i++)
ss << "in float4 v_col" << i << ";\n";
if (m_use_glsl_interface_blocks)
{
ss << "in VertexData {\n";
for (u32 i = 0; i < num_color_inputs; i++)
ss << " float4 v_col" << i << ";\n";
for (u32 i = 0; i < num_texcoord_inputs; i++)
ss << "in float2 v_tex" << i << ";\n";
for (u32 i = 0; i < num_texcoord_inputs; i++)
ss << " float2 v_tex" << i << ";\n";
for (const char* input : additional_inputs)
ss << input << ";\n";
for (const auto [qualifiers, name] : additional_inputs)
ss << " " << qualifiers << " " << name << ";\n";
ss << "};\n";
}
else
{
for (u32 i = 0; i < num_color_inputs; i++)
ss << "in float4 v_col" << i << ";\n";
for (u32 i = 0; i < num_texcoord_inputs; i++)
ss << "in float2 v_tex" << i << ";\n";
for (const auto [qualifiers, name] : additional_inputs)
ss << qualifiers << " in " << name << ";\n";
}
if (declare_fragcoord)
ss << "#define v_pos gl_FragCoord\n";
@ -312,9 +349,9 @@ void GPU_HW_ShaderGen::DeclareFragmentEntryPoint(std::stringstream& ss, u32 num_
ss << " in float2 v_tex" << i << " : TEXCOORD" << i << ",\n";
u32 additional_counter = num_texcoord_inputs;
for (const char* output : additional_inputs)
for (const auto [qualifiers, name] : additional_inputs)
{
ss << " " << output << " : TEXCOORD" << additional_counter << ",\n";
ss << " " << qualifiers << " in " << name << " : TEXCOORD" << additional_counter << ",\n";
additional_counter++;
}
@ -353,7 +390,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
if (textured)
{
DeclareVertexEntryPoint(ss, {"int2 a_pos", "float4 a_col0", "int a_texcoord", "int a_texpage"}, 1, 1,
{"nointerpolation out int4 v_texpage"});
{{"nointerpolation", "int4 v_texpage"}});
}
else
{
@ -498,7 +535,7 @@ float4 SampleFromVRAM(int4 texpage, int2 icoord)
if (textured)
{
DeclareFragmentEntryPoint(ss, 1, 1, {"nointerpolation in int4 v_texpage"}, true, use_dual_source);
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "int4 v_texpage"}}, true, use_dual_source);
}
else
{
@ -638,15 +675,57 @@ std::string GPU_HW_ShaderGen::GenerateBatchLineExpandGeometryShader()
WriteHeader(ss);
WriteCommonFunctions(ss);
ss << R"(
CONSTANT float2 WIDTH = (1.0 / float2(VRAM_SIZE)) * float2(RESOLUTION_SCALE, RESOLUTION_SCALE);
)";
// GS is a pain, too different between HLSL and GLSL...
if (m_glsl)
{
ss << R"(
in VertexData {
float4 v_col0;
} in_data[];
out VertexData {
float4 v_col0;
} out_data;
layout(lines) in;
layout(triangle_strip, max_vertices = 4) out;
void main() {
float2 dir = normalize(gl_in[1].gl_Position.xy - gl_in[0].gl_Position.xy);
float2 normal = cross(float3(dir, 0.0), float3(0.0, 0.0, 1.0)).xy * WIDTH;
float4 offset = float4(normal, 0.0, 0.0);
// top-left
out_data.v_col0 = in_data[0].v_col0;
gl_Position = gl_in[0].gl_Position - offset;
EmitVertex();
// top-right
out_data.v_col0 = in_data[0].v_col0;
gl_Position = gl_in[0].gl_Position + offset;
EmitVertex();
// bottom-left
out_data.v_col0 = in_data[1].v_col0;
gl_Position = gl_in[1].gl_Position - offset;
EmitVertex();
// bottom-right
out_data.v_col0 = in_data[1].v_col0;
gl_Position = gl_in[1].gl_Position + offset;
EmitVertex();
EndPrimitive();
}
)";
}
else
{
ss << R"(
CONSTANT float2 WIDTH = (1.0 / float2(VRAM_SIZE)) * float2(RESOLUTION_SCALE, RESOLUTION_SCALE);
struct Vertex
{
float4 col0 : COLOR0;

View File

@ -8,7 +8,7 @@ class GPU_HW_ShaderGen
{
public:
GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color, bool scaled_dithering,
bool texture_filtering, bool supports_dual_source_belnd);
bool texture_filtering, bool supports_dual_source_blend);
~GPU_HW_ShaderGen();
std::string GenerateBatchVertexShader(bool textured);
@ -30,6 +30,7 @@ public:
bool m_texture_filering;
bool m_glsl;
bool m_supports_dual_source_blend;
bool m_use_glsl_interface_blocks;
std::string m_glsl_version_string;
@ -41,10 +42,10 @@ private:
void DeclareTextureBuffer(std::stringstream& ss, const char* name, u32 index, bool is_int, bool is_unsigned);
void DeclareVertexEntryPoint(std::stringstream& ss, const std::initializer_list<const char*>& attributes,
u32 num_color_outputs, u32 num_texcoord_outputs,
const std::initializer_list<const char*>& additional_outputs,
const std::initializer_list<std::pair<const char*, const char*>>& additional_outputs,
bool declare_vertex_id = false);
void DeclareFragmentEntryPoint(std::stringstream& ss, u32 num_color_inputs, u32 num_texcoord_inputs,
const std::initializer_list<const char*>& additional_inputs,
const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs,
bool declare_fragcoord = false, bool dual_color_output = false);
void WriteCommonFunctions(std::stringstream& ss);