flycast/core/rend/gl4/abuffer.cpp

578 lines
16 KiB
C++

/*
Copyright 2018 flyinghead
This file is part of Flycast.
Flycast is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
Flycast is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Flycast. If not, see <https://www.gnu.org/licenses/>.
*/
#include "gl4.h"
#include "rend/gles/glcache.h"
static GLuint pixels_buffer;
static GLuint pixels_pointers;
static GLuint atomic_buffer;
static gl4PipelineShader g_abuffer_final_shader;
static gl4PipelineShader g_abuffer_clear_shader;
static gl4PipelineShader g_abuffer_tr_modvol_shaders[ModeCount];
static int maxLayers;
static int64_t pixelBufferSize;
static std::unique_ptr<GlBuffer> g_quadBuffer;
static std::unique_ptr<GlBuffer> g_quadIndexBuffer;
static GLuint g_quadVertexArray;
static const char *final_shader_source = R"(
layout(binding = 0) uniform sampler2D tex;
uniform float shade_scale_factor;
out vec4 FragColor;
uint pixel_list[MAX_PIXELS_PER_FRAGMENT];
int fillAndSortFragmentArray(ivec2 coords)
{
uint idx = imageLoad(abufferPointerImg, coords).x;
if (idx == EOL)
return 0;
int count = 1;
pixel_list[0] = idx;
idx = pixels[idx].next;
for (; idx != EOL && count < MAX_PIXELS_PER_FRAGMENT; count++)
{
const Pixel p = pixels[idx];
int j = count - 1;
Pixel jp = pixels[pixel_list[j]];
while (j >= 0
&& (jp.depth > p.depth
|| (jp.depth == p.depth && getPolyIndex(jp) > getPolyIndex(p))))
{
pixel_list[j + 1] = pixel_list[j];
j--;
if (j >= 0)
jp = pixels[pixel_list[j]];
}
pixel_list[j + 1] = idx;
idx = p.next;
}
return count;
}
// Blend fragments back-to-front
vec4 resolveAlphaBlend(ivec2 coords) {
// Copy and sort fragments into a local array
int num_frag = fillAndSortFragmentArray(coords);
vec4 finalColor = texture(tex, gl_FragCoord.xy / textureSize(tex, 0));
vec4 secondaryBuffer = vec4(0.0); // Secondary accumulation buffer
for (int i = 0; i < num_frag; i++)
{
const Pixel pixel = pixels[pixel_list[i]];
const PolyParam pp = tr_poly_params[getPolyNumber(pixel)];
bool area1 = false;
bool shadowed = false;
if (isShadowed(pixel))
{
if (isTwoVolumes(pp))
area1 = true;
else
shadowed = true;
}
vec4 srcColor;
if (getSrcSelect(pp, area1))
srcColor = secondaryBuffer;
else
{
srcColor = unpackColors(pixel.color);
if (shadowed)
srcColor.rgb *= shade_scale_factor;
}
vec4 dstColor = getDstSelect(pp, area1) ? secondaryBuffer : finalColor;
vec4 srcCoef;
vec4 dstCoef;
int srcBlend = getSrcBlendFunc(pp, area1);
switch (srcBlend)
{
case ZERO:
srcCoef = vec4(0.0);
break;
case ONE:
srcCoef = vec4(1.0);
break;
case OTHER_COLOR:
srcCoef = finalColor;
break;
case INVERSE_OTHER_COLOR:
srcCoef = vec4(1.0) - dstColor;
break;
case SRC_ALPHA:
srcCoef = vec4(srcColor.a);
break;
case INVERSE_SRC_ALPHA:
srcCoef = vec4(1.0 - srcColor.a);
break;
case DST_ALPHA:
srcCoef = vec4(dstColor.a);
break;
case INVERSE_DST_ALPHA:
srcCoef = vec4(1.0 - dstColor.a);
break;
}
int dstBlend = getDstBlendFunc(pp, area1);
switch (dstBlend)
{
case ZERO:
dstCoef = vec4(0.0);
break;
case ONE:
dstCoef = vec4(1.0);
break;
case OTHER_COLOR:
dstCoef = srcColor;
break;
case INVERSE_OTHER_COLOR:
dstCoef = vec4(1.0) - srcColor;
break;
case SRC_ALPHA:
dstCoef = vec4(srcColor.a);
break;
case INVERSE_SRC_ALPHA:
dstCoef = vec4(1.0 - srcColor.a);
break;
case DST_ALPHA:
dstCoef = vec4(dstColor.a);
break;
case INVERSE_DST_ALPHA:
dstCoef = vec4(1.0 - dstColor.a);
break;
}
const vec4 result = clamp(dstColor * dstCoef + srcColor * srcCoef, 0.0, 1.0);
if (getDstSelect(pp, area1))
secondaryBuffer = result;
else
finalColor = result;
}
return finalColor;
}
void main(void)
{
ivec2 coords = ivec2(gl_FragCoord.xy);
// Compute and output final color for the frame buffer
// Visualize the number of layers in use
//FragColor = vec4(float(fillAndSortFragmentArray(coords)) / MAX_PIXELS_PER_FRAGMENT * 4, 0, 0, 1);
FragColor = resolveAlphaBlend(coords);
// Reset pointers
imageStore(abufferPointerImg, coords, uvec4(EOL));
}
)";
static const char *clear_shader_source = R"(
void main(void)
{
ivec2 coords = ivec2(gl_FragCoord.xy);
// Reset pointers
imageStore(abufferPointerImg, coords, uvec4(EOL));
// Discard fragment so nothing is written to the framebuffer
discard;
}
)";
static const char *tr_modvol_shader_source = R"(
in vec3 vtx_uv;
// Must match ModifierVolumeMode enum values
#define MV_XOR 0
#define MV_OR 1
#define MV_INCLUSION 2
#define MV_EXCLUSION 3
void main(void)
{
#if MV_MODE == MV_XOR || MV_MODE == MV_OR
setFragDepth(vtx_uv.z);
#endif
ivec2 coords = ivec2(gl_FragCoord.xy);
uint idx = imageLoad(abufferPointerImg, coords).x;
int list_len = 0;
while (idx != EOL && list_len < MAX_PIXELS_PER_FRAGMENT)
{
const Pixel pixel = pixels[idx];
const PolyParam pp = tr_poly_params[getPolyNumber(pixel)];
if (getShadowEnable(pp))
{
#if MV_MODE == MV_XOR
if (gl_FragDepth >= pixel.depth)
atomicXor(pixels[idx].seq_num, SHADOW_STENCIL);
#elif MV_MODE == MV_OR
if (gl_FragDepth >= pixel.depth)
atomicOr(pixels[idx].seq_num, SHADOW_STENCIL);
#elif MV_MODE == MV_INCLUSION
uint prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL));
if ((prev_val & (SHADOW_STENCIL|SHADOW_ACC)) == SHADOW_STENCIL)
pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1u, 31, 1);
#elif MV_MODE == MV_EXCLUSION
uint prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL|SHADOW_ACC));
if ((prev_val & (SHADOW_STENCIL|SHADOW_ACC)) == SHADOW_ACC)
pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1u, 31, 1);
#endif
}
idx = pixel.next;
list_len++;
}
discard;
}
)";
static const char* VertexShaderSource = R"(
in vec3 in_pos;
void main()
{
gl_Position = vec4(in_pos, 1.0);
}
)";
static void abufferDrawQuad();
static void compileFinalAndModVolShaders()
{
if (maxLayers != config::PerPixelLayers)
{
maxLayers = config::PerPixelLayers;
glcache.DeleteProgram(g_abuffer_final_shader.program);
g_abuffer_final_shader.program = 0;
for (int mode = 0; mode < ModeCount; mode++)
{
glcache.DeleteProgram(g_abuffer_tr_modvol_shaders[mode].program);
g_abuffer_tr_modvol_shaders[mode].program = 0;
}
}
if (g_abuffer_final_shader.program == 0)
{
OpenGl4Source vertexShader;
vertexShader.addSource(VertexShaderSource);
OpenGl4Source finalShader;
finalShader.addConstant("MAX_PIXELS_PER_FRAGMENT", config::PerPixelLayers)
.addSource(ShaderHeader)
.addSource(final_shader_source);
gl4CompilePipelineShader(&g_abuffer_final_shader, finalShader.generate().c_str(), vertexShader.generate().c_str());
}
if (g_abuffer_tr_modvol_shaders[0].program == 0)
{
OpenGl4Source modVolShader;
modVolShader.addConstant("MAX_PIXELS_PER_FRAGMENT", config::PerPixelLayers)
.addConstant("DIV_POS_Z", config::NativeDepthInterpolation)
.addSource(ShaderHeader)
.addSource(tr_modvol_shader_source);
for (int mode = 0; mode < ModeCount; mode++)
{
modVolShader.setConstant("MV_MODE", mode);
g_abuffer_tr_modvol_shaders[mode].pp_Gouraud = false;
gl4CompilePipelineShader(&g_abuffer_tr_modvol_shaders[mode], modVolShader.generate().c_str(), nullptr);
}
}
}
static void makePixelBuffer()
{
if (pixels_buffer == 0 || pixelBufferSize != config::PixelBufferSize)
{
// get the max buffer size
GLint64 maxSize;
glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &maxSize);
pixelBufferSize = config::PixelBufferSize;
GLsizeiptr pixel_buffer_size = std::min<int64_t>(pixelBufferSize, maxSize);
// Create the buffer
if (pixels_buffer == 0)
glGenBuffers(1, &pixels_buffer);
// Bind it
glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer);
// Declare storage
glBufferData(GL_SHADER_STORAGE_BUFFER, pixel_buffer_size, NULL, GL_DYNAMIC_COPY);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer);
glCheck();
}
}
void initABuffer()
{
if (max_image_width > 0 && max_image_height > 0)
{
if (pixels_pointers == 0)
pixels_pointers = glcache.GenTexture();
glActiveTexture(GL_TEXTURE4);
glcache.BindTexture(GL_TEXTURE_2D, pixels_pointers);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, max_image_width, max_image_height, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
glBindImageTexture(4, pixels_pointers, 0, false, 0, GL_READ_WRITE, GL_R32UI);
glCheck();
}
makePixelBuffer();
if (atomic_buffer == 0 )
{
// Create the buffer
glGenBuffers(1, &atomic_buffer);
// Bind it
glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomic_buffer);
// Declare storage
glBufferData(GL_ATOMIC_COUNTER_BUFFER, 4, NULL, GL_DYNAMIC_COPY);
glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomic_buffer);
GLint zero = 0;
glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLint), &zero);
glCheck();
}
compileFinalAndModVolShaders();
if (g_abuffer_clear_shader.program == 0)
{
OpenGl4Source vertexShader;
vertexShader.addSource(VertexShaderSource);
OpenGl4Source clearShader;
clearShader.addSource(ShaderHeader)
.addSource(clear_shader_source);
gl4CompilePipelineShader(&g_abuffer_clear_shader, clearShader.generate().c_str(), vertexShader.generate().c_str());
}
if (g_quadBuffer == nullptr)
{
g_quadBuffer = std::unique_ptr<GlBuffer>(new GlBuffer(GL_ARRAY_BUFFER, GL_STATIC_DRAW));
static const float vertices[] = {
-1, 1, 1,
-1, -1, 1,
1, 1, 1,
1, -1, 1,
};
g_quadBuffer->update(vertices, sizeof(vertices));
}
if (g_quadIndexBuffer == nullptr)
{
g_quadIndexBuffer = std::unique_ptr<GlBuffer>(new GlBuffer(GL_ELEMENT_ARRAY_BUFFER, GL_STATIC_DRAW));
static const GLushort indices[] = { 0, 1, 2, 1, 3 };
g_quadIndexBuffer->update(indices, sizeof(indices));
}
if (g_quadVertexArray == 0)
{
glGenVertexArrays(1, &g_quadVertexArray);
glBindVertexArray(g_quadVertexArray);
g_quadBuffer->bind();
g_quadIndexBuffer->bind();
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(float) * 3, (void*)0);
glBindVertexArray(0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
}
glCheck();
// Clear A-buffer pointers
glcache.UseProgram(g_abuffer_clear_shader.program);
gl4ShaderUniforms.Set(&g_abuffer_clear_shader);
abufferDrawQuad();
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
glCheck();
}
void termABuffer()
{
if (pixels_pointers != 0)
{
glcache.DeleteTextures(1, &pixels_pointers);
pixels_pointers = 0;
}
if (pixels_buffer != 0)
{
glDeleteBuffers(1, &pixels_buffer);
pixels_buffer = 0;
}
if (atomic_buffer != 0)
{
glDeleteBuffers(1, &atomic_buffer);
atomic_buffer = 0;
}
if (g_quadVertexArray != 0)
{
glDeleteVertexArrays(1, &g_quadVertexArray);
g_quadVertexArray = 0;
}
g_quadBuffer.reset();
g_quadIndexBuffer.reset();
glcache.DeleteProgram(g_abuffer_final_shader.program);
g_abuffer_final_shader.program = 0;
glcache.DeleteProgram(g_abuffer_clear_shader.program);
g_abuffer_clear_shader.program = 0;
for (int mode = 0; mode < ModeCount; mode++)
{
glcache.DeleteProgram(g_abuffer_tr_modvol_shaders[mode].program);
g_abuffer_tr_modvol_shaders[mode].program = 0;
}
maxLayers = 0;
}
void reshapeABuffer(int w, int h)
{
if (pixels_pointers != 0)
{
glcache.DeleteTextures(1, &pixels_pointers);
pixels_pointers = 0;
}
initABuffer();
}
static void abufferDrawQuad()
{
glBindVertexArray(g_quadVertexArray);
g_quadBuffer->bind();
g_quadIndexBuffer->bind();
glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, (GLvoid *)0);
glBindVertexArray(0);
glCheck();
}
void DrawTranslucentModVols(int first, int count, bool useOpaqueGeom)
{
if (count == 0 || pvrrc.modtrig.used() == 0)
return;
compileFinalAndModVolShaders();
glBindVertexArray(gl4.vbo.getModVolVAO());
gl4.vbo.getModVolBuffer()->bind();
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE3);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0);
glcache.BindTexture(GL_TEXTURE_2D, 0);
glcache.Disable(GL_DEPTH_TEST);
glcache.Disable(GL_STENCIL_TEST);
glCheck();
ModifierVolumeParam* params = useOpaqueGeom ? &pvrrc.global_param_mvo.head()[first] : &pvrrc.global_param_mvo_tr.head()[first];
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT);
int mod_base = -1;
for (int cmv = 0; cmv < count; cmv++)
{
ModifierVolumeParam& param = params[cmv];
if (param.count == 0)
continue;
u32 mv_mode = param.isp.DepthMode;
verify(param.first >= 0 && param.first + param.count <= (u32)pvrrc.modtrig.used());
if (mod_base == -1)
mod_base = param.first;
gl4PipelineShader *shader;
if (!param.isp.VolumeLast && mv_mode > 0)
shader = &g_abuffer_tr_modvol_shaders[Or]; // OR'ing (open volume or quad)
else
shader = &g_abuffer_tr_modvol_shaders[Xor]; // XOR'ing (closed volume)
glcache.UseProgram(shader->program);
gl4ShaderUniforms.Set(shader);
SetCull(param.isp.CullMode); glCheck();
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glDrawArrays(GL_TRIANGLES, param.first * 3, param.count * 3); glCheck();
if (mv_mode == 1 || mv_mode == 2)
{
//Sum the area
shader = &g_abuffer_tr_modvol_shaders[mv_mode == 1 ? Inclusion : Exclusion];
glcache.UseProgram(shader->program);
gl4ShaderUniforms.Set(shader);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glDrawArrays(GL_TRIANGLES, mod_base * 3, (param.first + param.count - mod_base) * 3); glCheck();
mod_base = -1;
}
}
glBindVertexArray(gl4.vbo.getMainVAO());
}
void checkOverflowAndReset()
{
// Using atomic counter
GLuint max_pixel_index = 0;
// glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &max_pixel_index);
//// printf("ABUFFER %d pixels used\n", max_pixel_index);
// if ((max_pixel_index + 1) * 32 - 1 >= pixel_buffer_size)
// {
// GLint64 size;
// glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &size);
// if (pixel_buffer_size == size)
// printf("A-buffer overflow: %d pixels. Buffer size already maxed out\n", max_pixel_index);
// else
// {
// pixel_buffer_size = (GLuint)min(2 * (GLint64)pixel_buffer_size, size);
//
// printf("A-buffer overflow: %d pixels. Resizing buffer to %d MB\n", max_pixel_index, pixel_buffer_size / 1024 / 1024);
//
// glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer);
// glBufferData(GL_SHADER_STORAGE_BUFFER, pixel_buffer_size, NULL, GL_DYNAMIC_COPY);
// glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer);
// glCheck();
// }
// }
// Reset counter
max_pixel_index = 0;
glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0 , sizeof(GLuint), &max_pixel_index);
}
void renderABuffer()
{
// Render to output FBO
compileFinalAndModVolShaders();
glcache.UseProgram(g_abuffer_final_shader.program);
gl4ShaderUniforms.Set(&g_abuffer_final_shader);
glcache.Disable(GL_DEPTH_TEST);
glcache.Disable(GL_CULL_FACE);
glcache.Disable(GL_SCISSOR_TEST);
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT);
abufferDrawQuad();
glActiveTexture(GL_TEXTURE0);
makePixelBuffer();
glCheck();
}