flycast/core/rend/gl4/abuffer.cpp

523 lines
13 KiB
C++

/*
* abuffer.cpp
*
* Created on: May 26, 2018
* Author: raph
*/
#include <cmath>
#include "gl4.h"
#include "rend/gles/glcache.h"
GLuint pixels_buffer;
GLuint pixels_pointers;
GLuint atomic_buffer;
gl4PipelineShader g_abuffer_final_shader;
gl4PipelineShader g_abuffer_clear_shader;
gl4PipelineShader g_abuffer_tr_modvol_shaders[ModeCount];
static GLuint g_quadBuffer = 0;
static GLuint g_quadVertexArray = 0;
GLuint pixel_buffer_size = 512 * 1024 * 1024; // Initial size 512 MB
#define MAX_PIXELS_PER_FRAGMENT "32"
static const char *final_shader_source = SHADER_HEADER
"#define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT
R"(
layout(binding = 0) uniform sampler2D tex;
uniform highp float shade_scale_factor;
out vec4 FragColor;
uint pixel_list[MAX_PIXELS_PER_FRAGMENT];
int fillAndSortFragmentArray(ivec2 coords)
{
// Load fragments into a local memory array for sorting
uint idx = imageLoad(abufferPointerImg, coords).x;
int count = 0;
for (; idx != EOL && count < MAX_PIXELS_PER_FRAGMENT; count++)
{
const Pixel p = pixels[idx];
int j = count - 1;
Pixel jp = pixels[pixel_list[j]];
while (j >= 0
&& (jp.depth > p.depth
|| (jp.depth == p.depth && getPolyNumber(jp) > getPolyNumber(p))))
{
pixel_list[j + 1] = pixel_list[j];
j--;
jp = pixels[pixel_list[j]];
}
pixel_list[j + 1] = idx;
idx = p.next;
}
return count;
}
// Blend fragments back-to-front
vec4 resolveAlphaBlend(ivec2 coords) {
// Copy and sort fragments into a local array
int num_frag = fillAndSortFragmentArray(coords);
vec4 finalColor = texture(tex, gl_FragCoord.xy / textureSize(tex, 0));
vec4 secondaryBuffer = vec4(0.0); // Secondary accumulation buffer
for (int i = 0; i < num_frag; i++)
{
const Pixel pixel = pixels[pixel_list[i]];
const PolyParam pp = tr_poly_params[getPolyNumber(pixel)];
bool area1 = false;
bool shadowed = false;
if (isShadowed(pixel))
{
if (isTwoVolumes(pp))
area1 = true;
else
shadowed = true;
}
vec4 srcColor;
if (getSrcSelect(pp, area1))
srcColor = secondaryBuffer;
else
{
srcColor = pixel.color;
if (shadowed)
srcColor.rgb *= shade_scale_factor;
}
vec4 dstColor = getDstSelect(pp, area1) ? secondaryBuffer : finalColor;
vec4 srcCoef;
vec4 dstCoef;
int srcBlend = getSrcBlendFunc(pp, area1);
switch (srcBlend)
{
case ZERO:
srcCoef = vec4(0.0);
break;
case ONE:
srcCoef = vec4(1.0);
break;
case OTHER_COLOR:
srcCoef = finalColor;
break;
case INVERSE_OTHER_COLOR:
srcCoef = vec4(1.0) - dstColor;
break;
case SRC_ALPHA:
srcCoef = vec4(srcColor.a);
break;
case INVERSE_SRC_ALPHA:
srcCoef = vec4(1.0 - srcColor.a);
break;
case DST_ALPHA:
srcCoef = vec4(dstColor.a);
break;
case INVERSE_DST_ALPHA:
srcCoef = vec4(1.0 - dstColor.a);
break;
}
int dstBlend = getDstBlendFunc(pp, area1);
switch (dstBlend)
{
case ZERO:
dstCoef = vec4(0.0);
break;
case ONE:
dstCoef = vec4(1.0);
break;
case OTHER_COLOR:
dstCoef = srcColor;
break;
case INVERSE_OTHER_COLOR:
dstCoef = vec4(1.0) - srcColor;
break;
case SRC_ALPHA:
dstCoef = vec4(srcColor.a);
break;
case INVERSE_SRC_ALPHA:
dstCoef = vec4(1.0 - srcColor.a);
break;
case DST_ALPHA:
dstCoef = vec4(dstColor.a);
break;
case INVERSE_DST_ALPHA:
dstCoef = vec4(1.0 - dstColor.a);
break;
}
const vec4 result = clamp(dstColor * dstCoef + srcColor * srcCoef, 0.0, 1.0);
if (getDstSelect(pp, area1))
secondaryBuffer = result;
else
finalColor = result;
}
return finalColor;
}
void main(void)
{
ivec2 coords = ivec2(gl_FragCoord.xy);
// Compute and output final color for the frame buffer
// Visualize the number of layers in use
//FragColor = vec4(float(fillAndSortFragmentArray(coords)) / MAX_PIXELS_PER_FRAGMENT * 4, 0, 0, 1);
FragColor = resolveAlphaBlend(coords);
}
)";
static const char *clear_shader_source = SHADER_HEADER
R"(
void main(void)
{
ivec2 coords = ivec2(gl_FragCoord.xy);
// Reset pointers
imageStore(abufferPointerImg, coords, uvec4(EOL));
// Discard fragment so nothing is written to the framebuffer
discard;
}
)";
static const char *tr_modvol_shader_source = SHADER_HEADER
"#define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT
R"(
#define MV_MODE %d
// Must match ModifierVolumeMode enum values
#define MV_XOR 0
#define MV_OR 1
#define MV_INCLUSION 2
#define MV_EXCLUSION 3
void main(void)
{
#if MV_MODE == MV_XOR || MV_MODE == MV_OR
setFragDepth();
#endif
ivec2 coords = ivec2(gl_FragCoord.xy);
uint idx = imageLoad(abufferPointerImg, coords).x;
int list_len = 0;
while (idx != EOL && list_len < MAX_PIXELS_PER_FRAGMENT)
{
const Pixel pixel = pixels[idx];
const PolyParam pp = tr_poly_params[getPolyNumber(pixel)];
if (getShadowEnable(pp))
{
#if MV_MODE == MV_XOR
if (gl_FragDepth >= pixel.depth)
atomicXor(pixels[idx].seq_num, SHADOW_STENCIL);
#elif MV_MODE == MV_OR
if (gl_FragDepth >= pixel.depth)
atomicOr(pixels[idx].seq_num, SHADOW_STENCIL);
#elif MV_MODE == MV_INCLUSION
uint prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL));
if ((prev_val & (SHADOW_STENCIL|SHADOW_ACC)) == SHADOW_STENCIL)
pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1u, 31, 1);
#elif MV_MODE == MV_EXCLUSION
uint prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL|SHADOW_ACC));
if ((prev_val & (SHADOW_STENCIL|SHADOW_ACC)) == SHADOW_ACC)
pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1u, 31, 1);
#endif
}
idx = pixel.next;
list_len++;
}
discard;
}
)";
static const char* VertexShaderSource =
R"(#version 430
in highp vec3 in_pos;
void main()
{
gl_Position = vec4(in_pos, 1.0);
}
)";
void initABuffer()
{
if (max_image_width > 0 && max_image_height > 0)
{
if (pixels_pointers == 0)
pixels_pointers = glcache.GenTexture();
glActiveTexture(GL_TEXTURE4);
glBindTexture(GL_TEXTURE_2D, pixels_pointers);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, max_image_width, max_image_height, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
glBindImageTexture(4, pixels_pointers, 0, false, 0, GL_READ_WRITE, GL_R32UI);
glCheck();
}
if (pixels_buffer == 0 )
{
// get the max buffer size
GLint64 size;
glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &size);
pixel_buffer_size = (GLuint)min((GLint64)pixel_buffer_size, size);
// Create the buffer
glGenBuffers(1, &pixels_buffer);
// Bind it
glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer);
// Declare storage
glBufferData(GL_SHADER_STORAGE_BUFFER, pixel_buffer_size, NULL, GL_DYNAMIC_COPY);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer);
glCheck();
}
if (atomic_buffer == 0 )
{
// Create the buffer
glGenBuffers(1, &atomic_buffer);
// Bind it
glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomic_buffer);
// Declare storage
glBufferData(GL_ATOMIC_COUNTER_BUFFER, 4, NULL, GL_DYNAMIC_COPY);
glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomic_buffer);
GLint zero = 0;
glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLint), &zero);
glCheck();
}
if (g_abuffer_final_shader.program == 0)
gl4CompilePipelineShader(&g_abuffer_final_shader, final_shader_source, VertexShaderSource);
if (g_abuffer_clear_shader.program == 0)
gl4CompilePipelineShader(&g_abuffer_clear_shader, clear_shader_source, VertexShaderSource);
if (g_abuffer_tr_modvol_shaders[0].program == 0)
{
char source[16384];
for (int mode = 0; mode < ModeCount; mode++)
{
sprintf(source, tr_modvol_shader_source, mode);
gl4CompilePipelineShader(&g_abuffer_tr_modvol_shaders[mode], source, VertexShaderSource);
}
}
if (g_quadVertexArray == 0)
glGenVertexArrays(1, &g_quadVertexArray);
if (g_quadBuffer == 0)
{
glBindVertexArray(g_quadVertexArray);
glGenBuffers(1, &g_quadBuffer);
glBindBuffer(GL_ARRAY_BUFFER, g_quadBuffer); glCheck();
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(float) * 3, (void*)0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glCheck();
glBindVertexArray(0);
}
glCheck();
// Clear A-buffer pointers
glcache.UseProgram(g_abuffer_clear_shader.program);
gl4ShaderUniforms.Set(&g_abuffer_clear_shader);
abufferDrawQuad();
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
glCheck();
}
void termABuffer()
{
if (pixels_pointers != 0)
{
glcache.DeleteTextures(1, &pixels_pointers);
pixels_pointers = 0;
}
if (pixels_buffer != 0)
{
glDeleteBuffers(1, &pixels_buffer);
pixels_buffer = 0;
}
if (atomic_buffer != 0)
{
glDeleteBuffers(1, &atomic_buffer);
atomic_buffer = 0;
}
if (g_quadVertexArray != 0)
{
glDeleteVertexArrays(1, &g_quadVertexArray);
g_quadVertexArray = 0;
}
if (g_quadBuffer != 0)
{
glDeleteBuffers(1, &g_quadBuffer);
g_quadBuffer = 0;
}
glcache.DeleteProgram(g_abuffer_final_shader.program);
g_abuffer_final_shader.program = 0;
glcache.DeleteProgram(g_abuffer_clear_shader.program);
g_abuffer_clear_shader.program = 0;
for (int mode = 0; mode < ModeCount; mode++)
{
glcache.DeleteProgram(g_abuffer_tr_modvol_shaders[mode].program);
g_abuffer_tr_modvol_shaders[mode].program = 0;
}
}
void reshapeABuffer(int w, int h)
{
if (pixels_pointers != 0)
{
glcache.DeleteTextures(1, &pixels_pointers);
pixels_pointers = 0;
}
initABuffer();
}
void abufferDrawQuad()
{
glBindVertexArray(g_quadVertexArray);
float vertices[] = {
-1, 1, 1,
-1, -1, 1,
1, 1, 1,
1, -1, 1,
};
GLushort indices[] = { 0, 1, 2, 1, 3 };
glBindBuffer(GL_ARRAY_BUFFER, g_quadBuffer); glCheck();
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STREAM_DRAW); glCheck();
glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, indices); glCheck();
glBindVertexArray(0);
glCheck();
}
void DrawTranslucentModVols(int first, int count)
{
if (count == 0 || pvrrc.modtrig.used() == 0)
return;
glBindVertexArray(gl4.vbo.modvol_vao);
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE3);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, 0);
glcache.Disable(GL_DEPTH_TEST);
glcache.Disable(GL_STENCIL_TEST);
glCheck();
ModifierVolumeParam* params = &pvrrc.global_param_mvo_tr.head()[first];
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT);
int mod_base = -1;
for (u32 cmv = 0; cmv < count; cmv++)
{
ModifierVolumeParam& param = params[cmv];
if (param.count == 0)
continue;
u32 mv_mode = param.isp.DepthMode;
verify(param.first >= 0 && param.first + param.count <= pvrrc.modtrig.used());
if (mod_base == -1)
mod_base = param.first;
gl4PipelineShader *shader;
if (!param.isp.VolumeLast && mv_mode > 0)
shader = &g_abuffer_tr_modvol_shaders[Or]; // OR'ing (open volume or quad)
else
shader = &g_abuffer_tr_modvol_shaders[Xor]; // XOR'ing (closed volume)
glcache.UseProgram(shader->program);
gl4ShaderUniforms.Set(shader);
SetCull(param.isp.CullMode); glCheck();
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glDrawArrays(GL_TRIANGLES, param.first * 3, param.count * 3); glCheck();
if (mv_mode == 1 || mv_mode == 2)
{
//Sum the area
shader = &g_abuffer_tr_modvol_shaders[mv_mode == 1 ? Inclusion : Exclusion];
glcache.UseProgram(shader->program);
gl4ShaderUniforms.Set(shader);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glDrawArrays(GL_TRIANGLES, mod_base * 3, (param.first + param.count - mod_base) * 3); glCheck();
mod_base = -1;
}
}
glBindVertexArray(gl4.vbo.main_vao);
}
void checkOverflowAndReset()
{
// Using atomic counter
GLuint max_pixel_index = 0;
// glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &max_pixel_index);
//// printf("ABUFFER %d pixels used\n", max_pixel_index);
// if ((max_pixel_index + 1) * 32 - 1 >= pixel_buffer_size)
// {
// GLint64 size;
// glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &size);
// if (pixel_buffer_size == size)
// printf("A-buffer overflow: %d pixels. Buffer size already maxed out\n", max_pixel_index);
// else
// {
// pixel_buffer_size = (GLuint)min(2 * (GLint64)pixel_buffer_size, size);
//
// printf("A-buffer overflow: %d pixels. Resizing buffer to %d MB\n", max_pixel_index, pixel_buffer_size / 1024 / 1024);
//
// glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer);
// glBufferData(GL_SHADER_STORAGE_BUFFER, pixel_buffer_size, NULL, GL_DYNAMIC_COPY);
// glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer);
// glCheck();
// }
// }
// Reset counter
max_pixel_index = 0;
glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0 , sizeof(GLuint), &max_pixel_index);
}
void renderABuffer()
{
// Render to output FBO
glcache.UseProgram(g_abuffer_final_shader.program);
gl4ShaderUniforms.Set(&g_abuffer_final_shader);
glcache.Disable(GL_DEPTH_TEST);
glcache.Disable(GL_CULL_FACE);
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT);
abufferDrawQuad();
glCheck();
// Clear A-buffer pointers
glcache.UseProgram(g_abuffer_clear_shader.program);
gl4ShaderUniforms.Set(&g_abuffer_clear_shader);
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
abufferDrawQuad();
glActiveTexture(GL_TEXTURE0);
glCheck();
}