Merge pull request #1448 from reicast/fh/wacky-races

Enable log2 depth at runtime when supported (GL, GLES 3)
This commit is contained in:
baka0815 2018-11-03 22:01:22 +01:00 committed by GitHub
commit 8ab66a44b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 235 additions and 175 deletions

View File

@ -22,6 +22,13 @@ int fbdev = -1;
#endif
#endif
#ifndef GL_RED
#define GL_RED 0x1903
#endif
#ifndef GL_MAJOR_VERSION
#define GL_MAJOR_VERSION 0x821B
#endif
/*
GL|ES 2
Slower, smaller subset of gl2
@ -61,58 +68,56 @@ Tile clip
void GenSorted();
float fb_scale_x,fb_scale_y;
float scale_x, scale_y;
#ifndef GLES
#define attr "in"
#define vary "out"
#else
#define attr "attribute"
#define vary "varying"
#endif
#if 1
//Fragment and vertex shaders code
//pretty much 1:1 copy of the d3d ones for now
const char* VertexShaderSource =
#ifndef GLES
"#version 140 \n"
#endif
"\
"%s \n\
\n\
#define TARGET_GL %s \n\
\n\
#define GLES2 0 \n\
#define GLES3 1 \n\
#define GL2 2 \n\
#define GL3 3 \n\
\n\
#if TARGET_GL == GLES2 || TARGET_GL == GL2 \n\
#define in attribute \n\
#define out varying \n\
#endif \n\
\n\
/* Vertex constants*/ \n\
uniform highp vec4 scale; \n\
uniform highp vec4 depth_scale; \n\
uniform highp float sp_FOG_DENSITY; \n\
/* Vertex input */ \n\
" attr " highp vec4 in_pos; \n\
" attr " lowp vec4 in_base; \n\
" attr " lowp vec4 in_offs; \n\
" attr " mediump vec2 in_uv; \n\
in highp vec4 in_pos; \n\
in lowp vec4 in_base; \n\
in lowp vec4 in_offs; \n\
in mediump vec2 in_uv; \n\
/* output */ \n\
" vary " lowp vec4 vtx_base; \n\
" vary " lowp vec4 vtx_offs; \n\
" vary " mediump vec2 vtx_uv; \n\
" vary " highp vec3 vtx_xyz; \n\
out lowp vec4 vtx_base; \n\
out lowp vec4 vtx_offs; \n\
out mediump vec2 vtx_uv; \n\
void main() \n\
{ \n\
vtx_base=in_base; \n\
vtx_offs=in_offs; \n\
vtx_uv=in_uv; \n\
vec4 vpos=in_pos; \n\
vtx_xyz.xy = vpos.xy; \n\
vtx_xyz.z = vpos.z*sp_FOG_DENSITY; \n\
vpos.w=1.0/vpos.z; \n"
#ifndef GLES
"\
vpos.w=1.0/vpos.z; \n\
#if TARGET_GL != GLES2 \n\
if (vpos.w < 0.0) { \n\
gl_Position = vec4(0.0, 0.0, 0.0, vpos.w); \n\
return; \n\
} \n\
vpos.z = vpos.w; \n"
#else
"\
vpos.z=depth_scale.x+depth_scale.y*vpos.w; \n"
#endif
"\
vpos.z = vpos.w; \n\
#else \n\
vpos.z=depth_scale.x+depth_scale.y*vpos.w; \n\
#endif \n\
\n\
vpos.xy=vpos.xy*scale.xy-scale.zw; \n\
vpos.xy*=vpos.w; \n\
gl_Position = vpos; \n\
@ -226,22 +231,10 @@ lowp float fog_mode2(highp float invW) \n\
} \n\
*/
#ifndef GLES
#define FRAGCOL "FragColor"
#define TEXLOOKUP "texture"
#define vary "in"
#else
#define FRAGCOL "gl_FragColor"
#define TEXLOOKUP "texture2D"
#endif
const char* PixelPipelineShader =
#ifndef GLES
"#version 140 \n"
"out vec4 FragColor; \n"
#endif
"\
"%s \n\
\n\
#define TARGET_GL %s \n\
\
#define cp_AlphaTest %d \n\
#define pp_ClipTestMode %d \n\
@ -251,22 +244,45 @@ const char* PixelPipelineShader =
#define pp_ShadInstr %d \n\
#define pp_Offset %d \n\
#define pp_FogCtrl %d \n\
\n\
#define GLES2 0 \n\
#define GLES3 1 \n\
#define GL2 2 \n\
#define GL3 3 \n\
\n\
#if TARGET_GL == GLES3 \n\
out highp vec4 FragColor; \n\
#define gl_FragColor FragColor \n\
#define FOG_CHANNEL a \n\
#elif TARGET_GL == GL3 \n\
out highp vec4 FragColor; \n\
#define gl_FragColor FragColor \n\
#define FOG_CHANNEL r \n\
#else \n\
#define in varying \n\
#define texture texture2D \n\
#define FOG_CHANNEL a \n\
#endif \n\
\n\
/* Shader program params*/ \n\
/* gles has no alpha test stage, so its emulated on the shader */ \n\
uniform lowp float cp_AlphaTestValue; \n\
uniform lowp vec4 pp_ClipTest; \n\
uniform lowp vec3 sp_FOG_COL_RAM,sp_FOG_COL_VERT; \n\
uniform highp vec2 sp_LOG_FOG_COEFS; \n\
uniform highp float sp_FOG_DENSITY; \n\
uniform sampler2D tex,fog_table; \n\
/* Vertex input*/ \n\
" vary " lowp vec4 vtx_base; \n\
" vary " lowp vec4 vtx_offs; \n\
" vary " mediump vec2 vtx_uv; \n\
" vary " highp vec3 vtx_xyz; \n\
lowp float fog_mode2(highp float val) \n\
in lowp vec4 vtx_base; \n\
in lowp vec4 vtx_offs; \n\
in mediump vec2 vtx_uv; \n\
lowp float fog_mode2(highp float w) \n\
{ \n\
highp float fog_idx=clamp(val,0.0,127.99); \n\
return clamp(sp_LOG_FOG_COEFS.y*log2(fog_idx)+sp_LOG_FOG_COEFS.x,0.001,1.0); //the clamp is required due to yet another bug !\n\
highp float z = clamp(w * sp_FOG_DENSITY, 1.0, 255.9999); \n\
highp float exp = floor(log2(z)); \n\
highp float m = z * 16.0 / pow(2.0, exp) - 16.0; \n\
lowp float idx = floor(m) + exp * 16.0 + 0.5; \n\
highp vec4 fog_coef = texture(fog_table, vec2(idx / 128.0, 0.75 - (m - floor(m)) / 2.0)); \n\
return fog_coef.FOG_CHANNEL; \n\
} \n\
void main() \n\
{ \n\
@ -288,11 +304,11 @@ void main() \n\
color.a=1.0; \n\
#endif\n\
#if pp_FogCtrl==3 \n\
color=vec4(sp_FOG_COL_RAM.rgb,fog_mode2(vtx_xyz.z)); \n\
color=vec4(sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\
#endif\n\
#if pp_Texture==1 \n\
{ \n\
lowp vec4 texcol=" TEXLOOKUP "(tex,vtx_uv); \n\
lowp vec4 texcol=texture(tex,vtx_uv); \n\
\n\
#if pp_IgnoreTexA==1 \n\
texcol.a=1.0; \n\
@ -332,7 +348,7 @@ void main() \n\
#endif\n\
#if pp_FogCtrl==0 \n\
{ \n\
color.rgb=mix(color.rgb,sp_FOG_COL_RAM.rgb,fog_mode2(vtx_xyz.z)); \n\
color.rgb=mix(color.rgb,sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\
} \n\
#endif\n\
#if cp_AlphaTest == 1 \n\
@ -342,48 +358,67 @@ void main() \n\
else \n\
color.a = 1.0; \n\
#endif \n\
//color.rgb=vec3(vtx_xyz.z/255.0);\n"
#ifndef GLES
"\
#if TARGET_GL != GLES2 \n\
highp float w = gl_FragCoord.w * 100000.0; \n\
gl_FragDepth = log2(1.0 + w) / 34.0; \n"
#endif
FRAGCOL "=color; \n\
gl_FragDepth = log2(1.0 + w) / 34.0; \n\
#endif \n\
gl_FragColor=color; \n\
}";
const char* ModifierVolumeShader =
#ifndef GLES
"#version 140 \n"
"out vec4 FragColor; \n"
#endif
" \
"%s \n\
\n\
#define TARGET_GL %s \n\
\n\
\n\
#define GLES2 0 \n\
#define GLES3 1 \n\
#define GL2 2 \n\
#define GL3 3 \n\
\n\
#if TARGET_GL != GLES2 && TARGET_GL != GL2 \n\
out highp vec4 FragColor; \n\
#define gl_FragColor FragColor \n\
#endif \n\
\n\
uniform lowp float sp_ShaderColor; \n\
/* Vertex input*/ \n\
void main() \n\
{ \n"
#ifndef GLES
"\
{ \n\
#if TARGET_GL != GLES2 \n\
highp float w = gl_FragCoord.w * 100000.0; \n\
gl_FragDepth = log2(1.0 + w) / 34.0; \n"
#endif
FRAGCOL "=vec4(0.0, 0.0, 0.0, sp_ShaderColor); \n\
gl_FragDepth = log2(1.0 + w) / 34.0; \n\
#endif \n\
gl_FragColor=vec4(0.0, 0.0, 0.0, sp_ShaderColor); \n\
}";
const char* OSD_Shader =
#ifndef GLES
"#version 140 \n"
"out vec4 FragColor; \n"
#endif
" \
" vary " lowp vec4 vtx_base; \n\
" vary " mediump vec2 vtx_uv; \n\
"%s \n\
\n\
#define TARGET_GL %s \n\
\n\
#define GLES2 0 \n\
#define GLES3 1 \n\
#define GL2 2 \n\
#define GL3 3 \n\
\n\
#if TARGET_GL != GLES2 && TARGET_GL != GL2 \n\
out highp vec4 FragColor; \n\
#define gl_FragColor FragColor \n\
#else \n\
#define in varying \n\
#define texture texture2D \n\
#endif \n\
\n\
in lowp vec4 vtx_base; \n\
in mediump vec2 vtx_uv; \n\
/* Vertex input*/ \n\
uniform sampler2D tex; \n\
void main() \n\
{ \n\
mediump vec2 uv=vtx_uv; \n\
uv.y=1.0-uv.y; \n\
" FRAGCOL "=vtx_base*" TEXLOOKUP "(tex,uv.st); \n\n\
gl_FragColor=vtx_base*texture(tex,uv.st); \n\n\
}";
@ -391,6 +426,7 @@ gl_ctx gl;
int screen_width;
int screen_height;
GLuint fogTextureId;
#if (HOST_OS != OS_DARWIN) && !defined(TARGET_NACL32)
#if defined(GLES) && !defined(USE_SDL)
@ -696,6 +732,48 @@ int screen_height;
#endif
void findGLVersion()
{
while (true)
if (glGetError() == GL_NO_ERROR)
break;
glGetIntegerv(GL_MAJOR_VERSION, &gl.gl_major);
if (glGetError() == GL_INVALID_ENUM)
gl.gl_major = 2;
const char *version = (const char *)glGetString(GL_VERSION);
if (!strncmp(version, "OpenGL ES", 9))
{
gl.is_gles = true;
if (gl.gl_major >= 3)
{
gl.gl_version = "GLES3";
gl.glsl_version_header = "#version 300 es";
}
else
{
gl.gl_version = "GLES2";
gl.glsl_version_header = "";
}
gl.fog_image_format = GL_ALPHA;
}
else
{
gl.is_gles = false;
if (gl.gl_major >= 3)
{
gl.gl_version = "GL3";
gl.glsl_version_header = "#version 130";
gl.fog_image_format = GL_RED;
}
else
{
gl.gl_version = "GL2";
gl.glsl_version_header = "#version 120";
gl.fog_image_format = GL_ALPHA;
}
}
}
struct ShaderUniforms_t
{
float PT_ALPHA;
@ -704,7 +782,6 @@ struct ShaderUniforms_t
float fog_den_float;
float ps_FOG_COL_RAM[3];
float ps_FOG_COL_VERT[3];
float fog_coefs[2];
void Set(PipelineShader* s)
{
@ -725,9 +802,6 @@ struct ShaderUniforms_t
if (s->sp_FOG_COL_VERT!=-1)
glUniform3fv( s->sp_FOG_COL_VERT, 1, ps_FOG_COL_VERT);
if (s->sp_LOG_FOG_COEFS!=-1)
glUniform2fv(s->sp_LOG_FOG_COEFS,1, fog_coefs);
}
} ShaderUniforms;
@ -831,12 +905,15 @@ int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode,
bool CompilePipelineShader( PipelineShader* s)
{
char pshader[8192];
char vshader[8192];
sprintf(pshader,PixelPipelineShader,
sprintf(pshader,PixelPipelineShader, gl.glsl_version_header, gl.gl_version,
s->cp_AlphaTest,s->pp_ClipTestMode,s->pp_UseAlpha,
s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl);
s->program=gl_CompileAndLink(VertexShaderSource,pshader);
sprintf(vshader,VertexShaderSource, gl.glsl_version_header, gl.gl_version);
s->program=gl_CompileAndLink(vshader,pshader);
//setup texture 0 as the input for the shader
@ -863,14 +940,15 @@ bool CompilePipelineShader( PipelineShader* s)
if (s->pp_FogCtrl==0 || s->pp_FogCtrl==3)
{
s->sp_FOG_COL_RAM=glGetUniformLocation(s->program, "sp_FOG_COL_RAM");
s->sp_LOG_FOG_COEFS=glGetUniformLocation(s->program, "sp_LOG_FOG_COEFS");
}
else
{
s->sp_FOG_COL_RAM=-1;
s->sp_LOG_FOG_COEFS=-1;
}
// Setup texture 1 as the fog table
gu = glGetUniformLocation(s->program, "fog_table");
if (gu != -1)
glUniform1i(gu, 1);
ShaderUniforms.Set(s);
@ -940,15 +1018,22 @@ bool gl_create_resources()
}
}
findGLVersion();
char vshader[8192];
sprintf(vshader, VertexShaderSource, gl.glsl_version_header, gl.gl_version);
char fshader[8192];
sprintf(fshader, ModifierVolumeShader, gl.glsl_version_header, gl.gl_version);
gl.modvol_shader.program=gl_CompileAndLink(VertexShaderSource,ModifierVolumeShader);
gl.modvol_shader.program=gl_CompileAndLink(vshader,fshader);
gl.modvol_shader.scale = glGetUniformLocation(gl.modvol_shader.program, "scale");
gl.modvol_shader.sp_ShaderColor = glGetUniformLocation(gl.modvol_shader.program, "sp_ShaderColor");
gl.modvol_shader.depth_scale = glGetUniformLocation(gl.modvol_shader.program, "depth_scale");
gl.OSD_SHADER.program=gl_CompileAndLink(VertexShaderSource,OSD_Shader);
sprintf(fshader, OSD_Shader, gl.glsl_version_header, gl.gl_version);
gl.OSD_SHADER.program=gl_CompileAndLink(vshader,fshader);
printf("OSD: %d\n",gl.OSD_SHADER.program);
gl.OSD_SHADER.scale=glGetUniformLocation(gl.OSD_SHADER.program, "scale");
gl.OSD_SHADER.depth_scale=glGetUniformLocation(gl.OSD_SHADER.program, "depth_scale");
@ -1015,70 +1100,35 @@ bool gles_init()
return true;
}
float fog_coefs[]={0,0};
void tryfit(float* x,float* y)
void UpdateFogTexture(u8 *fog_table, GLenum texture_slot, GLint fog_image_format)
{
//y=B*ln(x)+A
double sylnx=0,sy=0,slnx=0,slnx2=0;
u32 cnt=0;
for (int i=0;i<128;i++)
glActiveTexture(texture_slot);
if (fogTextureId == 0)
{
int rep=1;
//discard values clipped to 0 or 1
if (y[i]==1 && y[i+1]==1)
continue;
if (i>0 && y[i]==0 && y[i-1]==0)
continue;
//Add many samples for first and last value (fog-in, fog-out -> important)
if (i>0 && y[i]!=1 && y[i-1]==1)
rep=10000;
if (y[i]!=0 && y[i+1]==0)
rep=10000;
for (int j=0;j<rep;j++)
{
cnt++;
sylnx+=y[i]*log((double)x[i]);
sy+=y[i];
slnx+=log((double)x[i]);
slnx2+=log((double)x[i])*log((double)x[i]);
}
glGenTextures(1, &fogTextureId);
glBindTexture(GL_TEXTURE_2D, fogTextureId);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
else
glBindTexture(GL_TEXTURE_2D, fogTextureId);
double a,b;
b=(cnt*sylnx-sy*slnx)/(cnt*slnx2-slnx*slnx);
a=(sy-b*slnx)/(cnt);
//We use log2 and not ln on calculations //B*log(x)+A
//log2(x)=log(x)/log(2)
//log(x)=log2(x)*log(2)
//B*log(2)*log(x)+A
b*=logf(2.0);
float maxdev=0;
for (int i=0;i<128;i++)
u8 temp_tex_buffer[256];
for (int i = 0; i < 128; i++)
{
float diff=min(max(b*logf(x[i])/logf(2.0)+a,(double)0),(double)1)-y[i];
maxdev=max((float)fabs((float)diff),(float)maxdev);
temp_tex_buffer[i] = fog_table[i * 4];
temp_tex_buffer[i + 128] = fog_table[i * 4 + 1];
}
printf("FOG TABLE Curve match: maxdev: %.02f cents\n",maxdev*100);
fog_coefs[0]=a;
fog_coefs[1]=b;
//printf("%f\n",B*log(maxdev)/log(2.0)+A);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glTexImage2D(GL_TEXTURE_2D, 0, fog_image_format, 128, 2, 0, fog_image_format, GL_UNSIGNED_BYTE, temp_tex_buffer);
glCheck();
glActiveTexture(GL_TEXTURE0);
}
extern u16 kcode[4];
extern u8 rt[4],lt[4];
@ -1141,10 +1191,10 @@ static void DrawButton(float* xy, u32 state)
vtx.z=1;
float x=xy[0];
float y=xy[1];
float w=xy[2];
float h=xy[3];
float x=xy[0] * scale_x;
float y=xy[1] * scale_y;
float w=xy[2] * scale_x;
float h=xy[3] * scale_y;
vtx.col[0]=vtx.col[1]=vtx.col[2]=(0x7F-0x40*state/255)*vjoy_pos[13][0];
@ -1590,7 +1640,8 @@ bool RenderFrame()
dc_height=480;
}
float scale_x=1, scale_y=1;
scale_x = 1;
scale_y = 1;
float scissoring_scale_x = 1;
@ -1607,18 +1658,18 @@ bool RenderFrame()
scissoring_scale_x = 0.5f;
scale_x *= 0.5f;
}
if (SCALER_CTL.hscale)
{
scissoring_scale_x /= 2;
scale_x*=2;
}
}
if (SCALER_CTL.hscale)
{
scale_x*=2;
}
dc_width *= scale_x;
dc_height *= scale_y;
glUseProgram(gl.modvol_shader.program);
/*
float vnear=0;
@ -1681,21 +1732,10 @@ bool RenderFrame()
s32 fog_den_exp=(s8)fog_density[0];
ShaderUniforms.fog_den_float=fog_den_mant*powf(2.0f,fog_den_exp);
if (fog_needs_update)
{
fog_needs_update=false;
//Get the coefs for the fog curve
u8* fog_table=(u8*)FOG_TABLE;
float xvals[128];
float yvals[128];
for (int i=0;i<128;i++)
{
xvals[i]=powf(2.0f,i>>4)*(1+(i&15)/16.f);
yvals[i]=fog_table[i*4+1]/255.0f;
}
tryfit(xvals,yvals);
fog_needs_update = false;
UpdateFogTexture((u8 *)FOG_TABLE, GL_TEXTURE1, gl.fog_image_format);
}
glUseProgram(gl.modvol_shader.program);
@ -1784,6 +1824,7 @@ bool RenderFrame()
else
glClearColor(0,0,0,1.0f);
glDepthMask(GL_TRUE);
glClearDepthf(0.f); glCheck();
glStencilMask(0xFF); glCheck();
glClear(GL_COLOR_BUFFER_BIT|GL_STENCIL_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); glCheck();
@ -1822,13 +1863,28 @@ bool RenderFrame()
printf("SCI: %f, %f, %f, %f\n", offs_x+pvrrc.fb_X_CLIP.min/scale_x,(pvrrc.fb_Y_CLIP.min/scale_y)*dc2s_scale_h,(pvrrc.fb_X_CLIP.max-pvrrc.fb_X_CLIP.min+1)/scale_x*dc2s_scale_h,(pvrrc.fb_Y_CLIP.max-pvrrc.fb_Y_CLIP.min+1)/scale_y*dc2s_scale_h);
#endif
glScissor(offs_x+pvrrc.fb_X_CLIP.min/scale_x,(pvrrc.fb_Y_CLIP.min/scale_y)*dc2s_scale_h,(pvrrc.fb_X_CLIP.max-pvrrc.fb_X_CLIP.min+1)/scale_x*dc2s_scale_h,(pvrrc.fb_Y_CLIP.max-pvrrc.fb_Y_CLIP.min+1)/scale_y*dc2s_scale_h);
if (settings.rend.WideScreen && pvrrc.fb_X_CLIP.min==0 && ((pvrrc.fb_X_CLIP.max+1)/scale_x==640) && (pvrrc.fb_Y_CLIP.min==0) && ((pvrrc.fb_Y_CLIP.max+1)/scale_y==480 ) )
{
glDisable(GL_SCISSOR_TEST);
}
else
{
float width = (pvrrc.fb_X_CLIP.max - pvrrc.fb_X_CLIP.min + 1) / scale_x;
float height = (pvrrc.fb_Y_CLIP.max - pvrrc.fb_Y_CLIP.min + 1) / scale_y;
float min_x = pvrrc.fb_X_CLIP.min / scale_x;
float min_y = pvrrc.fb_Y_CLIP.min / scale_y;
if (!is_rtt)
{
// Add x offset for aspect ratio > 4/3
min_x = min_x * dc2s_scale_h + ds2s_offs_x;
// Invert y coordinates when rendering to screen
min_y = screen_height - (min_y + height) * dc2s_scale_h;
width *= dc2s_scale_h;
height *= dc2s_scale_h;
}
glScissor(min_x + 0.5f, min_y + 0.5f, width + 0.5f, height + 0.5f);
glEnable(GL_SCISSOR_TEST);
}
//restore scale_x

View File

@ -53,7 +53,7 @@ struct PipelineShader
GLuint scale,depth_scale;
GLuint pp_ClipTest,cp_AlphaTestValue;
GLuint sp_FOG_COL_RAM,sp_FOG_COL_VERT,sp_FOG_DENSITY,sp_LOG_FOG_COEFS;
GLuint sp_FOG_COL_RAM,sp_FOG_COL_VERT,sp_FOG_DENSITY;
//
u32 cp_AlphaTest; s32 pp_ClipTestMode;
@ -97,7 +97,11 @@ struct gl_ctx
#endif
} vbo;
const char *gl_version;
const char *glsl_version_header;
int gl_major;
bool is_gles;
GLuint fog_image_format;
//GLuint matrix;
};