rend: fix dithering kernel

Use the actual PVR2 dithering kernel (standard 4x4 Bayer matrix) used on
real hardware.
Fixes the screen melt effect of Doom64 (per-pixel only).
Issue #1939
This commit is contained in:
Flyinghead 2025-05-26 19:27:02 +02:00
parent b973453317
commit 27f19a5b54
17 changed files with 97 additions and 123 deletions

View File

@ -445,21 +445,19 @@ void DX11Renderer::setupPixelShaderConstants()
{
case 0: // 0555 KRGB 16 bit
case 3: // 1555 ARGB 16 bit
pixelConstants.ditherColorMax[0] = pixelConstants.ditherColorMax[1] = pixelConstants.ditherColorMax[2] = 31.f;
pixelConstants.ditherColorMax[3] = 255.f;
break;
pixelConstants.ditherDivisor[0] = pixelConstants.ditherDivisor[1] = pixelConstants.ditherDivisor[2] = 2.f;
break;
case 1: // 565 RGB 16 bit
pixelConstants.ditherColorMax[0] = pixelConstants.ditherColorMax[2] = 31.f;
pixelConstants.ditherColorMax[1] = 63.f;
pixelConstants.ditherColorMax[3] = 255.f;
pixelConstants.ditherDivisor[0] = pixelConstants.ditherDivisor[2] = 2.f;
pixelConstants.ditherDivisor[1] = 4.f;
break;
case 2: // 4444 ARGB 16 bit
pixelConstants.ditherColorMax[0] = pixelConstants.ditherColorMax[1]
= pixelConstants.ditherColorMax[2] = pixelConstants.ditherColorMax[3] = 15.f;
pixelConstants.ditherDivisor[0] = pixelConstants.ditherDivisor[1] = pixelConstants.ditherDivisor[2] = 1.f;
break;
default:
break;
}
pixelConstants.ditherDivisor[3] = 1.f;
}
D3D11_MAPPED_SUBRESOURCE mappedSubres;

View File

@ -73,7 +73,7 @@ protected:
float colorClampMax[4];
float fog_col_vert[4];
float fog_col_ram[4];
float ditherColorMax[4];
float ditherDivisor[4];
float fogDensity;
float shadowScale;
float alphaTestValue;

View File

@ -222,7 +222,7 @@ cbuffer constantBuffer : register(b0)
float4 colorClampMax;
float4 FOG_COL_VERT;
float4 FOG_COL_RAM;
float4 ditherColorMax;
float4 ditherDivisor;
float fogDensity;
float shadowScale;
float alphaTestValue;
@ -341,16 +341,14 @@ PSO main(in Pixel inpix)
#if DITHERING == 1
static const float ditherTable[16] = {
0.9375f, 0.1875f, 0.75f, 0.0f,
0.4375f, 0.6875f, 0.25f, 0.5f,
0.8125f, 0.0625f, 0.875f, 0.125f,
0.3125f, 0.5625f, 0.375f, 0.625f
5.0f, 13.0f, 7.0f, 15.0f,
9.0f, 1.0f, 11.0f, 3.0f,
6.0f, 14.0f, 4.0f, 12.0f,
10.0f, 2.0f, 8.0f, 0.0f
};
float r = ditherTable[int(inpix.pos.y % 4.0f) * 4 + int(inpix.pos.x % 4.0f)] + 0.03125f; // why is this bias needed??
// 31 for 5-bit color, 63 for 6 bits, 15 for 4 bits
color += r / ditherColorMax;
// avoid rounding
color = floor(color * 255.0f) / 255.0f;
float r = ditherTable[int(inpix.pos.y % 4.0f) * 4 + int(inpix.pos.x % 4.0f)];
float4 dv = float4(r, r, r, 1.0f) / ditherDivisor;
color = clamp(floor(color * 255.0f + dv) / 255.0f, 0.0f, 1.0f);
#endif
PSO pso;
#if DIV_POS_Z == 1

View File

@ -116,7 +116,7 @@ cbuffer constantBuffer : register(b0)
float4 colorClampMax;
float4 FOG_COL_VERT;
float4 FOG_COL_RAM;
float4 ditherColorMax;
float4 ditherDivisor;
float fogDensity;
float shadowScale;
float alphaTestValue;
@ -652,16 +652,14 @@ float4 resolveAlphaBlend(in float2 pos)
}
#if DITHERING == 1
static const float ditherTable[16] = {
0.9375f, 0.1875f, 0.75f, 0.0f,
0.4375f, 0.6875f, 0.25f, 0.5f,
0.8125f, 0.0625f, 0.875f, 0.125f,
0.3125f, 0.5625f, 0.375f, 0.625f
5.0f, 13.0f, 7.0f, 15.0f,
9.0f, 1.0f, 11.0f, 3.0f,
6.0f, 14.0f, 4.0f, 12.0f,
10.0f, 2.0f, 8.0f, 0.0f
};
float r = ditherTable[int(pos.y % 4.0f) * 4 + int(pos.x % 4.0f)] + 0.03125f; // why is this bias needed??
// 31 for 5-bit color, 63 for 6 bits, 15 for 4 bits
finalColor += r / ditherColorMax;
// avoid rounding
finalColor = floor(finalColor * 255.0f) / 255.0f;
float r = ditherTable[int(pos.y % 4.0f) * 4 + int(pos.x % 4.0f)];
float4 dv = float4(r, r, r, 1.0f) / ditherDivisor;
finalColor = clamp(floor(finalColor * 255.0f + dv) / 255.0f, 0.0f, 1.0f);
#endif
return finalColor;

View File

@ -1116,27 +1116,24 @@ bool D3DRenderer::Render()
dithering = config::EmulateFramebuffer && pvrrc.fb_W_CTRL.fb_dither && pvrrc.fb_W_CTRL.fb_packmode <= 3;
if (dithering)
{
float ditherColorMax[4];
float ditherDivisor[4] { 0.f, 0.f, 0.f, 1.f };
switch (pvrrc.fb_W_CTRL.fb_packmode)
{
case 0: // 0555 KRGB 16 bit
case 3: // 1555 ARGB 16 bit
ditherColorMax[0] = ditherColorMax[1] = ditherColorMax[2] = 31.f;
ditherColorMax[3] = 255.f;
ditherDivisor[0] = ditherDivisor[1] = ditherDivisor[2] = 2.f;
break;
case 1: // 565 RGB 16 bit
ditherColorMax[0] = ditherColorMax[2] = 31.f;
ditherColorMax[1] = 63.f;
ditherColorMax[3] = 255.f;
ditherDivisor[0] = ditherDivisor[2] = 2.f;
ditherDivisor[1] = 4.f;
break;
case 2: // 4444 ARGB 16 bit
ditherColorMax[0] = ditherColorMax[1]
= ditherColorMax[2] = ditherColorMax[3] = 15.f;
ditherDivisor[0] = ditherDivisor[1] = ditherDivisor[2] = 1.f;
break;
default:
break;
}
device->SetPixelShaderConstantF(8, ditherColorMax, 1);
device->SetPixelShaderConstantF(8, ditherDivisor, 1);
}
devCache.SetRenderState(D3DRS_ZENABLE, D3DZB_TRUE);

View File

@ -94,7 +94,7 @@ float4 clipTest : register(c4);
float4 trilinearAlpha : register(c5);
float4 colorClampMin : register(c6);
float4 colorClampMax : register(c7);
float4 ditherColorMax : register(c8);
float4 ditherDivisor : register(c8);
float4 textureSize : register(c9);
float fog_mode2(float w)
@ -265,16 +265,15 @@ PSO main(in pixel inpix)
#if DITHERING == 1
static const float ditherTable[16] = {
0.9375f, 0.1875f, 0.75f, 0.0f,
0.4375f, 0.6875f, 0.25f, 0.5f,
0.8125f, 0.0625f, 0.875f, 0.125f,
0.3125f, 0.5625f, 0.375f, 0.625f
5.0f, 13.0f, 7.0f, 15.0f,
9.0f, 1.0f, 11.0f, 3.0f,
6.0f, 14.0f, 4.0f, 12.0f,
10.0f, 2.0f, 8.0f, 0.0f
};
float r = ditherTable[int(inpix.pos.y % 4.0f) * 4 + int(inpix.pos.x % 4.0f)] + 0.03125f; // why is this bias needed??
// 31 for 5-bit color, 63 for 6 bits, 15 for 4 bits
color += r / ditherColorMax;
// avoid rounding
color = floor(color * 255.0f) / 255.0f;
float r = ditherTable[int(inpix.pos.y % 4.0f) * 4 + int(inpix.pos.x % 4.0f)];
float4 dv = float4(r, r, r, 1.0f) / ditherDivisor;
color = clamp(floor(color * 255.0f + dv) / 255.0f, 0.0f, 1.0f);
#endif
PSO pso;
#if DIV_POS_Z == 1

View File

@ -47,7 +47,7 @@ static const char *final_shader_source = R"(
layout(binding = 0) uniform sampler2D tex;
uniform float shade_scale_factor;
#if DITHERING == 1
uniform vec4 ditherColorMax;
uniform vec4 ditherDivisor;
#endif
out vec4 FragColor;
@ -183,16 +183,14 @@ vec4 resolveAlphaBlend(ivec2 coords) {
}
#if DITHERING == 1
float ditherTable[16] = float[](
0.9375, 0.1875, 0.75, 0.,
0.4375, 0.6875, 0.25, 0.5,
0.8125, 0.0625, 0.875, 0.125,
0.3125, 0.5625, 0.375, 0.625
5., 13., 7., 15.,
9., 1., 11., 3.,
6., 14., 4., 12.,
10., 2., 8., 0.
);
float r = ditherTable[int(mod(gl_FragCoord.y, 4.)) * 4 + int(mod(gl_FragCoord.x, 4.))];
// 31 for 5-bit color, 63 for 6 bits, 15 for 4 bits
finalColor += r / ditherColorMax;
// avoid rounding
finalColor = floor(finalColor * 255.) / 255.;
vec4 dv = vec4(r, r, r, 1.) / ditherDivisor;
finalColor = clamp(floor(finalColor * 255. + dv) / 255., 0., 1.);
#endif
return finalColor;
@ -589,21 +587,19 @@ void renderABuffer(bool lastPass)
{
case 0: // 0555 KRGB 16 bit
case 3: // 1555 ARGB 16 bit
gl4ShaderUniforms.ditherColorMax[0] = gl4ShaderUniforms.ditherColorMax[1] = gl4ShaderUniforms.ditherColorMax[2] = 31.f;
gl4ShaderUniforms.ditherColorMax[3] = 255.f;
gl4ShaderUniforms.ditherDivisor[0] = gl4ShaderUniforms.ditherDivisor[1] = gl4ShaderUniforms.ditherDivisor[2] = 2.f;
break;
case 1: // 565 RGB 16 bit
gl4ShaderUniforms.ditherColorMax[0] = gl4ShaderUniforms.ditherColorMax[2] = 31.f;
gl4ShaderUniforms.ditherColorMax[1] = 63.f;
gl4ShaderUniforms.ditherColorMax[3] = 255.f;
gl4ShaderUniforms.ditherDivisor[0] = gl4ShaderUniforms.ditherDivisor[2] = 2.f;
gl4ShaderUniforms.ditherDivisor[1] = 4.f;
break;
case 2: // 4444 ARGB 16 bit
gl4ShaderUniforms.ditherColorMax[0] = gl4ShaderUniforms.ditherColorMax[1]
= gl4ShaderUniforms.ditherColorMax[2] = gl4ShaderUniforms.ditherColorMax[3] = 15.f;
gl4ShaderUniforms.ditherDivisor[0] = gl4ShaderUniforms.ditherDivisor[1] = gl4ShaderUniforms.ditherDivisor[2] = 1.f;
break;
default:
break;
}
gl4ShaderUniforms.ditherDivisor[3] = 1.f;
gl4ShaderUniforms.Set(&g_abuffer_final_shader[1]);
}
else {

View File

@ -45,7 +45,7 @@ struct gl4PipelineShader
GLint fog_clamp_min, fog_clamp_max;
GLint ndcMat;
GLint palette_index;
GLint ditherColorMax;
GLint ditherDivisor;
// Naomi2
GLint mvMat;
@ -228,7 +228,7 @@ extern struct gl4ShaderUniforms_t
int height;
} base_clipping;
int palette_index;
float ditherColorMax[4];
float ditherDivisor[4];
void setUniformArray(GLint location, int v0, int v1)
{
@ -259,7 +259,7 @@ extern struct gl4ShaderUniforms_t
glUniform4fv(s->fog_clamp_max, 1, fog_clamp_max);
glUniformMatrix4fv(s->ndcMat, 1, GL_FALSE, &ndcMat[0][0]);
glUniform1i(s->palette_index, palette_index);
glUniform4fv(s->ditherColorMax, 1, ditherColorMax);
glUniform4fv(s->ditherDivisor, 1, ditherDivisor);
}
} gl4ShaderUniforms;

View File

@ -589,7 +589,7 @@ bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *fragment_source
if (gu != -1)
glUniform1i(gu, 6); // GL_TEXTURE6
s->palette_index = glGetUniformLocation(s->program, "palette_index");
s->ditherColorMax = glGetUniformLocation(s->program, "ditherColorMax");
s->ditherDivisor = glGetUniformLocation(s->program, "ditherDivisor");
if (s->naomi2)
initN2Uniforms(s);

View File

@ -147,7 +147,7 @@ uniform lowp vec2 texSize;
#endif
#endif
#if DITHERING == 1
uniform lowp vec4 ditherColorMax;
uniform lowp vec4 ditherDivisor;
#endif
/* Vertex input*/
@ -360,16 +360,14 @@ void main()
#if DITHERING == 1
mediump float ditherTable[16] = float[](
0.9375, 0.1875, 0.75, 0.,
0.4375, 0.6875, 0.25, 0.5,
0.8125, 0.0625, 0.875, 0.125,
0.3125, 0.5625, 0.375, 0.625
5., 13., 7., 15.,
9., 1., 11., 3.,
6., 14., 4., 12.,
10., 2., 8., 0.
);
mediump float r = ditherTable[int(mod(gl_FragCoord.y, 4.)) * 4 + int(mod(gl_FragCoord.x, 4.))];
// 31 for 5-bit color, 63 for 6 bits, 15 for 4 bits
color += r / ditherColorMax;
// avoid rounding
color = floor(color * 255.) / 255.;
mediump vec4 dv = vec4(r, r, r, 1.) / ditherDivisor;
color = clamp(floor(color * 255. + dv) / 255., 0., 1.);
#endif
#endif
gl_FragColor = color;
@ -857,7 +855,7 @@ bool CompilePipelineShader(PipelineShader* s)
s->fog_clamp_max = -1;
}
s->ndcMat = glGetUniformLocation(s->program, "ndcMat");
s->ditherColorMax = glGetUniformLocation(s->program, "ditherColorMax");
s->ditherDivisor = glGetUniformLocation(s->program, "ditherDivisor");
s->texSize = glGetUniformLocation(s->program, "texSize");
if (s->naomi2)
@ -1153,21 +1151,19 @@ bool OpenGLRenderer::renderFrame(int width, int height)
{
case 0: // 0555 KRGB 16 bit
case 3: // 1555 ARGB 16 bit
ShaderUniforms.ditherColorMax[0] = ShaderUniforms.ditherColorMax[1] = ShaderUniforms.ditherColorMax[2] = 31.f;
ShaderUniforms.ditherColorMax[3] = 255.f;
ShaderUniforms.ditherDivisor[0] = ShaderUniforms.ditherDivisor[1] = ShaderUniforms.ditherDivisor[2] = 2.f;
break;
case 1: // 565 RGB 16 bit
ShaderUniforms.ditherColorMax[0] = ShaderUniforms.ditherColorMax[2] = 31.f;
ShaderUniforms.ditherColorMax[1] = 63.f;
ShaderUniforms.ditherColorMax[3] = 255.f;
ShaderUniforms.ditherDivisor[0] = ShaderUniforms.ditherDivisor[2] = 2.f;
ShaderUniforms.ditherDivisor[1] = 4.f;
break;
case 2: // 4444 ARGB 16 bit
ShaderUniforms.ditherColorMax[0] = ShaderUniforms.ditherColorMax[1]
= ShaderUniforms.ditherColorMax[2] = ShaderUniforms.ditherColorMax[3] = 15.f;
ShaderUniforms.ditherDivisor[0] = ShaderUniforms.ditherDivisor[1] = ShaderUniforms.ditherDivisor[2] = 1.f;
break;
default:
break;
}
ShaderUniforms.ditherDivisor[3] = 1.f;
}
else
{

View File

@ -60,7 +60,7 @@ struct PipelineShader
GLint fog_clamp_min, fog_clamp_max;
GLint ndcMat;
GLint palette_index;
GLint ditherColorMax;
GLint ditherDivisor;
GLint texSize;
// Naomi2
@ -411,7 +411,7 @@ extern struct ShaderUniforms_t
int height;
} base_clipping;
bool dithering;
float ditherColorMax[4];
float ditherDivisor[4];
void Set(const PipelineShader* s)
{
@ -438,8 +438,8 @@ extern struct ShaderUniforms_t
if (s->ndcMat != -1)
glUniformMatrix4fv(s->ndcMat, 1, GL_FALSE, &ndcMat[0][0]);
if (s->ditherColorMax != -1)
glUniform4fv(s->ditherColorMax, 1, ditherColorMax);
if (s->ditherDivisor != -1)
glUniform4fv(s->ditherDivisor, 1, ditherDivisor);
}
} ShaderUniforms;

View File

@ -379,21 +379,19 @@ bool Drawer::Draw(const Texture *fogTexture, const Texture *paletteTexture)
{
case 0: // 0555 KRGB 16 bit
case 3: // 1555 ARGB 16 bit
fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[1] = fragUniforms.ditherColorMax[2] = 31.f;
fragUniforms.ditherColorMax[3] = 255.f;
fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[1] = fragUniforms.ditherDivisor[2] = 2.f;
break;
case 1: // 565 RGB 16 bit
fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[2] = 31.f;
fragUniforms.ditherColorMax[1] = 63.f;
fragUniforms.ditherColorMax[3] = 255.f;
fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[2] = 2.f;
fragUniforms.ditherDivisor[1] = 4.f;
break;
case 2: // 4444 ARGB 16 bit
fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[1]
= fragUniforms.ditherColorMax[2] = fragUniforms.ditherColorMax[3] = 15.f;
fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[1] = fragUniforms.ditherDivisor[2] = 1.f;
break;
default:
break;
}
fragUniforms.ditherDivisor[3] = 1.f;
}
currentScissor = vk::Rect2D();

View File

@ -314,21 +314,19 @@ bool OITDrawer::Draw(const Texture *fogTexture, const Texture *paletteTexture)
{
case 0: // 0555 KRGB 16 bit
case 3: // 1555 ARGB 16 bit
fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[1] = fragUniforms.ditherColorMax[2] = 31.f;
fragUniforms.ditherColorMax[3] = 255.f;
fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[1] = fragUniforms.ditherDivisor[2] = 2.f;
break;
case 1: // 565 RGB 16 bit
fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[2] = 31.f;
fragUniforms.ditherColorMax[1] = 63.f;
fragUniforms.ditherColorMax[3] = 255.f;
fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[2] = 2.f;
fragUniforms.ditherDivisor[1] = 4.f;
break;
case 2: // 4444 ARGB 16 bit
fragUniforms.ditherColorMax[0] = fragUniforms.ditherColorMax[1]
= fragUniforms.ditherColorMax[2] = fragUniforms.ditherColorMax[3] = 15.f;
fragUniforms.ditherDivisor[0] = fragUniforms.ditherDivisor[1] = fragUniforms.ditherDivisor[2] = 1.f;
break;
default:
break;
}
fragUniforms.ditherDivisor[3] = 1.f;
}
currentScissor = vk::Rect2D();

View File

@ -45,7 +45,7 @@ public:
float colorClampMax[4];
float sp_FOG_COL_RAM[4]; // Only using 3 elements but easier for std140
float sp_FOG_COL_VERT[4]; // same comment
float ditherColorMax[4];
float ditherDivisor[4];
float cp_AlphaTestValue;
float sp_FOG_DENSITY;
float shade_scale_factor; // new for OIT

View File

@ -91,7 +91,7 @@ layout (std140, set = 0, binding = 1) uniform FragmentShaderUniforms
vec4 colorClampMax;
vec4 sp_FOG_COL_RAM;
vec4 sp_FOG_COL_VERT;
vec4 ditherColorMax;
vec4 ditherDivisor;
float cp_AlphaTestValue;
float sp_FOG_DENSITY;
float shade_scale_factor;
@ -528,16 +528,14 @@ vec4 resolveAlphaBlend(ivec2 coords) {
#if DITHERING == 1
float ditherTable[16] = float[](
0.9375, 0.1875, 0.75, 0.,
0.4375, 0.6875, 0.25, 0.5,
0.8125, 0.0625, 0.875, 0.125,
0.3125, 0.5625, 0.375, 0.625
5., 13., 7., 15.,
9., 1., 11., 3.,
6., 14., 4., 12.,
10., 2., 8., 0.
);
float r = ditherTable[int(mod(gl_FragCoord.y, 4.)) * 4 + int(mod(gl_FragCoord.x, 4.))];
// 31 for 5-bit color, 63 for 6 bits, 15 for 4 bits
finalColor += r / uniformBuffer.ditherColorMax;
// avoid rounding
finalColor = floor(finalColor * 255.) / 255.;
vec4 dv = vec4(r, r, r, 1.) / uniformBuffer.ditherDivisor;
finalColor = clamp(floor(finalColor * 255. + dv) / 255., 0., 1.);
#endif
return finalColor;

View File

@ -72,7 +72,7 @@ layout (std140, set = 0, binding = 1) uniform FragmentShaderUniforms
vec4 colorClampMax;
vec4 sp_FOG_COL_RAM;
vec4 sp_FOG_COL_VERT;
vec4 ditherColorMax;
vec4 ditherDivisor;
float cp_AlphaTestValue;
float sp_FOG_DENSITY;
} uniformBuffer;
@ -294,16 +294,14 @@ void main()
#if DITHERING == 1
float ditherTable[16] = float[](
0.9375, 0.1875, 0.75, 0.,
0.4375, 0.6875, 0.25, 0.5,
0.8125, 0.0625, 0.875, 0.125,
0.3125, 0.5625, 0.375, 0.625
5., 13., 7., 15.,
9., 1., 11., 3.,
6., 14., 4., 12.,
10., 2., 8., 0.
);
float r = ditherTable[int(mod(gl_FragCoord.y, 4.)) * 4 + int(mod(gl_FragCoord.x, 4.))];
// 31 for 5-bit color, 63 for 6 bits, 15 for 4 bits
color += r / uniformBuffer.ditherColorMax;
// avoid rounding
color = floor(color * 255.) / 255.;
vec4 dv = vec4(r, r, r, 1.) / uniformBuffer.ditherDivisor;
color = clamp(floor(color * 255. + dv) / 255., 0., 1.);
#endif
gl_FragColor = color;
}

View File

@ -84,7 +84,7 @@ struct FragmentShaderUniforms
float colorClampMax[4];
float sp_FOG_COL_RAM[4]; // Only using 3 elements but easier for std140
float sp_FOG_COL_VERT[4]; // same comment
float ditherColorMax[4];
float ditherDivisor[4];
float cp_AlphaTestValue;
float sp_FOG_DENSITY;
};