EFB2RAM: Apply copy filter as a float coefficient after sampling

Using 8-bit integer math here lead to precision loss for depth copies,
which broke various effects in games, e.g. lens flare in MK:DD.

It's unlikely the console implements this as a floating-point multiply
(fixed-point perhaps), but since we have the float round trip in our
EFB2RAM shaders anyway, it's not going to make things any worse. If we
do rewrite our shaders to use integer math completely, then it might be
worth switching this conversion back to integers.

However, the range of the values (format) should be known, or we should
expand all values out to 24-bits first.
This commit is contained in:
Stenzek 2018-05-22 12:14:48 +10:00
parent 59be5da24c
commit f74dbc794c
10 changed files with 38 additions and 35 deletions

View File

@ -34,7 +34,7 @@ struct EFBEncodeParams
float gamma_rcp; float gamma_rcp;
float clamp_top; float clamp_top;
float clamp_bottom; float clamp_bottom;
s32 filter_coefficients[3]; float filter_coefficients[3];
u32 padding; u32 padding;
}; };
@ -169,4 +169,4 @@ ID3D11PixelShader* PSTextureEncoder::GetEncodingPixelShader(const EFBCopyParams&
m_encoding_shaders.emplace(params, newShader); m_encoding_shaders.emplace(params, newShader);
return newShader; return newShader;
} }
} } // namespace DX11

View File

@ -276,7 +276,7 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
}; };
PixelConstants constants; PixelConstants constants;
for (size_t i = 0; i < filter_coefficients.size(); i++) for (size_t i = 0; i < filter_coefficients.size(); i++)
constants.filter_coefficients[i] = filter_coefficients[i] / 64.0f; constants.filter_coefficients[i] = filter_coefficients[i];
constants.gamma_rcp = 1.0f / gamma; constants.gamma_rcp = 1.0f / gamma;
constants.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; constants.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f;
constants.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f; constants.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f;
@ -315,4 +315,4 @@ TextureCache::GetEFBToTexPixelShader(const TextureConversionShaderGen::TCShaderU
m_efb_to_tex_pixel_shaders.emplace(uid, shader); m_efb_to_tex_pixel_shaders.emplace(uid, shader);
return shader; return shader;
} }
} } // namespace DX11

View File

@ -558,12 +558,12 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
glUniform2f(shader.clamp_tb_uniform, glUniform2f(shader.clamp_tb_uniform,
clamp_bottom ? (1.0f - src_rect.bottom / static_cast<float>(EFB_HEIGHT)) : 0.0f, clamp_bottom ? (1.0f - src_rect.bottom / static_cast<float>(EFB_HEIGHT)) : 0.0f,
clamp_top ? (1.0f - src_rect.top / static_cast<float>(EFB_HEIGHT)) : 1.0f); clamp_top ? (1.0f - src_rect.top / static_cast<float>(EFB_HEIGHT)) : 1.0f);
glUniform3f(shader.filter_coefficients_uniform, filter_coefficients[0] / 64.0f, glUniform3f(shader.filter_coefficients_uniform, filter_coefficients[0], filter_coefficients[1],
filter_coefficients[1] / 64.0f, filter_coefficients[2] / 64.0f); filter_coefficients[2]);
ProgramShaderCache::BindVertexFormat(nullptr); ProgramShaderCache::BindVertexFormat(nullptr);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
g_renderer->RestoreAPIState(); g_renderer->RestoreAPIState();
} }
} } // namespace OGL

View File

@ -50,7 +50,7 @@ std::unique_ptr<AbstractStagingTexture> s_encoding_readback_texture;
const int renderBufferWidth = EFB_WIDTH * 4; const int renderBufferWidth = EFB_WIDTH * 4;
const int renderBufferHeight = 1024; const int renderBufferHeight = 1024;
} } // namespace
static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params) static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params)
{ {
@ -158,7 +158,7 @@ void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 nativ
glUniform1f(texconv_shader.y_scale_uniform, y_scale); glUniform1f(texconv_shader.y_scale_uniform, y_scale);
glUniform1f(texconv_shader.gamma_rcp_uniform, 1.0f / gamma); glUniform1f(texconv_shader.gamma_rcp_uniform, 1.0f / gamma);
glUniform2f(texconv_shader.clamp_tb_uniform, clamp_top, clamp_bottom); glUniform2f(texconv_shader.clamp_tb_uniform, clamp_top, clamp_bottom);
glUniform3i(texconv_shader.filter_coefficients_uniform, filter_coefficients[0], glUniform3f(texconv_shader.filter_coefficients_uniform, filter_coefficients[0],
filter_coefficients[1], filter_coefficients[2]); filter_coefficients[1], filter_coefficients[2]);
const GLuint read_texture = params.depth ? const GLuint read_texture = params.depth ?
@ -171,6 +171,6 @@ void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 nativ
g_renderer->RestoreAPIState(); g_renderer->RestoreAPIState();
} }
} // namespace } // namespace TextureConverter
} // namespace OGL } // namespace OGL

View File

@ -244,7 +244,7 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
}; };
PixelUniforms uniforms; PixelUniforms uniforms;
for (size_t i = 0; i < filter_coefficients.size(); i++) for (size_t i = 0; i < filter_coefficients.size(); i++)
uniforms.filter_coefficients[i] = filter_coefficients[i] / 64.0f; uniforms.filter_coefficients[i] = filter_coefficients[i];
uniforms.gamma_rcp = 1.0f / gamma; uniforms.gamma_rcp = 1.0f / gamma;
uniforms.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; uniforms.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f;
uniforms.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f; uniforms.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f;

View File

@ -41,10 +41,10 @@ struct EFBEncodeParams
float gamma_rcp; float gamma_rcp;
float clamp_top; float clamp_top;
float clamp_bottom; float clamp_bottom;
s32 filter_coefficients[3]; float filter_coefficients[3];
u32 padding; u32 padding;
}; };
} } // namespace
TextureConverter::TextureConverter() TextureConverter::TextureConverter()
{ {
} }

View File

@ -1506,10 +1506,14 @@ TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterC
{ {
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1 // To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below. // are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
return {static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1]), return {
static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) + static_cast<float>(static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1])) /
static_cast<u32>(coefficients[4]), 64.0f,
static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])}; static_cast<float>(static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
static_cast<u32>(coefficients[4])) /
64.0f,
static_cast<float>(static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])) /
64.0f};
} }
TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetVRAMCopyFilterCoefficients( TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetVRAMCopyFilterCoefficients(

View File

@ -109,7 +109,7 @@ private:
public: public:
// Reduced version of the full coefficient array, reduced to a single value for each row. // Reduced version of the full coefficient array, reduced to a single value for each row.
using CopyFilterCoefficientArray = std::array<u32, 3>; using CopyFilterCoefficientArray = std::array<float, 3>;
struct TCacheEntry struct TCacheEntry
{ {

View File

@ -67,7 +67,7 @@ static void WriteHeader(char*& p, APIType ApiType)
WRITE(p, "uniform float y_scale;\n"); WRITE(p, "uniform float y_scale;\n");
WRITE(p, "uniform float gamma_rcp;\n"); WRITE(p, "uniform float gamma_rcp;\n");
WRITE(p, "uniform float2 clamp_tb;\n"); WRITE(p, "uniform float2 clamp_tb;\n");
WRITE(p, "uniform int3 filter_coefficients;\n"); WRITE(p, "uniform float3 filter_coefficients;\n");
WRITE(p, "#define samp0 samp9\n"); WRITE(p, "#define samp0 samp9\n");
WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n"); WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n");
WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
@ -79,7 +79,7 @@ static void WriteHeader(char*& p, APIType ApiType)
WRITE(p, " float y_scale;\n"); WRITE(p, " float y_scale;\n");
WRITE(p, " float gamma_rcp;\n"); WRITE(p, " float gamma_rcp;\n");
WRITE(p, " float2 clamp_tb;\n"); WRITE(p, " float2 clamp_tb;\n");
WRITE(p, " int3 filter_coefficients;\n"); WRITE(p, " float3 filter_coefficients;\n");
WRITE(p, "};\n"); WRITE(p, "};\n");
WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
@ -91,7 +91,7 @@ static void WriteHeader(char*& p, APIType ApiType)
WRITE(p, " float y_scale;\n"); WRITE(p, " float y_scale;\n");
WRITE(p, " float gamma_rcp;\n"); WRITE(p, " float gamma_rcp;\n");
WRITE(p, " float2 clamp_tb;\n"); WRITE(p, " float2 clamp_tb;\n");
WRITE(p, " int3 filter_coefficients;\n"); WRITE(p, " float3 filter_coefficients;\n");
WRITE(p, "};\n"); WRITE(p, "};\n");
WRITE(p, "sampler samp0 : register(s0);\n"); WRITE(p, "sampler samp0 : register(s0);\n");
WRITE(p, "Texture2DArray Tex0 : register(t0);\n"); WRITE(p, "Texture2DArray Tex0 : register(t0);\n");
@ -191,21 +191,18 @@ static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType A
WRITE(p, " float4 next_row = "); WRITE(p, " float4 next_row = ");
WriteSampleOp(1); WriteSampleOp(1);
WRITE(p, ";\n"); WRITE(p, ";\n");
WRITE( WRITE(p, " return float4(min(prev_row.rgb * filter_coefficients[0] +\n"
p, " current_row.rgb * filter_coefficients[1] +\n"
" float3 col = float3(clamp((int3(prev_row.rgb * 255.0) * filter_coefficients[0] +\n" " next_row.rgb * filter_coefficients[2], \n"
" int3(current_row.rgb * 255.0) * filter_coefficients[1] +\n" " float3(1, 1, 1)), current_row.a);\n");
" int3(next_row.rgb * 255.0) * filter_coefficients[2]) >> 6,\n"
" int3(0, 0, 0), int3(255, 255, 255))) / 255.0;\n");
WRITE(p, " return float4(col, current_row.a);\n");
} }
else else
{ {
WRITE(p, " float4 current_row = "); WRITE(p, " float4 current_row = ");
WriteSampleOp(0); WriteSampleOp(0);
WRITE(p, ";\n"); WRITE(p, ";\n");
WRITE(p, " return float4(clamp(int3(current_row.rgb * 255.0) * filter_coefficients[1], " WRITE(p, "return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
"int3(0, 0, 0), int3(255, 255, 255)), current_row.a);\n"); " current_row.a);\n");
} }
WRITE(p, "}\n"); WRITE(p, "}\n");
} }
@ -1422,4 +1419,4 @@ std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_form
return ss.str(); return ss.str();
} }
} // namespace } // namespace TextureConversionShaderTiled

View File

@ -97,15 +97,17 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n" out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n"
" float4 current_row = SampleEFB(uv0, 0.0f);\n" " float4 current_row = SampleEFB(uv0, 0.0f);\n"
" float4 next_row = SampleEFB(uv0, 1.0f);\n" " float4 next_row = SampleEFB(uv0, 1.0f);\n"
" float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n" " float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n" " current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], current_row.a);\n"); " next_row.rgb * filter_coefficients[2], \n"
" float3(1, 1, 1)), current_row.a);\n");
} }
else else
{ {
out.Write( out.Write(
" float4 current_row = SampleEFB(uv0, 0.0f);\n" " float4 current_row = SampleEFB(uv0, 0.0f);\n"
" float4 texcol = float4(current_row.rgb * filter_coefficients[1], current_row.a);\n"); " float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
" current_row.a);\n");
} }
if (uid_data->is_depth_copy) if (uid_data->is_depth_copy)