Implement dual-source blending in shader

For some GLES drivers that don't support dual-source blending, but do
support GL_EXT_shader_framebuffer_fetch, this might be useful.
This commit is contained in:
Jonathan Hamilton 2017-10-25 22:44:39 -07:00
parent 11976526d1
commit 29a9ed043b
12 changed files with 222 additions and 41 deletions

View File

@ -82,6 +82,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsBitfield = false;
g_Config.backend_info.bSupportsDynamicSamplerIndexing = false;
g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsFramebufferFetch = false;
IDXGIFactory2* factory;
IDXGIAdapter* ad;

View File

@ -46,6 +46,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
g_Config.backend_info.bSupportsST3CTextures = false;
g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsFramebufferFetch = false;
// aamodes: We only support 1 sample, so no MSAA
g_Config.backend_info.Adapters.clear();

View File

@ -885,6 +885,24 @@ void ProgramShaderCache::CreateHeader()
}
}
std::string framebuffer_fetch_string;
switch (g_ogl_config.SupportedFramebufferFetch)
{
case ES_FB_FETCH_TYPE::FB_FETCH_EXT:
framebuffer_fetch_string = "#extension GL_EXT_shader_framebuffer_fetch: enable\n"
"#define FB_FETCH_VALUE ocol0\n"
"#define FRAGMENT_INOUT inout";
break;
case ES_FB_FETCH_TYPE::FB_FETCH_ARM:
framebuffer_fetch_string = "#extension GL_ARM_shader_framebuffer_fetch: enable\n"
"#define FB_FETCH_VALUE gl_LastFragColorARM\n"
"#define FRAGMENT_INOUT out";
break;
case ES_FB_FETCH_TYPE::FB_FETCH_NONE:
framebuffer_fetch_string = "";
break;
}
s_glsl_header = StringFromFormat(
"%s\n"
"%s\n" // ubo
@ -902,6 +920,7 @@ void ProgramShaderCache::CreateHeader()
"%s\n" // ES texture buffer
"%s\n" // ES dual source blend
"%s\n" // shader image load store
"%s\n" // shader framebuffer fetch
// Precision defines for GLSL ES
"%s\n"
@ -976,8 +995,8 @@ void ProgramShaderCache::CreateHeader()
((!is_glsles && v < GLSL_430) || (is_glsles && v < GLSLES_310)) ?
"#extension GL_ARB_shader_image_load_store : enable" :
"",
is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "",
is_glsles ? "precision highp sampler2DArray;" : "",
framebuffer_fetch_string.c_str(), is_glsles ? "precision highp float;" : "",
is_glsles ? "precision highp int;" : "", is_glsles ? "precision highp sampler2DArray;" : "",
(is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ?
"precision highp usamplerBuffer;" :
"",

View File

@ -522,6 +522,21 @@ Renderer::Renderer()
// depth clamping.
g_Config.backend_info.bSupportsDepthClamp = false;
if (GLExtensions::Supports("GL_EXT_shader_framebuffer_fetch"))
{
g_ogl_config.SupportedFramebufferFetch = ES_FB_FETCH_TYPE::FB_FETCH_EXT;
}
else if (GLExtensions::Supports("GL_ARM_shader_framebuffer_fetch"))
{
g_ogl_config.SupportedFramebufferFetch = ES_FB_FETCH_TYPE::FB_FETCH_ARM;
}
else
{
g_ogl_config.SupportedFramebufferFetch = ES_FB_FETCH_TYPE::FB_FETCH_NONE;
}
g_Config.backend_info.bSupportsFramebufferFetch =
g_ogl_config.SupportedFramebufferFetch != ES_FB_FETCH_TYPE::FB_FETCH_NONE;
if (GLExtensions::Version() == 300)
{
g_ogl_config.eSupportedGLSLVersion = GLSLES_300;
@ -1275,44 +1290,54 @@ void Renderer::SetBlendingState(const BlendingState& state)
bool useDualSource =
state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || state.dstalpha);
// Only use shader blend if we need to and we don't support dual-source blending directly
bool useShaderBlend = !useDualSource && state.usedualsrc && state.dstalpha &&
g_ActiveConfig.backend_info.bSupportsFramebufferFetch;
const GLenum src_factors[8] = {
GL_ZERO,
GL_ONE,
GL_DST_COLOR,
GL_ONE_MINUS_DST_COLOR,
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : (GLenum)GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA};
const GLenum dst_factors[8] = {
GL_ZERO,
GL_ONE,
GL_SRC_COLOR,
GL_ONE_MINUS_SRC_COLOR,
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : (GLenum)GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA};
if (state.blendenable)
{
glEnable(GL_BLEND);
}
else
if (useShaderBlend)
{
glDisable(GL_BLEND);
}
else
{
const GLenum src_factors[8] = {
GL_ZERO,
GL_ONE,
GL_DST_COLOR,
GL_ONE_MINUS_DST_COLOR,
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : (GLenum)GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA};
const GLenum dst_factors[8] = {
GL_ZERO,
GL_ONE,
GL_SRC_COLOR,
GL_ONE_MINUS_SRC_COLOR,
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA : (GLenum)GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA};
// Always call glBlendEquationSeparate and glBlendFuncSeparate, even when
// GL_BLEND is disabled, as a workaround for some bugs (possibly graphics
// driver issues?). See https://bugs.dolphin-emu.org/issues/10120 : "Sonic
// Adventure 2 Battle: graphics crash when loading first Dark level"
GLenum equation = state.subtract ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD;
GLenum equationAlpha = state.subtractAlpha ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD;
glBlendEquationSeparate(equation, equationAlpha);
glBlendFuncSeparate(src_factors[state.srcfactor], dst_factors[state.dstfactor],
src_factors[state.srcfactoralpha], dst_factors[state.dstfactoralpha]);
if (state.blendenable)
{
glEnable(GL_BLEND);
}
else
{
glDisable(GL_BLEND);
}
// Always call glBlendEquationSeparate and glBlendFuncSeparate, even when
// GL_BLEND is disabled, as a workaround for some bugs (possibly graphics
// driver issues?). See https://bugs.dolphin-emu.org/issues/10120 : "Sonic
// Adventure 2 Battle: graphics crash when loading first Dark level"
GLenum equation = state.subtract ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD;
GLenum equationAlpha = state.subtractAlpha ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD;
glBlendEquationSeparate(equation, equationAlpha);
glBlendFuncSeparate(src_factors[state.srcfactor], dst_factors[state.dstfactor],
src_factors[state.srcfactoralpha], dst_factors[state.dstfactoralpha]);
}
const GLenum logic_op_codes[16] = {
GL_CLEAR, GL_AND, GL_AND_REVERSE, GL_COPY, GL_AND_INVERTED, GL_NOOP,

View File

@ -35,6 +35,13 @@ enum class ES_TEXBUF_TYPE
TEXBUF_EXT
};
enum class ES_FB_FETCH_TYPE
{
FB_FETCH_NONE,
FB_FETCH_EXT,
FB_FETCH_ARM,
};
// ogl-only config, so not in VideoConfig.h
struct VideoConfig
{
@ -59,6 +66,7 @@ struct VideoConfig
bool bSupportsAniso;
bool bSupportsBitfield;
bool bSupportsTextureSubImage;
ES_FB_FETCH_TYPE SupportedFramebufferFetch;
const char* gl_vendor;
const char* gl_renderer;

View File

@ -73,6 +73,7 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsCopyToVram = false;
g_Config.backend_info.bForceCopyToRam = true;
g_Config.backend_info.bSupportsFramebufferFetch = false;
// aamodes
g_Config.backend_info.AAModes = {1};

View File

@ -247,6 +247,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsReversedDepthRange = false; // No support yet due to driver bugs.
config->backend_info.bSupportsCopyToVram = true; // Assumed support.
config->backend_info.bForceCopyToRam = false;
config->backend_info.bSupportsFramebufferFetch = false;
}
void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list)

View File

@ -16,6 +16,7 @@
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/LightingShaderGen.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/RenderState.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
@ -319,6 +320,21 @@ PixelShaderUid GetPixelShaderUid()
uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj;
uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled;
BlendingState state = {};
state.Generate(bpmem);
if (state.usedualsrc && state.dstalpha && g_ActiveConfig.backend_info.bSupportsFramebufferFetch &&
!g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
{
uid_data->blend_enable = state.blendenable;
uid_data->blend_src_factor = state.srcfactor;
uid_data->blend_src_factor_alpha = state.srcfactoralpha;
uid_data->blend_dst_factor = state.dstfactor;
uid_data->blend_dst_factor_alpha = state.dstfactoralpha;
uid_data->blend_subtract = state.subtract;
uid_data->blend_subtract_alpha = state.subtractAlpha;
}
return out;
}
@ -447,6 +463,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data);
static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data,
bool use_dual_source);
static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data);
ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host_config,
const pixel_shader_uid_data* uid_data)
@ -519,6 +536,8 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
host_config.backend_dual_source_blend &&
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) ||
uid_data->useDstAlpha);
const bool use_shader_blend =
!use_dual_source && (uid_data->useDstAlpha && host_config.backend_shader_framebuffer_fetch);
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
@ -535,6 +554,17 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n");
}
}
else if (use_shader_blend)
{
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION))
{
out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 ocol0;\n");
}
else
{
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 ocol0;\n");
}
}
else
{
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
@ -575,6 +605,11 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
out.Write("void main()\n{\n");
out.Write("\tfloat4 rawpos = gl_FragCoord;\n");
if (use_shader_blend)
{
// Store off a copy of the initial fb value for blending
out.Write("\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n");
}
}
else // D3D
{
@ -710,7 +745,8 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
// testing result)
if (uid_data->Pretest == AlphaTest::UNDETERMINED ||
(uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest))
WriteAlphaTest(out, uid_data, ApiType, uid_data->per_pixel_depth, use_dual_source);
WriteAlphaTest(out, uid_data, ApiType, uid_data->per_pixel_depth,
use_dual_source || use_shader_blend);
if (uid_data->zfreeze)
{
@ -793,7 +829,11 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
WriteFog(out, uid_data);
// Write the color and alpha values to the framebuffer
WriteColor(out, ApiType, uid_data, use_dual_source);
// If using shader blend, we still use the separate alpha
WriteColor(out, ApiType, uid_data, use_dual_source || use_shader_blend);
if (use_shader_blend)
WriteBlend(out, uid_data);
if (uid_data->bounding_box)
{
@ -1358,3 +1398,79 @@ static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid
}
}
}
static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
{
if (uid_data->blend_enable)
{
static const std::array<const char*, 8> blendSrcFactor = {
"float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE
"initial_ocol0.rgb;", // DSTCLR
"float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR
"ocol1.aaa;", // SRCALPHA
"float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
"initial_ocol0.aaa;", // DSTALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
};
static const std::array<const char*, 8> blendSrcFactorAlpha = {
"0.0;", // ZERO
"1.0;", // ONE
"initial_ocol0.a;", // DSTCLR
"1.0 - initial_ocol0.a;", // INVDSTCLR
"ocol1.a;", // SRCALPHA
"1.0 - ocol1.a;", // INVSRCALPHA
"initial_ocol0.a;", // DSTALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA
};
static const std::array<const char*, 8> blendDstFactor = {
"float3(0,0,0);", // ZERO
"float3(1,1,1);", // ONE
"ocol0.rgb;", // SRCCLR
"float3(1,1,1) - ocol0.rgb;", // INVSRCCLR
"ocol1.aaa;", // SRCALHA
"float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
"initial_ocol0.aaa;", // DSTALPHA
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
};
static const std::array<const char*, 8> blendDstFactorAlpha = {
"0.0;", // ZERO
"1.0;", // ONE
"ocol0.a;", // SRCCLR
"1.0 - ocol0.a;", // INVSRCCLR
"ocol1.a;", // SRCALPHA
"1.0 - ocol1.a;", // INVSRCALPHA
"initial_ocol0.a;", // DSTALPHA
"1.0 - initial_ocol0.a;", // INVDSTALPHA
};
out.Write("\tfloat4 blend_src;\n");
out.Write("\tblend_src.rgb = %s\n", blendSrcFactor[uid_data->blend_src_factor]);
out.Write("\tblend_src.a = %s\n", blendSrcFactorAlpha[uid_data->blend_src_factor_alpha]);
out.Write("\tfloat4 blend_dst;\n");
out.Write("\tblend_dst.rgb = %s\n", blendDstFactor[uid_data->blend_dst_factor]);
out.Write("\tblend_dst.a = %s\n", blendDstFactorAlpha[uid_data->blend_dst_factor_alpha]);
out.Write("\tfloat4 blend_result;\n");
if (uid_data->blend_subtract)
{
out.Write("\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * "
"blend_src.rgb;\n");
}
else
{
out.Write(
"\tblend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * blend_src.rgb;\n");
}
if (uid_data->blend_subtract_alpha)
out.Write("\tblend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n");
else
out.Write("\tblend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n");
}
else
{
out.Write("\tfloat4 blend_result = ocol0;\n");
}
out.Write("\tocol0 = blend_result;\n");
}

View File

@ -44,7 +44,13 @@ struct pixel_shader_uid_data
u32 rgba6_format : 1;
u32 dither : 1;
u32 uint_output : 1;
u32 pad : 15;
u32 blend_enable : 1; // Only used with shader_framebuffer_fetch blend
u32 blend_src_factor : 3; // Only used with shader_framebuffer_fetch blend
u32 blend_src_factor_alpha : 3; // Only used with shader_framebuffer_fetch blend
u32 blend_dst_factor : 3; // Only used with shader_framebuffer_fetch blend
u32 blend_dst_factor_alpha : 3; // Only used with shader_framebuffer_fetch blend
u32 blend_subtract : 1; // Only used with shader_framebuffer_fetch blend
u32 blend_subtract_alpha : 1; // Only used with shader_framebuffer_fetch blend
u32 texMtxInfo_n_projection : 8; // 8x1 bit
u32 tevindref_bi0 : 3;

View File

@ -32,6 +32,7 @@ ShaderHostConfig ShaderHostConfig::GetCurrent()
bits.backend_bitfield = g_ActiveConfig.backend_info.bSupportsBitfield;
bits.backend_dynamic_sampler_indexing =
g_ActiveConfig.backend_info.bSupportsDynamicSamplerIndexing;
bits.backend_shader_framebuffer_fetch = g_ActiveConfig.backend_info.bSupportsFramebufferFetch;
return bits;
}
@ -68,9 +69,9 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool
if (include_host_config)
{
// We're using 20 bits, so 5 hex characters.
// We're using 21 bits, so 6 hex characters.
ShaderHostConfig host_config = ShaderHostConfig::GetCurrent();
filename += StringFromFormat("-%05X", host_config.bits);
filename += StringFromFormat("-%06X", host_config.bits);
}
filename += ".cache";

View File

@ -178,7 +178,8 @@ union ShaderHostConfig
u32 backend_reversed_depth_range : 1;
u32 backend_bitfield : 1;
u32 backend_dynamic_sampler_indexing : 1;
u32 pad : 12;
u32 backend_shader_framebuffer_fetch : 1;
u32 pad : 11;
};
static ShaderHostConfig GetCurrent();

View File

@ -227,6 +227,7 @@ struct VideoConfig final
bool bSupportsBitfield; // Needed by UberShaders, so must stay in VideoCommon
bool bSupportsDynamicSamplerIndexing; // Needed by UberShaders, so must stay in VideoCommon
bool bSupportsBPTCTextures;
bool bSupportsFramebufferFetch; // Used as an alternative to dual-source blend on GLES
} backend_info;
// Utility