Enable shader_framebuffer_fetch blend path on ubershaders
Tested on a linux Intel Skylake integrated graphics with blend_func_extended force-disabled, as it's the only platform I have that doesn't crash with ubershaders and supports fb_fetch
This commit is contained in:
parent
8d68adcaf3
commit
ceb1f8c8cb
|
@ -153,9 +153,7 @@ static void BPWritten(const BPCmd& bp)
|
|||
|
||||
SetBlendMode();
|
||||
|
||||
// Dither
|
||||
if (bp.changes & 0x04)
|
||||
PixelShaderManager::SetBlendModeChanged();
|
||||
PixelShaderManager::SetBlendModeChanged();
|
||||
}
|
||||
return;
|
||||
case BPMEM_CONSTANTALPHA: // Set Destination Alpha
|
||||
|
|
|
@ -42,6 +42,14 @@ struct PixelShaderConstants
|
|||
std::array<uint4, 16> pack1; // .xy - combiners, .z - tevind, .w - iref
|
||||
std::array<uint4, 8> pack2; // .x - tevorder, .y - tevksel
|
||||
std::array<int4, 32> konst; // .rgba
|
||||
// The following are used in ubershaders when using shader_framebuffer_fetch blending
|
||||
u32 blend_enable;
|
||||
u32 blend_src_factor;
|
||||
u32 blend_src_factor_alpha;
|
||||
u32 blend_dst_factor;
|
||||
u32 blend_dst_factor_alpha;
|
||||
u32 blend_subtract;
|
||||
u32 blend_subtract_alpha;
|
||||
};
|
||||
|
||||
struct VertexShaderConstants
|
||||
|
|
|
@ -413,6 +413,13 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texg
|
|||
"\tuint4 bpmem_pack1[16];\n" // .xy - combiners, .z - tevind
|
||||
"\tuint4 bpmem_pack2[8];\n" // .x - tevorder, .y - tevksel
|
||||
"\tint4 konstLookup[32];\n"
|
||||
"\tbool blend_enable;\n"
|
||||
"\tuint blend_src_factor;\n"
|
||||
"\tuint blend_src_factor_alpha;\n"
|
||||
"\tuint blend_dst_factor;\n"
|
||||
"\tuint blend_dst_factor_alpha;\n"
|
||||
"\tbool blend_subtract;\n"
|
||||
"\tbool blend_subtract_alpha;\n"
|
||||
"};\n\n");
|
||||
out.Write("#define bpmem_combiners(i) (bpmem_pack1[(i)].xy)\n"
|
||||
"#define bpmem_tevind(i) (bpmem_pack1[(i)].z)\n"
|
||||
|
|
|
@ -473,6 +473,43 @@ void PixelShaderManager::SetBlendModeChanged()
|
|||
constants.dither = dither;
|
||||
dirty = true;
|
||||
}
|
||||
BlendingState state = {};
|
||||
state.Generate(bpmem);
|
||||
if (constants.blend_enable != state.blendenable)
|
||||
{
|
||||
constants.blend_enable = state.blendenable;
|
||||
dirty = true;
|
||||
}
|
||||
if (constants.blend_src_factor != state.srcfactor)
|
||||
{
|
||||
constants.blend_src_factor = state.srcfactor;
|
||||
dirty = true;
|
||||
}
|
||||
if (constants.blend_src_factor_alpha != state.srcfactoralpha)
|
||||
{
|
||||
constants.blend_src_factor_alpha = state.srcfactoralpha;
|
||||
dirty = true;
|
||||
}
|
||||
if (constants.blend_dst_factor != state.dstfactor)
|
||||
{
|
||||
constants.blend_dst_factor = state.dstfactor;
|
||||
dirty = true;
|
||||
}
|
||||
if (constants.blend_dst_factor_alpha != state.dstfactoralpha)
|
||||
{
|
||||
constants.blend_dst_factor_alpha = state.dstfactoralpha;
|
||||
dirty = true;
|
||||
}
|
||||
if (constants.blend_subtract != state.subtract)
|
||||
{
|
||||
constants.blend_subtract = state.subtract;
|
||||
dirty = true;
|
||||
}
|
||||
if (constants.blend_subtract_alpha != state.subtractAlpha)
|
||||
{
|
||||
constants.blend_subtract_alpha = state.subtractAlpha;
|
||||
dirty = true;
|
||||
}
|
||||
s_bDestAlphaDirty = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -47,6 +47,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
|
|||
const bool ssaa = host_config.ssaa;
|
||||
const bool stereo = host_config.stereo;
|
||||
const bool use_dual_source = host_config.backend_dual_source_blend;
|
||||
const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch;
|
||||
const bool early_depth = uid_data->early_depth != 0;
|
||||
const bool per_pixel_depth = uid_data->per_pixel_depth != 0;
|
||||
const bool bounding_box =
|
||||
|
@ -77,6 +78,21 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
|
|||
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n");
|
||||
}
|
||||
}
|
||||
else if (use_shader_blend)
|
||||
{
|
||||
// QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an
|
||||
// intermediate value with multiple reads & modifications, so pull out the "real" output value
|
||||
// and use a temporary for calculations, then set the output value once at the end of the
|
||||
// shader
|
||||
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION))
|
||||
{
|
||||
out.Write("FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0;\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;\n");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
|
||||
|
@ -658,6 +674,13 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
|
|||
|
||||
out.Write("void main()\n{\n");
|
||||
out.Write(" float4 rawpos = gl_FragCoord;\n");
|
||||
if (use_shader_blend)
|
||||
{
|
||||
// Store off a copy of the initial fb value for blending
|
||||
out.Write(" float4 initial_ocol0 = FB_FETCH_VALUE;\n");
|
||||
out.Write(" float4 ocol0;\n");
|
||||
out.Write(" float4 ocol1;\n");
|
||||
}
|
||||
}
|
||||
else // D3D
|
||||
{
|
||||
|
@ -1203,7 +1226,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
|
|||
" ocol0.a = float(TevResult.a >> 2) / 63.0;\n"
|
||||
" \n");
|
||||
|
||||
if (use_dual_source)
|
||||
if (use_dual_source || use_shader_blend)
|
||||
{
|
||||
out.Write(" // Dest alpha override (dual source blending)\n"
|
||||
" // Colors will be blended against the alpha from ocol1 and\n"
|
||||
|
@ -1228,6 +1251,99 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
|
|||
out.Write(" }\n");
|
||||
}
|
||||
|
||||
if (use_shader_blend)
|
||||
{
|
||||
static const std::array<const char*, 8> blendSrcFactor = {
|
||||
"float3(0,0,0);", // ZERO
|
||||
"float3(1,1,1);", // ONE
|
||||
"initial_ocol0.rgb;", // DSTCLR
|
||||
"float3(1,1,1) - initial_ocol0.rgb;", // INVDSTCLR
|
||||
"ocol1.aaa;", // SRCALPHA
|
||||
"float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
|
||||
"initial_ocol0.aaa;", // DSTALPHA
|
||||
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
|
||||
};
|
||||
static const std::array<const char*, 8> blendSrcFactorAlpha = {
|
||||
"0.0;", // ZERO
|
||||
"1.0;", // ONE
|
||||
"initial_ocol0.a;", // DSTCLR
|
||||
"1.0 - initial_ocol0.a;", // INVDSTCLR
|
||||
"ocol1.a;", // SRCALPHA
|
||||
"1.0 - ocol1.a;", // INVSRCALPHA
|
||||
"initial_ocol0.a;", // DSTALPHA
|
||||
"1.0 - initial_ocol0.a;", // INVDSTALPHA
|
||||
};
|
||||
static const std::array<const char*, 8> blendDstFactor = {
|
||||
"float3(0,0,0);", // ZERO
|
||||
"float3(1,1,1);", // ONE
|
||||
"ocol0.rgb;", // SRCCLR
|
||||
"float3(1,1,1) - ocol0.rgb;", // INVSRCCLR
|
||||
"ocol1.aaa;", // SRCALHA
|
||||
"float3(1,1,1) - ocol1.aaa;", // INVSRCALPHA
|
||||
"initial_ocol0.aaa;", // DSTALPHA
|
||||
"float3(1,1,1) - initial_ocol0.aaa;", // INVDSTALPHA
|
||||
};
|
||||
static const std::array<const char*, 8> blendDstFactorAlpha = {
|
||||
"0.0;", // ZERO
|
||||
"1.0;", // ONE
|
||||
"ocol0.a;", // SRCCLR
|
||||
"1.0 - ocol0.a;", // INVSRCCLR
|
||||
"ocol1.a;", // SRCALPHA
|
||||
"1.0 - ocol1.a;", // INVSRCALPHA
|
||||
"initial_ocol0.a;", // DSTALPHA
|
||||
"1.0 - initial_ocol0.a;", // INVDSTALPHA
|
||||
};
|
||||
|
||||
out.Write(" if (blend_enable) {\n"
|
||||
" float4 blend_src;\n"
|
||||
" switch (blend_src_factor) {\n");
|
||||
for (unsigned i = 0; i < blendSrcFactor.size(); i++)
|
||||
{
|
||||
out.Write(" case %uu: blend_src.rgb = %s; break;\n", i, blendSrcFactor[i]);
|
||||
}
|
||||
|
||||
out.Write(" }\n"
|
||||
" switch (blend_src_factor_alpha) {\n");
|
||||
for (unsigned i = 0; i < blendSrcFactorAlpha.size(); i++)
|
||||
{
|
||||
out.Write(" case %uu: blend_src.a = %s; break;\n", i, blendSrcFactorAlpha[i]);
|
||||
}
|
||||
|
||||
out.Write(" }\n"
|
||||
" float4 blend_dst;\n"
|
||||
" switch (blend_dst_factor) {\n");
|
||||
for (unsigned i = 0; i < blendDstFactor.size(); i++)
|
||||
{
|
||||
out.Write(" case %uu: blend_dst.rgb = %s; break;\n", i, blendDstFactor[i]);
|
||||
}
|
||||
out.Write(" }\n"
|
||||
" switch (blend_dst_factor_alpha) {\n");
|
||||
for (unsigned i = 0; i < blendDstFactorAlpha.size(); i++)
|
||||
{
|
||||
out.Write(" case %uu: blend_dst.a = %s; break;\n", i, blendDstFactorAlpha[i]);
|
||||
}
|
||||
|
||||
out.Write(
|
||||
" }\n"
|
||||
" float4 blend_result;\n"
|
||||
" if (blend_subtract)\n"
|
||||
" blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * blend_src.rgb;\n"
|
||||
" else\n"
|
||||
" blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * "
|
||||
"blend_src.rgb;\n");
|
||||
|
||||
out.Write(" if (blend_subtract_alpha)\n"
|
||||
" blend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;\n"
|
||||
" else\n"
|
||||
" blend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;\n");
|
||||
|
||||
out.Write(" real_ocol0 = blend_result;\n");
|
||||
|
||||
out.Write(" } else {\n"
|
||||
" real_ocol0 = ocol0;\n"
|
||||
" }\n");
|
||||
}
|
||||
|
||||
out.Write("}\n"
|
||||
"\n"
|
||||
"int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1) {\n"
|
||||
|
|
Loading…
Reference in New Issue