Merge pull request #10747 from tellowkrinkle/LateUIDFixup

Add a post-cache shader UID fixup pass
This commit is contained in:
JMC47 2022-07-17 00:43:16 -04:00 committed by GitHub
commit 70b0b03c3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 319 additions and 229 deletions

View File

@ -1168,55 +1168,43 @@ void Renderer::ApplyBlendingState(const BlendingState state)
if (m_current_blend_state == state) if (m_current_blend_state == state)
return; return;
bool useDualSource = bool useDualSource = state.usedualsrc;
state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || state.dstalpha);
// Only use shader blend if we need to and we don't support dual-source blending directly
bool useShaderBlend = !useDualSource && state.usedualsrc && state.dstalpha &&
g_ActiveConfig.backend_info.bSupportsFramebufferFetch;
if (useShaderBlend) const GLenum src_factors[8] = {GL_ZERO,
{ GL_ONE,
glDisable(GL_BLEND); GL_DST_COLOR,
} GL_ONE_MINUS_DST_COLOR,
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA :
(GLenum)GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA};
const GLenum dst_factors[8] = {GL_ZERO,
GL_ONE,
GL_SRC_COLOR,
GL_ONE_MINUS_SRC_COLOR,
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA :
(GLenum)GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA};
if (state.blendenable)
glEnable(GL_BLEND);
else else
{ glDisable(GL_BLEND);
const GLenum src_factors[8] = {GL_ZERO,
GL_ONE,
GL_DST_COLOR,
GL_ONE_MINUS_DST_COLOR,
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA :
(GLenum)GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA};
const GLenum dst_factors[8] = {GL_ZERO,
GL_ONE,
GL_SRC_COLOR,
GL_ONE_MINUS_SRC_COLOR,
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA :
(GLenum)GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA};
if (state.blendenable) // Always call glBlendEquationSeparate and glBlendFuncSeparate, even when
glEnable(GL_BLEND); // GL_BLEND is disabled, as a workaround for some bugs (possibly graphics
else // driver issues?). See https://bugs.dolphin-emu.org/issues/10120 : "Sonic
glDisable(GL_BLEND); // Adventure 2 Battle: graphics crash when loading first Dark level"
GLenum equation = state.subtract ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD;
// Always call glBlendEquationSeparate and glBlendFuncSeparate, even when GLenum equationAlpha = state.subtractAlpha ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD;
// GL_BLEND is disabled, as a workaround for some bugs (possibly graphics glBlendEquationSeparate(equation, equationAlpha);
// driver issues?). See https://bugs.dolphin-emu.org/issues/10120 : "Sonic glBlendFuncSeparate(src_factors[u32(state.srcfactor.Value())],
// Adventure 2 Battle: graphics crash when loading first Dark level" dst_factors[u32(state.dstfactor.Value())],
GLenum equation = state.subtract ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD; src_factors[u32(state.srcfactoralpha.Value())],
GLenum equationAlpha = state.subtractAlpha ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD; dst_factors[u32(state.dstfactoralpha.Value())]);
glBlendEquationSeparate(equation, equationAlpha);
glBlendFuncSeparate(src_factors[u32(state.srcfactor.Value())],
dst_factors[u32(state.dstfactor.Value())],
src_factors[u32(state.srcfactoralpha.Value())],
dst_factors[u32(state.dstfactoralpha.Value())]);
}
const GLenum logic_op_codes[16] = { const GLenum logic_op_codes[16] = {
GL_CLEAR, GL_AND, GL_AND_REVERSE, GL_COPY, GL_AND_INVERTED, GL_NOOP, GL_CLEAR, GL_AND, GL_AND_REVERSE, GL_COPY, GL_AND_INVERTED, GL_NOOP,

View File

@ -153,7 +153,7 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi)
s32 z = (s32)std::clamp<float>(ZSlope.GetValue(x, y), 0.0f, 16777215.0f); s32 z = (s32)std::clamp<float>(ZSlope.GetValue(x, y), 0.0f, 16777215.0f);
if (bpmem.UseEarlyDepthTest()) if (bpmem.GetEmulatedZ() == EmulatedZ::Early)
{ {
// TODO: Test if perf regs are incremented even if test is disabled // TODO: Test if perf regs are incremented even if test is disabled
EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT_ZCOMPLOC); EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT_ZCOMPLOC);

View File

@ -840,7 +840,7 @@ void Tev::Draw()
output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8; output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8;
} }
if (bpmem.UseLateDepthTest()) if (bpmem.GetEmulatedZ() == EmulatedZ::Late)
{ {
// TODO: Check against hw if these values get incremented even if depth testing is disabled // TODO: Check against hw if these values get incremented even if depth testing is disabled
EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT); EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT);

View File

@ -137,60 +137,48 @@ GetVulkanAttachmentBlendState(const BlendingState& state, AbstractPipelineUsage
{ {
VkPipelineColorBlendAttachmentState vk_state = {}; VkPipelineColorBlendAttachmentState vk_state = {};
bool use_dual_source = bool use_dual_source = state.usedualsrc;
state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || state.dstalpha);
bool use_shader_blend = !use_dual_source && state.usedualsrc && state.dstalpha &&
g_ActiveConfig.backend_info.bSupportsFramebufferFetch;
if (use_shader_blend || (usage == AbstractPipelineUsage::GX && vk_state.blendEnable = static_cast<VkBool32>(state.blendenable);
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z))) vk_state.colorBlendOp = state.subtract ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD;
vk_state.alphaBlendOp = state.subtractAlpha ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD;
if (use_dual_source)
{ {
vk_state.blendEnable = VK_FALSE; static constexpr std::array<VkBlendFactor, 8> src_factors = {
{VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
static constexpr std::array<VkBlendFactor, 8> dst_factors = {
{VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
vk_state.srcColorBlendFactor = src_factors[u32(state.srcfactor.Value())];
vk_state.srcAlphaBlendFactor = src_factors[u32(state.srcfactoralpha.Value())];
vk_state.dstColorBlendFactor = dst_factors[u32(state.dstfactor.Value())];
vk_state.dstAlphaBlendFactor = dst_factors[u32(state.dstfactoralpha.Value())];
} }
else else
{ {
vk_state.blendEnable = static_cast<VkBool32>(state.blendenable); static constexpr std::array<VkBlendFactor, 8> src_factors = {
vk_state.colorBlendOp = state.subtract ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD; {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR,
vk_state.alphaBlendOp = state.subtractAlpha ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD; VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
if (use_dual_source) static constexpr std::array<VkBlendFactor, 8> dst_factors = {
{ {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR,
static constexpr std::array<VkBlendFactor, 8> src_factors = { VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC_ALPHA,
{VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR, VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA, VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
static constexpr std::array<VkBlendFactor, 8> dst_factors = {
{VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
vk_state.srcColorBlendFactor = src_factors[u32(state.srcfactor.Value())]; vk_state.srcColorBlendFactor = src_factors[u32(state.srcfactor.Value())];
vk_state.srcAlphaBlendFactor = src_factors[u32(state.srcfactoralpha.Value())]; vk_state.srcAlphaBlendFactor = src_factors[u32(state.srcfactoralpha.Value())];
vk_state.dstColorBlendFactor = dst_factors[u32(state.dstfactor.Value())]; vk_state.dstColorBlendFactor = dst_factors[u32(state.dstfactor.Value())];
vk_state.dstAlphaBlendFactor = dst_factors[u32(state.dstfactoralpha.Value())]; vk_state.dstAlphaBlendFactor = dst_factors[u32(state.dstfactoralpha.Value())];
}
else
{
static constexpr std::array<VkBlendFactor, 8> src_factors = {
{VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
static constexpr std::array<VkBlendFactor, 8> dst_factors = {
{VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
vk_state.srcColorBlendFactor = src_factors[u32(state.srcfactor.Value())];
vk_state.srcAlphaBlendFactor = src_factors[u32(state.srcfactoralpha.Value())];
vk_state.dstColorBlendFactor = dst_factors[u32(state.dstfactor.Value())];
vk_state.dstAlphaBlendFactor = dst_factors[u32(state.dstfactoralpha.Value())];
}
} }
if (state.colorupdate) if (state.colorupdate)

View File

@ -371,13 +371,6 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_REVERSED_DEPTH_RANGE)) if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_REVERSED_DEPTH_RANGE))
config->backend_info.bSupportsReversedDepthRange = false; config->backend_info.bSupportsReversedDepthRange = false;
// Calling discard when early depth test is enabled can break on some Apple Silicon GPU drivers.
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z))
{
// We will use shader blending, so disable hardware dual source blending.
config->backend_info.bSupportsDualSourceBlend = false;
}
// Dynamic sampler indexing locks up Intel GPUs on MoltenVK/Metal // Dynamic sampler indexing locks up Intel GPUs on MoltenVK/Metal
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING)) if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING))
config->backend_info.bSupportsDynamicSamplerIndexing = false; config->backend_info.bSupportsDynamicSamplerIndexing = false;

View File

@ -2338,6 +2338,16 @@ struct BPCmd
int newvalue; int newvalue;
}; };
enum class EmulatedZ : u32
{
Disabled = 0,
Early = 1,
Late = 2,
ForcedEarly = 3,
EarlyWithFBFetch = 4,
EarlyWithZComplocHack = 5,
};
struct BPMemory struct BPMemory
{ {
GenMode genMode; GenMode genMode;
@ -2405,8 +2415,15 @@ struct BPMemory
u32 bpMask; // 0xFE u32 bpMask; // 0xFE
u32 unknown18; // ff u32 unknown18; // ff
bool UseEarlyDepthTest() const { return zcontrol.early_ztest && zmode.testenable; } EmulatedZ GetEmulatedZ() const
bool UseLateDepthTest() const { return !zcontrol.early_ztest && zmode.testenable; } {
if (!zmode.testenable)
return EmulatedZ::Disabled;
if (zcontrol.early_ztest)
return EmulatedZ::Early;
else
return EmulatedZ::Late;
}
}; };
#pragma pack() #pragma pack()

View File

@ -237,7 +237,8 @@ enum Bug
// crash. Sometimes this happens in the kernel mode part of the driver, resulting in a BSOD. // crash. Sometimes this happens in the kernel mode part of the driver, resulting in a BSOD.
// These shaders are also particularly problematic on macOS's Intel drivers. On OpenGL, they can // These shaders are also particularly problematic on macOS's Intel drivers. On OpenGL, they can
// cause depth issues. On Metal, they can cause the driver to not write a primitive to the depth // cause depth issues. On Metal, they can cause the driver to not write a primitive to the depth
// buffer whenever a fragment is discarded. Disable dual-source blending support on these drivers. // buffer if dual source blending is output in the shader but not subsequently used in blending.
// Compile separate shaders for DSB on vs off for these drivers.
BUG_BROKEN_DUAL_SOURCE_BLENDING, BUG_BROKEN_DUAL_SOURCE_BLENDING,
// BUG: ImgTec GLSL shader compiler fails when negating the input to a bitwise operation // BUG: ImgTec GLSL shader compiler fails when negating the input to a bitwise operation

View File

@ -19,7 +19,7 @@ namespace VideoCommon
// As pipelines encompass both shader UIDs and render states, changes to either of these should // As pipelines encompass both shader UIDs and render states, changes to either of these should
// also increment the pipeline UID version. Incrementing the UID version will cause all UID // also increment the pipeline UID version. Incrementing the UID version will cause all UID
// caches to be invalidated. // caches to be invalidated.
constexpr u32 GX_PIPELINE_UID_VERSION = 4; // Last changed in PR 10215 constexpr u32 GX_PIPELINE_UID_VERSION = 5; // Last changed in PR 10747
struct GXPipelineUid struct GXPipelineUid
{ {

View File

@ -167,9 +167,6 @@ constexpr Common::EnumMap<const char*, TevOutput::Color2> tev_a_output_table{
"c2.a", "c2.a",
}; };
// FIXME: Some of the video card's capabilities (BBox support, EarlyZ support, dstAlpha support)
// leak into this UID; This is really unhelpful if these UIDs ever move from one machine to
// another.
PixelShaderUid GetPixelShaderUid() PixelShaderUid GetPixelShaderUid()
{ {
PixelShaderUid out; PixelShaderUid out;
@ -189,20 +186,25 @@ PixelShaderUid GetPixelShaderUid()
u32 numStages = uid_data->genMode_numtevstages + 1; u32 numStages = uid_data->genMode_numtevstages + 1;
const bool forced_early_z = uid_data->Pretest = bpmem.alpha_test.TestResult();
bpmem.UseEarlyDepthTest() && uid_data->ztest = bpmem.GetEmulatedZ();
if (uid_data->ztest == EmulatedZ::Early &&
(g_ActiveConfig.bFastDepthCalc || (g_ActiveConfig.bFastDepthCalc ||
bpmem.alpha_test.TestResult() == AlphaTestResult::Undetermined) bpmem.alpha_test.TestResult() == AlphaTestResult::Undetermined)
// We can't allow early_ztest for zfreeze because depth is overridden per-pixel. // We can't allow early_ztest for zfreeze because depth is overridden per-pixel.
// This means it's impossible for zcomploc to be emulated on a zfrozen polygon. // This means it's impossible for zcomploc to be emulated on a zfrozen polygon.
&& !(bpmem.zmode.testenable && bpmem.genMode.zfreeze); && !bpmem.genMode.zfreeze)
{
uid_data->ztest = EmulatedZ::ForcedEarly;
}
const bool forced_early_z = uid_data->ztest == EmulatedZ::ForcedEarly;
const bool per_pixel_depth = const bool per_pixel_depth =
(bpmem.ztex2.op != ZTexOp::Disabled && bpmem.UseLateDepthTest()) || (bpmem.ztex2.op != ZTexOp::Disabled && uid_data->ztest == EmulatedZ::Late) ||
(!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) ||
(bpmem.zmode.testenable && bpmem.genMode.zfreeze); (bpmem.zmode.testenable && bpmem.genMode.zfreeze);
uid_data->per_pixel_depth = per_pixel_depth; uid_data->per_pixel_depth = per_pixel_depth;
uid_data->forced_early_z = forced_early_z;
if (g_ActiveConfig.bEnablePixelLighting) if (g_ActiveConfig.bEnablePixelLighting)
{ {
@ -285,59 +287,24 @@ PixelShaderUid GetPixelShaderUid()
sizeof(*uid_data) : sizeof(*uid_data) :
MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]); MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]);
uid_data->Pretest = bpmem.alpha_test.TestResult();
uid_data->late_ztest = bpmem.UseLateDepthTest();
// NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled
// (in this case we need to write a depth value if depth test passes regardless of the alpha // (in this case we need to write a depth value if depth test passes regardless of the alpha
// testing result) // testing result)
if (uid_data->Pretest == AlphaTestResult::Undetermined || if (uid_data->Pretest == AlphaTestResult::Undetermined ||
(uid_data->Pretest == AlphaTestResult::Fail && uid_data->late_ztest)) (uid_data->Pretest == AlphaTestResult::Fail && uid_data->ztest == EmulatedZ::Late))
{ {
uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0; uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0;
uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1; uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1;
uid_data->alpha_test_logic = bpmem.alpha_test.logic; uid_data->alpha_test_logic = bpmem.alpha_test.logic;
// ZCOMPLOC HACK:
// The only way to emulate alpha test + early-z is to force early-z in the shader.
// As this isn't available on all drivers and as we can't emulate this feature otherwise,
// we are only able to choose which one we want to respect more.
// Tests seem to have proven that writing depth even when the alpha test fails is more
// important that a reliable alpha test, so we just force the alpha test to always succeed.
// At least this seems to be less buggy.
uid_data->alpha_test_use_zcomploc_hack =
bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable &&
!g_ActiveConfig.backend_info.bSupportsEarlyZ && !bpmem.genMode.zfreeze;
} }
uid_data->zfreeze = bpmem.genMode.zfreeze; uid_data->zfreeze = bpmem.genMode.zfreeze;
uid_data->ztex_op = bpmem.ztex2.op; uid_data->ztex_op = bpmem.ztex2.op;
uid_data->early_ztest = bpmem.UseEarlyDepthTest();
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj; uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj;
uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled; uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled;
BlendingState state = {};
state.Generate(bpmem);
if (((state.usedualsrc && state.dstalpha) ||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z)) &&
g_ActiveConfig.backend_info.bSupportsFramebufferFetch &&
!g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
{
uid_data->blend_enable = state.blendenable;
uid_data->blend_src_factor = state.srcfactor;
uid_data->blend_src_factor_alpha = state.srcfactoralpha;
uid_data->blend_dst_factor = state.dstfactor;
uid_data->blend_dst_factor_alpha = state.dstfactoralpha;
uid_data->blend_subtract = state.subtract;
uid_data->blend_subtract_alpha = state.subtractAlpha;
}
uid_data->logic_op_enable = state.logicopenable;
uid_data->logic_op_mode = u32(state.logicmode.Value());
return out; return out;
} }
@ -798,7 +765,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) " out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) "
"sampleTexture(texmap, samp[texmap], uv, layer)\n"); "sampleTexture(texmap, samp[texmap], uv, layer)\n");
if (uid_data->forced_early_z && g_ActiveConfig.backend_info.bSupportsEarlyZ) if (uid_data->ztest == EmulatedZ::ForcedEarly)
{ {
// Zcomploc (aka early_ztest) is a way to control whether depth test is done before // Zcomploc (aka early_ztest) is a way to control whether depth test is done before
// or after texturing and alpha test. PC graphics APIs used to provide no way to emulate // or after texturing and alpha test. PC graphics APIs used to provide no way to emulate
@ -837,28 +804,15 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
out.Write("FORCE_EARLY_Z; \n"); out.Write("FORCE_EARLY_Z; \n");
} }
// Only use dual-source blending when required on drivers that don't support it very well. const bool use_framebuffer_fetch = uid_data->blend_enable || uid_data->logic_op_enable ||
const bool use_dual_source = uid_data->ztest == EmulatedZ::EarlyWithFBFetch;
host_config.backend_dual_source_blend &&
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) ||
uid_data->useDstAlpha);
const bool use_shader_blend =
!use_dual_source &&
(uid_data->useDstAlpha ||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z)) &&
host_config.backend_shader_framebuffer_fetch;
const bool use_shader_logic_op = !host_config.backend_logic_op && uid_data->logic_op_enable &&
host_config.backend_shader_framebuffer_fetch;
const bool use_framebuffer_fetch =
use_shader_blend || use_shader_logic_op ||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z);
#ifdef __APPLE__ #ifdef __APPLE__
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK) // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
// if we want to use it. // if we want to use it.
if (api_type == APIType::Vulkan) if (api_type == APIType::Vulkan)
{ {
if (use_dual_source) if (!uid_data->no_dual_src)
{ {
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n" out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n"
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n", "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n",
@ -891,7 +845,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
uid_data->uint_output ? "uvec4" : "vec4", uid_data->uint_output ? "uvec4" : "vec4",
use_framebuffer_fetch ? "real_ocol0" : "ocol0"); use_framebuffer_fetch ? "real_ocol0" : "ocol0");
if (use_dual_source) if (!uid_data->no_dual_src)
{ {
out.Write("{} out {} ocol1;\n", out.Write("{} out {} ocol1;\n",
has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(1)" : has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(1)" :
@ -960,7 +914,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
out.Write("\tfloat4 ocol0;\n"); out.Write("\tfloat4 ocol0;\n");
} }
if (use_shader_blend) if (uid_data->blend_enable)
{ {
out.Write("\tfloat4 ocol1;\n"); out.Write("\tfloat4 ocol1;\n");
} }
@ -1086,10 +1040,10 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
// (in this case we need to write a depth value if depth test passes regardless of the alpha // (in this case we need to write a depth value if depth test passes regardless of the alpha
// testing result) // testing result)
if (uid_data->Pretest == AlphaTestResult::Undetermined || if (uid_data->Pretest == AlphaTestResult::Undetermined ||
(uid_data->Pretest == AlphaTestResult::Fail && uid_data->late_ztest)) (uid_data->Pretest == AlphaTestResult::Fail && uid_data->ztest == EmulatedZ::Late))
{ {
WriteAlphaTest(out, uid_data, api_type, uid_data->per_pixel_depth, WriteAlphaTest(out, uid_data, api_type, uid_data->per_pixel_depth,
use_dual_source || use_shader_blend); !uid_data->no_dual_src || uid_data->blend_enable);
} }
// This situation is important for Mario Kart Wii's menus (they will render incorrectly if the // This situation is important for Mario Kart Wii's menus (they will render incorrectly if the
@ -1144,7 +1098,10 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
const bool skip_ztexture = !uid_data->per_pixel_depth && uid_data->fog_fsel == FogType::Off; const bool skip_ztexture = !uid_data->per_pixel_depth && uid_data->fog_fsel == FogType::Off;
// Note: z-textures are not written to depth buffer if early depth test is used // Note: z-textures are not written to depth buffer if early depth test is used
if (uid_data->per_pixel_depth && uid_data->early_ztest) const bool early_ztest = uid_data->ztest == EmulatedZ::Early ||
uid_data->ztest == EmulatedZ::EarlyWithFBFetch ||
uid_data->ztest == EmulatedZ::EarlyWithZComplocHack;
if (uid_data->per_pixel_depth && early_ztest)
{ {
if (!host_config.backend_reversed_depth_range) if (!host_config.backend_reversed_depth_range)
out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
@ -1165,7 +1122,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
out.Write("\tzCoord = zCoord & 0xFFFFFF;\n"); out.Write("\tzCoord = zCoord & 0xFFFFFF;\n");
} }
if (uid_data->per_pixel_depth && uid_data->late_ztest) if (uid_data->per_pixel_depth && uid_data->ztest == EmulatedZ::Late)
{ {
if (!host_config.backend_reversed_depth_range) if (!host_config.backend_reversed_depth_range)
out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
@ -1184,14 +1141,14 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
WriteFog(out, uid_data); WriteFog(out, uid_data);
if (use_shader_logic_op) if (uid_data->logic_op_enable)
WriteLogicOp(out, uid_data); WriteLogicOp(out, uid_data);
// Write the color and alpha values to the framebuffer // Write the color and alpha values to the framebuffer
// If using shader blend, we still use the separate alpha // If using shader blend, we still use the separate alpha
WriteColor(out, api_type, uid_data, use_dual_source || use_shader_blend); WriteColor(out, api_type, uid_data, !uid_data->no_dual_src || uid_data->blend_enable);
if (use_shader_blend) if (uid_data->blend_enable)
WriteBlend(out, uid_data); WriteBlend(out, uid_data);
else if (use_framebuffer_fetch) else if (use_framebuffer_fetch)
out.Write("\treal_ocol0 = ocol0;\n"); out.Write("\treal_ocol0 = ocol0;\n");
@ -1728,11 +1685,10 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
} }
// ZCOMPLOC HACK: // ZCOMPLOC HACK:
if (!uid_data->alpha_test_use_zcomploc_hack) if (uid_data->ztest != EmulatedZ::EarlyWithZComplocHack)
{ {
#ifdef __APPLE__ #ifdef __APPLE__
if (uid_data->forced_early_z && if (uid_data->ztest == EmulatedZ::EarlyWithFBFetch)
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z))
{ {
// Instead of using discard, fetch the framebuffer's color value and use it as the output // Instead of using discard, fetch the framebuffer's color value and use it as the output
// for this fragment. // for this fragment.

View File

@ -12,6 +12,7 @@ enum class AlphaTestOp : u32;
enum class AlphaTestResult; enum class AlphaTestResult;
enum class CompareMode : u32; enum class CompareMode : u32;
enum class DstBlendFactor : u32; enum class DstBlendFactor : u32;
enum class EmulatedZ : u32;
enum class FogProjection : u32; enum class FogProjection : u32;
enum class FogType : u32; enum class FogType : u32;
enum class KonstSel : u32; enum class KonstSel : u32;
@ -28,6 +29,7 @@ struct pixel_shader_uid_data
u32 NumValues() const { return num_values; } u32 NumValues() const { return num_values; }
u32 pad0 : 4; u32 pad0 : 4;
u32 useDstAlpha : 1; u32 useDstAlpha : 1;
u32 no_dual_src : 1;
AlphaTestResult Pretest : 2; AlphaTestResult Pretest : 2;
u32 nIndirectStagesUsed : 4; u32 nIndirectStagesUsed : 4;
u32 genMode_numtexgens : 4; u32 genMode_numtexgens : 4;
@ -36,16 +38,13 @@ struct pixel_shader_uid_data
CompareMode alpha_test_comp0 : 3; CompareMode alpha_test_comp0 : 3;
CompareMode alpha_test_comp1 : 3; CompareMode alpha_test_comp1 : 3;
AlphaTestOp alpha_test_logic : 2; AlphaTestOp alpha_test_logic : 2;
u32 alpha_test_use_zcomploc_hack : 1;
FogProjection fog_proj : 1; FogProjection fog_proj : 1;
FogType fog_fsel : 3; FogType fog_fsel : 3;
u32 fog_RangeBaseEnabled : 1; u32 fog_RangeBaseEnabled : 1;
ZTexOp ztex_op : 2; ZTexOp ztex_op : 2;
u32 per_pixel_depth : 1; u32 per_pixel_depth : 1;
u32 forced_early_z : 1; EmulatedZ ztest : 3;
u32 early_ztest : 1;
u32 late_ztest : 1;
u32 bounding_box : 1; u32 bounding_box : 1;
u32 zfreeze : 1; u32 zfreeze : 1;
u32 numColorChans : 2; u32 numColorChans : 2;

View File

@ -448,7 +448,7 @@ void PixelShaderManager::SetGenModeChanged()
void PixelShaderManager::SetZModeControl() void PixelShaderManager::SetZModeControl()
{ {
u32 late_ztest = bpmem.UseLateDepthTest(); u32 late_ztest = bpmem.GetEmulatedZ() == EmulatedZ::Late;
u32 rgba6_format = u32 rgba6_format =
(bpmem.zcontrol.pixel_format == PixelFormat::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor) ? (bpmem.zcontrol.pixel_format == PixelFormat::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor) ?
1 : 1 :

View File

@ -25,6 +25,34 @@ void DepthState::Generate(const BPMemory& bp)
func = bp.zmode.func.Value(); func = bp.zmode.func.Value();
} }
static bool IsDualSrc(SrcBlendFactor factor)
{
return factor == SrcBlendFactor::SrcAlpha || factor == SrcBlendFactor::InvSrcAlpha;
}
static bool IsDualSrc(DstBlendFactor factor)
{
switch (factor)
{
case DstBlendFactor::SrcClr:
case DstBlendFactor::SrcAlpha:
case DstBlendFactor::InvSrcClr:
case DstBlendFactor::InvSrcAlpha:
return true;
default:
return false;
}
}
bool BlendingState::RequiresDualSrc() const
{
bool requires_dual_src = false;
requires_dual_src |= IsDualSrc(srcfactor) || IsDualSrc(srcfactoralpha);
requires_dual_src |= IsDualSrc(dstfactor) || IsDualSrc(dstfactoralpha);
requires_dual_src &= blendenable && usedualsrc;
return requires_dual_src;
}
// If the framebuffer format has no alpha channel, it is assumed to // If the framebuffer format has no alpha channel, it is assumed to
// ONE on blending. As the backends may emulate this framebuffer // ONE on blending. As the backends may emulate this framebuffer
// configuration with an alpha channel, we just drop all references // configuration with an alpha channel, we just drop all references
@ -92,12 +120,12 @@ void BlendingState::Generate(const BPMemory& bp)
// Start with everything disabled. // Start with everything disabled.
hex = 0; hex = 0;
bool target_has_alpha = bp.zcontrol.pixel_format == PixelFormat::RGBA6_Z24; const bool target_has_alpha = bp.zcontrol.pixel_format == PixelFormat::RGBA6_Z24;
bool alpha_test_may_succeed = bp.alpha_test.TestResult() != AlphaTestResult::Fail; const bool alpha_test_may_succeed = bp.alpha_test.TestResult() != AlphaTestResult::Fail;
colorupdate = bp.blendmode.colorupdate && alpha_test_may_succeed; colorupdate = bp.blendmode.colorupdate && alpha_test_may_succeed;
alphaupdate = bp.blendmode.alphaupdate && target_has_alpha && alpha_test_may_succeed; alphaupdate = bp.blendmode.alphaupdate && target_has_alpha && alpha_test_may_succeed;
dstalpha = bp.dstalpha.enable && alphaupdate; const bool dstalpha = bp.dstalpha.enable && alphaupdate;
usedualsrc = true; usedualsrc = true;
// The subtract bit has the highest priority // The subtract bit has the highest priority

View File

@ -130,7 +130,6 @@ union BlendingState
BitField<0, 1, u32> blendenable; BitField<0, 1, u32> blendenable;
BitField<1, 1, u32> logicopenable; BitField<1, 1, u32> logicopenable;
BitField<2, 1, u32> dstalpha;
BitField<3, 1, u32> colorupdate; BitField<3, 1, u32> colorupdate;
BitField<4, 1, u32> alphaupdate; BitField<4, 1, u32> alphaupdate;
BitField<5, 1, u32> subtract; BitField<5, 1, u32> subtract;
@ -142,6 +141,8 @@ union BlendingState
BitField<17, 3, SrcBlendFactor> srcfactoralpha; BitField<17, 3, SrcBlendFactor> srcfactoralpha;
BitField<20, 4, LogicOp> logicmode; BitField<20, 4, LogicOp> logicmode;
bool RequiresDualSrc() const;
u32 hex; u32 hex;
}; };

View File

@ -10,6 +10,7 @@
#include "Common/MsgHandler.h" #include "Common/MsgHandler.h"
#include "Core/ConfigManager.h" #include "Core/ConfigManager.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/FramebufferShaderGen.h" #include "VideoCommon/FramebufferShaderGen.h"
#include "VideoCommon/RenderBase.h" #include "VideoCommon/RenderBase.h"
@ -612,8 +613,95 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig(
return config; return config;
} }
std::optional<AbstractPipelineConfig> ShaderCache::GetGXPipelineConfig(const GXPipelineUid& config) /// Edits the UID based on driver bugs and other special configurations
static GXPipelineUid ApplyDriverBugs(const GXPipelineUid& in)
{ {
GXPipelineUid out;
memcpy(&out, &in, sizeof(out)); // copy padding
pixel_shader_uid_data* ps = out.ps_uid.GetUidData();
BlendingState& blend = out.blending_state;
if (ps->ztest == EmulatedZ::ForcedEarly && !out.depth_state.updateenable)
{
// No need to force early depth test if you're not writing z
ps->ztest = EmulatedZ::Early;
}
const bool benefits_from_ps_dual_source_off =
(!g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
g_ActiveConfig.backend_info.bSupportsFramebufferFetch) ||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING);
if (benefits_from_ps_dual_source_off && !blend.RequiresDualSrc())
{
// Only use dual-source blending when required on drivers that don't support it very well.
ps->no_dual_src = true;
blend.usedualsrc = false;
}
if (g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
{
bool fbfetch_blend = false;
if ((DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z) ||
!g_ActiveConfig.backend_info.bSupportsEarlyZ) &&
ps->ztest == EmulatedZ::ForcedEarly)
{
ps->ztest = EmulatedZ::EarlyWithFBFetch;
fbfetch_blend |= static_cast<bool>(out.blending_state.blendenable);
ps->no_dual_src = true;
}
fbfetch_blend |= blend.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp;
fbfetch_blend |= blend.usedualsrc && !g_ActiveConfig.backend_info.bSupportsDualSourceBlend;
if (fbfetch_blend)
{
ps->no_dual_src = true;
if (blend.logicopenable)
{
ps->logic_op_enable = true;
ps->logic_op_mode = static_cast<u32>(blend.logicmode.Value());
blend.logicopenable = false;
}
if (blend.blendenable)
{
ps->blend_enable = true;
ps->blend_src_factor = blend.srcfactor;
ps->blend_src_factor_alpha = blend.srcfactoralpha;
ps->blend_dst_factor = blend.dstfactor;
ps->blend_dst_factor_alpha = blend.dstfactoralpha;
ps->blend_subtract = blend.subtract;
ps->blend_subtract_alpha = blend.subtractAlpha;
blend.blendenable = false;
}
}
}
// force dual src off if we can't support it
if (!g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
{
ps->no_dual_src = true;
blend.usedualsrc = false;
}
if (ps->ztest == EmulatedZ::ForcedEarly && !g_ActiveConfig.backend_info.bSupportsEarlyZ)
{
// These things should be false
ASSERT(!ps->zfreeze);
// ZCOMPLOC HACK:
// The only way to emulate alpha test + early-z is to force early-z in the shader.
// As this isn't available on all drivers and as we can't emulate this feature otherwise,
// we are only able to choose which one we want to respect more.
// Tests seem to have proven that writing depth even when the alpha test fails is more
// important that a reliable alpha test, so we just force the alpha test to always succeed.
// At least this seems to be less buggy.
ps->ztest = EmulatedZ::EarlyWithZComplocHack;
}
return out;
}
std::optional<AbstractPipelineConfig>
ShaderCache::GetGXPipelineConfig(const GXPipelineUid& config_in)
{
GXPipelineUid config = ApplyDriverBugs(config_in);
const AbstractShader* vs; const AbstractShader* vs;
auto vs_iter = m_vs_cache.shader_map.find(config.vs_uid); auto vs_iter = m_vs_cache.shader_map.find(config.vs_uid);
if (vs_iter != m_vs_cache.shader_map.end() && !vs_iter->second.pending) if (vs_iter != m_vs_cache.shader_map.end() && !vs_iter->second.pending)
@ -650,9 +738,33 @@ std::optional<AbstractPipelineConfig> ShaderCache::GetGXPipelineConfig(const GXP
config.depth_state, config.blending_state); config.depth_state, config.blending_state);
} }
std::optional<AbstractPipelineConfig> /// Edits the UID based on driver bugs and other special configurations
ShaderCache::GetGXPipelineConfig(const GXUberPipelineUid& config) static GXUberPipelineUid ApplyDriverBugs(const GXUberPipelineUid& in)
{ {
GXUberPipelineUid out;
memcpy(&out, &in, sizeof(out)); // Copy padding
if (g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
{
// Always blend in shader
out.blending_state.hex = 0;
out.blending_state.colorupdate = in.blending_state.colorupdate.Value();
out.blending_state.alphaupdate = in.blending_state.alphaupdate.Value();
out.ps_uid.GetUidData()->no_dual_src = true;
}
else if (!g_ActiveConfig.backend_info.bSupportsDualSourceBlend ||
(DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) &&
!out.blending_state.RequiresDualSrc()))
{
out.blending_state.usedualsrc = false;
out.ps_uid.GetUidData()->no_dual_src = true;
}
return out;
}
std::optional<AbstractPipelineConfig>
ShaderCache::GetGXPipelineConfig(const GXUberPipelineUid& config_in)
{
GXUberPipelineUid config = ApplyDriverBugs(config_in);
const AbstractShader* vs; const AbstractShader* vs;
auto vs_iter = m_uber_vs_cache.shader_map.find(config.vs_uid); auto vs_iter = m_uber_vs_cache.shader_map.find(config.vs_uid);
if (vs_iter != m_uber_vs_cache.shader_map.end() && !vs_iter->second.pending) if (vs_iter != m_uber_vs_cache.shader_map.end() && !vs_iter->second.pending)
@ -981,12 +1093,14 @@ void ShaderCache::QueuePipelineCompile(const GXPipelineUid& uid, u32 priority)
{ {
stages_ready = true; stages_ready = true;
auto vs_it = shader_cache->m_vs_cache.shader_map.find(uid.vs_uid); GXPipelineUid actual_uid = ApplyDriverBugs(uid);
auto vs_it = shader_cache->m_vs_cache.shader_map.find(actual_uid.vs_uid);
stages_ready &= vs_it != shader_cache->m_vs_cache.shader_map.end() && !vs_it->second.pending; stages_ready &= vs_it != shader_cache->m_vs_cache.shader_map.end() && !vs_it->second.pending;
if (vs_it == shader_cache->m_vs_cache.shader_map.end()) if (vs_it == shader_cache->m_vs_cache.shader_map.end())
shader_cache->QueueVertexShaderCompile(uid.vs_uid, priority); shader_cache->QueueVertexShaderCompile(actual_uid.vs_uid, priority);
PixelShaderUid ps_uid = uid.ps_uid; PixelShaderUid ps_uid = actual_uid.ps_uid;
ClearUnusedPixelShaderUidBits(shader_cache->m_api_type, shader_cache->m_host_config, &ps_uid); ClearUnusedPixelShaderUidBits(shader_cache->m_api_type, shader_cache->m_host_config, &ps_uid);
auto ps_it = shader_cache->m_ps_cache.shader_map.find(ps_uid); auto ps_it = shader_cache->m_ps_cache.shader_map.find(ps_uid);
@ -1051,13 +1165,15 @@ void ShaderCache::QueueUberPipelineCompile(const GXUberPipelineUid& uid, u32 pri
{ {
stages_ready = true; stages_ready = true;
auto vs_it = shader_cache->m_uber_vs_cache.shader_map.find(uid.vs_uid); GXUberPipelineUid actual_uid = ApplyDriverBugs(uid);
auto vs_it = shader_cache->m_uber_vs_cache.shader_map.find(actual_uid.vs_uid);
stages_ready &= stages_ready &=
vs_it != shader_cache->m_uber_vs_cache.shader_map.end() && !vs_it->second.pending; vs_it != shader_cache->m_uber_vs_cache.shader_map.end() && !vs_it->second.pending;
if (vs_it == shader_cache->m_uber_vs_cache.shader_map.end()) if (vs_it == shader_cache->m_uber_vs_cache.shader_map.end())
shader_cache->QueueVertexUberShaderCompile(uid.vs_uid, priority); shader_cache->QueueVertexUberShaderCompile(actual_uid.vs_uid, priority);
UberShader::PixelShaderUid ps_uid = uid.ps_uid; UberShader::PixelShaderUid ps_uid = actual_uid.ps_uid;
UberShader::ClearUnusedPixelShaderUidBits(shader_cache->m_api_type, UberShader::ClearUnusedPixelShaderUidBits(shader_cache->m_api_type,
shader_cache->m_host_config, &ps_uid); shader_cache->m_host_config, &ps_uid);

View File

@ -3,6 +3,8 @@
#include "VideoCommon/UberShaderPixel.h" #include "VideoCommon/UberShaderPixel.h"
#include "Common/Assert.h"
#include "VideoCommon/BPMemory.h" #include "VideoCommon/BPMemory.h"
#include "VideoCommon/DriverDetails.h" #include "VideoCommon/DriverDetails.h"
#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/NativeVertexFormat.h"
@ -21,12 +23,12 @@ PixelShaderUid GetPixelShaderUid()
pixel_ubershader_uid_data* const uid_data = out.GetUidData(); pixel_ubershader_uid_data* const uid_data = out.GetUidData();
uid_data->num_texgens = xfmem.numTexGen.numTexGens; uid_data->num_texgens = xfmem.numTexGen.numTexGens;
uid_data->early_depth = bpmem.UseEarlyDepthTest() && uid_data->early_depth = bpmem.GetEmulatedZ() == EmulatedZ::Early &&
(g_ActiveConfig.bFastDepthCalc || (g_ActiveConfig.bFastDepthCalc ||
bpmem.alpha_test.TestResult() == AlphaTestResult::Undetermined) && bpmem.alpha_test.TestResult() == AlphaTestResult::Undetermined) &&
!(bpmem.zmode.testenable && bpmem.genMode.zfreeze); !(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
uid_data->per_pixel_depth = uid_data->per_pixel_depth =
(bpmem.ztex2.op != ZTexOp::Disabled && bpmem.UseLateDepthTest()) || (bpmem.ztex2.op != ZTexOp::Disabled && bpmem.GetEmulatedZ() == EmulatedZ::Late) ||
(!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !uid_data->early_depth) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !uid_data->early_depth) ||
(bpmem.zmode.testenable && bpmem.genMode.zfreeze); (bpmem.zmode.testenable && bpmem.genMode.zfreeze);
uid_data->uint_output = bpmem.blendmode.UseLogicOp(); uid_data->uint_output = bpmem.blendmode.UseLogicOp();
@ -39,6 +41,13 @@ void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& hos
{ {
pixel_ubershader_uid_data* const uid_data = uid->GetUidData(); pixel_ubershader_uid_data* const uid_data = uid->GetUidData();
// With fbfetch, ubershaders always blend using that and don't use dual src
if (host_config.backend_shader_framebuffer_fetch || !host_config.backend_dual_source_blend)
uid_data->no_dual_src = 1;
// Dual source is always enabled in the shader if this bug is not present
else if (!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING))
uid_data->no_dual_src = 0;
// OpenGL and Vulkan convert implicitly normalized color outputs to their uint representation. // OpenGL and Vulkan convert implicitly normalized color outputs to their uint representation.
// Therefore, it is not necessary to use a uint output on these backends. We also disable the // Therefore, it is not necessary to use a uint output on these backends. We also disable the
// uint output when logic op is not supported (i.e. driver/device does not support D3D11.1). // uint output when logic op is not supported (i.e. driver/device does not support D3D11.1).
@ -53,19 +62,17 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
const bool msaa = host_config.msaa; const bool msaa = host_config.msaa;
const bool ssaa = host_config.ssaa; const bool ssaa = host_config.ssaa;
const bool stereo = host_config.stereo; const bool stereo = host_config.stereo;
const bool use_dual_source = host_config.backend_dual_source_blend; const bool use_framebuffer_fetch = host_config.backend_shader_framebuffer_fetch;
const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch; const bool use_dual_source = host_config.backend_dual_source_blend && !uid_data->no_dual_src;
const bool use_shader_logic_op =
!host_config.backend_logic_op && host_config.backend_shader_framebuffer_fetch;
const bool use_framebuffer_fetch =
use_shader_blend || use_shader_logic_op ||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z);
const bool early_depth = uid_data->early_depth != 0; const bool early_depth = uid_data->early_depth != 0;
const bool per_pixel_depth = uid_data->per_pixel_depth != 0; const bool per_pixel_depth = uid_data->per_pixel_depth != 0;
const bool bounding_box = host_config.bounding_box; const bool bounding_box = host_config.bounding_box;
const u32 numTexgen = uid_data->num_texgens; const u32 numTexgen = uid_data->num_texgens;
ShaderCode out; ShaderCode out;
ASSERT_MSG(VIDEO, !(use_dual_source && use_framebuffer_fetch),
"If you're using framebuffer fetch, you shouldn't need dual source blend!");
out.Write("// {}\n", *uid_data); out.Write("// {}\n", *uid_data);
WriteBitfieldExtractHeader(out, api_type, host_config); WriteBitfieldExtractHeader(out, api_type, host_config);
WritePixelShaderCommonHeader(out, api_type, host_config, bounding_box); WritePixelShaderCommonHeader(out, api_type, host_config, bounding_box);
@ -79,9 +86,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
{ {
if (use_dual_source) if (use_dual_source)
{ {
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n" out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n"
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n", "FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n");
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
} }
else else
{ {
@ -520,12 +526,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
// intermediate value with multiple reads & modifications, so we pull out the "real" output // intermediate value with multiple reads & modifications, so we pull out the "real" output
// value above and use a temporary for calculations, then set the output value once at the // value above and use a temporary for calculations, then set the output value once at the
// end of the shader. // end of the shader.
out.Write(" float4 ocol0;\n"); out.Write(" float4 ocol0;\n"
} " float4 ocol1;\n");
if (use_shader_blend)
{
out.Write(" float4 ocol1;\n");
} }
if (host_config.backend_geometry_shaders && stereo) if (host_config.backend_geometry_shaders && stereo)
@ -943,8 +945,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
{ {
// Instead of using discard, fetch the framebuffer's color value and use it as the output // Instead of using discard, fetch the framebuffer's color value and use it as the output
// for this fragment. // for this fragment.
out.Write(" #define discard_fragment {{ {} = float4(initial_ocol0.xyz, 1.0); return; }}\n", out.Write(
use_shader_blend ? "real_ocol0" : "ocol0"); " #define discard_fragment {{ real_ocol0 = float4(initial_ocol0.xyz, 1.0); return; }}\n");
} }
else else
{ {
@ -1055,7 +1057,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
" }}\n" " }}\n"
"\n"); "\n");
if (use_shader_logic_op) if (use_framebuffer_fetch)
{ {
static constexpr std::array<const char*, 16> logic_op_mode{ static constexpr std::array<const char*, 16> logic_op_mode{
"int4(0, 0, 0, 0)", // CLEAR "int4(0, 0, 0, 0)", // CLEAR
@ -1113,7 +1115,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
" ocol0.a = float(TevResult.a >> 2) / 63.0;\n" " ocol0.a = float(TevResult.a >> 2) / 63.0;\n"
" \n"); " \n");
if (use_dual_source || use_shader_blend) if (use_dual_source || use_framebuffer_fetch)
{ {
out.Write(" // Dest alpha override (dual source blending)\n" out.Write(" // Dest alpha override (dual source blending)\n"
" // Colors will be blended against the alpha from ocol1 and\n" " // Colors will be blended against the alpha from ocol1 and\n"
@ -1129,7 +1131,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
" }}\n"); " }}\n");
} }
if (use_shader_blend) if (use_framebuffer_fetch)
{ {
using Common::EnumMap; using Common::EnumMap;
@ -1208,10 +1210,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
" real_ocol0 = ocol0;\n" " real_ocol0 = ocol0;\n"
" }}\n"); " }}\n");
} }
else if (use_framebuffer_fetch)
{
out.Write(" real_ocol0 = ocol0;\n");
}
out.Write("}}\n" out.Write("}}\n"
"\n" "\n"
@ -1274,7 +1272,11 @@ void EnumeratePixelShaderUids(const std::function<void(const PixelShaderUid&)>&
for (u32 uint_output = 0; uint_output < 2; uint_output++) for (u32 uint_output = 0; uint_output < 2; uint_output++)
{ {
puid->uint_output = uint_output; puid->uint_output = uint_output;
callback(uid); for (u32 no_dual_src = 0; no_dual_src < 2; no_dual_src++)
{
puid->no_dual_src = no_dual_src;
callback(uid);
}
} }
} }
} }

View File

@ -18,6 +18,7 @@ struct pixel_ubershader_uid_data
u32 early_depth : 1; u32 early_depth : 1;
u32 per_pixel_depth : 1; u32 per_pixel_depth : 1;
u32 uint_output : 1; u32 uint_output : 1;
u32 no_dual_src : 1;
u32 NumValues() const { return sizeof(pixel_ubershader_uid_data); } u32 NumValues() const { return sizeof(pixel_ubershader_uid_data); }
}; };
@ -42,9 +43,9 @@ struct fmt::formatter<UberShader::pixel_ubershader_uid_data>
template <typename FormatContext> template <typename FormatContext>
auto format(const UberShader::pixel_ubershader_uid_data& uid, FormatContext& ctx) const auto format(const UberShader::pixel_ubershader_uid_data& uid, FormatContext& ctx) const
{ {
return fmt::format_to(ctx.out(), "Pixel UberShader for {} texgens{}{}{}", uid.num_texgens, return fmt::format_to(
uid.early_depth ? ", early-depth" : "", ctx.out(), "Pixel UberShader for {} texgens{}{}{}{}", uid.num_texgens,
uid.per_pixel_depth ? ", per-pixel depth" : "", uid.early_depth ? ", early-depth" : "", uid.per_pixel_depth ? ", per-pixel depth" : "",
uid.uint_output ? ", uint output" : ""); uid.uint_output ? ", uint output" : "", uid.no_dual_src ? ", no dual-source blending" : "");
} }
}; };