Merge pull request #10747 from tellowkrinkle/LateUIDFixup
Add a post-cache shader UID fixup pass
This commit is contained in:
commit
70b0b03c3c
|
@ -1168,55 +1168,43 @@ void Renderer::ApplyBlendingState(const BlendingState state)
|
|||
if (m_current_blend_state == state)
|
||||
return;
|
||||
|
||||
bool useDualSource =
|
||||
state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
|
||||
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || state.dstalpha);
|
||||
// Only use shader blend if we need to and we don't support dual-source blending directly
|
||||
bool useShaderBlend = !useDualSource && state.usedualsrc && state.dstalpha &&
|
||||
g_ActiveConfig.backend_info.bSupportsFramebufferFetch;
|
||||
bool useDualSource = state.usedualsrc;
|
||||
|
||||
if (useShaderBlend)
|
||||
{
|
||||
glDisable(GL_BLEND);
|
||||
}
|
||||
const GLenum src_factors[8] = {GL_ZERO,
|
||||
GL_ONE,
|
||||
GL_DST_COLOR,
|
||||
GL_ONE_MINUS_DST_COLOR,
|
||||
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
|
||||
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA :
|
||||
(GLenum)GL_ONE_MINUS_SRC_ALPHA,
|
||||
GL_DST_ALPHA,
|
||||
GL_ONE_MINUS_DST_ALPHA};
|
||||
const GLenum dst_factors[8] = {GL_ZERO,
|
||||
GL_ONE,
|
||||
GL_SRC_COLOR,
|
||||
GL_ONE_MINUS_SRC_COLOR,
|
||||
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
|
||||
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA :
|
||||
(GLenum)GL_ONE_MINUS_SRC_ALPHA,
|
||||
GL_DST_ALPHA,
|
||||
GL_ONE_MINUS_DST_ALPHA};
|
||||
|
||||
if (state.blendenable)
|
||||
glEnable(GL_BLEND);
|
||||
else
|
||||
{
|
||||
const GLenum src_factors[8] = {GL_ZERO,
|
||||
GL_ONE,
|
||||
GL_DST_COLOR,
|
||||
GL_ONE_MINUS_DST_COLOR,
|
||||
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
|
||||
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA :
|
||||
(GLenum)GL_ONE_MINUS_SRC_ALPHA,
|
||||
GL_DST_ALPHA,
|
||||
GL_ONE_MINUS_DST_ALPHA};
|
||||
const GLenum dst_factors[8] = {GL_ZERO,
|
||||
GL_ONE,
|
||||
GL_SRC_COLOR,
|
||||
GL_ONE_MINUS_SRC_COLOR,
|
||||
useDualSource ? GL_SRC1_ALPHA : (GLenum)GL_SRC_ALPHA,
|
||||
useDualSource ? GL_ONE_MINUS_SRC1_ALPHA :
|
||||
(GLenum)GL_ONE_MINUS_SRC_ALPHA,
|
||||
GL_DST_ALPHA,
|
||||
GL_ONE_MINUS_DST_ALPHA};
|
||||
glDisable(GL_BLEND);
|
||||
|
||||
if (state.blendenable)
|
||||
glEnable(GL_BLEND);
|
||||
else
|
||||
glDisable(GL_BLEND);
|
||||
|
||||
// Always call glBlendEquationSeparate and glBlendFuncSeparate, even when
|
||||
// GL_BLEND is disabled, as a workaround for some bugs (possibly graphics
|
||||
// driver issues?). See https://bugs.dolphin-emu.org/issues/10120 : "Sonic
|
||||
// Adventure 2 Battle: graphics crash when loading first Dark level"
|
||||
GLenum equation = state.subtract ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD;
|
||||
GLenum equationAlpha = state.subtractAlpha ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD;
|
||||
glBlendEquationSeparate(equation, equationAlpha);
|
||||
glBlendFuncSeparate(src_factors[u32(state.srcfactor.Value())],
|
||||
dst_factors[u32(state.dstfactor.Value())],
|
||||
src_factors[u32(state.srcfactoralpha.Value())],
|
||||
dst_factors[u32(state.dstfactoralpha.Value())]);
|
||||
}
|
||||
// Always call glBlendEquationSeparate and glBlendFuncSeparate, even when
|
||||
// GL_BLEND is disabled, as a workaround for some bugs (possibly graphics
|
||||
// driver issues?). See https://bugs.dolphin-emu.org/issues/10120 : "Sonic
|
||||
// Adventure 2 Battle: graphics crash when loading first Dark level"
|
||||
GLenum equation = state.subtract ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD;
|
||||
GLenum equationAlpha = state.subtractAlpha ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD;
|
||||
glBlendEquationSeparate(equation, equationAlpha);
|
||||
glBlendFuncSeparate(src_factors[u32(state.srcfactor.Value())],
|
||||
dst_factors[u32(state.dstfactor.Value())],
|
||||
src_factors[u32(state.srcfactoralpha.Value())],
|
||||
dst_factors[u32(state.dstfactoralpha.Value())]);
|
||||
|
||||
const GLenum logic_op_codes[16] = {
|
||||
GL_CLEAR, GL_AND, GL_AND_REVERSE, GL_COPY, GL_AND_INVERTED, GL_NOOP,
|
||||
|
|
|
@ -153,7 +153,7 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi)
|
|||
|
||||
s32 z = (s32)std::clamp<float>(ZSlope.GetValue(x, y), 0.0f, 16777215.0f);
|
||||
|
||||
if (bpmem.UseEarlyDepthTest())
|
||||
if (bpmem.GetEmulatedZ() == EmulatedZ::Early)
|
||||
{
|
||||
// TODO: Test if perf regs are incremented even if test is disabled
|
||||
EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT_ZCOMPLOC);
|
||||
|
|
|
@ -840,7 +840,7 @@ void Tev::Draw()
|
|||
output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8;
|
||||
}
|
||||
|
||||
if (bpmem.UseLateDepthTest())
|
||||
if (bpmem.GetEmulatedZ() == EmulatedZ::Late)
|
||||
{
|
||||
// TODO: Check against hw if these values get incremented even if depth testing is disabled
|
||||
EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT);
|
||||
|
|
|
@ -137,60 +137,48 @@ GetVulkanAttachmentBlendState(const BlendingState& state, AbstractPipelineUsage
|
|||
{
|
||||
VkPipelineColorBlendAttachmentState vk_state = {};
|
||||
|
||||
bool use_dual_source =
|
||||
state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
|
||||
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || state.dstalpha);
|
||||
bool use_shader_blend = !use_dual_source && state.usedualsrc && state.dstalpha &&
|
||||
g_ActiveConfig.backend_info.bSupportsFramebufferFetch;
|
||||
bool use_dual_source = state.usedualsrc;
|
||||
|
||||
if (use_shader_blend || (usage == AbstractPipelineUsage::GX &&
|
||||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z)))
|
||||
vk_state.blendEnable = static_cast<VkBool32>(state.blendenable);
|
||||
vk_state.colorBlendOp = state.subtract ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD;
|
||||
vk_state.alphaBlendOp = state.subtractAlpha ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD;
|
||||
|
||||
if (use_dual_source)
|
||||
{
|
||||
vk_state.blendEnable = VK_FALSE;
|
||||
static constexpr std::array<VkBlendFactor, 8> src_factors = {
|
||||
{VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
|
||||
static constexpr std::array<VkBlendFactor, 8> dst_factors = {
|
||||
{VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
|
||||
|
||||
vk_state.srcColorBlendFactor = src_factors[u32(state.srcfactor.Value())];
|
||||
vk_state.srcAlphaBlendFactor = src_factors[u32(state.srcfactoralpha.Value())];
|
||||
vk_state.dstColorBlendFactor = dst_factors[u32(state.dstfactor.Value())];
|
||||
vk_state.dstAlphaBlendFactor = dst_factors[u32(state.dstfactoralpha.Value())];
|
||||
}
|
||||
else
|
||||
{
|
||||
vk_state.blendEnable = static_cast<VkBool32>(state.blendenable);
|
||||
vk_state.colorBlendOp = state.subtract ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD;
|
||||
vk_state.alphaBlendOp = state.subtractAlpha ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD;
|
||||
static constexpr std::array<VkBlendFactor, 8> src_factors = {
|
||||
{VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
|
||||
|
||||
if (use_dual_source)
|
||||
{
|
||||
static constexpr std::array<VkBlendFactor, 8> src_factors = {
|
||||
{VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
|
||||
static constexpr std::array<VkBlendFactor, 8> dst_factors = {
|
||||
{VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
|
||||
static constexpr std::array<VkBlendFactor, 8> dst_factors = {
|
||||
{VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
|
||||
|
||||
vk_state.srcColorBlendFactor = src_factors[u32(state.srcfactor.Value())];
|
||||
vk_state.srcAlphaBlendFactor = src_factors[u32(state.srcfactoralpha.Value())];
|
||||
vk_state.dstColorBlendFactor = dst_factors[u32(state.dstfactor.Value())];
|
||||
vk_state.dstAlphaBlendFactor = dst_factors[u32(state.dstfactoralpha.Value())];
|
||||
}
|
||||
else
|
||||
{
|
||||
static constexpr std::array<VkBlendFactor, 8> src_factors = {
|
||||
{VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
|
||||
|
||||
static constexpr std::array<VkBlendFactor, 8> dst_factors = {
|
||||
{VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}};
|
||||
|
||||
vk_state.srcColorBlendFactor = src_factors[u32(state.srcfactor.Value())];
|
||||
vk_state.srcAlphaBlendFactor = src_factors[u32(state.srcfactoralpha.Value())];
|
||||
vk_state.dstColorBlendFactor = dst_factors[u32(state.dstfactor.Value())];
|
||||
vk_state.dstAlphaBlendFactor = dst_factors[u32(state.dstfactoralpha.Value())];
|
||||
}
|
||||
vk_state.srcColorBlendFactor = src_factors[u32(state.srcfactor.Value())];
|
||||
vk_state.srcAlphaBlendFactor = src_factors[u32(state.srcfactoralpha.Value())];
|
||||
vk_state.dstColorBlendFactor = dst_factors[u32(state.dstfactor.Value())];
|
||||
vk_state.dstAlphaBlendFactor = dst_factors[u32(state.dstfactoralpha.Value())];
|
||||
}
|
||||
|
||||
if (state.colorupdate)
|
||||
|
|
|
@ -371,13 +371,6 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD
|
|||
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_REVERSED_DEPTH_RANGE))
|
||||
config->backend_info.bSupportsReversedDepthRange = false;
|
||||
|
||||
// Calling discard when early depth test is enabled can break on some Apple Silicon GPU drivers.
|
||||
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z))
|
||||
{
|
||||
// We will use shader blending, so disable hardware dual source blending.
|
||||
config->backend_info.bSupportsDualSourceBlend = false;
|
||||
}
|
||||
|
||||
// Dynamic sampler indexing locks up Intel GPUs on MoltenVK/Metal
|
||||
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING))
|
||||
config->backend_info.bSupportsDynamicSamplerIndexing = false;
|
||||
|
|
|
@ -2338,6 +2338,16 @@ struct BPCmd
|
|||
int newvalue;
|
||||
};
|
||||
|
||||
enum class EmulatedZ : u32
|
||||
{
|
||||
Disabled = 0,
|
||||
Early = 1,
|
||||
Late = 2,
|
||||
ForcedEarly = 3,
|
||||
EarlyWithFBFetch = 4,
|
||||
EarlyWithZComplocHack = 5,
|
||||
};
|
||||
|
||||
struct BPMemory
|
||||
{
|
||||
GenMode genMode;
|
||||
|
@ -2405,8 +2415,15 @@ struct BPMemory
|
|||
u32 bpMask; // 0xFE
|
||||
u32 unknown18; // ff
|
||||
|
||||
bool UseEarlyDepthTest() const { return zcontrol.early_ztest && zmode.testenable; }
|
||||
bool UseLateDepthTest() const { return !zcontrol.early_ztest && zmode.testenable; }
|
||||
EmulatedZ GetEmulatedZ() const
|
||||
{
|
||||
if (!zmode.testenable)
|
||||
return EmulatedZ::Disabled;
|
||||
if (zcontrol.early_ztest)
|
||||
return EmulatedZ::Early;
|
||||
else
|
||||
return EmulatedZ::Late;
|
||||
}
|
||||
};
|
||||
|
||||
#pragma pack()
|
||||
|
|
|
@ -237,7 +237,8 @@ enum Bug
|
|||
// crash. Sometimes this happens in the kernel mode part of the driver, resulting in a BSOD.
|
||||
// These shaders are also particularly problematic on macOS's Intel drivers. On OpenGL, they can
|
||||
// cause depth issues. On Metal, they can cause the driver to not write a primitive to the depth
|
||||
// buffer whenever a fragment is discarded. Disable dual-source blending support on these drivers.
|
||||
// buffer if dual source blending is output in the shader but not subsequently used in blending.
|
||||
// Compile separate shaders for DSB on vs off for these drivers.
|
||||
BUG_BROKEN_DUAL_SOURCE_BLENDING,
|
||||
|
||||
// BUG: ImgTec GLSL shader compiler fails when negating the input to a bitwise operation
|
||||
|
|
|
@ -19,7 +19,7 @@ namespace VideoCommon
|
|||
// As pipelines encompass both shader UIDs and render states, changes to either of these should
|
||||
// also increment the pipeline UID version. Incrementing the UID version will cause all UID
|
||||
// caches to be invalidated.
|
||||
constexpr u32 GX_PIPELINE_UID_VERSION = 4; // Last changed in PR 10215
|
||||
constexpr u32 GX_PIPELINE_UID_VERSION = 5; // Last changed in PR 10747
|
||||
|
||||
struct GXPipelineUid
|
||||
{
|
||||
|
|
|
@ -167,9 +167,6 @@ constexpr Common::EnumMap<const char*, TevOutput::Color2> tev_a_output_table{
|
|||
"c2.a",
|
||||
};
|
||||
|
||||
// FIXME: Some of the video card's capabilities (BBox support, EarlyZ support, dstAlpha support)
|
||||
// leak into this UID; This is really unhelpful if these UIDs ever move from one machine to
|
||||
// another.
|
||||
PixelShaderUid GetPixelShaderUid()
|
||||
{
|
||||
PixelShaderUid out;
|
||||
|
@ -189,20 +186,25 @@ PixelShaderUid GetPixelShaderUid()
|
|||
|
||||
u32 numStages = uid_data->genMode_numtevstages + 1;
|
||||
|
||||
const bool forced_early_z =
|
||||
bpmem.UseEarlyDepthTest() &&
|
||||
uid_data->Pretest = bpmem.alpha_test.TestResult();
|
||||
uid_data->ztest = bpmem.GetEmulatedZ();
|
||||
if (uid_data->ztest == EmulatedZ::Early &&
|
||||
(g_ActiveConfig.bFastDepthCalc ||
|
||||
bpmem.alpha_test.TestResult() == AlphaTestResult::Undetermined)
|
||||
// We can't allow early_ztest for zfreeze because depth is overridden per-pixel.
|
||||
// This means it's impossible for zcomploc to be emulated on a zfrozen polygon.
|
||||
&& !(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
|
||||
&& !bpmem.genMode.zfreeze)
|
||||
{
|
||||
uid_data->ztest = EmulatedZ::ForcedEarly;
|
||||
}
|
||||
|
||||
const bool forced_early_z = uid_data->ztest == EmulatedZ::ForcedEarly;
|
||||
const bool per_pixel_depth =
|
||||
(bpmem.ztex2.op != ZTexOp::Disabled && bpmem.UseLateDepthTest()) ||
|
||||
(bpmem.ztex2.op != ZTexOp::Disabled && uid_data->ztest == EmulatedZ::Late) ||
|
||||
(!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) ||
|
||||
(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
|
||||
|
||||
uid_data->per_pixel_depth = per_pixel_depth;
|
||||
uid_data->forced_early_z = forced_early_z;
|
||||
|
||||
if (g_ActiveConfig.bEnablePixelLighting)
|
||||
{
|
||||
|
@ -285,59 +287,24 @@ PixelShaderUid GetPixelShaderUid()
|
|||
sizeof(*uid_data) :
|
||||
MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]);
|
||||
|
||||
uid_data->Pretest = bpmem.alpha_test.TestResult();
|
||||
uid_data->late_ztest = bpmem.UseLateDepthTest();
|
||||
|
||||
// NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled
|
||||
// (in this case we need to write a depth value if depth test passes regardless of the alpha
|
||||
// testing result)
|
||||
if (uid_data->Pretest == AlphaTestResult::Undetermined ||
|
||||
(uid_data->Pretest == AlphaTestResult::Fail && uid_data->late_ztest))
|
||||
(uid_data->Pretest == AlphaTestResult::Fail && uid_data->ztest == EmulatedZ::Late))
|
||||
{
|
||||
uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0;
|
||||
uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1;
|
||||
uid_data->alpha_test_logic = bpmem.alpha_test.logic;
|
||||
|
||||
// ZCOMPLOC HACK:
|
||||
// The only way to emulate alpha test + early-z is to force early-z in the shader.
|
||||
// As this isn't available on all drivers and as we can't emulate this feature otherwise,
|
||||
// we are only able to choose which one we want to respect more.
|
||||
// Tests seem to have proven that writing depth even when the alpha test fails is more
|
||||
// important that a reliable alpha test, so we just force the alpha test to always succeed.
|
||||
// At least this seems to be less buggy.
|
||||
uid_data->alpha_test_use_zcomploc_hack =
|
||||
bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable &&
|
||||
!g_ActiveConfig.backend_info.bSupportsEarlyZ && !bpmem.genMode.zfreeze;
|
||||
}
|
||||
|
||||
uid_data->zfreeze = bpmem.genMode.zfreeze;
|
||||
uid_data->ztex_op = bpmem.ztex2.op;
|
||||
uid_data->early_ztest = bpmem.UseEarlyDepthTest();
|
||||
|
||||
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
|
||||
uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj;
|
||||
uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled;
|
||||
|
||||
BlendingState state = {};
|
||||
state.Generate(bpmem);
|
||||
|
||||
if (((state.usedualsrc && state.dstalpha) ||
|
||||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z)) &&
|
||||
g_ActiveConfig.backend_info.bSupportsFramebufferFetch &&
|
||||
!g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
|
||||
{
|
||||
uid_data->blend_enable = state.blendenable;
|
||||
uid_data->blend_src_factor = state.srcfactor;
|
||||
uid_data->blend_src_factor_alpha = state.srcfactoralpha;
|
||||
uid_data->blend_dst_factor = state.dstfactor;
|
||||
uid_data->blend_dst_factor_alpha = state.dstfactoralpha;
|
||||
uid_data->blend_subtract = state.subtract;
|
||||
uid_data->blend_subtract_alpha = state.subtractAlpha;
|
||||
}
|
||||
|
||||
uid_data->logic_op_enable = state.logicopenable;
|
||||
uid_data->logic_op_mode = u32(state.logicmode.Value());
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
@ -798,7 +765,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||
out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) "
|
||||
"sampleTexture(texmap, samp[texmap], uv, layer)\n");
|
||||
|
||||
if (uid_data->forced_early_z && g_ActiveConfig.backend_info.bSupportsEarlyZ)
|
||||
if (uid_data->ztest == EmulatedZ::ForcedEarly)
|
||||
{
|
||||
// Zcomploc (aka early_ztest) is a way to control whether depth test is done before
|
||||
// or after texturing and alpha test. PC graphics APIs used to provide no way to emulate
|
||||
|
@ -837,28 +804,15 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||
out.Write("FORCE_EARLY_Z; \n");
|
||||
}
|
||||
|
||||
// Only use dual-source blending when required on drivers that don't support it very well.
|
||||
const bool use_dual_source =
|
||||
host_config.backend_dual_source_blend &&
|
||||
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) ||
|
||||
uid_data->useDstAlpha);
|
||||
const bool use_shader_blend =
|
||||
!use_dual_source &&
|
||||
(uid_data->useDstAlpha ||
|
||||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z)) &&
|
||||
host_config.backend_shader_framebuffer_fetch;
|
||||
const bool use_shader_logic_op = !host_config.backend_logic_op && uid_data->logic_op_enable &&
|
||||
host_config.backend_shader_framebuffer_fetch;
|
||||
const bool use_framebuffer_fetch =
|
||||
use_shader_blend || use_shader_logic_op ||
|
||||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z);
|
||||
const bool use_framebuffer_fetch = uid_data->blend_enable || uid_data->logic_op_enable ||
|
||||
uid_data->ztest == EmulatedZ::EarlyWithFBFetch;
|
||||
|
||||
#ifdef __APPLE__
|
||||
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
|
||||
// if we want to use it.
|
||||
if (api_type == APIType::Vulkan)
|
||||
{
|
||||
if (use_dual_source)
|
||||
if (!uid_data->no_dual_src)
|
||||
{
|
||||
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n"
|
||||
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n",
|
||||
|
@ -891,7 +845,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||
uid_data->uint_output ? "uvec4" : "vec4",
|
||||
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
|
||||
|
||||
if (use_dual_source)
|
||||
if (!uid_data->no_dual_src)
|
||||
{
|
||||
out.Write("{} out {} ocol1;\n",
|
||||
has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(1)" :
|
||||
|
@ -960,7 +914,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||
out.Write("\tfloat4 ocol0;\n");
|
||||
}
|
||||
|
||||
if (use_shader_blend)
|
||||
if (uid_data->blend_enable)
|
||||
{
|
||||
out.Write("\tfloat4 ocol1;\n");
|
||||
}
|
||||
|
@ -1086,10 +1040,10 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||
// (in this case we need to write a depth value if depth test passes regardless of the alpha
|
||||
// testing result)
|
||||
if (uid_data->Pretest == AlphaTestResult::Undetermined ||
|
||||
(uid_data->Pretest == AlphaTestResult::Fail && uid_data->late_ztest))
|
||||
(uid_data->Pretest == AlphaTestResult::Fail && uid_data->ztest == EmulatedZ::Late))
|
||||
{
|
||||
WriteAlphaTest(out, uid_data, api_type, uid_data->per_pixel_depth,
|
||||
use_dual_source || use_shader_blend);
|
||||
!uid_data->no_dual_src || uid_data->blend_enable);
|
||||
}
|
||||
|
||||
// This situation is important for Mario Kart Wii's menus (they will render incorrectly if the
|
||||
|
@ -1144,7 +1098,10 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||
const bool skip_ztexture = !uid_data->per_pixel_depth && uid_data->fog_fsel == FogType::Off;
|
||||
|
||||
// Note: z-textures are not written to depth buffer if early depth test is used
|
||||
if (uid_data->per_pixel_depth && uid_data->early_ztest)
|
||||
const bool early_ztest = uid_data->ztest == EmulatedZ::Early ||
|
||||
uid_data->ztest == EmulatedZ::EarlyWithFBFetch ||
|
||||
uid_data->ztest == EmulatedZ::EarlyWithZComplocHack;
|
||||
if (uid_data->per_pixel_depth && early_ztest)
|
||||
{
|
||||
if (!host_config.backend_reversed_depth_range)
|
||||
out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
|
||||
|
@ -1165,7 +1122,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||
out.Write("\tzCoord = zCoord & 0xFFFFFF;\n");
|
||||
}
|
||||
|
||||
if (uid_data->per_pixel_depth && uid_data->late_ztest)
|
||||
if (uid_data->per_pixel_depth && uid_data->ztest == EmulatedZ::Late)
|
||||
{
|
||||
if (!host_config.backend_reversed_depth_range)
|
||||
out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
|
||||
|
@ -1184,14 +1141,14 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||
|
||||
WriteFog(out, uid_data);
|
||||
|
||||
if (use_shader_logic_op)
|
||||
if (uid_data->logic_op_enable)
|
||||
WriteLogicOp(out, uid_data);
|
||||
|
||||
// Write the color and alpha values to the framebuffer
|
||||
// If using shader blend, we still use the separate alpha
|
||||
WriteColor(out, api_type, uid_data, use_dual_source || use_shader_blend);
|
||||
WriteColor(out, api_type, uid_data, !uid_data->no_dual_src || uid_data->blend_enable);
|
||||
|
||||
if (use_shader_blend)
|
||||
if (uid_data->blend_enable)
|
||||
WriteBlend(out, uid_data);
|
||||
else if (use_framebuffer_fetch)
|
||||
out.Write("\treal_ocol0 = ocol0;\n");
|
||||
|
@ -1728,11 +1685,10 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
|
|||
}
|
||||
|
||||
// ZCOMPLOC HACK:
|
||||
if (!uid_data->alpha_test_use_zcomploc_hack)
|
||||
if (uid_data->ztest != EmulatedZ::EarlyWithZComplocHack)
|
||||
{
|
||||
#ifdef __APPLE__
|
||||
if (uid_data->forced_early_z &&
|
||||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z))
|
||||
if (uid_data->ztest == EmulatedZ::EarlyWithFBFetch)
|
||||
{
|
||||
// Instead of using discard, fetch the framebuffer's color value and use it as the output
|
||||
// for this fragment.
|
||||
|
|
|
@ -12,6 +12,7 @@ enum class AlphaTestOp : u32;
|
|||
enum class AlphaTestResult;
|
||||
enum class CompareMode : u32;
|
||||
enum class DstBlendFactor : u32;
|
||||
enum class EmulatedZ : u32;
|
||||
enum class FogProjection : u32;
|
||||
enum class FogType : u32;
|
||||
enum class KonstSel : u32;
|
||||
|
@ -28,6 +29,7 @@ struct pixel_shader_uid_data
|
|||
u32 NumValues() const { return num_values; }
|
||||
u32 pad0 : 4;
|
||||
u32 useDstAlpha : 1;
|
||||
u32 no_dual_src : 1;
|
||||
AlphaTestResult Pretest : 2;
|
||||
u32 nIndirectStagesUsed : 4;
|
||||
u32 genMode_numtexgens : 4;
|
||||
|
@ -36,16 +38,13 @@ struct pixel_shader_uid_data
|
|||
CompareMode alpha_test_comp0 : 3;
|
||||
CompareMode alpha_test_comp1 : 3;
|
||||
AlphaTestOp alpha_test_logic : 2;
|
||||
u32 alpha_test_use_zcomploc_hack : 1;
|
||||
FogProjection fog_proj : 1;
|
||||
|
||||
FogType fog_fsel : 3;
|
||||
u32 fog_RangeBaseEnabled : 1;
|
||||
ZTexOp ztex_op : 2;
|
||||
u32 per_pixel_depth : 1;
|
||||
u32 forced_early_z : 1;
|
||||
u32 early_ztest : 1;
|
||||
u32 late_ztest : 1;
|
||||
EmulatedZ ztest : 3;
|
||||
u32 bounding_box : 1;
|
||||
u32 zfreeze : 1;
|
||||
u32 numColorChans : 2;
|
||||
|
|
|
@ -448,7 +448,7 @@ void PixelShaderManager::SetGenModeChanged()
|
|||
|
||||
void PixelShaderManager::SetZModeControl()
|
||||
{
|
||||
u32 late_ztest = bpmem.UseLateDepthTest();
|
||||
u32 late_ztest = bpmem.GetEmulatedZ() == EmulatedZ::Late;
|
||||
u32 rgba6_format =
|
||||
(bpmem.zcontrol.pixel_format == PixelFormat::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor) ?
|
||||
1 :
|
||||
|
|
|
@ -25,6 +25,34 @@ void DepthState::Generate(const BPMemory& bp)
|
|||
func = bp.zmode.func.Value();
|
||||
}
|
||||
|
||||
static bool IsDualSrc(SrcBlendFactor factor)
|
||||
{
|
||||
return factor == SrcBlendFactor::SrcAlpha || factor == SrcBlendFactor::InvSrcAlpha;
|
||||
}
|
||||
|
||||
static bool IsDualSrc(DstBlendFactor factor)
|
||||
{
|
||||
switch (factor)
|
||||
{
|
||||
case DstBlendFactor::SrcClr:
|
||||
case DstBlendFactor::SrcAlpha:
|
||||
case DstBlendFactor::InvSrcClr:
|
||||
case DstBlendFactor::InvSrcAlpha:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool BlendingState::RequiresDualSrc() const
|
||||
{
|
||||
bool requires_dual_src = false;
|
||||
requires_dual_src |= IsDualSrc(srcfactor) || IsDualSrc(srcfactoralpha);
|
||||
requires_dual_src |= IsDualSrc(dstfactor) || IsDualSrc(dstfactoralpha);
|
||||
requires_dual_src &= blendenable && usedualsrc;
|
||||
return requires_dual_src;
|
||||
}
|
||||
|
||||
// If the framebuffer format has no alpha channel, it is assumed to
|
||||
// ONE on blending. As the backends may emulate this framebuffer
|
||||
// configuration with an alpha channel, we just drop all references
|
||||
|
@ -92,12 +120,12 @@ void BlendingState::Generate(const BPMemory& bp)
|
|||
// Start with everything disabled.
|
||||
hex = 0;
|
||||
|
||||
bool target_has_alpha = bp.zcontrol.pixel_format == PixelFormat::RGBA6_Z24;
|
||||
bool alpha_test_may_succeed = bp.alpha_test.TestResult() != AlphaTestResult::Fail;
|
||||
const bool target_has_alpha = bp.zcontrol.pixel_format == PixelFormat::RGBA6_Z24;
|
||||
const bool alpha_test_may_succeed = bp.alpha_test.TestResult() != AlphaTestResult::Fail;
|
||||
|
||||
colorupdate = bp.blendmode.colorupdate && alpha_test_may_succeed;
|
||||
alphaupdate = bp.blendmode.alphaupdate && target_has_alpha && alpha_test_may_succeed;
|
||||
dstalpha = bp.dstalpha.enable && alphaupdate;
|
||||
const bool dstalpha = bp.dstalpha.enable && alphaupdate;
|
||||
usedualsrc = true;
|
||||
|
||||
// The subtract bit has the highest priority
|
||||
|
|
|
@ -130,7 +130,6 @@ union BlendingState
|
|||
|
||||
BitField<0, 1, u32> blendenable;
|
||||
BitField<1, 1, u32> logicopenable;
|
||||
BitField<2, 1, u32> dstalpha;
|
||||
BitField<3, 1, u32> colorupdate;
|
||||
BitField<4, 1, u32> alphaupdate;
|
||||
BitField<5, 1, u32> subtract;
|
||||
|
@ -142,6 +141,8 @@ union BlendingState
|
|||
BitField<17, 3, SrcBlendFactor> srcfactoralpha;
|
||||
BitField<20, 4, LogicOp> logicmode;
|
||||
|
||||
bool RequiresDualSrc() const;
|
||||
|
||||
u32 hex;
|
||||
};
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "Common/MsgHandler.h"
|
||||
#include "Core/ConfigManager.h"
|
||||
|
||||
#include "VideoCommon/DriverDetails.h"
|
||||
#include "VideoCommon/FramebufferManager.h"
|
||||
#include "VideoCommon/FramebufferShaderGen.h"
|
||||
#include "VideoCommon/RenderBase.h"
|
||||
|
@ -612,8 +613,95 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig(
|
|||
return config;
|
||||
}
|
||||
|
||||
std::optional<AbstractPipelineConfig> ShaderCache::GetGXPipelineConfig(const GXPipelineUid& config)
|
||||
/// Edits the UID based on driver bugs and other special configurations
|
||||
static GXPipelineUid ApplyDriverBugs(const GXPipelineUid& in)
|
||||
{
|
||||
GXPipelineUid out;
|
||||
memcpy(&out, &in, sizeof(out)); // copy padding
|
||||
pixel_shader_uid_data* ps = out.ps_uid.GetUidData();
|
||||
BlendingState& blend = out.blending_state;
|
||||
|
||||
if (ps->ztest == EmulatedZ::ForcedEarly && !out.depth_state.updateenable)
|
||||
{
|
||||
// No need to force early depth test if you're not writing z
|
||||
ps->ztest = EmulatedZ::Early;
|
||||
}
|
||||
|
||||
const bool benefits_from_ps_dual_source_off =
|
||||
(!g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
|
||||
g_ActiveConfig.backend_info.bSupportsFramebufferFetch) ||
|
||||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING);
|
||||
if (benefits_from_ps_dual_source_off && !blend.RequiresDualSrc())
|
||||
{
|
||||
// Only use dual-source blending when required on drivers that don't support it very well.
|
||||
ps->no_dual_src = true;
|
||||
blend.usedualsrc = false;
|
||||
}
|
||||
|
||||
if (g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
|
||||
{
|
||||
bool fbfetch_blend = false;
|
||||
if ((DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z) ||
|
||||
!g_ActiveConfig.backend_info.bSupportsEarlyZ) &&
|
||||
ps->ztest == EmulatedZ::ForcedEarly)
|
||||
{
|
||||
ps->ztest = EmulatedZ::EarlyWithFBFetch;
|
||||
fbfetch_blend |= static_cast<bool>(out.blending_state.blendenable);
|
||||
ps->no_dual_src = true;
|
||||
}
|
||||
fbfetch_blend |= blend.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp;
|
||||
fbfetch_blend |= blend.usedualsrc && !g_ActiveConfig.backend_info.bSupportsDualSourceBlend;
|
||||
if (fbfetch_blend)
|
||||
{
|
||||
ps->no_dual_src = true;
|
||||
if (blend.logicopenable)
|
||||
{
|
||||
ps->logic_op_enable = true;
|
||||
ps->logic_op_mode = static_cast<u32>(blend.logicmode.Value());
|
||||
blend.logicopenable = false;
|
||||
}
|
||||
if (blend.blendenable)
|
||||
{
|
||||
ps->blend_enable = true;
|
||||
ps->blend_src_factor = blend.srcfactor;
|
||||
ps->blend_src_factor_alpha = blend.srcfactoralpha;
|
||||
ps->blend_dst_factor = blend.dstfactor;
|
||||
ps->blend_dst_factor_alpha = blend.dstfactoralpha;
|
||||
ps->blend_subtract = blend.subtract;
|
||||
ps->blend_subtract_alpha = blend.subtractAlpha;
|
||||
blend.blendenable = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// force dual src off if we can't support it
|
||||
if (!g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
|
||||
{
|
||||
ps->no_dual_src = true;
|
||||
blend.usedualsrc = false;
|
||||
}
|
||||
|
||||
if (ps->ztest == EmulatedZ::ForcedEarly && !g_ActiveConfig.backend_info.bSupportsEarlyZ)
|
||||
{
|
||||
// These things should be false
|
||||
ASSERT(!ps->zfreeze);
|
||||
// ZCOMPLOC HACK:
|
||||
// The only way to emulate alpha test + early-z is to force early-z in the shader.
|
||||
// As this isn't available on all drivers and as we can't emulate this feature otherwise,
|
||||
// we are only able to choose which one we want to respect more.
|
||||
// Tests seem to have proven that writing depth even when the alpha test fails is more
|
||||
// important that a reliable alpha test, so we just force the alpha test to always succeed.
|
||||
// At least this seems to be less buggy.
|
||||
ps->ztest = EmulatedZ::EarlyWithZComplocHack;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::optional<AbstractPipelineConfig>
|
||||
ShaderCache::GetGXPipelineConfig(const GXPipelineUid& config_in)
|
||||
{
|
||||
GXPipelineUid config = ApplyDriverBugs(config_in);
|
||||
const AbstractShader* vs;
|
||||
auto vs_iter = m_vs_cache.shader_map.find(config.vs_uid);
|
||||
if (vs_iter != m_vs_cache.shader_map.end() && !vs_iter->second.pending)
|
||||
|
@ -650,9 +738,33 @@ std::optional<AbstractPipelineConfig> ShaderCache::GetGXPipelineConfig(const GXP
|
|||
config.depth_state, config.blending_state);
|
||||
}
|
||||
|
||||
std::optional<AbstractPipelineConfig>
|
||||
ShaderCache::GetGXPipelineConfig(const GXUberPipelineUid& config)
|
||||
/// Edits the UID based on driver bugs and other special configurations
|
||||
static GXUberPipelineUid ApplyDriverBugs(const GXUberPipelineUid& in)
|
||||
{
|
||||
GXUberPipelineUid out;
|
||||
memcpy(&out, &in, sizeof(out)); // Copy padding
|
||||
if (g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
|
||||
{
|
||||
// Always blend in shader
|
||||
out.blending_state.hex = 0;
|
||||
out.blending_state.colorupdate = in.blending_state.colorupdate.Value();
|
||||
out.blending_state.alphaupdate = in.blending_state.alphaupdate.Value();
|
||||
out.ps_uid.GetUidData()->no_dual_src = true;
|
||||
}
|
||||
else if (!g_ActiveConfig.backend_info.bSupportsDualSourceBlend ||
|
||||
(DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) &&
|
||||
!out.blending_state.RequiresDualSrc()))
|
||||
{
|
||||
out.blending_state.usedualsrc = false;
|
||||
out.ps_uid.GetUidData()->no_dual_src = true;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
std::optional<AbstractPipelineConfig>
|
||||
ShaderCache::GetGXPipelineConfig(const GXUberPipelineUid& config_in)
|
||||
{
|
||||
GXUberPipelineUid config = ApplyDriverBugs(config_in);
|
||||
const AbstractShader* vs;
|
||||
auto vs_iter = m_uber_vs_cache.shader_map.find(config.vs_uid);
|
||||
if (vs_iter != m_uber_vs_cache.shader_map.end() && !vs_iter->second.pending)
|
||||
|
@ -981,12 +1093,14 @@ void ShaderCache::QueuePipelineCompile(const GXPipelineUid& uid, u32 priority)
|
|||
{
|
||||
stages_ready = true;
|
||||
|
||||
auto vs_it = shader_cache->m_vs_cache.shader_map.find(uid.vs_uid);
|
||||
GXPipelineUid actual_uid = ApplyDriverBugs(uid);
|
||||
|
||||
auto vs_it = shader_cache->m_vs_cache.shader_map.find(actual_uid.vs_uid);
|
||||
stages_ready &= vs_it != shader_cache->m_vs_cache.shader_map.end() && !vs_it->second.pending;
|
||||
if (vs_it == shader_cache->m_vs_cache.shader_map.end())
|
||||
shader_cache->QueueVertexShaderCompile(uid.vs_uid, priority);
|
||||
shader_cache->QueueVertexShaderCompile(actual_uid.vs_uid, priority);
|
||||
|
||||
PixelShaderUid ps_uid = uid.ps_uid;
|
||||
PixelShaderUid ps_uid = actual_uid.ps_uid;
|
||||
ClearUnusedPixelShaderUidBits(shader_cache->m_api_type, shader_cache->m_host_config, &ps_uid);
|
||||
|
||||
auto ps_it = shader_cache->m_ps_cache.shader_map.find(ps_uid);
|
||||
|
@ -1051,13 +1165,15 @@ void ShaderCache::QueueUberPipelineCompile(const GXUberPipelineUid& uid, u32 pri
|
|||
{
|
||||
stages_ready = true;
|
||||
|
||||
auto vs_it = shader_cache->m_uber_vs_cache.shader_map.find(uid.vs_uid);
|
||||
GXUberPipelineUid actual_uid = ApplyDriverBugs(uid);
|
||||
|
||||
auto vs_it = shader_cache->m_uber_vs_cache.shader_map.find(actual_uid.vs_uid);
|
||||
stages_ready &=
|
||||
vs_it != shader_cache->m_uber_vs_cache.shader_map.end() && !vs_it->second.pending;
|
||||
if (vs_it == shader_cache->m_uber_vs_cache.shader_map.end())
|
||||
shader_cache->QueueVertexUberShaderCompile(uid.vs_uid, priority);
|
||||
shader_cache->QueueVertexUberShaderCompile(actual_uid.vs_uid, priority);
|
||||
|
||||
UberShader::PixelShaderUid ps_uid = uid.ps_uid;
|
||||
UberShader::PixelShaderUid ps_uid = actual_uid.ps_uid;
|
||||
UberShader::ClearUnusedPixelShaderUidBits(shader_cache->m_api_type,
|
||||
shader_cache->m_host_config, &ps_uid);
|
||||
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include "VideoCommon/UberShaderPixel.h"
|
||||
|
||||
#include "Common/Assert.h"
|
||||
|
||||
#include "VideoCommon/BPMemory.h"
|
||||
#include "VideoCommon/DriverDetails.h"
|
||||
#include "VideoCommon/NativeVertexFormat.h"
|
||||
|
@ -21,12 +23,12 @@ PixelShaderUid GetPixelShaderUid()
|
|||
|
||||
pixel_ubershader_uid_data* const uid_data = out.GetUidData();
|
||||
uid_data->num_texgens = xfmem.numTexGen.numTexGens;
|
||||
uid_data->early_depth = bpmem.UseEarlyDepthTest() &&
|
||||
uid_data->early_depth = bpmem.GetEmulatedZ() == EmulatedZ::Early &&
|
||||
(g_ActiveConfig.bFastDepthCalc ||
|
||||
bpmem.alpha_test.TestResult() == AlphaTestResult::Undetermined) &&
|
||||
!(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
|
||||
uid_data->per_pixel_depth =
|
||||
(bpmem.ztex2.op != ZTexOp::Disabled && bpmem.UseLateDepthTest()) ||
|
||||
(bpmem.ztex2.op != ZTexOp::Disabled && bpmem.GetEmulatedZ() == EmulatedZ::Late) ||
|
||||
(!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !uid_data->early_depth) ||
|
||||
(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
|
||||
uid_data->uint_output = bpmem.blendmode.UseLogicOp();
|
||||
|
@ -39,6 +41,13 @@ void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& hos
|
|||
{
|
||||
pixel_ubershader_uid_data* const uid_data = uid->GetUidData();
|
||||
|
||||
// With fbfetch, ubershaders always blend using that and don't use dual src
|
||||
if (host_config.backend_shader_framebuffer_fetch || !host_config.backend_dual_source_blend)
|
||||
uid_data->no_dual_src = 1;
|
||||
// Dual source is always enabled in the shader if this bug is not present
|
||||
else if (!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING))
|
||||
uid_data->no_dual_src = 0;
|
||||
|
||||
// OpenGL and Vulkan convert implicitly normalized color outputs to their uint representation.
|
||||
// Therefore, it is not necessary to use a uint output on these backends. We also disable the
|
||||
// uint output when logic op is not supported (i.e. driver/device does not support D3D11.1).
|
||||
|
@ -53,19 +62,17 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
|
|||
const bool msaa = host_config.msaa;
|
||||
const bool ssaa = host_config.ssaa;
|
||||
const bool stereo = host_config.stereo;
|
||||
const bool use_dual_source = host_config.backend_dual_source_blend;
|
||||
const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch;
|
||||
const bool use_shader_logic_op =
|
||||
!host_config.backend_logic_op && host_config.backend_shader_framebuffer_fetch;
|
||||
const bool use_framebuffer_fetch =
|
||||
use_shader_blend || use_shader_logic_op ||
|
||||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z);
|
||||
const bool use_framebuffer_fetch = host_config.backend_shader_framebuffer_fetch;
|
||||
const bool use_dual_source = host_config.backend_dual_source_blend && !uid_data->no_dual_src;
|
||||
const bool early_depth = uid_data->early_depth != 0;
|
||||
const bool per_pixel_depth = uid_data->per_pixel_depth != 0;
|
||||
const bool bounding_box = host_config.bounding_box;
|
||||
const u32 numTexgen = uid_data->num_texgens;
|
||||
ShaderCode out;
|
||||
|
||||
ASSERT_MSG(VIDEO, !(use_dual_source && use_framebuffer_fetch),
|
||||
"If you're using framebuffer fetch, you shouldn't need dual source blend!");
|
||||
|
||||
out.Write("// {}\n", *uid_data);
|
||||
WriteBitfieldExtractHeader(out, api_type, host_config);
|
||||
WritePixelShaderCommonHeader(out, api_type, host_config, bounding_box);
|
||||
|
@ -79,9 +86,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
|
|||
{
|
||||
if (use_dual_source)
|
||||
{
|
||||
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n"
|
||||
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n",
|
||||
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
|
||||
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n"
|
||||
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -520,12 +526,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
|
|||
// intermediate value with multiple reads & modifications, so we pull out the "real" output
|
||||
// value above and use a temporary for calculations, then set the output value once at the
|
||||
// end of the shader.
|
||||
out.Write(" float4 ocol0;\n");
|
||||
}
|
||||
|
||||
if (use_shader_blend)
|
||||
{
|
||||
out.Write(" float4 ocol1;\n");
|
||||
out.Write(" float4 ocol0;\n"
|
||||
" float4 ocol1;\n");
|
||||
}
|
||||
|
||||
if (host_config.backend_geometry_shaders && stereo)
|
||||
|
@ -943,8 +945,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
|
|||
{
|
||||
// Instead of using discard, fetch the framebuffer's color value and use it as the output
|
||||
// for this fragment.
|
||||
out.Write(" #define discard_fragment {{ {} = float4(initial_ocol0.xyz, 1.0); return; }}\n",
|
||||
use_shader_blend ? "real_ocol0" : "ocol0");
|
||||
out.Write(
|
||||
" #define discard_fragment {{ real_ocol0 = float4(initial_ocol0.xyz, 1.0); return; }}\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1055,7 +1057,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
|
|||
" }}\n"
|
||||
"\n");
|
||||
|
||||
if (use_shader_logic_op)
|
||||
if (use_framebuffer_fetch)
|
||||
{
|
||||
static constexpr std::array<const char*, 16> logic_op_mode{
|
||||
"int4(0, 0, 0, 0)", // CLEAR
|
||||
|
@ -1113,7 +1115,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
|
|||
" ocol0.a = float(TevResult.a >> 2) / 63.0;\n"
|
||||
" \n");
|
||||
|
||||
if (use_dual_source || use_shader_blend)
|
||||
if (use_dual_source || use_framebuffer_fetch)
|
||||
{
|
||||
out.Write(" // Dest alpha override (dual source blending)\n"
|
||||
" // Colors will be blended against the alpha from ocol1 and\n"
|
||||
|
@ -1129,7 +1131,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
|
|||
" }}\n");
|
||||
}
|
||||
|
||||
if (use_shader_blend)
|
||||
if (use_framebuffer_fetch)
|
||||
{
|
||||
using Common::EnumMap;
|
||||
|
||||
|
@ -1208,10 +1210,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
|
|||
" real_ocol0 = ocol0;\n"
|
||||
" }}\n");
|
||||
}
|
||||
else if (use_framebuffer_fetch)
|
||||
{
|
||||
out.Write(" real_ocol0 = ocol0;\n");
|
||||
}
|
||||
|
||||
out.Write("}}\n"
|
||||
"\n"
|
||||
|
@ -1274,7 +1272,11 @@ void EnumeratePixelShaderUids(const std::function<void(const PixelShaderUid&)>&
|
|||
for (u32 uint_output = 0; uint_output < 2; uint_output++)
|
||||
{
|
||||
puid->uint_output = uint_output;
|
||||
callback(uid);
|
||||
for (u32 no_dual_src = 0; no_dual_src < 2; no_dual_src++)
|
||||
{
|
||||
puid->no_dual_src = no_dual_src;
|
||||
callback(uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ struct pixel_ubershader_uid_data
|
|||
u32 early_depth : 1;
|
||||
u32 per_pixel_depth : 1;
|
||||
u32 uint_output : 1;
|
||||
u32 no_dual_src : 1;
|
||||
|
||||
u32 NumValues() const { return sizeof(pixel_ubershader_uid_data); }
|
||||
};
|
||||
|
@ -42,9 +43,9 @@ struct fmt::formatter<UberShader::pixel_ubershader_uid_data>
|
|||
template <typename FormatContext>
|
||||
auto format(const UberShader::pixel_ubershader_uid_data& uid, FormatContext& ctx) const
|
||||
{
|
||||
return fmt::format_to(ctx.out(), "Pixel UberShader for {} texgens{}{}{}", uid.num_texgens,
|
||||
uid.early_depth ? ", early-depth" : "",
|
||||
uid.per_pixel_depth ? ", per-pixel depth" : "",
|
||||
uid.uint_output ? ", uint output" : "");
|
||||
return fmt::format_to(
|
||||
ctx.out(), "Pixel UberShader for {} texgens{}{}{}{}", uid.num_texgens,
|
||||
uid.early_depth ? ", early-depth" : "", uid.per_pixel_depth ? ", per-pixel depth" : "",
|
||||
uid.uint_output ? ", uint output" : "", uid.no_dual_src ? ", no dual-source blending" : "");
|
||||
}
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue