Merge pull request #10864 from TellowKrinkle/BetterLogicBlend

VideoCommon: Better logic op invert approximation
This commit is contained in:
OatmealDome 2023-01-31 01:55:55 -05:00 committed by GitHub
commit 0f037a1af8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 139 additions and 33 deletions

View File

@ -744,6 +744,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
bool per_pixel_depth, bool use_dual_source);
static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data);
static void WriteLogicOp(ShaderCode& out, const pixel_shader_uid_data* uid_data);
static void WriteLogicOpBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data);
static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data,
bool use_dual_source);
static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data);
@ -1148,6 +1149,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
if (uid_data->logic_op_enable)
WriteLogicOp(out, uid_data);
else if (uid_data->emulate_logic_op_with_blend)
WriteLogicOpBlend(out, uid_data);
// Write the color and alpha values to the framebuffer
// If using shader blend, we still use the separate alpha
@ -1803,6 +1806,29 @@ static void WriteLogicOp(ShaderCode& out, const pixel_shader_uid_data* uid_data)
out.Write("\tprev = ({}) & 0xff;\n", logic_op_mode[uid_data->logic_op_mode]);
}
static void WriteLogicOpBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data)
{
switch (static_cast<LogicOp>(uid_data->logic_op_mode))
{
case LogicOp::Clear:
case LogicOp::NoOp:
out.Write("\tprev = int4(0, 0, 0, 0);\n");
break;
case LogicOp::Copy:
// Do nothing!
break;
case LogicOp::CopyInverted:
out.Write("\tprev ^= 255;\n");
break;
case LogicOp::Set:
case LogicOp::Invert: // In cooperation with blend
out.Write("\tprev = int4(255, 255, 255, 255);\n");
break;
default:
break;
}
}
static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data,
bool use_dual_source)
{

View File

@ -58,8 +58,9 @@ struct pixel_shader_uid_data
DstBlendFactor blend_dst_factor_alpha : 3; // Only used with shader_framebuffer_fetch blend
u32 blend_subtract : 1; // Only used with shader_framebuffer_fetch blend
u32 blend_subtract_alpha : 1; // Only used with shader_framebuffer_fetch blend
u32 emulate_logic_op_with_blend : 1; // Only used with logic op blend emulation
u32 logic_op_enable : 1; // Only used with shader_framebuffer_fetch logic ops
u32 logic_op_mode : 4; // Only used with shader_framebuffer_fetch logic ops
u32 logic_op_mode : 4; // Only used with shader_framebuffer_fetch logic ops and blend emulation
u32 texMtxInfo_n_projection : 8; // 8x1 bit
u32 tevindref_bi0 : 3;

View File

@ -185,6 +185,7 @@ void BlendingState::ApproximateLogicOpWithBlending()
{
struct LogicOpApproximation
{
bool blendEnable;
bool subtract;
SrcBlendFactor srcfactor;
DstBlendFactor dstfactor;
@ -193,31 +194,69 @@ void BlendingState::ApproximateLogicOpWithBlending()
// but INVSRCCLR and INVDSTCLR were also aliased and were mixed.
// Thus, NOR, EQUIV, INVERT, COPY_INVERTED, and OR_INVERTED duplicate(d) other values.
static constexpr std::array<LogicOpApproximation, 16> approximations = {{
{false, SrcBlendFactor::Zero, DstBlendFactor::Zero}, // CLEAR
{false, SrcBlendFactor::DstClr, DstBlendFactor::Zero}, // AND
{true, SrcBlendFactor::One, DstBlendFactor::InvSrcClr}, // AND_REVERSE
{false, SrcBlendFactor::One, DstBlendFactor::Zero}, // COPY
{true, SrcBlendFactor::DstClr, DstBlendFactor::One}, // AND_INVERTED
{false, SrcBlendFactor::Zero, DstBlendFactor::One}, // NOOP
{false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // XOR
{false, SrcBlendFactor::InvDstClr, DstBlendFactor::One}, // OR
{false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // NOR
{false, SrcBlendFactor::InvDstClr, DstBlendFactor::Zero}, // EQUIV
{false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // INVERT
{false, SrcBlendFactor::One, DstBlendFactor::InvDstAlpha}, // OR_REVERSE
{false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // COPY_INVERTED
{false, SrcBlendFactor::InvDstClr, DstBlendFactor::One}, // OR_INVERTED
{false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // NAND
{false, SrcBlendFactor::One, DstBlendFactor::One}, // SET
// clang-format off
{false, false, SrcBlendFactor::One, DstBlendFactor::Zero}, // CLEAR (Shader outputs 0)
{true, false, SrcBlendFactor::DstClr, DstBlendFactor::Zero}, // AND
{true, true, SrcBlendFactor::One, DstBlendFactor::InvSrcClr}, // AND_REVERSE
{false, false, SrcBlendFactor::One, DstBlendFactor::Zero}, // COPY
{true, true, SrcBlendFactor::DstClr, DstBlendFactor::One}, // AND_INVERTED
{true, false, SrcBlendFactor::Zero, DstBlendFactor::One}, // NOOP
{true, false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // XOR
{true, false, SrcBlendFactor::InvDstClr, DstBlendFactor::One}, // OR
{true, false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // NOR
{true, false, SrcBlendFactor::InvDstClr, DstBlendFactor::Zero}, // EQUIV
{true, false, SrcBlendFactor::InvDstClr, DstBlendFactor::Zero}, // INVERT (Shader outputs 255)
{true, false, SrcBlendFactor::One, DstBlendFactor::InvDstAlpha}, // OR_REVERSE
{false, false, SrcBlendFactor::One, DstBlendFactor::Zero}, // COPY_INVERTED (Shader inverts)
{true, false, SrcBlendFactor::InvDstClr, DstBlendFactor::One}, // OR_INVERTED
{true, false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // NAND
{false, false, SrcBlendFactor::One, DstBlendFactor::Zero}, // SET (Shader outputs 255)
// clang-format on
}};
logicopenable = false;
blendenable = true;
subtract = approximations[u32(logicmode.Value())].subtract;
srcfactor = approximations[u32(logicmode.Value())].srcfactor;
srcfactoralpha = approximations[u32(logicmode.Value())].srcfactor;
dstfactor = approximations[u32(logicmode.Value())].dstfactor;
dstfactoralpha = approximations[u32(logicmode.Value())].dstfactor;
usedualsrc = false;
const LogicOpApproximation& approximation = approximations[static_cast<u32>(logicmode.Value())];
if (approximation.blendEnable)
{
blendenable = true;
subtract = approximation.subtract;
srcfactor = approximation.srcfactor;
srcfactoralpha = approximation.srcfactor;
dstfactor = approximation.dstfactor;
dstfactoralpha = approximation.dstfactor;
}
}
bool BlendingState::LogicOpApproximationIsExact()
{
switch (logicmode.Value())
{
case LogicOp::Clear:
case LogicOp::Set:
case LogicOp::NoOp:
case LogicOp::Invert:
case LogicOp::CopyInverted:
case LogicOp::Copy:
return true;
default:
return false;
}
}
bool BlendingState::LogicOpApproximationWantsShaderHelp()
{
switch (logicmode.Value())
{
case LogicOp::Clear:
case LogicOp::Set:
case LogicOp::NoOp:
case LogicOp::Invert:
case LogicOp::CopyInverted:
return true;
default:
return false;
}
}
void SamplerState::Generate(const BPMemory& bp, u32 index)

View File

@ -109,6 +109,8 @@ union BlendingState
// HACK: Replaces logical operations with blend operations.
// Will not be bit-correct, and in some cases not even remotely in the same ballpark.
void ApproximateLogicOpWithBlending();
bool LogicOpApproximationIsExact();
bool LogicOpApproximationWantsShaderHelp();
BlendingState() = default;
BlendingState(const BlendingState&) = default;

View File

@ -601,16 +601,6 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig(
config.depth_state = depth_state;
config.blending_state = blending_state;
config.framebuffer_state = g_framebuffer_manager->GetEFBFramebufferState();
// We can use framebuffer fetch to emulate logic ops in the fragment shader.
if (config.blending_state.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp &&
!g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
{
WARN_LOG_FMT(VIDEO,
"Approximating logic op with blending, this will produce incorrect rendering.");
config.blending_state.ApproximateLogicOpWithBlending();
}
return config;
}
@ -628,6 +618,22 @@ static GXPipelineUid ApplyDriverBugs(const GXPipelineUid& in)
ps->ztest = EmulatedZ::Early;
}
// If framebuffer fetch is available, we can emulate logic ops in the fragment shader
// and don't need the below blend approximation
if (blend.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp &&
!g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
{
if (!blend.LogicOpApproximationIsExact())
WARN_LOG_FMT(VIDEO,
"Approximating logic op with blending, this will produce incorrect rendering.");
if (blend.LogicOpApproximationWantsShaderHelp())
{
ps->emulate_logic_op_with_blend = true;
ps->logic_op_mode = static_cast<u32>(blend.logicmode.Value());
}
blend.ApproximateLogicOpWithBlending();
}
const bool benefits_from_ps_dual_source_off =
(!g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
g_ActiveConfig.backend_info.bSupportsFramebufferFetch) ||
@ -775,6 +781,18 @@ static GXUberPipelineUid ApplyDriverBugs(const GXUberPipelineUid& in)
memcpy(&out, &in, sizeof(out)); // Copy padding
if (g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader)
out.vertex_format = nullptr;
// If framebuffer fetch is available, we can emulate logic ops in the fragment shader
// and don't need the below blend approximation
if (out.blending_state.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp &&
!g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
{
if (!out.blending_state.LogicOpApproximationIsExact())
WARN_LOG_FMT(VIDEO,
"Approximating logic op with blending, this will produce incorrect rendering.");
out.blending_state.ApproximateLogicOpWithBlending();
}
if (g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
{
// Always blend in shader

View File

@ -1092,6 +1092,26 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
" TevResult &= 0xff;\n"
" }}\n");
}
else if (!host_config.backend_logic_op)
{
out.Write(" // Helpers for logic op blending approximations\n"
" if (logic_op_enable) {{\n"
" switch (logic_op_mode) {{\n");
out.Write(" case {}: // Clear\n", static_cast<u32>(LogicOp::Clear));
out.Write(" TevResult = int4(0, 0, 0, 0);\n"
" break;\n");
out.Write(" case {}: // Copy Inverted\n", static_cast<u32>(LogicOp::CopyInverted));
out.Write(" TevResult ^= 0xff;\n"
" break;\n");
out.Write(" case {}: // Set\n", static_cast<u32>(LogicOp::Set));
out.Write(" case {}: // Invert\n", static_cast<u32>(LogicOp::Invert));
out.Write(" TevResult = int4(255, 255, 255, 255);\n"
" break;\n");
out.Write(" default:\n"
" break;\n"
" }}\n"
" }}\n");
}
// Some backends require that the shader outputs be uint when writing to a uint render target for
// logic op.