GPU/HW: Use dual-source blend to split alpha and mask

This commit is contained in:
Connor McLaughlin 2019-11-24 23:23:33 +10:00
parent 9d6d00480c
commit bc5a247a4b
5 changed files with 62 additions and 14 deletions

View File

@ -153,6 +153,7 @@ protected:
u32 m_resolution_scale = 1;
u32 m_max_resolution_scale = 1;
bool m_true_color = false;
bool m_supports_dual_source_blend = false;
BatchConfig m_batch = {};
BatchUBOData m_batch_ubo_data = {};

View File

@ -144,6 +144,8 @@ void GPU_HW_D3D11::SetCapabilities()
m_max_resolution_scale = max_texture_scale;
Log_InfoPrintf("Maximum resolution scale is %u", m_max_resolution_scale);
m_supports_dual_source_blend = true;
}
bool GPU_HW_D3D11::CreateFramebuffer()
@ -237,7 +239,8 @@ bool GPU_HW_D3D11::CreateBatchInputLayout()
{"ATTR", 3, DXGI_FORMAT_R32_SINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0}}};
// we need a vertex shader...
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color);
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color,
m_supports_dual_source_blend);
ComPtr<ID3DBlob> vs_bytecode = D3D11::ShaderCompiler::CompileShader(
D3D11::ShaderCompiler::Type::Vertex, m_device->GetFeatureLevel(), shadergen.GenerateBatchVertexShader(true), false);
if (!vs_bytecode)
@ -295,7 +298,7 @@ bool GPU_HW_D3D11::CreateStateObjects()
{
bl_desc.RenderTarget[0].BlendEnable = TRUE;
bl_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE;
bl_desc.RenderTarget[0].DestBlend = D3D11_BLEND_SRC_ALPHA;
bl_desc.RenderTarget[0].DestBlend = D3D11_BLEND_SRC1_ALPHA;
bl_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
bl_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO;
bl_desc.RenderTarget[0].BlendOp =
@ -315,7 +318,8 @@ bool GPU_HW_D3D11::CreateStateObjects()
bool GPU_HW_D3D11::CompileShaders()
{
const bool debug = true;
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color);
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color,
m_supports_dual_source_blend);
m_screen_quad_vertex_shader = D3D11::ShaderCompiler::CompileAndCreateVertexShader(
m_device.Get(), shadergen.GenerateScreenQuadVertexShader(), debug);

View File

@ -144,6 +144,12 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display)
{
Log_WarningPrintf("Texture buffers are not supported, VRAM writes will be slower.");
}
int max_dual_source_draw_buffers = 0;
glGetIntegerv(GL_MAX_DUAL_SOURCE_DRAW_BUFFERS, &max_dual_source_draw_buffers);
m_supports_dual_source_blend = (max_dual_source_draw_buffers > 0);
if (!m_supports_dual_source_blend)
Log_WarningPrintf("Dual-source blending is not supported, this may break some mask effects.");
}
void GPU_HW_OpenGL::CreateFramebuffer()
@ -256,7 +262,8 @@ void GPU_HW_OpenGL::CreateTextureBuffer()
bool GPU_HW_OpenGL::CompilePrograms()
{
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color);
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color,
m_supports_dual_source_blend);
for (u32 render_mode = 0; render_mode < 4; render_mode++)
{
@ -310,7 +317,17 @@ bool GPU_HW_OpenGL::CompilePrograms()
return false;
if (!m_is_gles)
prog.BindFragData(0, "o_col0");
{
if (m_supports_dual_source_blend)
{
prog.BindFragDataIndexed(0, "o_col0");
prog.BindFragDataIndexed(1, "o_col1");
}
else
{
prog.BindFragData(0, "o_col0");
}
}
if (!prog.Link())
return false;
@ -381,7 +398,7 @@ void GPU_HW_OpenGL::SetDrawState(BatchRenderMode render_mode)
glBlendEquationSeparate(
m_batch.transparency_mode == TransparencyMode::BackgroundMinusForeground ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD,
GL_FUNC_ADD);
glBlendFuncSeparate(GL_ONE, GL_SRC_ALPHA, GL_ONE, GL_ZERO);
glBlendFuncSeparate(GL_ONE, m_supports_dual_source_blend ? GL_SRC1_ALPHA : GL_SRC_ALPHA, GL_ONE, GL_ZERO);
}
if (m_drawing_area_changed)

View File

@ -1,9 +1,10 @@
#include "gpu_hw_shadergen.h"
#include <glad.h>
GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color)
GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color,
bool supports_dual_source_blend)
: m_render_api(render_api), m_resolution_scale(resolution_scale), m_true_color(true_color),
m_glsl(render_api != HostDisplay::RenderAPI::D3D11)
m_glsl(render_api != HostDisplay::RenderAPI::D3D11), m_supports_dual_source_blend(supports_dual_source_blend)
{
}
@ -342,6 +343,8 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
const GPU::TextureMode actual_texture_mode = texture_mode & ~GPU::TextureMode::RawTextureBit;
const bool raw_texture = (texture_mode & GPU::TextureMode::RawTextureBit) == GPU::TextureMode::RawTextureBit;
const bool textured = (texture_mode != GPU::TextureMode::Disabled);
const bool use_dual_source =
m_supports_dual_source_blend && transparency != GPU_HW::BatchRenderMode::TransparencyDisabled;
std::stringstream ss;
WriteHeader(ss);
@ -357,6 +360,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
DefineMacro(ss, "RAW_TEXTURE", raw_texture);
DefineMacro(ss, "DITHERING", dithering);
DefineMacro(ss, "TRUE_COLOR", m_true_color);
DefineMacro(ss, "USE_DUAL_SOURCE", use_dual_source);
WriteCommonFunctions(ss);
WriteBatchUniformBuffer(ss);
@ -451,11 +455,11 @@ int4 SampleFromVRAM(int4 texpage, float2 coord)
if (textured)
{
DeclareFragmentEntryPoint(ss, 1, 1, {"nointerpolation in int4 v_texpage"}, true, false);
DeclareFragmentEntryPoint(ss, 1, 1, {"nointerpolation in int4 v_texpage"}, true, use_dual_source);
}
else
{
DeclareFragmentEntryPoint(ss, 1, 0, {}, true, false);
DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source);
}
ss << R"(
@ -500,6 +504,9 @@ int4 SampleFromVRAM(int4 texpage, float2 coord)
icolor = TruncateTo15Bit(icolor);
#endif
// Compute output alpha (mask bit)
float output_alpha = float(semitransparent);
// Normalize
float3 color = float3(icolor) / float3(255.0, 255.0, 255.0);
@ -510,17 +517,34 @@ int4 SampleFromVRAM(int4 texpage, float2 coord)
#if TRANSPARENCY_ONLY_OPAQUE
discard;
#endif
o_col0 = float4(color * u_src_alpha_factor, u_dst_alpha_factor);
#if USE_DUAL_SOURCE
o_col0 = float4(color * u_src_alpha_factor, output_alpha);
o_col1 = float4(0.0, 0.0, 0.0, u_dst_alpha_factor);
#else
o_col0 = float4(color * u_src_alpha_factor, u_dst_alpha_factor);
#endif
}
else
{
#if TRANSPARENCY_ONLY_TRANSPARENCY
discard;
#endif
o_col0 = float4(color, 0.0);
#if USE_DUAL_SOURCE
o_col0 = float4(color, output_alpha);
o_col1 = float4(0.0, 0.0, 0.0, 0.0);
#else
o_col0 = float4(color, 0.0);
#endif
}
#else
o_col0 = float4(color, 0.0);
// Non-transparency won't enable blending so we can write the mask here regardless.
o_col0 = float4(color, output_alpha);
#if USE_DUAL_SOURCE
o_col1 = float4(0.0, 0.0, 0.0, 0.0);
#endif
#endif
}
)";

View File

@ -7,7 +7,8 @@
class GPU_HW_ShaderGen
{
public:
GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color);
GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color,
bool supports_dual_source_belnd);
~GPU_HW_ShaderGen();
std::string GenerateBatchVertexShader(bool textured);
@ -26,6 +27,7 @@ public:
bool m_true_color;
bool m_glsl;
bool m_glsl_es;
bool m_supports_dual_source_blend;
private:
void WriteHeader(std::stringstream& ss);