D3D / VideoCommon: generate HLSL from SPIRV

This commit is contained in:
iwubcode 2022-05-04 00:41:34 -05:00
parent 3790c99a7d
commit 5dd2704416
20 changed files with 780 additions and 1121 deletions

View File

@ -106,7 +106,8 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& vtx_decl)
const AttributeFormat* format = &vtx_decl.position;
if (format->enable)
{
m_elems[m_num_elems].SemanticName = "POSITION";
m_elems[m_num_elems].SemanticName = "TEXCOORD";
m_elems[m_num_elems].SemanticIndex = SHADER_POSITION_ATTRIB;
m_elems[m_num_elems].AlignedByteOffset = format->offset;
m_elems[m_num_elems].Format = VarToD3D(format->type, format->components, format->integer);
m_elems[m_num_elems].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
@ -115,12 +116,11 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& vtx_decl)
for (int i = 0; i < 3; i++)
{
static constexpr std::array<const char*, 3> NAMES = {"NORMAL", "TANGENT", "BINORMAL"};
format = &vtx_decl.normals[i];
if (format->enable)
{
m_elems[m_num_elems].SemanticName = NAMES[i];
m_elems[m_num_elems].SemanticIndex = 0;
m_elems[m_num_elems].SemanticName = "TEXCOORD";
m_elems[m_num_elems].SemanticIndex = SHADER_NORMAL_ATTRIB + i;
m_elems[m_num_elems].AlignedByteOffset = format->offset;
m_elems[m_num_elems].Format = VarToD3D(format->type, format->components, format->integer);
m_elems[m_num_elems].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
@ -133,8 +133,8 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& vtx_decl)
format = &vtx_decl.colors[i];
if (format->enable)
{
m_elems[m_num_elems].SemanticName = "COLOR";
m_elems[m_num_elems].SemanticIndex = i;
m_elems[m_num_elems].SemanticName = "TEXCOORD";
m_elems[m_num_elems].SemanticIndex = SHADER_COLOR0_ATTRIB + i;
m_elems[m_num_elems].AlignedByteOffset = format->offset;
m_elems[m_num_elems].Format = VarToD3D(format->type, format->components, format->integer);
m_elems[m_num_elems].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
@ -148,7 +148,7 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& vtx_decl)
if (format->enable)
{
m_elems[m_num_elems].SemanticName = "TEXCOORD";
m_elems[m_num_elems].SemanticIndex = i;
m_elems[m_num_elems].SemanticIndex = SHADER_TEXTURE0_ATTRIB + i;
m_elems[m_num_elems].AlignedByteOffset = format->offset;
m_elems[m_num_elems].Format = VarToD3D(format->type, format->components, format->integer);
m_elems[m_num_elems].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
@ -159,7 +159,8 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& vtx_decl)
format = &vtx_decl.posmtx;
if (format->enable)
{
m_elems[m_num_elems].SemanticName = "BLENDINDICES";
m_elems[m_num_elems].SemanticName = "TEXCOORD";
m_elems[m_num_elems].SemanticIndex = SHADER_POSMTX_ATTRIB;
m_elems[m_num_elems].AlignedByteOffset = format->offset;
m_elems[m_num_elems].Format = VarToD3D(format->type, format->components, format->integer);
m_elems[m_num_elems].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;

View File

@ -76,6 +76,7 @@ void StateManager::Apply()
if (m_current.vertexConstants != m_pending.vertexConstants)
{
D3D::context->VSSetConstantBuffers(0, 1, &m_pending.vertexConstants);
D3D::context->VSSetConstantBuffers(1, 1, &m_pending.vertexConstants);
m_current.vertexConstants = m_pending.vertexConstants;
}

View File

@ -578,6 +578,8 @@ bool Renderer::ApplyState()
{
cmdlist->SetGraphicsRootConstantBufferView(ROOT_PARAMETER_VS_CBV,
m_state.constant_buffers[1]);
cmdlist->SetGraphicsRootConstantBufferView(ROOT_PARAMETER_VS_CBV2,
m_state.constant_buffers[1]);
if (g_ActiveConfig.bEnablePixelLighting)
{

View File

@ -323,7 +323,7 @@ bool DXContext::CreateRootSignatures()
bool DXContext::CreateGXRootSignature()
{
// GX:
// - 3 constant buffers (bindings 0-2), 0/1 visible in PS, 1 visible in VS, 2 visible in GS.
// - 3 constant buffers (bindings 0-2), 0/1 visible in PS, 2 visible in VS, 1 visible in GS.
// - 8 textures (visible in PS).
// - 8 samplers (visible in PS).
// - 1 UAV (visible in PS).
@ -341,6 +341,8 @@ bool DXContext::CreateGXRootSignature()
param_count++;
SetRootParamCBV(&params[param_count], 0, D3D12_SHADER_VISIBILITY_VERTEX);
param_count++;
SetRootParamCBV(&params[param_count], 1, D3D12_SHADER_VISIBILITY_VERTEX);
param_count++;
SetRootParamCBV(&params[param_count], 0, D3D12_SHADER_VISIBILITY_GEOMETRY);
param_count++;

View File

@ -25,6 +25,7 @@ enum ROOT_PARAMETER
ROOT_PARAMETER_PS_SRV,
ROOT_PARAMETER_PS_SAMPLERS,
ROOT_PARAMETER_VS_CBV,
ROOT_PARAMETER_VS_CBV2,
ROOT_PARAMETER_GS_CBV,
ROOT_PARAMETER_PS_UAV_OR_CBV2,
ROOT_PARAMETER_PS_CBV2, // ROOT_PARAMETER_PS_UAV_OR_CBV2 if bbox is not enabled

View File

@ -83,7 +83,7 @@ void DXVertexFormat::MapAttributes()
if (m_decl.position.enable)
{
AddAttribute(
"POSITION", 0, 0,
"TEXCOORD", SHADER_POSITION_ATTRIB, 0,
VarToDXGIFormat(m_decl.position.type, m_decl.position.components, m_decl.position.integer),
m_decl.position.offset);
}
@ -92,8 +92,7 @@ void DXVertexFormat::MapAttributes()
{
if (m_decl.normals[i].enable)
{
static constexpr std::array<const char*, 3> NAMES = {"NORMAL", "TANGENT", "BINORMAL"};
AddAttribute(NAMES[i], 0, 0,
AddAttribute("TEXCOORD", SHADER_NORMAL_ATTRIB + i, 0,
VarToDXGIFormat(m_decl.normals[i].type, m_decl.normals[i].components,
m_decl.normals[i].integer),
m_decl.normals[i].offset);
@ -104,7 +103,7 @@ void DXVertexFormat::MapAttributes()
{
if (m_decl.colors[i].enable)
{
AddAttribute("COLOR", i, 0,
AddAttribute("TEXCOORD", SHADER_COLOR0_ATTRIB + i, 0,
VarToDXGIFormat(m_decl.colors[i].type, m_decl.colors[i].components,
m_decl.colors[i].integer),
m_decl.colors[i].offset);
@ -115,7 +114,7 @@ void DXVertexFormat::MapAttributes()
{
if (m_decl.texcoords[i].enable)
{
AddAttribute("TEXCOORD", i, 0,
AddAttribute("TEXCOORD", SHADER_TEXTURE0_ATTRIB + i, 0,
VarToDXGIFormat(m_decl.texcoords[i].type, m_decl.texcoords[i].components,
m_decl.texcoords[i].integer),
m_decl.texcoords[i].offset);
@ -125,7 +124,7 @@ void DXVertexFormat::MapAttributes()
if (m_decl.posmtx.enable)
{
AddAttribute(
"BLENDINDICES", 0, 0,
"TEXCOORD", SHADER_POSMTX_ATTRIB, 0,
VarToDXGIFormat(m_decl.posmtx.type, m_decl.posmtx.components, m_decl.posmtx.integer),
m_decl.posmtx.offset);
}

View File

@ -11,6 +11,7 @@ target_link_libraries(videod3dcommon
PUBLIC
common
videocommon
spirv_cross
)
if(MSVC)

View File

@ -4,7 +4,13 @@
#include "VideoBackends/D3DCommon/Shader.h"
#include <fstream>
#include <optional>
#include <string_view>
#include <fmt/format.h>
#include <wrl/client.h>
#include "disassemble.h"
#include "spirv_hlsl.hpp"
#include "Common/Assert.h"
#include "Common/FileUtil.h"
@ -14,9 +20,141 @@
#include "Common/StringUtil.h"
#include "Common/Version.h"
#include "VideoCommon/Spirv.h"
#include "VideoCommon/VideoBackendBase.h"
#include "VideoCommon/VideoConfig.h"
namespace
{
// Regarding the UBO bind points, we subtract one from the binding index because
// the OpenGL backend requires UBO #0 for non-block uniforms (at least on NV).
// This allows us to share the same shaders but use bind point #0 in the D3D
// backends. None of the specific shaders use UBOs, instead they use push
// constants, so when/if the GL backend moves to uniform blocks completely this
// subtraction can be removed.
constexpr std::string_view SHADER_HEADER = R"(
// Target GLSL 4.5.
#version 450 core
#define ATTRIBUTE_LOCATION(x) layout(location = x)
#define FRAGMENT_OUTPUT_LOCATION(x) layout(location = x)
#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) layout(location = x, index = y)
#define UBO_BINDING(packing, x) layout(packing, binding = (x - 1))
#define SAMPLER_BINDING(x) layout(binding = x)
#define TEXEL_BUFFER_BINDING(x) layout(binding = x)
#define SSBO_BINDING(x) layout(binding = (x + 2))
#define VARYING_LOCATION(x) layout(location = x)
#define FORCE_EARLY_Z layout(early_fragment_tests) in
// hlsl to glsl function translation
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define uint2 uvec2
#define uint3 uvec3
#define uint4 uvec4
#define int2 ivec2
#define int3 ivec3
#define int4 ivec4
#define frac fract
#define lerp mix
#define API_D3D 1
)";
constexpr std::string_view COMPUTE_SHADER_HEADER = R"(
// Target GLSL 4.5.
#version 450 core
// All resources are packed into one descriptor set for compute.
#define UBO_BINDING(packing, x) layout(packing, binding = (x - 1))
#define SAMPLER_BINDING(x) layout(binding = x)
#define TEXEL_BUFFER_BINDING(x) layout(binding = x)
#define IMAGE_BINDING(format, x) layout(format, binding = x)
// hlsl to glsl function translation
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define uint2 uvec2
#define uint3 uvec3
#define uint4 uvec4
#define int2 ivec2
#define int3 ivec3
#define int4 ivec4
#define frac fract
#define lerp mix
#define API_D3D 1
)";
std::optional<std::string> GetHLSLFromSPIRV(SPIRV::CodeVector spv, D3D_FEATURE_LEVEL feature_level)
{
spirv_cross::CompilerHLSL::Options options;
switch (feature_level)
{
case D3D_FEATURE_LEVEL_10_0:
options.shader_model = 40;
break;
case D3D_FEATURE_LEVEL_10_1:
options.shader_model = 41;
break;
default:
options.shader_model = 50;
break;
};
spirv_cross::CompilerHLSL compiler(std::move(spv));
compiler.set_hlsl_options(options);
return compiler.compile();
}
std::optional<SPIRV::CodeVector> GetSpirv(ShaderStage stage, std::string_view source)
{
switch (stage)
{
case ShaderStage::Vertex:
{
const auto full_source = fmt::format("{}{}", SHADER_HEADER, source);
return SPIRV::CompileVertexShader(full_source);
}
case ShaderStage::Geometry:
{
// Spirv cross does not currently support hlsl geometry shaders
return std::nullopt;
}
case ShaderStage::Pixel:
{
const auto full_source = fmt::format("{}{}", SHADER_HEADER, source);
return SPIRV::CompileFragmentShader(full_source);
}
case ShaderStage::Compute:
{
const auto full_source = fmt::format("{}{}", COMPUTE_SHADER_HEADER, source);
return SPIRV::CompileComputeShader(full_source);
}
};
return std::nullopt;
}
std::optional<std::string> GetHLSL(D3D_FEATURE_LEVEL feature_level, ShaderStage stage,
std::string_view source)
{
if (stage == ShaderStage::Geometry)
{
return std::string{source};
}
else if (const auto spirv = GetSpirv(stage, source))
{
return GetHLSLFromSPIRV(std::move(*spirv), feature_level);
}
return std::nullopt;
}
} // namespace
namespace D3DCommon
{
Shader::Shader(ShaderStage stage, BinaryData bytecode)
@ -95,6 +233,10 @@ static const char* GetCompileTarget(D3D_FEATURE_LEVEL feature_level, ShaderStage
std::optional<Shader::BinaryData> Shader::CompileShader(D3D_FEATURE_LEVEL feature_level,
ShaderStage stage, std::string_view source)
{
const auto hlsl = GetHLSL(feature_level, stage, source);
if (!hlsl)
return std::nullopt;
static constexpr D3D_SHADER_MACRO macros[] = {{"API_D3D", "1"}, {nullptr, nullptr}};
const UINT flags = g_ActiveConfig.bEnableValidationLayer ?
(D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION) :
@ -103,7 +245,7 @@ std::optional<Shader::BinaryData> Shader::CompileShader(D3D_FEATURE_LEVEL featur
Microsoft::WRL::ComPtr<ID3DBlob> code;
Microsoft::WRL::ComPtr<ID3DBlob> errors;
HRESULT hr = d3d_compile(source.data(), source.size(), nullptr, macros, nullptr, "main", target,
HRESULT hr = d3d_compile(hlsl->data(), hlsl->size(), nullptr, macros, nullptr, "main", target,
flags, 0, &code, &errors);
if (FAILED(hr))
{
@ -111,12 +253,20 @@ std::optional<Shader::BinaryData> Shader::CompileShader(D3D_FEATURE_LEVEL featur
std::string filename = VideoBackendBase::BadShaderFilename(target, num_failures++);
std::ofstream file;
File::OpenFStream(file, filename, std::ios_base::out);
file.write(source.data(), source.size());
file.write(hlsl->data(), hlsl->size());
file << "\n";
file.write(static_cast<const char*>(errors->GetBufferPointer()), errors->GetBufferSize());
file << "\n";
file << "Dolphin Version: " + Common::GetScmRevStr() + "\n";
file << "Video Backend: " + g_video_backend->GetDisplayName();
if (const auto spirv = GetSpirv(stage, source))
{
file << "\nOriginal Source: \n";
file << source << std::endl;
file << "SPIRV: \n";
spv::Disassemble(file, *spirv);
}
file.close();
PanicAlertFmt("Failed to compile {}: {}\nDebug info ({}):\n{}", filename, Common::HRWrap(hr),

View File

@ -25,10 +25,7 @@ APIType GetAPIType()
void EmitUniformBufferDeclaration(ShaderCode& code)
{
if (GetAPIType() == APIType::D3D)
code.Write("cbuffer PSBlock : register(b0)\n");
else
code.Write("UBO_BINDING(std140, 1) uniform PSBlock\n");
code.Write("UBO_BINDING(std140, 1) uniform PSBlock\n");
}
void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1,
@ -37,17 +34,6 @@ void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1,
switch (GetAPIType())
{
case APIType::D3D:
{
const char* array_type = multisampled ? "Texture2DMSArray<float4>" : "Texture2DArray<float4>";
for (u32 i = start; i < end; i++)
{
code.Write("{} tex{} : register(t{});\n", array_type, i, i);
code.Write("SamplerState samp{} : register(s{});\n", i, i);
}
}
break;
case APIType::OpenGL:
case APIType::Vulkan:
{
@ -69,9 +55,6 @@ void EmitSampleTexture(ShaderCode& code, u32 n, std::string_view coords)
switch (GetAPIType())
{
case APIType::D3D:
code.Write("tex{}.Sample(samp{}, {})", n, n, coords);
break;
case APIType::OpenGL:
case APIType::Vulkan:
code.Write("texture(samp{}, {})", n, coords);
@ -89,9 +72,6 @@ void EmitTextureLoad(ShaderCode& code, u32 n, std::string_view coords)
switch (GetAPIType())
{
case APIType::D3D:
code.Write("tex{}.Load({})", n, coords);
break;
case APIType::OpenGL:
case APIType::Vulkan:
code.Write("texelFetch(samp{}, ({}).xyz, ({}).w)", n, coords, coords);
@ -109,23 +89,6 @@ void EmitVertexMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_col
switch (GetAPIType())
{
case APIType::D3D:
{
code.Write("void main(");
for (u32 i = 0; i < num_tex_inputs; i++)
code.Write("in float3 rawtex{} : TEXCOORD{}, ", i, i);
for (u32 i = 0; i < num_color_inputs; i++)
code.Write("in float4 rawcolor{} : COLOR{}, ", i, i);
if (position_input)
code.Write("in float4 rawpos : POSITION, ");
code.Write("{}", extra_inputs);
for (u32 i = 0; i < num_tex_outputs; i++)
code.Write("out float3 v_tex{} : TEXCOORD{}, ", i, i);
for (u32 i = 0; i < num_color_outputs; i++)
code.Write("out float4 v_col{} : COLOR{}, ", i, i);
code.Write("out float4 opos : SV_Position)\n");
}
break;
case APIType::OpenGL:
case APIType::Vulkan:
{
@ -175,18 +138,6 @@ void EmitPixelMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_colo
switch (GetAPIType())
{
case APIType::D3D:
{
code.Write("void main(");
for (u32 i = 0; i < num_tex_inputs; i++)
code.Write("in float3 v_tex{} : TEXCOORD{}, ", i, i);
for (u32 i = 0; i < num_color_inputs; i++)
code.Write("in float4 v_col{} : COLOR{}, ", i, i);
if (emit_frag_coord)
code.Write("in float4 frag_coord : SV_Position, ");
code.Write("{}out {} ocol0 : SV_Target)\n", extra_vars, output_type);
}
break;
case APIType::OpenGL:
case APIType::Vulkan:
{
@ -225,8 +176,8 @@ std::string GenerateScreenQuadVertexShader()
{
ShaderCode code;
EmitVertexMainDeclaration(code, 0, 0, false, 1, 0,
GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " :
"#define id gl_VertexID\n");
"#define id gl_VertexID\n");
code.Write(
"{{\n"
" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"
@ -251,7 +202,7 @@ std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors)
for (u32 i = 0; i < num_tex; i++)
code.Write(" float3 tex{} : TEXCOORD{};\n", i, i);
for (u32 i = 0; i < num_colors; i++)
code.Write(" float4 color{} : COLOR{};\n", i, i);
code.Write(" float4 color{} : TEXCOORD{};\n", i, i + num_tex);
code.Write(" float4 position : SV_Position;\n"
"}};\n");
@ -260,7 +211,7 @@ std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors)
for (u32 i = 0; i < num_tex; i++)
code.Write(" float3 tex{} : TEXCOORD{};\n", i, i);
for (u32 i = 0; i < num_colors; i++)
code.Write(" float4 color{} : COLOR{};\n", i, i);
code.Write(" float4 color{} : TEXCOORD{};\n", i, i + num_tex);
code.Write(" float4 position : SV_Position;\n"
" uint slice : SV_RenderTargetArrayIndex;\n"
"}};\n\n");
@ -343,8 +294,8 @@ std::string GenerateTextureCopyVertexShader()
"}};\n\n");
EmitVertexMainDeclaration(code, 0, 0, false, 1, 0,
GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " :
"#define id gl_VertexID");
"#define id gl_VertexID");
code.Write("{{\n"
" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"
" opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"
@ -386,25 +337,15 @@ std::string GenerateResolveDepthPixelShader(u32 samples)
{
ShaderCode code;
EmitSamplerDeclarations(code, 0, 1, true);
EmitPixelMainDeclaration(code, 1, 0, "float",
GetAPIType() == APIType::D3D ? "in float4 ipos : SV_Position, " : "");
EmitPixelMainDeclaration(code, 1, 0, "float", "");
code.Write("{{\n"
" int layer = int(v_tex0.z);\n");
if (GetAPIType() == APIType::D3D)
code.Write(" int3 coords = int3(int2(ipos.xy), layer);\n");
else
code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n");
code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n");
// Take the minimum of all depth samples.
if (GetAPIType() == APIType::D3D)
code.Write(" ocol0 = tex0.Load(coords, 0).r;\n");
else
code.Write(" ocol0 = texelFetch(samp0, coords, 0).r;\n");
code.Write(" ocol0 = texelFetch(samp0, coords, 0).r;\n");
code.Write(" for (int i = 1; i < {}; i++)\n", samples);
if (GetAPIType() == APIType::D3D)
code.Write(" ocol0 = min(ocol0, tex0.Load(coords, i).r);\n");
else
code.Write(" ocol0 = min(ocol0, texelFetch(samp0, coords, i).r);\n");
code.Write(" ocol0 = min(ocol0, texelFetch(samp0, coords, i).r);\n");
code.Write("}}\n");
return code.GetBuffer();
@ -420,8 +361,8 @@ std::string GenerateClearVertexShader()
"}};\n");
EmitVertexMainDeclaration(code, 0, 0, false, 0, 1,
GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " :
"#define id gl_VertexID\n");
"#define id gl_VertexID\n");
code.Write(
"{{\n"
" float2 coord = float2(float((id << 1) & 2), float(id & 2));\n"
@ -459,45 +400,29 @@ std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samp
{
ShaderCode code;
EmitSamplerDeclarations(code, 0, 1, samples > 1);
EmitPixelMainDeclaration(
code, 1, 0, "float4",
GetAPIType() == APIType::D3D ?
(g_ActiveConfig.bSSAA ?
"in float4 ipos : SV_Position, in uint isample : SV_SampleIndex, " :
"in float4 ipos : SV_Position, ") :
"");
EmitPixelMainDeclaration(code, 1, 0, "float4",
"");
code.Write("{{\n"
" int layer = int(v_tex0.z);\n");
if (GetAPIType() == APIType::D3D)
code.Write(" int3 coords = int3(int2(ipos.xy), layer);\n");
else
code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n");
code.Write(" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n");
if (samples == 1)
{
// No MSAA at all.
if (GetAPIType() == APIType::D3D)
code.Write(" float4 val = tex0.Load(int4(coords, 0));\n");
else
code.Write(" float4 val = texelFetch(samp0, coords, 0);\n");
code.Write(" float4 val = texelFetch(samp0, coords, 0);\n");
}
else if (g_ActiveConfig.bSSAA)
{
// Sample shading, shader runs once per sample
if (GetAPIType() == APIType::D3D)
code.Write(" float4 val = tex0.Load(coords, isample);");
else
code.Write(" float4 val = texelFetch(samp0, coords, gl_SampleID);");
code.Write(" float4 val = texelFetch(samp0, coords, gl_SampleID);");
}
else
{
// MSAA without sample shading, average out all samples.
code.Write(" float4 val = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
code.Write(" for (int i = 0; i < {}; i++)\n", samples);
if (GetAPIType() == APIType::D3D)
code.Write(" val += tex0.Load(coords, i);\n");
else
code.Write(" val += texelFetch(samp0, coords, i);\n");
code.Write(" val += texelFetch(samp0, coords, i);\n");
code.Write(" val /= float({});\n", samples);
}
@ -689,13 +614,12 @@ std::string GenerateEFBRestorePixelShader()
{
ShaderCode code;
EmitSamplerDeclarations(code, 0, 2, false);
EmitPixelMainDeclaration(code, 1, 0, "float4",
GetAPIType() == APIType::D3D ? "out float depth : SV_Depth, " : "");
EmitPixelMainDeclaration(code, 1, 0, "float4", "");
code.Write("{{\n"
" ocol0 = ");
EmitSampleTexture(code, 0, "v_tex0");
code.Write(";\n");
code.Write(" {} = ", GetAPIType() == APIType::D3D ? "depth" : "gl_FragDepth");
code.Write(" gl_FragDepth = ");
EmitSampleTexture(code, 1, "v_tex0");
code.Write(".r;\n"
"}}\n");

View File

@ -103,7 +103,8 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
"}};\n");
out.Write("struct VS_OUTPUT {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "",
ShaderStage::Geometry);
out.Write("}};\n");
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
@ -113,12 +114,14 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
out.Write("VARYING_LOCATION(0) in VertexData {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config,
GetInterpolationQualifier(msaa, ssaa, true, true));
GetInterpolationQualifier(msaa, ssaa, true, true),
ShaderStage::Geometry);
out.Write("}} vs[{}];\n", vertex_in);
out.Write("VARYING_LOCATION(0) out VertexData {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config,
GetInterpolationQualifier(msaa, ssaa, true, false));
GetInterpolationQualifier(msaa, ssaa, true, false),
ShaderStage::Geometry);
if (stereo)
out.Write("\tflat int layer;\n");
@ -134,6 +137,7 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
if (stereo)
out.Write("\tuint layer : SV_RenderTargetArrayIndex;\n");
out.Write("\tfloat4 posout : SV_Position;\n");
out.Write("}};\n");
@ -344,6 +348,7 @@ static void EmitVertex(ShaderCode& out, const ShaderHostConfig& host_config,
else
{
out.Write("\tps.o = {};\n", vertex);
out.Write("\tps.posout = {}.pos;\n", vertex);
}
if (stereo)

View File

@ -379,23 +379,10 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type,
"int3 iround(float3 x) {{ return int3(round(x)); }}\n"
"int4 iround(float4 x) {{ return int4(round(x)); }}\n\n");
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n");
}
else // D3D
{
// Declare samplers
out.Write("SamplerState samp[8] : register(s0);\n"
"\n"
"Texture2DArray tex[8] : register(t0);\n");
}
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n");
out.Write("\n");
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n");
else
out.Write("cbuffer PSBlock : register(b0) {{\n");
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n");
out.Write("\tint4 " I_COLORS "[4];\n"
"\tint4 " I_KCOLORS "[4];\n"
@ -445,10 +432,7 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type,
{
out.Write("{}", s_lighting_struct);
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
else
out.Write("cbuffer VSBlock : register(b1) {{\n");
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
out.Write("{}", s_shader_uniforms);
out.Write("}};\n");
@ -456,18 +440,9 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type,
if (bounding_box)
{
if (api_type == APIType::D3D)
{
out.Write("globallycoherent RWBuffer<int> bbox_data : register(u2);\n"
"#define atomicMin InterlockedMin\n"
"#define atomicMax InterlockedMax");
}
else
{
out.Write("SSBO_BINDING(0) buffer BBox {{\n"
" int bbox_data[4];\n"
"}};");
}
out.Write("SSBO_BINDING(0) buffer BBox {{\n"
" int bbox_data[4];\n"
"}};");
out.Write(R"(
#define bbox_left bbox_data[0]
@ -535,24 +510,12 @@ void UpdateBoundingBox(float2 rawpos) {{
if (host_config.manual_texture_sampling)
{
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write(R"(
out.Write(R"(
int4 readTexture(in sampler2DArray tex, uint u, uint v, int layer, int lod) {{
return iround(texelFetch(tex, int3(u, v, layer), lod) * 255.0);
}}
int4 readTextureLinear(in sampler2DArray tex, uint2 uv1, uint2 uv2, int layer, int lod, int2 frac_uv) {{)");
}
else if (api_type == APIType::D3D)
{
out.Write(R"(
int4 readTexture(in Texture2DArray tex, uint u, uint v, int layer, int lod) {{
return iround(tex.Load(int4(u, v, layer, lod)) * 255.0);
}}
int4 readTextureLinear(in Texture2DArray tex, uint2 uv1, uint2 uv2, int layer, int lod, int2 frac_uv) {{)");
}
out.Write(R"(
int4 result =
@ -621,41 +584,26 @@ uint WrapCoord(int coord, uint wrap, int size) {{
}
}
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write("\nint4 sampleTexture(uint texmap, in sampler2DArray tex, int2 uv, int layer) {{\n");
}
else if (api_type == APIType::D3D)
{
out.Write("\nint4 sampleTexture(uint texmap, in Texture2DArray tex, in SamplerState tex_samp, "
"int2 uv, int layer) {{\n");
}
out.Write("\nint4 sampleTexture(uint texmap, in sampler2DArray tex, int2 uv, int layer) {{\n");
if (!host_config.manual_texture_sampling)
{
out.Write(" float size_s = float(" I_TEXDIMS "[texmap].x * 128);\n"
" float size_t = float(" I_TEXDIMS "[texmap].y * 128);\n"
" float3 coords = float3(float(uv.x) / size_s, float(uv.y) / size_t, layer);\n");
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
if (!host_config.backend_sampler_lod_bias)
{
if (!host_config.backend_sampler_lod_bias)
{
out.Write(" uint texmode0 = samp_texmode0(texmap);\n"
" float lod_bias = float({}) / 256.0f;\n"
" return iround(255.0 * texture(tex, coords, lod_bias));\n",
BitfieldExtract<&SamplerState::TM0::lod_bias>("texmode0"));
}
else
{
out.Write(" return iround(255.0 * texture(tex, coords));\n");
}
out.Write(" uint texmode0 = samp_texmode0(texmap);\n"
" float lod_bias = float({}) / 256.0f;\n"
" return iround(255.0 * texture(tex, coords, lod_bias));\n",
BitfieldExtract<&SamplerState::TM0::lod_bias>("texmode0"));
}
else
{
out.Write(" return iround(255.0 * texture(tex, coords));\n");
}
out.Write("}}\n");
}
else if (api_type == APIType::D3D)
{
out.Write(" return iround(255.0 * tex.Sample(tex_samp, coords));\n}}\n");
}
out.Write("}}\n");
}
else
{
@ -694,31 +642,20 @@ uint WrapCoord(int coord, uint wrap, int size) {{
int native_size_t = )" I_TEXDIMS R"([texmap].y;
)");
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write(R"(
out.Write(R"(
int3 size = textureSize(tex, 0);
int size_s = size.x;
int size_t = size.y;
)");
if (g_ActiveConfig.backend_info.bSupportsTextureQueryLevels)
{
out.Write(" int number_of_levels = textureQueryLevels(tex);\n");
}
else
{
out.Write(" int number_of_levels = 256; // textureQueryLevels is not supported\n");
ERROR_LOG_FMT(VIDEO, "textureQueryLevels is not supported! Odd graphical results may "
"occur if custom textures are in use!");
}
}
else if (api_type == APIType::D3D)
if (g_ActiveConfig.backend_info.bSupportsTextureQueryLevels)
{
ASSERT(g_ActiveConfig.backend_info.bSupportsTextureQueryLevels);
out.Write(R"(
int size_s, size_t, layers, number_of_levels;
tex.GetDimensions(0, size_s, size_t, layers, number_of_levels);
)");
out.Write(" int number_of_levels = textureQueryLevels(tex);\n");
}
else
{
out.Write(" int number_of_levels = 256; // textureQueryLevels is not supported\n");
ERROR_LOG_FMT(VIDEO, "textureQueryLevels is not supported! Odd graphical results may "
"occur if custom textures are in use!");
}
out.Write(R"(
@ -737,34 +674,23 @@ uint WrapCoord(int coord, uint wrap, int size) {{
)");
}
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
if (g_ActiveConfig.backend_info.bSupportsCoarseDerivatives)
{
if (g_ActiveConfig.backend_info.bSupportsCoarseDerivatives)
{
// The software renderer uses the equivalent of coarse derivatives, so use them here for
// consistency. This hasn't been hardware tested.
// Note that bSupportsCoarseDerivatives being false only means dFdxCoarse and dFdxFine don't
// exist. The GPU may still implement dFdx using coarse derivatives; we just don't have the
// ability to specifically require it.
out.Write(R"(
// The software renderer uses the equivalent of coarse derivatives, so use them here for
// consistency. This hasn't been hardware tested.
// Note that bSupportsCoarseDerivatives being false only means dFdxCoarse and dFdxFine don't
// exist. The GPU may still implement dFdx using coarse derivatives; we just don't have the
// ability to specifically require it.
out.Write(R"(
float2 uv_delta_x = abs(dFdxCoarse(float2(uv)));
float2 uv_delta_y = abs(dFdyCoarse(float2(uv)));
)");
}
else
{
out.Write(R"(
}
else
{
out.Write(R"(
float2 uv_delta_x = abs(dFdx(float2(uv)));
float2 uv_delta_y = abs(dFdy(float2(uv)));
)");
}
}
else if (api_type == APIType::D3D)
{
ASSERT(g_ActiveConfig.backend_info.bSupportsCoarseDerivatives);
out.Write(R"(
float2 uv_delta_x = abs(ddx_coarse(float2(uv)));
float2 uv_delta_y = abs(ddy_coarse(float2(uv)));
)");
}
@ -869,16 +795,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
WriteBitfieldExtractHeader(out, api_type, host_config);
WritePixelShaderCommonHeader(out, api_type, host_config, uid_data->bounding_box);
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) "
"sampleTexture(texmap, samp[texmap], uv, layer)\n");
}
else if (api_type == APIType::D3D)
{
out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) "
"sampleTexture(texmap, tex[texmap], samp[texmap], uv, layer)\n");
}
out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) "
"sampleTexture(texmap, samp[texmap], uv, layer)\n");
if (uid_data->forced_early_z && g_ActiveConfig.backend_info.bSupportsEarlyZ)
{
@ -915,16 +833,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
// all of the
// ARB_image_load_store extension yet.
// D3D11 also has a way to force the driver to enable early-z, so we're fine here.
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
// This is a #define which signals whatever early-z method the driver supports.
out.Write("FORCE_EARLY_Z; \n");
}
else
{
out.Write("[earlydepthstencil]\n");
}
// This is a #define which signals whatever early-z method the driver supports.
out.Write("FORCE_EARLY_Z; \n");
}
// Only use dual-source blending when required on drivers that don't support it very well.
@ -943,166 +853,119 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
use_shader_blend || use_shader_logic_op ||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z);
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
#ifdef __APPLE__
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
// if we want to use it.
if (api_type == APIType::Vulkan)
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
// if we want to use it.
if (api_type == APIType::Vulkan)
{
if (use_dual_source)
{
if (use_dual_source)
{
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n"
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n",
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
}
else
{
// Metal doesn't support a single unified variable for both input and output,
// so when using framebuffer fetch, we declare the input separately below.
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {};\n",
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
}
if (use_framebuffer_fetch)
{
// Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross.
out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n");
}
}
else
#endif
{
bool has_broken_decoration =
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION);
out.Write("{} {} vec4 {};\n",
has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(0)" :
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0)",
use_framebuffer_fetch ? "FRAGMENT_INOUT" : "out",
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n"
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n",
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
if (use_dual_source)
{
out.Write("{} out vec4 ocol1;\n", has_broken_decoration ?
"FRAGMENT_OUTPUT_LOCATION(1)" :
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)");
}
}
if (uid_data->per_pixel_depth)
out.Write("#define depth gl_FragDepth\n");
if (host_config.backend_geometry_shaders)
{
out.Write("VARYING_LOCATION(0) in VertexData {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->genMode_numtexgens, host_config,
GetInterpolationQualifier(msaa, ssaa, true, true));
if (stereo)
out.Write("\tflat int layer;\n");
out.Write("}};\n");
}
else
{
// Let's set up attributes
u32 counter = 0;
out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i)
{
out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
}
if (!host_config.fast_depth_calc)
{
out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
if (per_pixel_lighting)
{
out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
// Metal doesn't support a single unified variable for both input and output,
// so when using framebuffer fetch, we declare the input separately below.
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {};\n",
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
}
out.Write("void main()\n{{\n");
out.Write("\tfloat4 rawpos = gl_FragCoord;\n");
if (use_framebuffer_fetch)
{
// Store off a copy of the initial framebuffer value.
//
// If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the
// framebuffer), we read from real_ocol0.
out.Write("#ifdef FB_FETCH_VALUE\n"
"\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n"
"#else\n"
"\tfloat4 initial_ocol0 = real_ocol0;\n"
"#endif\n");
// QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an
// intermediate value with multiple reads & modifications, so we pull out the "real" output
// value above and use a temporary for calculations, then set the output value once at the
// end of the shader.
out.Write("\tfloat4 ocol0;\n");
}
if (use_shader_blend)
{
out.Write("\tfloat4 ocol1;\n");
// Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross.
out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n");
}
}
else // D3D
else
#endif
{
out.Write("void main(\n");
if (uid_data->uint_output)
{
out.Write(" out uint4 ocol0 : SV_Target,\n");
}
else
{
out.Write(" out float4 ocol0 : SV_Target0,\n"
" out float4 ocol1 : SV_Target1,\n");
}
out.Write("{}"
" in float4 rawpos : SV_Position,\n",
uid_data->per_pixel_depth ? " out float depth : SV_Depth,\n" : "");
bool has_broken_decoration =
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION);
out.Write(" in {} float4 colors_0 : COLOR0,\n", GetInterpolationQualifier(msaa, ssaa));
out.Write(" in {} float4 colors_1 : COLOR1\n", GetInterpolationQualifier(msaa, ssaa));
out.Write("{} {} {} {};\n",
has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(0)" :
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0)",
use_framebuffer_fetch ? "FRAGMENT_INOUT" : "out",
uid_data->uint_output ? "uvec4" : "vec4",
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
// compute window position if needed because binding semantic WPOS is not widely supported
if (use_dual_source)
{
out.Write("{} out {} ocol1;\n",
has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(1)" :
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)",
uid_data->uint_output ? "uvec4" : "vec4");
}
}
if (uid_data->per_pixel_depth)
out.Write("#define depth gl_FragDepth\n");
if (host_config.backend_geometry_shaders)
{
out.Write("VARYING_LOCATION(0) in VertexData {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->genMode_numtexgens, host_config,
GetInterpolationQualifier(msaa, ssaa, true, true), ShaderStage::Pixel);
if (stereo)
out.Write("\tflat int layer;\n");
out.Write("}};\n");
}
else
{
// Let's set up attributes
u32 counter = 0;
out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < uid_data->genMode_numtexgens; ++i)
{
out.Write(",\n in {} float3 tex{} : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), i,
i);
out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
}
if (!host_config.fast_depth_calc)
{
out.Write(",\n in {} float4 clipPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
uid_data->genMode_numtexgens);
out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
if (per_pixel_lighting)
{
out.Write(",\n in {} float3 Normal : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
uid_data->genMode_numtexgens + 1);
out.Write(",\n in {} float3 WorldPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
uid_data->genMode_numtexgens + 2);
out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
if (host_config.backend_geometry_shaders)
{
out.Write(",\n in float clipDist0 : SV_ClipDistance0\n"
",\n in float clipDist1 : SV_ClipDistance1\n");
}
if (stereo)
out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n");
out.Write(" ) {{\n");
}
out.Write("void main()\n{{\n");
out.Write("\tfloat4 rawpos = gl_FragCoord;\n");
if (use_framebuffer_fetch)
{
// Store off a copy of the initial framebuffer value.
//
// If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the
// framebuffer), we read from real_ocol0.
out.Write("#ifdef FB_FETCH_VALUE\n"
"\tfloat4 initial_ocol0 = FB_FETCH_VALUE;\n"
"#else\n"
"\tfloat4 initial_ocol0 = real_ocol0;\n"
"#endif\n");
// QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an
// intermediate value with multiple reads & modifications, so we pull out the "real" output
// value above and use a temporary for calculations, then set the output value once at the
// end of the shader.
out.Write("\tfloat4 ocol0;\n");
}
if (use_shader_blend)
{
out.Write("\tfloat4 ocol1;\n");
}
if (!stereo)
out.Write("\tint layer = 0;\n");

View File

@ -441,10 +441,7 @@ std::string PostProcessing::GetUniformBufferHeader() const
{
std::ostringstream ss;
u32 unused_counter = 1;
if (g_ActiveConfig.backend_info.api_type == APIType::D3D)
ss << "cbuffer PSBlock : register(b0) {\n";
else
ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n";
ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n";
// Builtin uniforms
ss << " float4 resolution;\n";
@ -499,42 +496,20 @@ std::string PostProcessing::GetHeader() const
{
std::ostringstream ss;
ss << GetUniformBufferHeader();
if (g_ActiveConfig.backend_info.api_type == APIType::D3D)
ss << "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n";
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
ss << "Texture2DArray samp0 : register(t0);\n";
ss << "SamplerState samp0_ss : register(s0);\n";
ss << "VARYING_LOCATION(0) in VertexData {\n";
ss << " float3 v_tex0;\n";
ss << "};\n";
}
else
{
ss << "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n";
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
ss << "VARYING_LOCATION(0) in VertexData {\n";
ss << " float3 v_tex0;\n";
ss << "};\n";
}
else
{
ss << "VARYING_LOCATION(0) in float3 v_tex0;\n";
}
ss << "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n";
ss << "VARYING_LOCATION(0) in float3 v_tex0;\n";
}
// Rename main, since we need to set up globals
if (g_ActiveConfig.backend_info.api_type == APIType::D3D)
{
ss << R"(
#define main real_main
static float3 v_tex0;
static float4 ocol0;
// Wrappers for sampling functions.
#define texture(sampler, coords) sampler.Sample(sampler##_ss, coords)
#define textureOffset(sampler, coords, offset) sampler.Sample(sampler##_ss, coords, offset)
)";
}
ss << "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n";
ss << R"(
float4 Sample() { return texture(samp0, v_tex0); }
@ -591,22 +566,7 @@ void SetOutput(float4 color)
std::string PostProcessing::GetFooter() const
{
if (g_ActiveConfig.backend_info.api_type == APIType::D3D)
{
return R"(
#undef main
void main(in float3 v_tex0_ : TEXCOORD0, out float4 ocol0_ : SV_Target)
{
v_tex0 = v_tex0_;
real_main();
ocol0_ = ocol0;
})";
}
else
{
return {};
}
return {};
}
bool PostProcessing::CompileVertexShader()
@ -614,28 +574,20 @@ bool PostProcessing::CompileVertexShader()
std::ostringstream ss;
ss << GetUniformBufferHeader();
if (g_ActiveConfig.backend_info.api_type == APIType::D3D)
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
ss << "void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n";
ss << " out float4 opos : SV_Position) {\n";
ss << "VARYING_LOCATION(0) out VertexData {\n";
ss << " float3 v_tex0;\n";
ss << "};\n";
}
else
{
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
ss << "VARYING_LOCATION(0) out VertexData {\n";
ss << " float3 v_tex0;\n";
ss << "};\n";
}
else
{
ss << "VARYING_LOCATION(0) out float3 v_tex0;\n";
}
ss << "#define id gl_VertexID\n";
ss << "#define opos gl_Position\n";
ss << "void main() {\n";
ss << "VARYING_LOCATION(0) out float3 v_tex0;\n";
}
ss << "#define id gl_VertexID\n";
ss << "#define opos gl_Position\n";
ss << "void main() {\n";
ss << " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n";
ss << " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n";
ss << " v_tex0 = float3(src_rect.xy + (src_rect.zw * v_tex0.xy), float(src_layer));\n";

View File

@ -93,20 +93,7 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool
void WriteIsNanHeader(ShaderCode& out, APIType api_type)
{
if (api_type == APIType::D3D)
{
out.Write("bool dolphin_isnan(float f) {{\n"
" // Workaround for the HLSL compiler deciding that isnan can never be true and\n"
" // optimising away the call, even though the value can actually be NaN\n"
" // Just look for the bit pattern that indicates NaN instead\n"
" return (asint(f) & 0x7FFFFFFF) > 0x7F800000;\n"
"}}\n\n");
// If isfinite is needed, (asint(f) & 0x7F800000) != 0x7F800000 can be used
}
else
{
out.Write("#define dolphin_isnan(f) isnan(f)\n");
}
out.Write("#define dolphin_isnan(f) isnan(f)\n");
}
void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type,
@ -135,14 +122,15 @@ void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type,
static void DefineOutputMember(ShaderCode& object, APIType api_type, std::string_view qualifier,
std::string_view type, std::string_view name, int var_index,
std::string_view semantic = {}, int semantic_index = -1)
ShaderStage stage, std::string_view semantic = {},
int semantic_index = -1)
{
object.Write("\t{} {} {}", qualifier, type, name);
if (var_index != -1)
object.Write("{}", var_index);
if (api_type == APIType::D3D && !semantic.empty())
if (api_type == APIType::D3D && !semantic.empty() && stage == ShaderStage::Geometry)
{
if (semantic_index != -1)
object.Write(" : {}{}", semantic, semantic_index);
@ -154,30 +142,83 @@ static void DefineOutputMember(ShaderCode& object, APIType api_type, std::string
}
void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens,
const ShaderHostConfig& host_config, std::string_view qualifier)
const ShaderHostConfig& host_config, std::string_view qualifier,
ShaderStage stage)
{
DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, "SV_Position");
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 0, "COLOR", 0);
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 1, "COLOR", 1);
for (unsigned int i = 0; i < texgens; ++i)
DefineOutputMember(object, api_type, qualifier, "float3", "tex", i, "TEXCOORD", i);
if (!host_config.fast_depth_calc)
DefineOutputMember(object, api_type, qualifier, "float4", "clipPos", -1, "TEXCOORD", texgens);
if (host_config.per_pixel_lighting)
// SPIRV-Cross names all semantics as "TEXCOORD"
// Unfortunately Geometry shaders (which also uses this function)
// aren't supported. The output semantic name needs to match
// up with the input semantic name for both the next stage (pixel shader)
// and the previous stage (vertex shader), so
// we need to handle geometry in a special way...
if (api_type == APIType::D3D && stage == ShaderStage::Geometry)
{
DefineOutputMember(object, api_type, qualifier, "float3", "Normal", -1, "TEXCOORD",
texgens + 1);
DefineOutputMember(object, api_type, qualifier, "float3", "WorldPos", -1, "TEXCOORD",
texgens + 2);
DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, stage, "TEXCOORD", 0);
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 0, stage, "TEXCOORD", 1);
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 1, stage, "TEXCOORD", 2);
const unsigned int index_base = 3;
unsigned int index_offset = 0;
if (host_config.backend_geometry_shaders)
{
DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 0, stage, "TEXCOORD",
index_base + index_offset);
DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 1, stage, "TEXCOORD",
index_base + index_offset + 1);
index_offset += 2;
}
for (unsigned int i = 0; i < texgens; ++i)
{
DefineOutputMember(object, api_type, qualifier, "float3", "tex", i, stage, "TEXCOORD",
index_base + index_offset + i);
}
index_offset += texgens;
if (!host_config.fast_depth_calc)
{
DefineOutputMember(object, api_type, qualifier, "float4", "clipPos", -1, stage, "TEXCOORD",
index_base + index_offset);
index_offset++;
}
if (host_config.per_pixel_lighting)
{
DefineOutputMember(object, api_type, qualifier, "float3", "Normal", -1, stage, "TEXCOORD",
index_base + index_offset);
DefineOutputMember(object, api_type, qualifier, "float3", "WorldPos", -1, stage, "TEXCOORD",
index_base + index_offset + 1);
index_offset += 2;
}
}
if (host_config.backend_geometry_shaders)
else
{
DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 0, "SV_ClipDistance", 0);
DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 1, "SV_ClipDistance", 1);
DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, stage, "SV_Position");
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 0, stage, "COLOR", 0);
DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 1, stage, "COLOR", 1);
if (host_config.backend_geometry_shaders)
{
DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 0, stage,
"SV_ClipDistance", 0);
DefineOutputMember(object, api_type, qualifier, "float", "clipDist", 1, stage,
"SV_ClipDistance", 1);
}
for (unsigned int i = 0; i < texgens; ++i)
DefineOutputMember(object, api_type, qualifier, "float3", "tex", i, stage, "TEXCOORD", i);
if (!host_config.fast_depth_calc)
DefineOutputMember(object, api_type, qualifier, "float4", "clipPos", -1, stage, "TEXCOORD",
texgens);
if (host_config.per_pixel_lighting)
{
DefineOutputMember(object, api_type, qualifier, "float3", "Normal", -1, stage, "TEXCOORD",
texgens + 1);
DefineOutputMember(object, api_type, qualifier, "float3", "WorldPos", -1, stage, "TEXCOORD",
texgens + 2);
}
}
}

View File

@ -18,6 +18,7 @@
#include "Common/StringUtil.h"
#include "Common/TypeUtils.h"
#include "VideoCommon/AbstractShader.h"
#include "VideoCommon/VideoCommon.h"
/**
@ -189,7 +190,8 @@ void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config);
void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens,
const ShaderHostConfig& host_config, std::string_view qualifier);
const ShaderHostConfig& host_config, std::string_view qualifier,
ShaderStage stage);
void AssignVSOutputMembers(ShaderCode& object, std::string_view a, std::string_view b, u32 texgens,
const ShaderHostConfig& host_config);
@ -220,57 +222,34 @@ void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable,
const Common::EnumMap<std::string_view, last_member>& values, int indent,
bool break_)
{
const bool make_switch = (ApiType == APIType::D3D);
// The second template argument is needed to avoid compile errors from ambiguity with multiple
// enums with the same number of members in GCC prior to 8. See https://godbolt.org/z/xcKaW1seW
// and https://godbolt.org/z/hz7Yqq1P5
using enum_type = decltype(last_member);
// {:{}} is used to indent by formatting an empty string with a variable width
if (make_switch)
{
out.Write("{:{}}switch ({}) {{\n", "", indent, variable);
for (u32 i = 0; i <= static_cast<u32>(last_member); i++)
// Generate a tree of if statements recursively
// std::function must be used because auto won't capture before initialization and thus can't be
// used recursively
std::function<void(u32, u32, u32)> BuildTree = [&](u32 cur_indent, u32 low, u32 high) {
// Each generated statement is for low <= x < high
if (high == low + 1)
{
const enum_type key = static_cast<enum_type>(i);
// Assumes existence of an EnumFormatter
out.Write("{:{}}case {:s}:\n", "", indent, key);
// Down to 1 case (low <= x < low + 1 means x == low)
const enum_type key = static_cast<enum_type>(low);
// Note that this indentation behaves poorly for multi-line code
if (!values[key].empty())
out.Write("{:{}} {}\n", "", indent, values[key]);
if (break_)
out.Write("{:{}} break;\n", "", indent);
out.Write("{:{}}{} // {}\n", "", cur_indent, values[key], key);
}
out.Write("{:{}}}}\n", "", indent);
}
else
{
// Generate a tree of if statements recursively
// std::function must be used because auto won't capture before initialization and thus can't be
// used recursively
std::function<void(u32, u32, u32)> BuildTree = [&](u32 cur_indent, u32 low, u32 high) {
// Each generated statement is for low <= x < high
if (high == low + 1)
{
// Down to 1 case (low <= x < low + 1 means x == low)
const enum_type key = static_cast<enum_type>(low);
// Note that this indentation behaves poorly for multi-line code
out.Write("{:{}}{} // {}\n", "", cur_indent, values[key], key);
}
else
{
u32 mid = low + ((high - low) / 2);
out.Write("{:{}}if ({} < {}u) {{\n", "", cur_indent, variable, mid);
BuildTree(cur_indent + 2, low, mid);
out.Write("{:{}}}} else {{\n", "", cur_indent);
BuildTree(cur_indent + 2, mid, high);
out.Write("{:{}}}}\n", "", cur_indent);
}
};
BuildTree(indent, 0, static_cast<u32>(last_member) + 1);
}
else
{
u32 mid = low + ((high - low) / 2);
out.Write("{:{}}if ({} < {}u) {{\n", "", cur_indent, variable, mid);
BuildTree(cur_indent + 2, low, mid);
out.Write("{:{}}}} else {{\n", "", cur_indent);
BuildTree(cur_indent + 2, mid, high);
out.Write("{:{}}}}\n", "", cur_indent);
}
};
BuildTree(indent, 0, static_cast<u32>(last_member) + 1);
}
// Constant variable names

View File

@ -56,48 +56,27 @@ u16 GetEncodedSampleCount(EFBCopyFormat format)
static void WriteHeader(ShaderCode& code, APIType api_type)
{
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
// left, top, of source rectangle within source texture
// width of the destination rectangle, scale_factor (1 or 2)
code.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
" int4 position;\n"
" float y_scale;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float3 filter_coefficients;\n"
"}};\n");
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
// left, top, of source rectangle within source texture
// width of the destination rectangle, scale_factor (1 or 2)
code.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
" int4 position;\n"
" float y_scale;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float3 filter_coefficients;\n"
code.Write("VARYING_LOCATION(0) in VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
code.Write("VARYING_LOCATION(0) in VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
}
else
{
code.Write("VARYING_LOCATION(0) in float3 v_tex0;\n");
}
code.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"
"FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n");
}
else // D3D
else
{
code.Write("cbuffer PSBlock : register(b0) {{\n"
" int4 position;\n"
" float y_scale;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float3 filter_coefficients;\n"
"}};\n"
"sampler samp0 : register(s0);\n"
"Texture2DArray Tex0 : register(t0);\n");
code.Write("VARYING_LOCATION(0) in float3 v_tex0;\n");
}
// D3D does not have roundEven(), only round(), which is specified "to the nearest integer".
// This differs from the roundEven() behavior, but to get consistency across drivers in OpenGL
// we need to use roundEven().
if (api_type == APIType::D3D)
code.Write("#define roundEven(x) round(x)\n");
code.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"
"FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n");
// Alpha channel in the copy is set to 1 the EFB format does not have an alpha channel.
code.Write("float4 RGBA8ToRGB8(float4 src)\n"
@ -149,10 +128,7 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
code.Write("(");
}
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
code.Write("texture(samp0, float3(");
else
code.Write("Tex0.Sample(samp0, float3(");
code.Write("texture(samp0, float3(");
code.Write("uv.x + float(xoffset) * pixel_size.x, ");
@ -211,23 +187,10 @@ static void WriteSwizzler(ShaderCode& code, const EFBCopyParams& params, EFBCopy
WriteHeader(code, api_type);
WriteSampleFunction(code, params, api_type);
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
code.Write("void main()\n"
"{{\n"
" int2 sampleUv;\n"
" int2 uv1 = int2(gl_FragCoord.xy);\n");
}
else // D3D
{
code.Write("void main(\n"
" in float3 v_tex0 : TEXCOORD0,\n"
" in float4 rawpos : SV_Position,\n"
" out float4 ocol0 : SV_Target)\n"
"{{\n"
" int2 sampleUv;\n"
" int2 uv1 = int2(rawpos.xy);\n");
}
code.Write("void main()\n"
"{{\n"
" int2 sampleUv;\n"
" int2 uv1 = int2(gl_FragCoord.xy);\n");
const int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format);
const int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format);
@ -853,11 +816,7 @@ static const char decoding_shader_header[] = R"(
#define HAS_PALETTE 1
#endif
#ifdef API_D3D
cbuffer UBO : register(b0) {
#else
UBO_BINDING(std140, 1) uniform UBO {
#endif
uint2 u_dst_size;
uint2 u_src_size;
uint u_src_offset;
@ -865,37 +824,6 @@ UBO_BINDING(std140, 1) uniform UBO {
uint u_palette_offset;
};
#ifdef API_D3D
Buffer<uint4> s_input_buffer : register(t0);
#ifdef HAS_PALETTE
Buffer<uint4> s_palette_buffer : register(t1);
#endif
RWTexture2DArray<unorm float4> output_image : register(u0);
// Helpers for reading/writing.
#define texelFetch(buffer, pos) buffer.Load(pos)
#define imageStore(image, coords, value) image[coords] = value
#define GROUP_MEMORY_BARRIER_WITH_SYNC GroupMemoryBarrierWithGroupSync();
#define GROUP_SHARED groupshared
#define DEFINE_MAIN(lx, ly) \
[numthreads(lx, ly, 1)] \
void main(uint3 gl_WorkGroupID : SV_GroupId, \
uint3 gl_LocalInvocationID : SV_GroupThreadID, \
uint3 gl_GlobalInvocationID : SV_DispatchThreadID)
uint bitfieldExtract(uint val, int off, int size)
{
// This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n"
// Microsoft's HLSL compiler automatically optimises this to a bitfield extract instruction.
uint mask = uint((1 << size) - 1);
return uint(val >> off) & mask;
}
#else
TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer;
#ifdef HAS_PALETTE
TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer;
@ -909,8 +837,6 @@ IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image;
layout(local_size_x = lx, local_size_y = ly) in; \
void main()
#endif
uint Swap16(uint v)
{
// Convert BE to LE.
@ -1498,48 +1424,29 @@ float4 DecodePixel(int val)
ss << "\n";
if (api_type == APIType::D3D)
{
ss << "Buffer<uint> tex0 : register(t0);\n";
ss << "Texture2DArray tex1 : register(t1);\n";
ss << "SamplerState samp1 : register(s1);\n";
ss << "cbuffer PSBlock : register(b0) {\n";
}
else
{
ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n";
ss << "SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n";
ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n";
}
ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n";
ss << "SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n";
ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n";
ss << " float multiplier;\n";
ss << " int texel_buffer_offset;\n";
ss << "};\n";
if (api_type == APIType::D3D)
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
ss << "void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target) {\n";
ss << " int src = int(round(tex1.Sample(samp1, v_tex0).r * multiplier));\n";
ss << " src = int(tex0.Load(src + texel_buffer_offset).r);\n";
ss << "VARYING_LOCATION(0) in VertexData {\n";
ss << " float3 v_tex0;\n";
ss << "};\n";
}
else
{
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
ss << "VARYING_LOCATION(0) in VertexData {\n";
ss << " float3 v_tex0;\n";
ss << "};\n";
}
else
{
ss << "VARYING_LOCATION(0) in float3 v_tex0;\n";
}
ss << "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n";
ss << "void main() {\n";
ss << " float3 coords = v_tex0;\n";
ss << " int src = int(round(texture(samp1, coords).r * multiplier));\n";
ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n";
ss << "VARYING_LOCATION(0) in float3 v_tex0;\n";
}
ss << "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n";
ss << "void main() {\n";
ss << " float3 coords = v_tex0;\n";
ss << " int src = int(round(texture(samp1, coords).r * multiplier));\n";
ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n";
ss << " src = ((src << 8) & 0xFF00) | (src >> 8);\n";
ss << " ocol0 = DecodePixel(src);\n";

View File

@ -29,26 +29,13 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i
static void WriteHeader(APIType api_type, ShaderCode& out)
{
if (api_type == APIType::D3D)
{
out.Write("cbuffer PSBlock : register(b0) {{\n"
" float2 src_offset, src_size;\n"
" float3 filter_coefficients;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float pixel_height;\n"
"}};\n\n");
}
else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
" float2 src_offset, src_size;\n"
" float3 filter_coefficients;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float pixel_height;\n"
"}};\n");
}
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
" float2 src_offset, src_size;\n"
" float3 filter_coefficients;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float pixel_height;\n"
"}};\n");
}
ShaderCode GenerateVertexShader(APIType api_type)
@ -56,27 +43,19 @@ ShaderCode GenerateVertexShader(APIType api_type)
ShaderCode out;
WriteHeader(api_type, out);
if (api_type == APIType::D3D)
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
out.Write("void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n"
" out float4 opos : SV_Position) {{\n");
out.Write("VARYING_LOCATION(0) out VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
}
else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
else
{
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
out.Write("VARYING_LOCATION(0) out VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
}
else
{
out.Write("VARYING_LOCATION(0) out float3 v_tex0;\n");
}
out.Write("#define id gl_VertexID\n"
"#define opos gl_Position\n"
"void main() {{\n");
out.Write("VARYING_LOCATION(0) out float3 v_tex0;\n");
}
out.Write("#define id gl_VertexID\n"
"#define opos gl_Position\n"
"void main() {{\n");
out.Write(" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n");
out.Write(
" opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n");
@ -98,38 +77,24 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
ShaderCode out;
WriteHeader(api_type, out);
if (api_type == APIType::D3D)
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
out.Write("float4 SampleEFB(float3 uv, float y_offset) {{\n"
" return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), {}));\n"
"}}\n",
mono_depth ? "0.0" : "uv.z");
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
out.Write("Texture2DArray tex0 : register(t0);\n"
"SamplerState samp0 : register(s0);\n"
"float4 SampleEFB(float3 uv, float y_offset) {{\n"
" return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), {}));\n"
"}}\n\n",
mono_depth ? "0.0" : "uv.z");
out.Write("void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target)\n{{\n");
out.Write("VARYING_LOCATION(0) in VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
}
else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
else
{
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
out.Write("float4 SampleEFB(float3 uv, float y_offset) {{\n"
" return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
"clamp_tb.x, clamp_tb.y), {}));\n"
"}}\n",
mono_depth ? "0.0" : "uv.z");
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
out.Write("VARYING_LOCATION(0) in VertexData {{\n"
" float3 v_tex0;\n"
"}};\n");
}
else
{
out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n");
}
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"
"void main()\n{{\n");
out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n");
}
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"
"void main()\n{{\n");
// The copy filter applies to both color and depth copies. This has been verified on hardware.
// The filter is only applied to the RGB channels, the alpha channel is left intact.

View File

@ -77,8 +77,7 @@ void WriteVertexLighting(ShaderCode& out, APIType api_type, std::string_view wor
std::string_view out_color_1_var)
{
out.Write("// Lighting\n");
out.Write("{}for (uint chan = 0u; chan < {}u; chan++) {{\n",
api_type == APIType::D3D ? "[loop] " : "", NUM_XF_COLOR_CHANNELS);
out.Write("for (uint chan = 0u; chan < {}u; chan++) {{\n", NUM_XF_COLOR_CHANNELS);
out.Write(" uint colorreg = xfmem_color(chan);\n"
" uint alphareg = xfmem_alpha(chan);\n"
" int4 mat = " I_MATERIALS "[chan + 2u]; \n"

View File

@ -72,93 +72,89 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
if (per_pixel_lighting)
WriteLightingFunction(out);
// Shader inputs/outputs in GLSL (HLSL is in main).
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
#ifdef __APPLE__
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
// if we want to use it.
if (api_type == APIType::Vulkan)
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
// if we want to use it.
if (api_type == APIType::Vulkan)
{
if (use_dual_source)
{
if (use_dual_source)
{
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n"
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n",
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
}
else
{
// Metal doesn't support a single unified variable for both input and output,
// so when using framebuffer fetch, we declare the input separately below.
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {};\n",
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
}
if (use_framebuffer_fetch)
{
// Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross.
out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n");
}
}
else
#endif
{
bool has_broken_decoration =
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION);
out.Write("{} {} vec4 {};\n",
has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(0)" :
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0)",
use_framebuffer_fetch ? "FRAGMENT_INOUT" : "out",
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n"
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n",
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
if (use_dual_source)
{
out.Write("{} out vec4 ocol1;\n", has_broken_decoration ?
"FRAGMENT_OUTPUT_LOCATION(1)" :
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)");
}
}
if (per_pixel_depth)
out.Write("#define depth gl_FragDepth\n");
if (host_config.backend_geometry_shaders)
{
out.Write("VARYING_LOCATION(0) in VertexData {{\n");
GenerateVSOutputMembers(out, api_type, numTexgen, host_config,
GetInterpolationQualifier(msaa, ssaa, true, true));
if (stereo)
out.Write(" flat int layer;\n");
out.Write("}};\n\n");
}
else
{
// Let's set up attributes
u32 counter = 0;
out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++,
// Metal doesn't support a single unified variable for both input and output,
// so when using framebuffer fetch, we declare the input separately below.
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 {};\n",
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
}
if (use_framebuffer_fetch)
{
// Subpass inputs will be converted to framebuffer fetch by SPIRV-Cross.
out.Write("INPUT_ATTACHMENT_BINDING(0, 0, 0) uniform subpassInput in_ocol0;\n");
}
}
else
#endif
{
bool has_broken_decoration =
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION);
out.Write("{} {} vec4 {};\n",
has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(0)" :
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0)",
use_framebuffer_fetch ? "FRAGMENT_INOUT" : "out",
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
if (use_dual_source)
{
out.Write("{} out vec4 ocol1;\n", has_broken_decoration ?
"FRAGMENT_OUTPUT_LOCATION(1)" :
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1)");
}
}
if (per_pixel_depth)
out.Write("#define depth gl_FragDepth\n");
if (host_config.backend_geometry_shaders)
{
out.Write("VARYING_LOCATION(0) in VertexData {{\n");
GenerateVSOutputMembers(out, api_type, numTexgen, host_config,
GetInterpolationQualifier(msaa, ssaa, true, true), ShaderStage::Pixel);
if (stereo)
out.Write(" flat int layer;\n");
out.Write("}};\n\n");
}
else
{
// Let's set up attributes
u32 counter = 0;
out.Write("VARYING_LOCATION({}) {} in float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < numTexgen; ++i)
{
out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
}
if (!host_config.fast_depth_calc)
{
out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} in float4 colors_1;\n", counter++,
}
if (per_pixel_lighting)
{
out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < numTexgen; ++i)
{
out.Write("VARYING_LOCATION({}) {} in float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
}
if (!host_config.fast_depth_calc)
{
out.Write("VARYING_LOCATION({}) {} in float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
if (per_pixel_lighting)
{
out.Write("VARYING_LOCATION({}) {} in float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} in float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
}
}
@ -243,10 +239,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
// Doesn't look like DirectX supports this. Oh well the code path is here just in case it
// supports this in the future.
out.Write("int4 sampleTextureWrapper(uint texmap, int2 uv, int layer) {{\n");
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.Write(" return sampleTexture(texmap, samp[texmap], uv, layer);\n");
else if (api_type == APIType::D3D)
out.Write(" return sampleTexture(texmap, tex[texmap], samp[texmap], uv, layer);\n");
out.Write(" return sampleTexture(texmap, samp[texmap], uv, layer);\n");
out.Write("}}\n\n");
}
else
@ -259,15 +252,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
" switch(sampler_num) {{\n");
for (int i = 0; i < 8; i++)
{
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
{
out.Write(" case {0}u: return sampleTexture({0}u, samp[{0}u], uv, layer);\n", i);
}
else if (api_type == APIType::D3D)
{
out.Write(" case {0}u: return sampleTexture({0}u, tex[{0}u], samp[{0}u], uv, layer);\n",
i);
}
out.Write(" case {0}u: return sampleTexture({0}u, samp[{0}u], uv, layer);\n", i);
}
out.Write(" }}\n"
"}}\n\n");
@ -522,82 +507,34 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
out.Write(")\n\n");
}
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
if (early_depth && host_config.backend_early_z)
out.Write("FORCE_EARLY_Z;\n");
out.Write("void main()\n{{\n");
out.Write(" float4 rawpos = gl_FragCoord;\n");
if (use_framebuffer_fetch)
{
if (early_depth && host_config.backend_early_z)
out.Write("FORCE_EARLY_Z;\n");
// Store off a copy of the initial framebuffer value.
//
// If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the
// framebuffer), we read from real_ocol0.
out.Write("#ifdef FB_FETCH_VALUE\n"
" float4 initial_ocol0 = FB_FETCH_VALUE;\n"
"#else\n"
" float4 initial_ocol0 = real_ocol0;\n"
"#endif\n");
out.Write("void main()\n{{\n");
out.Write(" float4 rawpos = gl_FragCoord;\n");
if (use_framebuffer_fetch)
{
// Store off a copy of the initial framebuffer value.
//
// If FB_FETCH_VALUE isn't defined (i.e. no special keyword for fetching from the
// framebuffer), we read from real_ocol0.
out.Write("#ifdef FB_FETCH_VALUE\n"
" float4 initial_ocol0 = FB_FETCH_VALUE;\n"
"#else\n"
" float4 initial_ocol0 = real_ocol0;\n"
"#endif\n");
// QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an
// intermediate value with multiple reads & modifications, so we pull out the "real" output
// value above and use a temporary for calculations, then set the output value once at the
// end of the shader.
out.Write(" float4 ocol0;\n");
}
if (use_shader_blend)
{
out.Write(" float4 ocol1;\n");
}
// QComm's Adreno driver doesn't seem to like using the framebuffer_fetch value as an
// intermediate value with multiple reads & modifications, so we pull out the "real" output
// value above and use a temporary for calculations, then set the output value once at the
// end of the shader.
out.Write(" float4 ocol0;\n");
}
else // D3D
if (use_shader_blend)
{
if (early_depth && host_config.backend_early_z)
out.Write("[earlydepthstencil]\n");
out.Write("void main(\n");
if (uid_data->uint_output)
{
out.Write(" out uint4 ocol0 : SV_Target,\n");
}
else
{
out.Write(" out float4 ocol0 : SV_Target0,\n"
" out float4 ocol1 : SV_Target1,\n");
}
if (per_pixel_depth)
out.Write(" out float depth : SV_Depth,\n");
out.Write(" in float4 rawpos : SV_Position,\n");
out.Write(" in {} float4 colors_0 : COLOR0,\n", GetInterpolationQualifier(msaa, ssaa));
out.Write(" in {} float4 colors_1 : COLOR1", GetInterpolationQualifier(msaa, ssaa));
// compute window position if needed because binding semantic WPOS is not widely supported
for (u32 i = 0; i < numTexgen; ++i)
{
out.Write(",\n in {} float3 tex{} : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa), i,
i);
}
if (!host_config.fast_depth_calc)
{
out.Write("\n,\n in {} float4 clipPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
numTexgen);
}
if (per_pixel_lighting)
{
out.Write(",\n in {} float3 Normal : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
numTexgen + 1);
out.Write(",\n in {} float3 WorldPos : TEXCOORD{}", GetInterpolationQualifier(msaa, ssaa),
numTexgen + 2);
}
out.Write(",\n in float clipDist0 : SV_ClipDistance0\n"
",\n in float clipDist1 : SV_ClipDistance1\n");
if (stereo)
out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n");
out.Write("\n ) {{\n");
out.Write(" float4 ocol1;\n");
}
if (!stereo)
out.Write(" int layer = 0;\n");
@ -634,11 +571,6 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
BitfieldExtract<&GenMode::numtevstages>("bpmem_genmode"));
out.Write(" // Main tev loop\n");
if (api_type == APIType::D3D)
{
// Tell DirectX we don't want this loop unrolled (it crashes if it tries to)
out.Write(" [loop]\n");
}
out.Write(" for(uint stage = 0u; stage <= num_stages; stage++)\n"
" {{\n"

View File

@ -38,85 +38,65 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
out.Write("{}", s_lighting_struct);
// uniforms
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
else
out.Write("cbuffer VSBlock {{\n");
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
out.Write("{}", s_shader_uniforms);
out.Write("}};\n");
out.Write("struct VS_OUTPUT {{\n");
GenerateVSOutputMembers(out, api_type, num_texgen, host_config, "");
GenerateVSOutputMembers(out, api_type, num_texgen, host_config, "", ShaderStage::Vertex);
out.Write("}};\n\n");
WriteIsNanHeader(out, api_type);
WriteBitfieldExtractHeader(out, api_type, host_config);
WriteLightingFunction(out);
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (int i = 0; i < 8; ++i)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i);
if (host_config.backend_geometry_shaders)
{
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (int i = 0; i < 8; ++i)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i);
if (host_config.backend_geometry_shaders)
{
out.Write("VARYING_LOCATION(0) out VertexData {{\n");
GenerateVSOutputMembers(out, api_type, num_texgen, host_config,
GetInterpolationQualifier(msaa, ssaa, true, false));
out.Write("}} vs;\n");
}
else
{
// Let's set up attributes
u32 counter = 0;
out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < num_texgen; ++i)
{
out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
}
if (!host_config.fast_depth_calc)
{
out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
if (per_pixel_lighting)
{
out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
}
out.Write("void main()\n{{\n");
out.Write("VARYING_LOCATION(0) out VertexData {{\n");
GenerateVSOutputMembers(out, api_type, num_texgen, host_config,
GetInterpolationQualifier(msaa, ssaa, true, false),
ShaderStage::Vertex);
out.Write("}} vs;\n");
}
else // D3D
else
{
out.Write("VS_OUTPUT main(\n");
// inputs
out.Write(" float3 rawnormal : NORMAL,\n"
" float3 rawtangent : TANGENT,\n"
" float3 rawbinormal : BINORMAL,\n"
" float4 rawcolor0 : COLOR0,\n"
" float4 rawcolor1 : COLOR1,\n");
for (int i = 0; i < 8; ++i)
out.Write(" float3 rawtex{} : TEXCOORD{},\n", i, i);
out.Write(" uint posmtx : BLENDINDICES,\n");
out.Write(" float4 rawpos : POSITION) {{\n");
// Let's set up attributes
u32 counter = 0;
out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < num_texgen; ++i)
{
out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
}
if (!host_config.fast_depth_calc)
{
out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
if (per_pixel_lighting)
{
out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
}
out.Write("void main()\n{{\n");
out.Write("VS_OUTPUT o;\n"
"\n");
@ -335,45 +315,38 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
"}}\n");
}
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
if (host_config.backend_geometry_shaders)
{
if (host_config.backend_geometry_shaders)
{
AssignVSOutputMembers(out, "vs", "o", num_texgen, host_config);
}
else
{
// TODO: Pass interface blocks between shader stages even if geometry shaders
// are not supported, however that will require at least OpenGL 3.2 support.
for (u32 i = 0; i < num_texgen; ++i)
out.Write("tex{}.xyz = o.tex{};\n", i, i);
if (!host_config.fast_depth_calc)
out.Write("clipPos = o.clipPos;\n");
if (per_pixel_lighting)
{
out.Write("Normal = o.Normal;\n"
"WorldPos = o.WorldPos;\n");
}
out.Write("colors_0 = o.colors_0;\n"
"colors_1 = o.colors_1;\n");
}
if (host_config.backend_depth_clamp)
{
out.Write("gl_ClipDistance[0] = clipDist0;\n"
"gl_ClipDistance[1] = clipDist1;\n");
}
// Vulkan NDC space has Y pointing down (right-handed NDC space).
if (api_type == APIType::Vulkan)
out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
else
out.Write("gl_Position = o.pos;\n");
AssignVSOutputMembers(out, "vs", "o", num_texgen, host_config);
}
else // D3D
else
{
out.Write("return o;\n");
// TODO: Pass interface blocks between shader stages even if geometry shaders
// are not supported, however that will require at least OpenGL 3.2 support.
for (u32 i = 0; i < num_texgen; ++i)
out.Write("tex{}.xyz = o.tex{};\n", i, i);
if (!host_config.fast_depth_calc)
out.Write("clipPos = o.clipPos;\n");
if (per_pixel_lighting)
{
out.Write("Normal = o.Normal;\n"
"WorldPos = o.WorldPos;\n");
}
out.Write("colors_0 = o.colors_0;\n"
"colors_1 = o.colors_1;\n");
}
if (host_config.backend_depth_clamp)
{
out.Write("gl_ClipDistance[0] = clipDist0;\n"
"gl_ClipDistance[1] = clipDist1;\n");
}
// Vulkan NDC space has Y pointing down (right-handed NDC space).
if (api_type == APIType::Vulkan)
out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
else
out.Write("gl_Position = o.pos;\n");
out.Write("}}\n");
return out;
@ -393,8 +366,7 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
}
else
{
out.Write("{}for (uint texgen = 0u; texgen < {}u; texgen++) {{\n",
api_type == APIType::D3D ? "[loop] " : "", num_texgen);
out.Write("for (uint texgen = 0u; texgen < {}u; texgen++) {{\n", num_texgen);
}
out.Write(" // Texcoord transforms\n");

View File

@ -86,110 +86,80 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
out.Write("{}", s_lighting_struct);
// uniforms
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
else
out.Write("cbuffer VSBlock {{\n");
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {{\n");
out.Write("{}", s_shader_uniforms);
out.Write("}};\n");
out.Write("struct VS_OUTPUT {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "",
ShaderStage::Vertex);
out.Write("}};\n\n");
WriteIsNanHeader(out, api_type);
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
if ((uid_data->components & VB_HAS_NORMAL) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB);
if ((uid_data->components & VB_HAS_TANGENT) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB);
if ((uid_data->components & VB_HAS_BINORMAL) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB);
if ((uid_data->components & VB_HAS_COL0) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
if ((uid_data->components & VB_HAS_COL1) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (u32 i = 0; i < 8; ++i)
{
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
if ((uid_data->components & VB_HAS_NORMAL) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB);
if ((uid_data->components & VB_HAS_TANGENT) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB);
if ((uid_data->components & VB_HAS_BINORMAL) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB);
const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i));
if ((uid_data->components & VB_HAS_COL0) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
if ((uid_data->components & VB_HAS_COL1) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (u32 i = 0; i < 8; ++i)
if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0)
{
const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i));
if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0)
{
out.Write("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i,
has_texmtx != 0 ? 3 : 2, i);
}
out.Write("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i,
has_texmtx != 0 ? 3 : 2, i);
}
if (host_config.backend_geometry_shaders)
{
out.Write("VARYING_LOCATION(0) out VertexData {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config,
GetInterpolationQualifier(msaa, ssaa, true, false));
out.Write("}} vs;\n");
}
else
{
// Let's set up attributes
u32 counter = 0;
out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < uid_data->numTexGens; ++i)
{
out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
}
if (!host_config.fast_depth_calc)
{
out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
if (per_pixel_lighting)
{
out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
}
out.Write("void main()\n{{\n");
}
else // D3D
if (host_config.backend_geometry_shaders)
{
out.Write("VS_OUTPUT main(\n");
// inputs
if ((uid_data->components & VB_HAS_NORMAL) != 0)
out.Write(" float3 rawnormal : NORMAL,\n");
if ((uid_data->components & VB_HAS_TANGENT) != 0)
out.Write(" float3 rawtangent : TANGENT,\n");
if ((uid_data->components & VB_HAS_BINORMAL) != 0)
out.Write(" float3 rawbinormal : BINORMAL,\n");
if ((uid_data->components & VB_HAS_COL0) != 0)
out.Write(" float4 rawcolor0 : COLOR0,\n");
if ((uid_data->components & VB_HAS_COL1) != 0)
out.Write(" float4 rawcolor1 : COLOR1,\n");
for (u32 i = 0; i < 8; ++i)
{
const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i));
if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0)
out.Write(" float{} rawtex{} : TEXCOORD{},\n", has_texmtx ? 3 : 2, i, i);
}
if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
out.Write(" uint4 posmtx : BLENDINDICES,\n");
out.Write(" float4 rawpos : POSITION) {{\n");
out.Write("VARYING_LOCATION(0) out VertexData {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config,
GetInterpolationQualifier(msaa, ssaa, true, false),
ShaderStage::Vertex);
out.Write("}} vs;\n");
}
else
{
// Let's set up attributes
u32 counter = 0;
out.Write("VARYING_LOCATION({}) {} out float4 colors_0;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float4 colors_1;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
for (u32 i = 0; i < uid_data->numTexGens; ++i)
{
out.Write("VARYING_LOCATION({}) {} out float3 tex{};\n", counter++,
GetInterpolationQualifier(msaa, ssaa), i);
}
if (!host_config.fast_depth_calc)
{
out.Write("VARYING_LOCATION({}) {} out float4 clipPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
if (per_pixel_lighting)
{
out.Write("VARYING_LOCATION({}) {} out float3 Normal;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
out.Write("VARYING_LOCATION({}) {} out float3 WorldPos;\n", counter++,
GetInterpolationQualifier(msaa, ssaa));
}
}
out.Write("void main()\n{{\n");
out.Write("VS_OUTPUT o;\n");
@ -548,45 +518,38 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
"}}\n");
}
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
if (host_config.backend_geometry_shaders)
{
if (host_config.backend_geometry_shaders)
{
AssignVSOutputMembers(out, "vs", "o", uid_data->numTexGens, host_config);
}
else
{
// TODO: Pass interface blocks between shader stages even if geometry shaders
// are not supported, however that will require at least OpenGL 3.2 support.
for (u32 i = 0; i < uid_data->numTexGens; ++i)
out.Write("tex{}.xyz = o.tex{};\n", i, i);
if (!host_config.fast_depth_calc)
out.Write("clipPos = o.clipPos;\n");
if (per_pixel_lighting)
{
out.Write("Normal = o.Normal;\n"
"WorldPos = o.WorldPos;\n");
}
out.Write("colors_0 = o.colors_0;\n"
"colors_1 = o.colors_1;\n");
}
if (host_config.backend_depth_clamp)
{
out.Write("gl_ClipDistance[0] = clipDist0;\n"
"gl_ClipDistance[1] = clipDist1;\n");
}
// Vulkan NDC space has Y pointing down (right-handed NDC space).
if (api_type == APIType::Vulkan)
out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
else
out.Write("gl_Position = o.pos;\n");
AssignVSOutputMembers(out, "vs", "o", uid_data->numTexGens, host_config);
}
else // D3D
else
{
out.Write("return o;\n");
// TODO: Pass interface blocks between shader stages even if geometry shaders
// are not supported, however that will require at least OpenGL 3.2 support.
for (u32 i = 0; i < uid_data->numTexGens; ++i)
out.Write("tex{}.xyz = o.tex{};\n", i, i);
if (!host_config.fast_depth_calc)
out.Write("clipPos = o.clipPos;\n");
if (per_pixel_lighting)
{
out.Write("Normal = o.Normal;\n"
"WorldPos = o.WorldPos;\n");
}
out.Write("colors_0 = o.colors_0;\n"
"colors_1 = o.colors_1;\n");
}
if (host_config.backend_depth_clamp)
{
out.Write("gl_ClipDistance[0] = clipDist0;\n"
"gl_ClipDistance[1] = clipDist1;\n");
}
// Vulkan NDC space has Y pointing down (right-handed NDC space).
if (api_type == APIType::Vulkan)
out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
else
out.Write("gl_Position = o.pos;\n");
out.Write("}}\n");
return out;