vulkan: only use per-pixel sorting when needed

This commit is contained in:
Flyinghead 2020-01-05 23:03:44 +01:00
parent f7655091b0
commit a6839f57e7
7 changed files with 62 additions and 142 deletions

View File

@ -64,9 +64,7 @@ vec4 resolveAlphaBlend(ivec2 coords) {
vec4 finalColor = texture(tex, gl_FragCoord.xy / textureSize(tex, 0));
vec4 secondaryBuffer = vec4(0.0); // Secondary accumulation buffer
float depth = 0.0;
bool do_depth_test = false;
for (int i = 0; i < num_frag; i++)
{
const Pixel pixel = pixels[pixel_list[i]];

View File

@ -22,7 +22,7 @@
#include "oit_drawer.h"
#include "hw/pvr/pvr_mem.h"
void OITDrawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool autosort, int pass,
void OITDrawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool autosort, Pass pass,
const PolyParam& poly, u32 first, u32 count)
{
vk::Rect2D scissorRect;
@ -82,7 +82,7 @@ void OITDrawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool
cmdBuffer.drawIndexed(count, 1, first, 0, 0);
}
void OITDrawer::DrawList(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, int pass,
void OITDrawer::DrawList(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, Pass pass,
const List<PolyParam>& polys, u32 first, u32 last)
{
for (u32 i = first; i < last; i++)
@ -323,8 +323,8 @@ bool OITDrawer::Draw(const Texture *fogTexture)
vk::SubpassContents::eInline);
// Depth + stencil subpass
DrawList(cmdBuffer, ListType_Opaque, false, 0, pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count);
DrawList(cmdBuffer, ListType_Punch_Through, false, 0, pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count);
DrawList(cmdBuffer, ListType_Opaque, false, Pass::Depth, pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count);
DrawList(cmdBuffer, ListType_Punch_Through, false, Pass::Depth, pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count);
DrawModifierVolumes<false>(cmdBuffer, previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count);
@ -332,12 +332,17 @@ bool OITDrawer::Draw(const Texture *fogTexture)
cmdBuffer.nextSubpass(vk::SubpassContents::eInline);
// OP + PT
DrawList(cmdBuffer, ListType_Opaque, false, 1, pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count);
DrawList(cmdBuffer, ListType_Punch_Through, false, 1, pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count);
DrawList(cmdBuffer, ListType_Opaque, false, Pass::Color, pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count);
DrawList(cmdBuffer, ListType_Punch_Through, false, Pass::Color, pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count);
// TR
if (!oitBuffers->isFirstFrameAfterInit())
DrawList(cmdBuffer, ListType_Translucent, current_pass.autosort, 3, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count);
if (current_pass.autosort)
{
if (!oitBuffers->isFirstFrameAfterInit())
DrawList(cmdBuffer, ListType_Translucent, true, Pass::OIT, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count);
}
else
DrawList(cmdBuffer, ListType_Translucent, false, Pass::Color, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count);
// Final subpass
cmdBuffer.nextSubpass(vk::SubpassContents::eInline);
@ -350,7 +355,7 @@ bool OITDrawer::Draw(const Texture *fogTexture)
if (GetContext()->GetVendorID() != VENDOR_QUALCOMM) // Adreno bug
DrawModifierVolumes<true>(cmdBuffer, previous_pass.mvo_tr_count, current_pass.mvo_tr_count - previous_pass.mvo_tr_count);
vk::Pipeline pipeline = pipelineManager->GetFinalPipeline(current_pass.autosort);
vk::Pipeline pipeline = pipelineManager->GetFinalPipeline();
cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline);
quadBuffer->Bind(cmdBuffer);
quadBuffer->Draw(cmdBuffer);
@ -371,7 +376,7 @@ bool OITDrawer::Draw(const Texture *fogTexture)
cmdBuffer.bindIndexBuffer(mainBuffer, offsets.indexOffset, vk::IndexType::eUint32);
// Tr depth-only pass
DrawList(cmdBuffer, ListType_Translucent, current_pass.autosort, 0, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count);
DrawList(cmdBuffer, ListType_Translucent, current_pass.autosort, Pass::Depth, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count);
cmdBuffer.endRenderPass();
}

View File

@ -112,9 +112,9 @@ protected:
vk::CommandBuffer currentCommandBuffer;
private:
void DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, int pass,
void DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, Pass pass,
const PolyParam& poly, u32 first, u32 count);
void DrawList(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, int pass,
void DrawList(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, Pass pass,
const List<PolyParam>& polys, u32 first, u32 count);
template<bool Translucent>
void DrawModifierVolumes(const vk::CommandBuffer& cmdBuffer, int first, int count);

View File

@ -21,7 +21,7 @@
#include "oit_pipeline.h"
#include "../quad.h"
void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyParam& pp, int pass)
void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyParam& pp, Pass pass)
{
vk::PipelineVertexInputStateCreateInfo pipelineVertexInputStateCreateInfo = GetMainVertexInputStateCreateInfo();
@ -53,7 +53,7 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP
// Depth and stencil
vk::CompareOp depthOp;
if (pass == 1 && !pp.isp.ZWriteDis)
if (pass == Pass::Color && !pp.isp.ZWriteDis && listType != ListType_Translucent)
depthOp = vk::CompareOp::eEqual;
else if (listType == ListType_Punch_Through || autosort)
depthOp = vk::CompareOp::eGreaterOrEqual;
@ -61,7 +61,7 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP
depthOp = depthOps[pp.isp.DepthMode];
bool depthWriteEnable;
// FIXME temporary Intel driver bug workaround
if (pass != 0 && (GetContext()->GetVendorID() != VENDOR_INTEL || pass != 1))
if (pass != Pass::Depth && !((!autosort || GetContext()->GetVendorID() == VENDOR_INTEL) && pass == Pass::Color))
depthWriteEnable = false;
// Z Write Disable seems to be ignored for punch-through.
// Fixes Worms World Party, Bust-a-Move 4 and Re-Volt
@ -70,7 +70,7 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP
else
depthWriteEnable = !pp.isp.ZWriteDis;
bool shadowed = pass == 0 && (listType == ListType_Opaque || listType == ListType_Punch_Through);
bool shadowed = pass == Pass::Depth && (listType == ListType_Opaque || listType == ListType_Punch_Through);
vk::StencilOpState stencilOpState;
if (shadowed)
{
@ -96,7 +96,7 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP
// Color flags and blending
vk::PipelineColorBlendAttachmentState pipelineColorBlendAttachmentState;
// Apparently punch-through polys support blending, or at least some combinations
if ((listType == ListType_Punch_Through || pass > 0) && pass != 3)
if (listType == ListType_Punch_Through || pass == Pass::Color)
{
vk::ColorComponentFlags colorComponentFlags(vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA);
u32 src = pp.tsp.SrcInstr;
@ -156,7 +156,6 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP
//params.trilinear = pp.pcw.Texture && pp.tsp.FilterMode > 1 && listType != ListType_Punch_Through;
params.useAlpha = pp.tsp.UseAlpha;
params.pass = pass;
params.depthFunc = autosort ? 6 : pp.isp.DepthMode;
params.twoVolume = pp.tsp1.full != -1 || pp.tcw1.full != -1;
vk::ShaderModule fragment_module = shaderManager->GetFragmentShader(params);
@ -180,14 +179,14 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP
&pipelineDynamicStateCreateInfo, // pDynamicState
*pipelineLayout, // layout
renderPasses->GetRenderPass(true, true), // renderPass
pass == 0 ? (listType == ListType_Translucent ? 2 : 0) : 1 // subpass
pass == Pass::Depth ? (listType == ListType_Translucent ? 2 : 0) : 1 // subpass
);
pipelines[hash(listType, autosort, &pp, pass)] = GetContext()->GetDevice().createGraphicsPipelineUnique(GetContext()->GetPipelineCache(),
graphicsPipelineCreateInfo);
}
void OITPipelineManager::CreateFinalPipeline(bool autosort)
void OITPipelineManager::CreateFinalPipeline()
{
vk::PipelineVertexInputStateCreateInfo pipelineVertexInputStateCreateInfo = GetQuadInputStateCreateInfo(false);
@ -245,7 +244,7 @@ void OITPipelineManager::CreateFinalPipeline(bool autosort)
vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates);
vk::ShaderModule vertex_module = shaderManager->GetFinalVertexShader();
vk::ShaderModule fragment_module = shaderManager->GetFinalShader(autosort);
vk::ShaderModule fragment_module = shaderManager->GetFinalShader();
vk::PipelineShaderStageCreateInfo stages[] = {
{ vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eVertex, vertex_module, "main" },
@ -270,10 +269,7 @@ void OITPipelineManager::CreateFinalPipeline(bool autosort)
2 // subpass
);
if (autosort)
finalAutosortPipeline = GetContext()->GetDevice().createGraphicsPipelineUnique(GetContext()->GetPipelineCache(), graphicsPipelineCreateInfo);
else
finalNosortPipeline = GetContext()->GetDevice().createGraphicsPipelineUnique(GetContext()->GetPipelineCache(), graphicsPipelineCreateInfo);
finalPipeline = GetContext()->GetDevice().createGraphicsPipelineUnique(GetContext()->GetPipelineCache(), graphicsPipelineCreateInfo);
}

View File

@ -250,7 +250,7 @@ public:
modVolPipelines.clear();
}
vk::Pipeline GetPipeline(u32 listType, bool autosort, const PolyParam& pp, int pass)
vk::Pipeline GetPipeline(u32 listType, bool autosort, const PolyParam& pp, Pass pass)
{
u32 pipehash = hash(listType, autosort, &pp, pass);
const auto &pipeline = pipelines.find(pipehash);
@ -282,12 +282,11 @@ public:
return *trModVolPipelines[pipehash];
}
vk::Pipeline GetFinalPipeline(bool autosort)
vk::Pipeline GetFinalPipeline()
{
vk::UniquePipeline& pipeline = autosort ? finalAutosortPipeline : finalNosortPipeline;
if (!pipeline)
CreateFinalPipeline(autosort);
return *pipeline;
if (!finalPipeline)
CreateFinalPipeline();
return *finalPipeline;
}
vk::Pipeline GetClearPipeline()
{
@ -306,7 +305,7 @@ private:
void CreateModVolPipeline(ModVolMode mode, int cullMode);
void CreateTrModVolPipeline(ModVolMode mode, int cullMode);
u32 hash(u32 listType, bool autosort, const PolyParam *pp, int pass) const
u32 hash(u32 listType, bool autosort, const PolyParam *pp, Pass pass) const
{
u32 hash = pp->pcw.Gouraud | (pp->pcw.Offset << 1) | (pp->pcw.Texture << 2) | (pp->pcw.Shadow << 3)
| (((pp->tileclip >> 28) == 3) << 4);
@ -361,15 +360,14 @@ private:
full ? vertexInputAttributeDescriptions : vertexInputLightAttributeDescriptions);
}
void CreatePipeline(u32 listType, bool autosort, const PolyParam& pp, int pass);
void CreateFinalPipeline(bool autosort);
void CreatePipeline(u32 listType, bool autosort, const PolyParam& pp, Pass pass);
void CreateFinalPipeline();
void CreateClearPipeline();
std::map<u32, vk::UniquePipeline> pipelines;
std::map<u32, vk::UniquePipeline> modVolPipelines;
std::map<u32, vk::UniquePipeline> trModVolPipelines;
vk::UniquePipeline finalAutosortPipeline;
vk::UniquePipeline finalNosortPipeline;
vk::UniquePipeline finalPipeline;
vk::UniquePipeline clearPipeline;
vk::UniquePipelineLayout pipelineLayout;

View File

@ -237,14 +237,17 @@ static const char OITFragmentShaderSource[] = R"(
#define pp_Offset %d
#define pp_FogCtrl %d
#define pp_TwoVolumes %d
#define pp_DepthFunc %d
#define pp_Gouraud %d
#define pp_BumpMap %d
#define ColorClamping %d
#define PASS %d
#define PI 3.1415926
#if PASS <= 1
#define PASS_DEPTH 0
#define PASS_COLOR 1
#define PASS_OIT 2
#if PASS == PASS_DEPTH || PASS == PASS_COLOR
layout (location = 0) out vec4 FragColor;
#define gl_FragColor FragColor
#endif
@ -287,10 +290,10 @@ layout (set = 1, binding = 1) uniform sampler2D tex1;
#endif
#endif
#if PASS == 1
#if PASS == PASS_COLOR
layout (input_attachment_index = 0, set = 0, binding = 4) uniform usubpassInput shadow_stencil;
#endif
#if PASS == 3
#if PASS == PASS_OIT
layout (input_attachment_index = 0, set = 0, binding = 5) uniform subpassInput DepthTex;
#endif
@ -330,30 +333,11 @@ void main()
{
setFragDepth();
#if PASS == 3
#if PASS == PASS_OIT
// Manual depth testing
highp float frontDepth = subpassLoad(DepthTex).r;
#if pp_DepthFunc == 0 // Never
if (gl_FragDepth < frontDepth)
discard;
#elif pp_DepthFunc == 1 // Less
if (gl_FragDepth >= frontDepth)
discard;
#elif pp_DepthFunc == 2 // Equal
if (gl_FragDepth != frontDepth)
discard;
#elif pp_DepthFunc == 3 // Less or equal
if (gl_FragDepth > frontDepth)
discard;
#elif pp_DepthFunc == 4 // Greater
if (gl_FragDepth <= frontDepth)
discard;
#elif pp_DepthFunc == 5 // Not equal
if (gl_FragDepth == frontDepth)
discard;
#elif pp_DepthFunc == 6 // Greater or equal
if (gl_FragDepth < frontDepth)
discard;
#endif
#endif
// Clip inside the box
@ -374,7 +358,7 @@ void main()
bool cur_ignore_tex_alpha = pushConstants.ignore_tex_alpha0 != 0;
int cur_shading_instr = pushConstants.shading_instr0;
int cur_fog_control = pushConstants.fog_control0;
#if PASS == 1
#if PASS == PASS_COLOR
uvec4 stencil = subpassLoad(shadow_stencil);
if (stencil.r == 0x81u) {
color = vtx_base1;
@ -456,7 +440,7 @@ void main()
#endif
}
#endif
#if PASS == 1 && pp_TwoVolumes == 0
#if PASS == PASS_COLOR && pp_TwoVolumes == 0
uvec4 stencil = subpassLoad(shadow_stencil);
if (stencil.r == 0x81u)
color.rgb *= uniformBuffer.shade_scale_factor;
@ -485,9 +469,9 @@ void main()
//color.rgb=vec3(gl_FragCoord.w * uniformBuffer.sp_FOG_DENSITY / 128.0);
#if PASS == 1
#if PASS == PASS_COLOR
FragColor = color;
#elif PASS > 1
#elif PASS == PASS_OIT
// Discard as many pixels as possible
switch (cur_blend_mode.y) // DST
{
@ -569,8 +553,6 @@ void main()
static const char OITFinalShaderSource[] =
"#define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT
R"(
#define DEPTH_SORTED %d
layout (input_attachment_index = 0, set = 2, binding = 0) uniform subpassInput tex;
layout (location = 0) out vec4 FragColor;
@ -588,13 +570,9 @@ int fillAndSortFragmentArray(ivec2 coords)
const Pixel p = PixelBuffer.pixels[idx];
int j = count - 1;
Pixel jp = PixelBuffer.pixels[pixel_list[j]];
#if DEPTH_SORTED == 1
while (j >= 0
&& (jp.depth > p.depth
|| (jp.depth == p.depth && getPolyNumber(jp) > getPolyNumber(p))))
#else
while (j >= 0 && getPolyNumber(jp) > getPolyNumber(p))
#endif
{
pixel_list[j + 1] = pixel_list[j];
j--;
@ -614,56 +592,11 @@ vec4 resolveAlphaBlend(ivec2 coords) {
vec4 finalColor = subpassLoad(tex);
vec4 secondaryBuffer = vec4(0.0); // Secondary accumulation buffer
float depth = 0.0;
bool do_depth_test = false;
for (int i = 0; i < num_frag; i++)
{
const Pixel pixel = PixelBuffer.pixels[pixel_list[i]];
const PolyParam pp = TrPolyParam.tr_poly_params[getPolyNumber(pixel)];
#if DEPTH_SORTED != 1
const float frag_depth = pixel.depth;
if (do_depth_test)
{
switch (getDepthFunc(pp))
{
case 0: // Never
continue;
case 1: // Less
if (frag_depth >= depth)
continue;
break;
case 2: // Equal
if (frag_depth != depth)
continue;
break;
case 3: // Less or equal
if (frag_depth > depth)
continue;
break;
case 4: // Greater
if (frag_depth <= depth)
continue;
break;
case 5: // Not equal
if (frag_depth == depth)
continue;
break;
case 6: // Greater or equal
if (frag_depth < depth)
continue;
break;
case 7: // Always
break;
}
}
if (getDepthMask(pp))
{
depth = frag_depth;
do_depth_test = true;
}
#endif
bool area1 = false;
bool shadowed = false;
if (isShadowed(pixel))
@ -848,17 +781,15 @@ vk::UniqueShaderModule OITShaderManager::compileShader(const FragmentShaderParam
strcpy(buf, OITShaderHeader);
sprintf(buf + strlen(buf), OITFragmentShaderSource, (int)params.alphaTest, (int)params.insideClipTest, (int)params.useAlpha,
(int)params.texture, (int)params.ignoreTexAlpha, params.shaderInstr, (int)params.offset, params.fog,
(int)params.twoVolume, params.depthFunc, (int)params.gouraud, (int)params.bumpmap, (int)params.clamping, params.pass);
(int)params.twoVolume, (int)params.gouraud, (int)params.bumpmap, (int)params.clamping, (int)params.pass);
return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, buf);
}
vk::UniqueShaderModule OITShaderManager::compileFinalShader(bool autosort)
vk::UniqueShaderModule OITShaderManager::compileFinalShader()
{
char buf[(sizeof(OITShaderHeader) + sizeof(OITFinalShaderSource)) * 2];
strcpy(buf, OITShaderHeader);
sprintf(buf + strlen(buf), OITFinalShaderSource, (int)autosort);
return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, buf);
std::string source = OITShaderHeader;
source += OITFinalShaderSource;
return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, source);
}
vk::UniqueShaderModule OITShaderManager::compileFinalVertexShader()
{

View File

@ -22,6 +22,8 @@
#include "../vulkan.h"
#include "../utils.h"
enum class Pass { Depth, Color, OIT };
class OITShaderManager
{
public:
@ -47,8 +49,7 @@ public:
bool bumpmap;
bool clamping;
bool twoVolume;
int depthFunc;
int pass;
Pass pass;
u32 hash()
{
@ -56,7 +57,7 @@ public:
| ((u32)texture << 3) | ((u32)ignoreTexAlpha << 4) | (shaderInstr << 5)
| ((u32)offset << 7) | ((u32)fog << 8) | ((u32)gouraud << 10)
| ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)twoVolume << 13)
| (depthFunc << 14) | (pass << 17);
| ((int)pass << 14);
}
};
@ -81,20 +82,11 @@ public:
return *trModVolShaders[(size_t)mode];
}
vk::ShaderModule GetFinalShader(bool autosort)
vk::ShaderModule GetFinalShader()
{
if (autosort)
{
if (!finalAutosortShader)
finalAutosortShader = compileFinalShader(true);
return *finalAutosortShader;
}
else
{
if (!finalSortedShader)
finalSortedShader = compileFinalShader(false);
return *finalSortedShader;
}
if (!finalAutosortShader)
finalAutosortShader = compileFinalShader();
return *finalAutosortShader;
}
vk::ShaderModule GetFinalVertexShader()
{
@ -124,7 +116,7 @@ private:
vk::UniqueShaderModule compileModVolVertexShader();
vk::UniqueShaderModule compileModVolFragmentShader();
void compileTrModVolFragmentShader(ModVolMode mode);
vk::UniqueShaderModule compileFinalShader(bool autosort);
vk::UniqueShaderModule compileFinalShader();
vk::UniqueShaderModule compileFinalVertexShader();
vk::UniqueShaderModule compileClearShader();