diff --git a/core/rend/gl4/abuffer.cpp b/core/rend/gl4/abuffer.cpp index 3addc0e64..21783c004 100644 --- a/core/rend/gl4/abuffer.cpp +++ b/core/rend/gl4/abuffer.cpp @@ -64,9 +64,7 @@ vec4 resolveAlphaBlend(ivec2 coords) { vec4 finalColor = texture(tex, gl_FragCoord.xy / textureSize(tex, 0)); vec4 secondaryBuffer = vec4(0.0); // Secondary accumulation buffer - float depth = 0.0; - bool do_depth_test = false; for (int i = 0; i < num_frag; i++) { const Pixel pixel = pixels[pixel_list[i]]; diff --git a/core/rend/vulkan/oit/oit_drawer.cpp b/core/rend/vulkan/oit/oit_drawer.cpp index 86711991b..013397dfc 100644 --- a/core/rend/vulkan/oit/oit_drawer.cpp +++ b/core/rend/vulkan/oit/oit_drawer.cpp @@ -22,7 +22,7 @@ #include "oit_drawer.h" #include "hw/pvr/pvr_mem.h" -void OITDrawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool autosort, int pass, +void OITDrawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool autosort, Pass pass, const PolyParam& poly, u32 first, u32 count) { vk::Rect2D scissorRect; @@ -82,7 +82,7 @@ void OITDrawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool cmdBuffer.drawIndexed(count, 1, first, 0, 0); } -void OITDrawer::DrawList(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, int pass, +void OITDrawer::DrawList(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, Pass pass, const List& polys, u32 first, u32 last) { for (u32 i = first; i < last; i++) @@ -323,8 +323,8 @@ bool OITDrawer::Draw(const Texture *fogTexture) vk::SubpassContents::eInline); // Depth + stencil subpass - DrawList(cmdBuffer, ListType_Opaque, false, 0, pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count); - DrawList(cmdBuffer, ListType_Punch_Through, false, 0, pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count); + DrawList(cmdBuffer, ListType_Opaque, false, Pass::Depth, pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count); + DrawList(cmdBuffer, ListType_Punch_Through, false, Pass::Depth, pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count); DrawModifierVolumes(cmdBuffer, previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); @@ -332,12 +332,17 @@ bool OITDrawer::Draw(const Texture *fogTexture) cmdBuffer.nextSubpass(vk::SubpassContents::eInline); // OP + PT - DrawList(cmdBuffer, ListType_Opaque, false, 1, pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count); - DrawList(cmdBuffer, ListType_Punch_Through, false, 1, pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count); + DrawList(cmdBuffer, ListType_Opaque, false, Pass::Color, pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count); + DrawList(cmdBuffer, ListType_Punch_Through, false, Pass::Color, pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count); // TR - if (!oitBuffers->isFirstFrameAfterInit()) - DrawList(cmdBuffer, ListType_Translucent, current_pass.autosort, 3, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); + if (current_pass.autosort) + { + if (!oitBuffers->isFirstFrameAfterInit()) + DrawList(cmdBuffer, ListType_Translucent, true, Pass::OIT, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); + } + else + DrawList(cmdBuffer, ListType_Translucent, false, Pass::Color, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); // Final subpass cmdBuffer.nextSubpass(vk::SubpassContents::eInline); @@ -350,7 +355,7 @@ bool OITDrawer::Draw(const Texture *fogTexture) if (GetContext()->GetVendorID() != VENDOR_QUALCOMM) // Adreno bug DrawModifierVolumes(cmdBuffer, previous_pass.mvo_tr_count, current_pass.mvo_tr_count - previous_pass.mvo_tr_count); - vk::Pipeline pipeline = pipelineManager->GetFinalPipeline(current_pass.autosort); + vk::Pipeline pipeline = pipelineManager->GetFinalPipeline(); cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); quadBuffer->Bind(cmdBuffer); quadBuffer->Draw(cmdBuffer); @@ -371,7 +376,7 @@ bool OITDrawer::Draw(const Texture *fogTexture) cmdBuffer.bindIndexBuffer(mainBuffer, offsets.indexOffset, vk::IndexType::eUint32); // Tr depth-only pass - DrawList(cmdBuffer, ListType_Translucent, current_pass.autosort, 0, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); + DrawList(cmdBuffer, ListType_Translucent, current_pass.autosort, Pass::Depth, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); cmdBuffer.endRenderPass(); } diff --git a/core/rend/vulkan/oit/oit_drawer.h b/core/rend/vulkan/oit/oit_drawer.h index 411477827..6b0a61c83 100644 --- a/core/rend/vulkan/oit/oit_drawer.h +++ b/core/rend/vulkan/oit/oit_drawer.h @@ -112,9 +112,9 @@ protected: vk::CommandBuffer currentCommandBuffer; private: - void DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, int pass, + void DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, Pass pass, const PolyParam& poly, u32 first, u32 count); - void DrawList(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, int pass, + void DrawList(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sortTriangles, Pass pass, const List& polys, u32 first, u32 count); template void DrawModifierVolumes(const vk::CommandBuffer& cmdBuffer, int first, int count); diff --git a/core/rend/vulkan/oit/oit_pipeline.cpp b/core/rend/vulkan/oit/oit_pipeline.cpp index 5c7791aad..7195d7901 100644 --- a/core/rend/vulkan/oit/oit_pipeline.cpp +++ b/core/rend/vulkan/oit/oit_pipeline.cpp @@ -21,7 +21,7 @@ #include "oit_pipeline.h" #include "../quad.h" -void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyParam& pp, int pass) +void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyParam& pp, Pass pass) { vk::PipelineVertexInputStateCreateInfo pipelineVertexInputStateCreateInfo = GetMainVertexInputStateCreateInfo(); @@ -53,7 +53,7 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP // Depth and stencil vk::CompareOp depthOp; - if (pass == 1 && !pp.isp.ZWriteDis) + if (pass == Pass::Color && !pp.isp.ZWriteDis && listType != ListType_Translucent) depthOp = vk::CompareOp::eEqual; else if (listType == ListType_Punch_Through || autosort) depthOp = vk::CompareOp::eGreaterOrEqual; @@ -61,7 +61,7 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP depthOp = depthOps[pp.isp.DepthMode]; bool depthWriteEnable; // FIXME temporary Intel driver bug workaround - if (pass != 0 && (GetContext()->GetVendorID() != VENDOR_INTEL || pass != 1)) + if (pass != Pass::Depth && !((!autosort || GetContext()->GetVendorID() == VENDOR_INTEL) && pass == Pass::Color)) depthWriteEnable = false; // Z Write Disable seems to be ignored for punch-through. // Fixes Worms World Party, Bust-a-Move 4 and Re-Volt @@ -70,7 +70,7 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP else depthWriteEnable = !pp.isp.ZWriteDis; - bool shadowed = pass == 0 && (listType == ListType_Opaque || listType == ListType_Punch_Through); + bool shadowed = pass == Pass::Depth && (listType == ListType_Opaque || listType == ListType_Punch_Through); vk::StencilOpState stencilOpState; if (shadowed) { @@ -96,7 +96,7 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP // Color flags and blending vk::PipelineColorBlendAttachmentState pipelineColorBlendAttachmentState; // Apparently punch-through polys support blending, or at least some combinations - if ((listType == ListType_Punch_Through || pass > 0) && pass != 3) + if (listType == ListType_Punch_Through || pass == Pass::Color) { vk::ColorComponentFlags colorComponentFlags(vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA); u32 src = pp.tsp.SrcInstr; @@ -156,7 +156,6 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP //params.trilinear = pp.pcw.Texture && pp.tsp.FilterMode > 1 && listType != ListType_Punch_Through; params.useAlpha = pp.tsp.UseAlpha; params.pass = pass; - params.depthFunc = autosort ? 6 : pp.isp.DepthMode; params.twoVolume = pp.tsp1.full != -1 || pp.tcw1.full != -1; vk::ShaderModule fragment_module = shaderManager->GetFragmentShader(params); @@ -180,14 +179,14 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP &pipelineDynamicStateCreateInfo, // pDynamicState *pipelineLayout, // layout renderPasses->GetRenderPass(true, true), // renderPass - pass == 0 ? (listType == ListType_Translucent ? 2 : 0) : 1 // subpass + pass == Pass::Depth ? (listType == ListType_Translucent ? 2 : 0) : 1 // subpass ); pipelines[hash(listType, autosort, &pp, pass)] = GetContext()->GetDevice().createGraphicsPipelineUnique(GetContext()->GetPipelineCache(), graphicsPipelineCreateInfo); } -void OITPipelineManager::CreateFinalPipeline(bool autosort) +void OITPipelineManager::CreateFinalPipeline() { vk::PipelineVertexInputStateCreateInfo pipelineVertexInputStateCreateInfo = GetQuadInputStateCreateInfo(false); @@ -245,7 +244,7 @@ void OITPipelineManager::CreateFinalPipeline(bool autosort) vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates); vk::ShaderModule vertex_module = shaderManager->GetFinalVertexShader(); - vk::ShaderModule fragment_module = shaderManager->GetFinalShader(autosort); + vk::ShaderModule fragment_module = shaderManager->GetFinalShader(); vk::PipelineShaderStageCreateInfo stages[] = { { vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eVertex, vertex_module, "main" }, @@ -270,10 +269,7 @@ void OITPipelineManager::CreateFinalPipeline(bool autosort) 2 // subpass ); - if (autosort) - finalAutosortPipeline = GetContext()->GetDevice().createGraphicsPipelineUnique(GetContext()->GetPipelineCache(), graphicsPipelineCreateInfo); - else - finalNosortPipeline = GetContext()->GetDevice().createGraphicsPipelineUnique(GetContext()->GetPipelineCache(), graphicsPipelineCreateInfo); + finalPipeline = GetContext()->GetDevice().createGraphicsPipelineUnique(GetContext()->GetPipelineCache(), graphicsPipelineCreateInfo); } diff --git a/core/rend/vulkan/oit/oit_pipeline.h b/core/rend/vulkan/oit/oit_pipeline.h index 464467f51..289e98703 100644 --- a/core/rend/vulkan/oit/oit_pipeline.h +++ b/core/rend/vulkan/oit/oit_pipeline.h @@ -250,7 +250,7 @@ public: modVolPipelines.clear(); } - vk::Pipeline GetPipeline(u32 listType, bool autosort, const PolyParam& pp, int pass) + vk::Pipeline GetPipeline(u32 listType, bool autosort, const PolyParam& pp, Pass pass) { u32 pipehash = hash(listType, autosort, &pp, pass); const auto &pipeline = pipelines.find(pipehash); @@ -282,12 +282,11 @@ public: return *trModVolPipelines[pipehash]; } - vk::Pipeline GetFinalPipeline(bool autosort) + vk::Pipeline GetFinalPipeline() { - vk::UniquePipeline& pipeline = autosort ? finalAutosortPipeline : finalNosortPipeline; - if (!pipeline) - CreateFinalPipeline(autosort); - return *pipeline; + if (!finalPipeline) + CreateFinalPipeline(); + return *finalPipeline; } vk::Pipeline GetClearPipeline() { @@ -306,7 +305,7 @@ private: void CreateModVolPipeline(ModVolMode mode, int cullMode); void CreateTrModVolPipeline(ModVolMode mode, int cullMode); - u32 hash(u32 listType, bool autosort, const PolyParam *pp, int pass) const + u32 hash(u32 listType, bool autosort, const PolyParam *pp, Pass pass) const { u32 hash = pp->pcw.Gouraud | (pp->pcw.Offset << 1) | (pp->pcw.Texture << 2) | (pp->pcw.Shadow << 3) | (((pp->tileclip >> 28) == 3) << 4); @@ -361,15 +360,14 @@ private: full ? vertexInputAttributeDescriptions : vertexInputLightAttributeDescriptions); } - void CreatePipeline(u32 listType, bool autosort, const PolyParam& pp, int pass); - void CreateFinalPipeline(bool autosort); + void CreatePipeline(u32 listType, bool autosort, const PolyParam& pp, Pass pass); + void CreateFinalPipeline(); void CreateClearPipeline(); std::map pipelines; std::map modVolPipelines; std::map trModVolPipelines; - vk::UniquePipeline finalAutosortPipeline; - vk::UniquePipeline finalNosortPipeline; + vk::UniquePipeline finalPipeline; vk::UniquePipeline clearPipeline; vk::UniquePipelineLayout pipelineLayout; diff --git a/core/rend/vulkan/oit/oit_shaders.cpp b/core/rend/vulkan/oit/oit_shaders.cpp index 9c54b65ee..d2357d541 100644 --- a/core/rend/vulkan/oit/oit_shaders.cpp +++ b/core/rend/vulkan/oit/oit_shaders.cpp @@ -237,14 +237,17 @@ static const char OITFragmentShaderSource[] = R"( #define pp_Offset %d #define pp_FogCtrl %d #define pp_TwoVolumes %d -#define pp_DepthFunc %d #define pp_Gouraud %d #define pp_BumpMap %d #define ColorClamping %d #define PASS %d #define PI 3.1415926 -#if PASS <= 1 +#define PASS_DEPTH 0 +#define PASS_COLOR 1 +#define PASS_OIT 2 + +#if PASS == PASS_DEPTH || PASS == PASS_COLOR layout (location = 0) out vec4 FragColor; #define gl_FragColor FragColor #endif @@ -287,10 +290,10 @@ layout (set = 1, binding = 1) uniform sampler2D tex1; #endif #endif -#if PASS == 1 +#if PASS == PASS_COLOR layout (input_attachment_index = 0, set = 0, binding = 4) uniform usubpassInput shadow_stencil; #endif -#if PASS == 3 +#if PASS == PASS_OIT layout (input_attachment_index = 0, set = 0, binding = 5) uniform subpassInput DepthTex; #endif @@ -330,30 +333,11 @@ void main() { setFragDepth(); - #if PASS == 3 + #if PASS == PASS_OIT // Manual depth testing highp float frontDepth = subpassLoad(DepthTex).r; - #if pp_DepthFunc == 0 // Never + if (gl_FragDepth < frontDepth) discard; - #elif pp_DepthFunc == 1 // Less - if (gl_FragDepth >= frontDepth) - discard; - #elif pp_DepthFunc == 2 // Equal - if (gl_FragDepth != frontDepth) - discard; - #elif pp_DepthFunc == 3 // Less or equal - if (gl_FragDepth > frontDepth) - discard; - #elif pp_DepthFunc == 4 // Greater - if (gl_FragDepth <= frontDepth) - discard; - #elif pp_DepthFunc == 5 // Not equal - if (gl_FragDepth == frontDepth) - discard; - #elif pp_DepthFunc == 6 // Greater or equal - if (gl_FragDepth < frontDepth) - discard; - #endif #endif // Clip inside the box @@ -374,7 +358,7 @@ void main() bool cur_ignore_tex_alpha = pushConstants.ignore_tex_alpha0 != 0; int cur_shading_instr = pushConstants.shading_instr0; int cur_fog_control = pushConstants.fog_control0; - #if PASS == 1 + #if PASS == PASS_COLOR uvec4 stencil = subpassLoad(shadow_stencil); if (stencil.r == 0x81u) { color = vtx_base1; @@ -456,7 +440,7 @@ void main() #endif } #endif - #if PASS == 1 && pp_TwoVolumes == 0 + #if PASS == PASS_COLOR && pp_TwoVolumes == 0 uvec4 stencil = subpassLoad(shadow_stencil); if (stencil.r == 0x81u) color.rgb *= uniformBuffer.shade_scale_factor; @@ -485,9 +469,9 @@ void main() //color.rgb=vec3(gl_FragCoord.w * uniformBuffer.sp_FOG_DENSITY / 128.0); - #if PASS == 1 + #if PASS == PASS_COLOR FragColor = color; - #elif PASS > 1 + #elif PASS == PASS_OIT // Discard as many pixels as possible switch (cur_blend_mode.y) // DST { @@ -569,8 +553,6 @@ void main() static const char OITFinalShaderSource[] = "#define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT R"( -#define DEPTH_SORTED %d - layout (input_attachment_index = 0, set = 2, binding = 0) uniform subpassInput tex; layout (location = 0) out vec4 FragColor; @@ -588,13 +570,9 @@ int fillAndSortFragmentArray(ivec2 coords) const Pixel p = PixelBuffer.pixels[idx]; int j = count - 1; Pixel jp = PixelBuffer.pixels[pixel_list[j]]; -#if DEPTH_SORTED == 1 while (j >= 0 && (jp.depth > p.depth || (jp.depth == p.depth && getPolyNumber(jp) > getPolyNumber(p)))) -#else - while (j >= 0 && getPolyNumber(jp) > getPolyNumber(p)) -#endif { pixel_list[j + 1] = pixel_list[j]; j--; @@ -614,56 +592,11 @@ vec4 resolveAlphaBlend(ivec2 coords) { vec4 finalColor = subpassLoad(tex); vec4 secondaryBuffer = vec4(0.0); // Secondary accumulation buffer - float depth = 0.0; - bool do_depth_test = false; for (int i = 0; i < num_frag; i++) { const Pixel pixel = PixelBuffer.pixels[pixel_list[i]]; const PolyParam pp = TrPolyParam.tr_poly_params[getPolyNumber(pixel)]; -#if DEPTH_SORTED != 1 - const float frag_depth = pixel.depth; - if (do_depth_test) - { - switch (getDepthFunc(pp)) - { - case 0: // Never - continue; - case 1: // Less - if (frag_depth >= depth) - continue; - break; - case 2: // Equal - if (frag_depth != depth) - continue; - break; - case 3: // Less or equal - if (frag_depth > depth) - continue; - break; - case 4: // Greater - if (frag_depth <= depth) - continue; - break; - case 5: // Not equal - if (frag_depth == depth) - continue; - break; - case 6: // Greater or equal - if (frag_depth < depth) - continue; - break; - case 7: // Always - break; - } - } - - if (getDepthMask(pp)) - { - depth = frag_depth; - do_depth_test = true; - } -#endif bool area1 = false; bool shadowed = false; if (isShadowed(pixel)) @@ -848,17 +781,15 @@ vk::UniqueShaderModule OITShaderManager::compileShader(const FragmentShaderParam strcpy(buf, OITShaderHeader); sprintf(buf + strlen(buf), OITFragmentShaderSource, (int)params.alphaTest, (int)params.insideClipTest, (int)params.useAlpha, (int)params.texture, (int)params.ignoreTexAlpha, params.shaderInstr, (int)params.offset, params.fog, - (int)params.twoVolume, params.depthFunc, (int)params.gouraud, (int)params.bumpmap, (int)params.clamping, params.pass); + (int)params.twoVolume, (int)params.gouraud, (int)params.bumpmap, (int)params.clamping, (int)params.pass); return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, buf); } -vk::UniqueShaderModule OITShaderManager::compileFinalShader(bool autosort) +vk::UniqueShaderModule OITShaderManager::compileFinalShader() { - char buf[(sizeof(OITShaderHeader) + sizeof(OITFinalShaderSource)) * 2]; - - strcpy(buf, OITShaderHeader); - sprintf(buf + strlen(buf), OITFinalShaderSource, (int)autosort); - return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, buf); + std::string source = OITShaderHeader; + source += OITFinalShaderSource; + return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, source); } vk::UniqueShaderModule OITShaderManager::compileFinalVertexShader() { diff --git a/core/rend/vulkan/oit/oit_shaders.h b/core/rend/vulkan/oit/oit_shaders.h index 1d560398f..3d7e9dca6 100644 --- a/core/rend/vulkan/oit/oit_shaders.h +++ b/core/rend/vulkan/oit/oit_shaders.h @@ -22,6 +22,8 @@ #include "../vulkan.h" #include "../utils.h" +enum class Pass { Depth, Color, OIT }; + class OITShaderManager { public: @@ -47,8 +49,7 @@ public: bool bumpmap; bool clamping; bool twoVolume; - int depthFunc; - int pass; + Pass pass; u32 hash() { @@ -56,7 +57,7 @@ public: | ((u32)texture << 3) | ((u32)ignoreTexAlpha << 4) | (shaderInstr << 5) | ((u32)offset << 7) | ((u32)fog << 8) | ((u32)gouraud << 10) | ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)twoVolume << 13) - | (depthFunc << 14) | (pass << 17); + | ((int)pass << 14); } }; @@ -81,20 +82,11 @@ public: return *trModVolShaders[(size_t)mode]; } - vk::ShaderModule GetFinalShader(bool autosort) + vk::ShaderModule GetFinalShader() { - if (autosort) - { - if (!finalAutosortShader) - finalAutosortShader = compileFinalShader(true); - return *finalAutosortShader; - } - else - { - if (!finalSortedShader) - finalSortedShader = compileFinalShader(false); - return *finalSortedShader; - } + if (!finalAutosortShader) + finalAutosortShader = compileFinalShader(); + return *finalAutosortShader; } vk::ShaderModule GetFinalVertexShader() { @@ -124,7 +116,7 @@ private: vk::UniqueShaderModule compileModVolVertexShader(); vk::UniqueShaderModule compileModVolFragmentShader(); void compileTrModVolFragmentShader(ModVolMode mode); - vk::UniqueShaderModule compileFinalShader(bool autosort); + vk::UniqueShaderModule compileFinalShader(); vk::UniqueShaderModule compileFinalVertexShader(); vk::UniqueShaderModule compileClearShader();