From 1015a8c09849cd07582e539abf877ad438b3beeb Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 12 Mar 2022 17:56:46 +0100 Subject: [PATCH] naomi2: vulkan support oit: use #define instead of functions in GLSL --- core/hw/pvr/ta_vtx.cpp | 8 +- core/rend/gl4/glsl.h | 85 ++---- core/rend/vulkan/buffer.cpp | 6 + core/rend/vulkan/buffer.h | 48 ++++ core/rend/vulkan/desc_set.h | 121 +++++++++ core/rend/vulkan/drawer.cpp | 126 ++++----- core/rend/vulkan/drawer.h | 141 ++++++++-- core/rend/vulkan/oit/oit_buffer.h | 39 +-- core/rend/vulkan/oit/oit_drawer.cpp | 176 ++++++------ core/rend/vulkan/oit/oit_drawer.h | 31 +-- core/rend/vulkan/oit/oit_pipeline.cpp | 18 +- core/rend/vulkan/oit/oit_pipeline.h | 230 +++++++++------- core/rend/vulkan/oit/oit_shaders.cpp | 176 ++++++++++-- core/rend/vulkan/oit/oit_shaders.h | 28 +- core/rend/vulkan/pipeline.cpp | 14 +- core/rend/vulkan/pipeline.h | 164 +++++++----- core/rend/vulkan/shaders.cpp | 370 +++++++++++++++++++++++++- core/rend/vulkan/shaders.h | 71 ++++- core/rend/vulkan/utils.h | 6 + core/rend/vulkan/vulkan_context.cpp | 6 +- core/rend/vulkan/vulkan_renderer.h | 2 + 21 files changed, 1337 insertions(+), 529 deletions(-) create mode 100644 core/rend/vulkan/desc_set.h diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index 31230a0f4..2d4473e66 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -1563,8 +1563,8 @@ bool ta_parse_vdrc(TA_context* ctx) const bool mergeTranslucent = !config::PerStripSorting || config::RendererType == RenderType::OpenGL_OIT - || config::RendererType == RenderType::DirectX11_OIT; - // TODO || config::RendererType == RenderType::Vulkan_OIT + || config::RendererType == RenderType::DirectX11_OIT + || config::RendererType == RenderType::Vulkan_OIT; TA_context *childCtx = ctx; while (childCtx != nullptr) { @@ -1684,8 +1684,8 @@ bool ta_parse_naomi2(TA_context* ctx) int tr_count = 0; const bool mergeTranslucent = !config::PerStripSorting || config::RendererType == RenderType::OpenGL_OIT - || config::RendererType == RenderType::DirectX11_OIT; - // TODO || config::RendererType == RenderType::Vulkan_OIT + || config::RendererType == RenderType::DirectX11_OIT + || config::RendererType == RenderType::Vulkan_OIT; for (const RenderPass& pass : ctx->rend.render_passes) { make_index(&ctx->rend.global_param_op, op_count, pass.op_count, true, &ctx->rend); diff --git a/core/rend/gl4/glsl.h b/core/rend/gl4/glsl.h index 848444ce2..f85f2083c 100644 --- a/core/rend/gl4/glsl.h +++ b/core/rend/gl4/glsl.h @@ -49,93 +49,40 @@ struct PolyParam { \n\ int tsp1; \n\ }; \n\ \n\ -#define GET_TSP_FOR_AREA int tsp = area1 ? pp.tsp1 : pp.tsp_isp_pcw; \n\ +#define GET_TSP_FOR_AREA(pp, area1) ((area1) ? (pp).tsp1 : (pp).tsp_isp_pcw) \n\ \n\ -int getSrcBlendFunc(const PolyParam pp, bool area1) \n\ -{ \n\ - GET_TSP_FOR_AREA \n\ - return (tsp >> 29) & 7; \n\ -} \n\ +#define getSrcBlendFunc(pp, area1) ((GET_TSP_FOR_AREA(pp, area1) >> 29) & 7) \n\ \n\ -int getDstBlendFunc(const PolyParam pp, bool area1) \n\ -{ \n\ - GET_TSP_FOR_AREA \n\ - return (tsp >> 26) & 7; \n\ -} \n\ +#define getDstBlendFunc(pp, area1) ((GET_TSP_FOR_AREA(pp, area1) >> 26) & 7) \n\ \n\ -bool getSrcSelect(const PolyParam pp, bool area1) \n\ -{ \n\ - GET_TSP_FOR_AREA \n\ - return ((tsp >> 25) & 1) != 0; \n\ -} \n\ +#define getSrcSelect(pp, area1) (((GET_TSP_FOR_AREA(pp, area1) >> 25) & 1) != 0) \n\ \n\ -bool getDstSelect(const PolyParam pp, bool area1) \n\ -{ \n\ - GET_TSP_FOR_AREA \n\ - return ((tsp >> 24) & 1) != 0; \n\ -} \n\ +#define getDstSelect(pp, area1) (((GET_TSP_FOR_AREA(pp, area1) >> 24) & 1) != 0) \n\ \n\ -int getFogControl(const PolyParam pp, bool area1) \n\ -{ \n\ - GET_TSP_FOR_AREA \n\ - return (tsp >> 22) & 3; \n\ -} \n\ +#define getFogControl(pp, area1) ((GET_TSP_FOR_AREA(pp, area1) >> 22) & 3) \n\ \n\ -bool getUseAlpha(const PolyParam pp, bool area1) \n\ -{ \n\ - GET_TSP_FOR_AREA \n\ - return ((tsp >> 20) & 1) != 0; \n\ -} \n\ +#define getUseAlpha(pp, area1) (((GET_TSP_FOR_AREA(pp, area1) >> 20) & 1) != 0) \n\ \n\ -bool getIgnoreTexAlpha(const PolyParam pp, bool area1) \n\ -{ \n\ - GET_TSP_FOR_AREA \n\ - return ((tsp >> 19) & 1) != 0; \n\ -} \n\ +#define getIgnoreTexAlpha(pp, area1) (((GET_TSP_FOR_AREA(pp, area1) >> 19) & 1) != 0) \n\ \n\ -int getShadingInstruction(const PolyParam pp, bool area1) \n\ -{ \n\ - GET_TSP_FOR_AREA \n\ - return (tsp >> 6) & 3; \n\ -} \n\ +#define getShadingInstruction(pp, area1) ((GET_TSP_FOR_AREA(pp, area1) >> 6) & 3) \n\ \n\ -int getDepthFunc(const PolyParam pp) \n\ -{ \n\ - return (pp.tsp_isp_pcw >> 13) & 7; \n\ -} \n\ +#define getDepthFunc(pp) (((pp).tsp_isp_pcw >> 13) & 7) \n\ \n\ -bool getDepthMask(const PolyParam pp) \n\ -{ \n\ - return ((pp.tsp_isp_pcw >> 10) & 1) != 1; \n\ -} \n\ +#define getDepthMask(pp) ((((pp).tsp_isp_pcw >> 10) & 1) != 1) \n\ \n\ -bool getShadowEnable(const PolyParam pp) \n\ -{ \n\ - return (pp.tsp_isp_pcw & 1) != 0; \n\ -} \n\ +#define getShadowEnable(pp) (((pp).tsp_isp_pcw & 1) != 0) \n\ \n\ -uint getPolyNumber(const Pixel pixel) \n\ -{ \n\ - return (pixel.seq_num & 0x3FFFFFFFu) >> 18; \n\ -} \n\ +#define getPolyNumber(pixel) (((pixel).seq_num & 0x3FFFFFFFu) >> 18) \n\ \n\ -uint getPolyIndex(const Pixel pixel) \n\ -{ \n\ - return pixel.seq_num & 0x3FFFFFFFu; \n\ -} \n\ +#define getPolyIndex(pixel) ((pixel).seq_num & 0x3FFFFFFFu) \n\ \n\ #define SHADOW_STENCIL 0x40000000u \n\ #define SHADOW_ACC 0x80000000u \n\ \n\ -bool isShadowed(const Pixel pixel) \n\ -{ \n\ - return (pixel.seq_num & SHADOW_ACC) == SHADOW_ACC; \n\ -} \n\ +#define isShadowed(pixel) (((pixel).seq_num & SHADOW_ACC) == SHADOW_ACC) \n\ \n\ -bool isTwoVolumes(const PolyParam pp) \n\ -{ \n\ - return pp.tsp1 != -1; \n\ -} \n\ +#define isTwoVolumes(pp) ((pp).tsp1 != -1) \n\ \n\ uint packColors(vec4 v) \n\ { \n\ diff --git a/core/rend/vulkan/buffer.cpp b/core/rend/vulkan/buffer.cpp index 48d07074f..af052fbe0 100644 --- a/core/rend/vulkan/buffer.cpp +++ b/core/rend/vulkan/buffer.cpp @@ -38,3 +38,9 @@ BufferData::BufferData(vk::DeviceSize size, const vk::BufferUsageFlags& usage, c #endif allocation = context->GetAllocator().AllocateForBuffer(*buffer, allocInfo); } + +BufferPacker::BufferPacker() +{ + uniformAlignment = VulkanContext::Instance()->GetUniformBufferAlignment(); + storageAlignment = VulkanContext::Instance()->GetStorageBufferAlignment(); +} diff --git a/core/rend/vulkan/buffer.h b/core/rend/vulkan/buffer.h index 7897d17d7..aac074391 100644 --- a/core/rend/vulkan/buffer.h +++ b/core/rend/vulkan/buffer.h @@ -21,6 +21,7 @@ #pragma once #include "vulkan.h" #include "vmallocator.h" +#include "utils.h" struct BufferData { @@ -92,3 +93,50 @@ private: vk::BufferUsageFlags m_usage; vk::MemoryPropertyFlags m_propertyFlags; }; + +class BufferPacker +{ +public: + BufferPacker(); + + vk::DeviceSize addUniform(const void *p, size_t size) { + return add(p, size, uniformAlignment); + } + + vk::DeviceSize addStorage(const void *p, size_t size) { + return add(p, size, storageAlignment); + } + + vk::DeviceSize add(const void *p, size_t size, u32 alignment = 4) + { + u32 padding = align(offset, std::max(4u, alignment)); + if (padding != 0) + { + chunks.push_back(nullptr); + chunkSizes.push_back(padding); + offset += padding; + } + vk::DeviceSize start = offset; + chunks.push_back(p); + chunkSizes.push_back(size); + offset += size; + + return start; + } + + void upload(BufferData& bufferData, u32 bufOffset = 0) + { + bufferData.upload(chunks.size(), &chunkSizes[0], &chunks[0], bufOffset); + } + + vk::DeviceSize size() const { + return offset; + } + +private: + std::vector chunks; + std::vector chunkSizes; + vk::DeviceSize offset = 0; + vk::DeviceSize uniformAlignment; + vk::DeviceSize storageAlignment; +}; diff --git a/core/rend/vulkan/desc_set.h b/core/rend/vulkan/desc_set.h new file mode 100644 index 000000000..b71b226f9 --- /dev/null +++ b/core/rend/vulkan/desc_set.h @@ -0,0 +1,121 @@ +/* + Copyright 2022 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ +#pragma once +#include "vulkan_context.h" +#include + +template +class DescSetAlloc +{ +public: + void setLayout(vk::DescriptorSetLayout layout) { + this->layout = layout; + } + void setAllocChunk(int size) { + this->allocChunk = size; + } + + void nextFrame() + { + index = (index + 1) % Size; + for (auto& descset : descSetsInFlight[index]) + descSets.emplace_back(std::move(descset)); + descSetsInFlight[index].clear(); + } + + vk::DescriptorSet alloc() + { + if (descSets.empty()) + { + std::vector layouts(allocChunk, layout); + descSets = VulkanContext::Instance()->GetDevice().allocateDescriptorSetsUnique( + vk::DescriptorSetAllocateInfo(VulkanContext::Instance()->GetDescriptorPool(), (u32)layouts.size(), &layouts[0])); + } + descSetsInFlight[index].emplace_back(std::move(descSets.back())); + descSets.pop_back(); + return *descSetsInFlight[index].back(); + } + +private: + vk::DescriptorSetLayout layout; + std::vector descSets; + std::array, Size> descSetsInFlight; + int index = 0; + int allocChunk = 10; +}; + +class DynamicDescSetAlloc +{ +public: + DynamicDescSetAlloc() { + descSetsInFlight.resize(1); + } + + void setLayout(vk::DescriptorSetLayout layout) { + this->layout = layout; + } + void setAllocChunk(int size) { + this->allocChunk = size; + } + + void nextFrame() + { + unsigned swapChainSize = VulkanContext::Instance()->GetSwapChainSize(); + if (swapChainSize > descSetsInFlight.size()) + descSetsInFlight.resize(swapChainSize); + else + while (swapChainSize < descSetsInFlight.size()) + { + for (auto& descset : descSetsInFlight[descSetsInFlight.size() - 1]) + descSets.emplace_back(std::move(descset)); + descSetsInFlight.resize(descSetsInFlight.size() - 1); + } + + index = (index + 1) % descSetsInFlight.size(); + for (auto& descset : descSetsInFlight[index]) + descSets.emplace_back(std::move(descset)); + descSetsInFlight[index].clear(); + } + + vk::DescriptorSet alloc() + { + if (descSets.empty()) + { + std::vector layouts(allocChunk, layout); + descSets = VulkanContext::Instance()->GetDevice().allocateDescriptorSetsUnique( + vk::DescriptorSetAllocateInfo(VulkanContext::Instance()->GetDescriptorPool(), (u32)layouts.size(), &layouts[0])); + } + descSetsInFlight[index].emplace_back(std::move(descSets.back())); + descSets.pop_back(); + return *descSetsInFlight[index].back(); + } + + void term() + { + descSets.clear(); + descSetsInFlight.clear(); + } + +private: + vk::DescriptorSetLayout layout; + std::vector descSets; + std::vector> descSetsInFlight; + int index = 0; + int allocChunk = 10; +}; diff --git a/core/rend/vulkan/drawer.cpp b/core/rend/vulkan/drawer.cpp index bd4187a44..d8a694946 100644 --- a/core/rend/vulkan/drawer.cpp +++ b/core/rend/vulkan/drawer.cpp @@ -143,14 +143,32 @@ void Drawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sor cmdBuffer.pushConstants(pipelineManager->GetPipelineLayout(), vk::ShaderStageFlagBits::eFragment, 0, pushConstants); } - if (poly.pcw.Texture) - GetCurrentDescSet().SetTexture((Texture *)poly.texture, poly.tsp); - vk::Pipeline pipeline = pipelineManager->GetPipeline(listType, sortTriangles, poly, gpuPalette); cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); - if (poly.pcw.Texture) - GetCurrentDescSet().BindPerPolyDescriptorSets(cmdBuffer, (Texture *)poly.texture, poly.tsp); - + if (poly.pcw.Texture || poly.isNaomi2()) + { + vk::DeviceSize offset = 0; + u32 index = 0; + if (poly.isNaomi2()) + { + switch (listType) + { + case ListType_Opaque: + offset = offsets.naomi2OpaqueOffset; + index = &poly - pvrrc.global_param_op.head(); + break; + case ListType_Punch_Through: + offset = offsets.naomi2PunchThroughOffset; + index = &poly - pvrrc.global_param_pt.head(); + break; + case ListType_Translucent: + offset = offsets.naomi2TranslucentOffset; + index = &poly - pvrrc.global_param_tr.head(); + break; + } + } + descriptorSets.bindPerPolyDescriptorSets(cmdBuffer, poly, index, *GetMainBuffer(0)->buffer, offset, offsets.lightsOffset); + } cmdBuffer.drawIndexed(count, 1, first, 0, 0); } @@ -165,7 +183,7 @@ void Drawer::DrawSorted(const vk::CommandBuffer& cmdBuffer, const std::vectorisp.ZWriteDis) continue; - vk::Pipeline pipeline = pipelineManager->GetDepthPassPipeline(param.ppid->isp.CullMode); + vk::Pipeline pipeline = pipelineManager->GetDepthPassPipeline(param.ppid->isp.CullMode, param.ppid->isNaomi2()); cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); vk::Rect2D scissorRect; TileClipping tileClip = SetTileClip(param.ppid->tileclip, scissorRect); @@ -213,16 +231,19 @@ void Drawer::DrawModVols(const vk::CommandBuffer& cmdBuffer, int first, int coun mod_base = param.first; if (!param.isp.VolumeLast && mv_mode > 0) - pipeline = pipelineManager->GetModifierVolumePipeline(ModVolMode::Or, param.isp.CullMode); // OR'ing (open volume or quad) + pipeline = pipelineManager->GetModifierVolumePipeline(ModVolMode::Or, param.isp.CullMode, param.isNaomi2()); // OR'ing (open volume or quad) else - pipeline = pipelineManager->GetModifierVolumePipeline(ModVolMode::Xor, param.isp.CullMode); // XOR'ing (closed volume) + pipeline = pipelineManager->GetModifierVolumePipeline(ModVolMode::Xor, param.isp.CullMode, param.isNaomi2()); // XOR'ing (closed volume) + cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + descriptorSets.bindPerPolyDescriptorSets(cmdBuffer, param, first + cmv, *GetMainBuffer(0)->buffer, offsets.naomi2ModVolOffset); + cmdBuffer.draw(param.count * 3, 1, param.first * 3, 0); if (mv_mode == 1 || mv_mode == 2) { // Sum the area - pipeline = pipelineManager->GetModifierVolumePipeline(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isp.CullMode); + pipeline = pipelineManager->GetModifierVolumePipeline(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isp.CullMode, param.isNaomi2()); cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); cmdBuffer.draw((param.first + param.count - mod_base) * 3, 1, mod_base * 3, 0); mod_base = -1; @@ -234,65 +255,38 @@ void Drawer::DrawModVols(const vk::CommandBuffer& cmdBuffer, int first, int coun std::array pushConstants = { 1 - FPU_SHAD_SCALE.scale_factor / 256.f, 0, 0, 0, 0 }; cmdBuffer.pushConstants(pipelineManager->GetPipelineLayout(), vk::ShaderStageFlagBits::eFragment, 0, pushConstants); - pipeline = pipelineManager->GetModifierVolumePipeline(ModVolMode::Final, 0); + pipeline = pipelineManager->GetModifierVolumePipeline(ModVolMode::Final, 0, false); cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); cmdBuffer.drawIndexed(4, 1, 0, 0, 0); } void Drawer::UploadMainBuffer(const VertexShaderUniforms& vertexUniforms, const FragmentShaderUniforms& fragmentUniforms) { - // TODO Put this logic in an allocator - std::vector chunks; - std::vector chunkSizes; + BufferPacker packer; // Vertex - chunks.push_back(pvrrc.verts.head()); - chunkSizes.push_back(pvrrc.verts.bytes()); - - u32 padding = align(pvrrc.verts.bytes(), 4); - offsets.modVolOffset = pvrrc.verts.bytes() + padding; - chunks.push_back(nullptr); - chunkSizes.push_back(padding); - + packer.add(pvrrc.verts.head(), pvrrc.verts.bytes()); // Modifier Volumes - chunks.push_back(pvrrc.modtrig.head()); - chunkSizes.push_back(pvrrc.modtrig.bytes()); - padding = align(offsets.modVolOffset + pvrrc.modtrig.bytes(), 4); - offsets.indexOffset = offsets.modVolOffset + pvrrc.modtrig.bytes() + padding; - chunks.push_back(nullptr); - chunkSizes.push_back(padding); - + offsets.modVolOffset = packer.add(pvrrc.modtrig.head(), pvrrc.modtrig.bytes()); // Index - chunks.push_back(pvrrc.idx.head()); - chunkSizes.push_back(pvrrc.idx.bytes()); + offsets.indexOffset = packer.add(pvrrc.idx.head(), pvrrc.idx.bytes()); for (const std::vector& idx : sortedIndexes) - { if (!idx.empty()) - { - chunks.push_back(&idx[0]); - chunkSizes.push_back(idx.size() * sizeof(u32)); - } - } + packer.add(&idx[0], idx.size() * sizeof(u32)); // Uniform buffers - u32 indexSize = pvrrc.idx.bytes() + sortedIndexCount * sizeof(u32); - padding = align(offsets.indexOffset + indexSize, std::max(4, (int)GetContext()->GetUniformBufferAlignment())); - offsets.vertexUniformOffset = offsets.indexOffset + indexSize + padding; - chunks.push_back(nullptr); - chunkSizes.push_back(padding); + offsets.vertexUniformOffset = packer.addUniform(&vertexUniforms, sizeof(vertexUniforms)); + offsets.fragmentUniformOffset = packer.addUniform(&fragmentUniforms, sizeof(fragmentUniforms)); - chunks.push_back(&vertexUniforms); - chunkSizes.push_back(sizeof(vertexUniforms)); - padding = align(offsets.vertexUniformOffset + sizeof(VertexShaderUniforms), std::max(4, (int)GetContext()->GetUniformBufferAlignment())); - offsets.fragmentUniformOffset = offsets.vertexUniformOffset + sizeof(VertexShaderUniforms) + padding; - chunks.push_back(nullptr); - chunkSizes.push_back(padding); + std::vector n2uniforms; + std::vector n2lights; + if (settings.platform.isNaomi2()) + { + uploadNaomi2Uniforms(packer, offsets, n2uniforms, false); + offsets.lightsOffset = uploadNaomi2Lights(packer, n2lights); + } - chunks.push_back(&fragmentUniforms); - chunkSizes.push_back(sizeof(fragmentUniforms)); - u32 totalSize = (u32)(offsets.fragmentUniformOffset + sizeof(FragmentShaderUniforms)); - - BufferData *buffer = GetMainBuffer(totalSize); - buffer->upload(chunks.size(), &chunkSizes[0], &chunks[0]); + BufferData *buffer = GetMainBuffer(packer.size()); + packer.upload(*buffer); } bool Drawer::Draw(const Texture *fogTexture, const Texture *paletteTexture) @@ -311,16 +305,27 @@ bool Drawer::Draw(const Texture *fogTexture, const Texture *paletteTexture) setFirstProvokingVertex(pvrrc); + // Do per-poly sorting + RenderPass previous_pass = {}; + if (config::PerStripSorting) + for (int render_pass = 0; render_pass < pvrrc.render_passes.used(); render_pass++) + { + const RenderPass& current_pass = pvrrc.render_passes.head()[render_pass]; + if (current_pass.autosort) + SortPParams(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); + previous_pass = current_pass; + } + // Upload vertex and index buffers VertexShaderUniforms vtxUniforms; - vtxUniforms.normal_matrix = matrices.GetNormalMatrix(); + vtxUniforms.ndcMat = matrices.GetNormalMatrix(); UploadMainBuffer(vtxUniforms, fragUniforms); // Update per-frame descriptor set and bind it - GetCurrentDescSet().UpdateUniforms(GetMainBuffer(0)->buffer.get(), (u32)offsets.vertexUniformOffset, (u32)offsets.fragmentUniformOffset, + descriptorSets.updateUniforms(GetMainBuffer(0)->buffer.get(), (u32)offsets.vertexUniformOffset, (u32)offsets.fragmentUniformOffset, fogTexture->GetImageView(), paletteTexture->GetImageView()); - GetCurrentDescSet().BindPerFrameDescriptorSets(cmdBuffer); + descriptorSets.bindPerFrameDescriptorSets(cmdBuffer); // Bind vertex and index buffers const vk::DeviceSize zeroOffset[] = { 0 }; @@ -332,7 +337,7 @@ bool Drawer::Draw(const Texture *fogTexture, const Texture *paletteTexture) std::array pushConstants = { 0, 0, 0, 0, 0 }; cmdBuffer.pushConstants(pipelineManager->GetPipelineLayout(), vk::ShaderStageFlagBits::eFragment, 0, pushConstants); - RenderPass previous_pass = {}; + previous_pass = {}; for (int render_pass = 0; render_pass < pvrrc.render_passes.used(); render_pass++) { const RenderPass& current_pass = pvrrc.render_passes.head()[render_pass]; @@ -348,14 +353,9 @@ bool Drawer::Draw(const Texture *fogTexture, const Texture *paletteTexture) if (current_pass.autosort) { if (!config::PerStripSorting) - { DrawSorted(cmdBuffer, sortedPolys[render_pass], render_pass + 1 < pvrrc.render_passes.used()); - } else - { - SortPParams(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); DrawList(cmdBuffer, ListType_Translucent, true, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); - } } else DrawList(cmdBuffer, ListType_Translucent, false, pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count); diff --git a/core/rend/vulkan/drawer.h b/core/rend/vulkan/drawer.h index 9d84ba69b..a1afb2425 100644 --- a/core/rend/vulkan/drawer.h +++ b/core/rend/vulkan/drawer.h @@ -31,6 +31,7 @@ #include #include +#include class BaseDrawer { @@ -51,11 +52,6 @@ protected: } } - u32 align(vk::DeviceSize offset, u32 alignment) - { - return (u32)(alignment - (offset & (alignment - 1))); - } - template T MakeFragmentUniforms() { @@ -76,6 +72,117 @@ protected: return fragUniforms; } + template + void uploadNaomi2Uniforms(BufferPacker& packer, Offsets& offsets, std::vector& n2uniforms, bool trModVolIncluded) + { + size_t n2UniformSize = sizeof(N2VertexShaderUniforms) + align(sizeof(N2VertexShaderUniforms), GetContext()->GetUniformBufferAlignment()); + int items = pvrrc.global_param_op.used() + pvrrc.global_param_pt.used() + pvrrc.global_param_tr.used() + pvrrc.global_param_mvo.used(); + if (trModVolIncluded) + items += pvrrc.global_param_mvo_tr.used(); + n2uniforms.resize(items * n2UniformSize); + size_t bufIdx = 0; + auto addUniform = [&](const PolyParam& pp, int polyNumber) { + if (pp.isNaomi2()) + { + N2VertexShaderUniforms& uni = *(N2VertexShaderUniforms *)&n2uniforms[bufIdx]; + memcpy(glm::value_ptr(uni.mvMat), pp.mvMatrix, sizeof(uni.mvMat)); + memcpy(glm::value_ptr(uni.normalMat), pp.normalMatrix, sizeof(uni.normalMat)); + memcpy(glm::value_ptr(uni.projMat), pp.projMatrix, sizeof(uni.projMat)); + uni.bumpMapping = pp.pcw.Texture == 1 && pp.tcw.PixelFmt == PixelBumpMap; + uni.polyNumber = polyNumber; + for (size_t i = 0; i < 2; i++) + { + uni.envMapping[i] = pp.envMapping[i]; + uni.glossCoef[i] = pp.glossCoef[i]; + uni.constantColor[i] = pp.constantColor[i]; + uni.modelDiffuse[i] = pp.diffuseColor[i]; + uni.modelSpecular[i] = pp.specularColor[i]; + } + } + bufIdx += n2UniformSize; + }; + for (const PolyParam& pp : pvrrc.global_param_op) + addUniform(pp, 0); + size_t ptOffset = bufIdx; + for (const PolyParam& pp : pvrrc.global_param_pt) + addUniform(pp, 0); + size_t trOffset = bufIdx; + for (const PolyParam& pp : pvrrc.global_param_tr) + addUniform(pp, &pp - pvrrc.global_param_tr.head()); + size_t mvOffset = bufIdx; + for (const ModifierVolumeParam& mvp : pvrrc.global_param_mvo) + { + if (mvp.isNaomi2()) + { + N2VertexShaderUniforms& uni = *(N2VertexShaderUniforms *)&n2uniforms[bufIdx]; + memcpy(glm::value_ptr(uni.mvMat), mvp.mvMatrix, sizeof(uni.mvMat)); + memcpy(glm::value_ptr(uni.projMat), mvp.projMatrix, sizeof(uni.projMat)); + } + bufIdx += n2UniformSize; + } + size_t trMvOffset = bufIdx; + if (trModVolIncluded) + for (const ModifierVolumeParam& mvp : pvrrc.global_param_mvo_tr) + { + if (mvp.isNaomi2()) + { + N2VertexShaderUniforms& uni = *(N2VertexShaderUniforms *)&n2uniforms[bufIdx]; + memcpy(glm::value_ptr(uni.mvMat), mvp.mvMatrix, sizeof(uni.mvMat)); + memcpy(glm::value_ptr(uni.projMat), mvp.projMatrix, sizeof(uni.projMat)); + } + bufIdx += n2UniformSize; + } + offsets.naomi2OpaqueOffset = packer.addUniform(n2uniforms.data(), bufIdx); + offsets.naomi2PunchThroughOffset = offsets.naomi2OpaqueOffset + ptOffset; + offsets.naomi2TranslucentOffset = offsets.naomi2OpaqueOffset + trOffset; + offsets.naomi2ModVolOffset = offsets.naomi2OpaqueOffset + mvOffset; + offsets.naomi2TrModVolOffset = offsets.naomi2OpaqueOffset + trMvOffset; + } + + vk::DeviceSize uploadNaomi2Lights(BufferPacker& packer, std::vector& n2lights) + { + size_t n2LightSize = sizeof(VkN2LightConstants) + align(sizeof(VkN2LightConstants), GetContext()->GetUniformBufferAlignment()); + n2lights.resize(pvrrc.lightModels.used() * n2LightSize); + size_t bufIdx = 0; + for (const N2LightModel& lights : pvrrc.lightModels) + { + VkN2LightConstants& vkLights = *(VkN2LightConstants *)&n2lights[bufIdx]; + vkLights.lightCount = lights.lightCount; + for (int i = 0; i < lights.lightCount; i++) + { + VkN2Light& vkLight = vkLights.lights[i]; + const N2Light& light = lights.lights[i]; + memcpy(vkLight.color, light.color, sizeof(vkLight.color)); + memcpy(vkLight.direction, light.direction, sizeof(vkLight.direction)); + memcpy(vkLight.position, light.position, sizeof(vkLight.position)); + vkLight.parallel = light.parallel; + vkLight.routing = light.routing; + vkLight.dmode = light.dmode; + vkLight.smode = light.smode; + memcpy(vkLight.diffuse, light.diffuse, sizeof(vkLight.diffuse)); + memcpy(vkLight.specular, light.specular, sizeof(vkLight.specular)); + vkLight.attnDistA = light.attnDistA; + vkLight.attnDistB = light.attnDistB; + vkLight.attnAngleA = light.attnAngleA; + vkLight.attnAngleB = light.attnAngleB; + vkLight.distAttnMode = light.distAttnMode; + } + memcpy(vkLights.ambientBase, lights.ambientBase, sizeof(vkLights.ambientBase)); + memcpy(vkLights.ambientOffset, lights.ambientOffset, sizeof(vkLights.ambientOffset)); + for (int i = 0; i < 2; i++) + { + vkLights.ambientMaterialBase[i] = lights.ambientMaterialBase[i]; + vkLights.ambientMaterialOffset[i] = lights.ambientMaterialOffset[i]; + } + vkLights.useBaseOver = lights.useBaseOver; + vkLights.bumpId1 = lights.bumpId1; + vkLights.bumpId2 = lights.bumpId2; + + bufIdx += n2LightSize; + } + return packer.addUniform(n2lights.data(), bufIdx); + } + vk::Rect2D baseScissor; vk::Rect2D currentScissor; TransformMatrix matrices; @@ -95,7 +202,7 @@ protected: virtual vk::CommandBuffer BeginRenderPass() = 0; void NewImage() { - GetCurrentDescSet().Reset(); + descriptorSets.reset(); imageIndex = (imageIndex + 1) % GetSwapChainSize(); if (perStripSorting != config::PerStripSorting) { @@ -110,18 +217,10 @@ protected: this->pipelineManager = pipelineManager; this->samplerManager = samplerManager; - size_t size = GetSwapChainSize(); - if (descriptorSets.size() > size) - descriptorSets.resize(size); - else - while (descriptorSets.size() < size) - { - descriptorSets.emplace_back(); - descriptorSets.back().Init(samplerManager, pipelineManager->GetPipelineLayout(), pipelineManager->GetPerFrameDSLayout(), pipelineManager->GetPerPolyDSLayout()); - } + descriptorSets.init(samplerManager, pipelineManager->GetPipelineLayout(), pipelineManager->GetPerFrameDSLayout(), pipelineManager->GetPerPolyDSLayout()); } + int GetCurrentImage() const { return imageIndex; } - DescriptorSets& GetCurrentDescSet() { return descriptorSets[GetCurrentImage()]; } BufferData* GetMainBuffer(u32 size) { @@ -141,7 +240,7 @@ protected: vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eUniformBuffer)); } return mainBuffers[bufferIndex].get(); - }; + } vk::CommandBuffer currentCommandBuffer; SamplerManager *samplerManager = nullptr; @@ -161,8 +260,14 @@ private: vk::DeviceSize modVolOffset = 0; vk::DeviceSize vertexUniformOffset = 0; vk::DeviceSize fragmentUniformOffset = 0; + vk::DeviceSize naomi2OpaqueOffset = 0; + vk::DeviceSize naomi2PunchThroughOffset = 0; + vk::DeviceSize naomi2TranslucentOffset = 0; + vk::DeviceSize naomi2ModVolOffset = 0; + vk::DeviceSize naomi2TrModVolOffset = 0; + vk::DeviceSize lightsOffset = 0; } offsets; - std::vector descriptorSets; + DescriptorSets descriptorSets; std::vector> mainBuffers; PipelineManager *pipelineManager = nullptr; diff --git a/core/rend/vulkan/oit/oit_buffer.h b/core/rend/vulkan/oit/oit_buffer.h index 6f57b83af..dafd5d10a 100644 --- a/core/rend/vulkan/oit/oit_buffer.h +++ b/core/rend/vulkan/oit/oit_buffer.h @@ -30,18 +30,7 @@ public: void Init(int width, int height) { const VulkanContext *context = VulkanContext::Instance(); - if (!descSetLayout) - { - // Descriptor set and pipeline layout - vk::DescriptorSetLayoutBinding descSetLayoutBindings[] = { - { 0, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // pixel buffer - { 1, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // pixel counter - { 2, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // a-buffer pointers - }; - descSetLayout = context->GetDevice().createDescriptorSetLayoutUnique( - vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(descSetLayoutBindings), descSetLayoutBindings)); - } if (width <= maxWidth && height <= maxHeight) return; maxWidth = std::max(maxWidth, width); @@ -66,23 +55,19 @@ public: abufferPointer = std::unique_ptr(new BufferData(maxWidth * maxHeight * sizeof(int), vk::BufferUsageFlagBits::eStorageBuffer, vk::MemoryPropertyFlagBits::eDeviceLocal)); firstFrameAfterInit = true; - - if (!descSet) - descSet = std::move(context->GetDevice().allocateDescriptorSetsUnique( - vk::DescriptorSetAllocateInfo(context->GetDescriptorPool(), 1, &descSetLayout.get())).front()); - std::vector writeDescriptorSets; - vk::DescriptorBufferInfo pixelBufferInfo(*pixelBuffer->buffer, 0, VK_WHOLE_SIZE); - writeDescriptorSets.emplace_back(*descSet, 0, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &pixelBufferInfo, nullptr); - vk::DescriptorBufferInfo pixelCounterBufferInfo(*pixelCounter->buffer, 0, 4); - writeDescriptorSets.emplace_back(*descSet, 1, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &pixelCounterBufferInfo, nullptr); - vk::DescriptorBufferInfo abufferPointerInfo(*abufferPointer->buffer, 0, VK_WHOLE_SIZE); - writeDescriptorSets.emplace_back(*descSet, 2, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &abufferPointerInfo, nullptr); - context->GetDevice().updateDescriptorSets(writeDescriptorSets, nullptr); } - void BindDescriptorSet(vk::CommandBuffer cmdBuffer, vk::PipelineLayout pipelineLayout, u32 firstSet) + void updateDescriptorSet(vk::DescriptorSet descSet, std::vector& writeDescSets) { - cmdBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, firstSet, 1, &descSet.get(), 0, nullptr); + static vk::DescriptorBufferInfo pixelBufferInfo({}, 0, VK_WHOLE_SIZE); + pixelBufferInfo.buffer = *pixelBuffer->buffer; + writeDescSets.emplace_back(descSet, 7, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &pixelBufferInfo, nullptr); + static vk::DescriptorBufferInfo pixelCounterBufferInfo({}, 0, 4); + pixelCounterBufferInfo.buffer = *pixelCounter->buffer; + writeDescSets.emplace_back(descSet, 8, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &pixelCounterBufferInfo, nullptr); + static vk::DescriptorBufferInfo abufferPointerInfo({}, 0, VK_WHOLE_SIZE); + abufferPointerInfo.buffer = *abufferPointer->buffer; + writeDescSets.emplace_back(descSet, 9, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &abufferPointerInfo, nullptr); } void OnNewFrame(vk::CommandBuffer commandBuffer) @@ -104,13 +89,9 @@ public: abufferPointer.reset(); } - vk::DescriptorSetLayout GetDescriptorSetLayout() const { return *descSetLayout; } bool isFirstFrameAfterInit() const { return firstFrameAfterInit; } private: - vk::UniqueDescriptorSet descSet; - vk::UniqueDescriptorSetLayout descSetLayout; - std::unique_ptr pixelBuffer; std::unique_ptr pixelCounter; std::unique_ptr pixelCounterReset; diff --git a/core/rend/vulkan/oit/oit_drawer.cpp b/core/rend/vulkan/oit/oit_drawer.cpp index 5a5b83031..ab15a96fc 100644 --- a/core/rend/vulkan/oit/oit_drawer.cpp +++ b/core/rend/vulkan/oit/oit_drawer.cpp @@ -61,7 +61,6 @@ void OITDrawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool }, { poly.tsp.SrcInstr, poly.tsp.DstInstr, 0, 0 }, trilinearAlpha, - listType == ListType_Translucent ? (int)(&poly - pvrrc.global_param_tr.head()) : 0, palette_index, }; if (twoVolumes) @@ -77,15 +76,42 @@ void OITDrawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool pushConstants.ignore_tex_alpha1 = poly.tsp1.IgnoreTexA; } cmdBuffer.pushConstants(pipelineManager->GetPipelineLayout(), vk::ShaderStageFlagBits::eFragment, 0, pushConstants); + if (!poly.isNaomi2()) + { + OITDescriptorSets::VtxPushConstants vtxPushConstants = { + listType == ListType_Translucent ? (int)(&poly - pvrrc.global_param_tr.head()) : 0 + }; + cmdBuffer.pushConstants(pipelineManager->GetPipelineLayout(), vk::ShaderStageFlagBits::eVertex, + sizeof(OITDescriptorSets::PushConstants), vtxPushConstants); + } - bool needTexture = poly.pcw.Texture; - if (needTexture) - GetCurrentDescSet().SetTexture((Texture *)poly.texture, poly.tsp, (Texture *)poly.texture1, poly.tsp1); + if (poly.pcw.Texture == 1 || poly.isNaomi2()) + { + vk::DeviceSize offset = 0; + u32 polyNumber = 0; + if (poly.isNaomi2()) + { + switch (listType) + { + case ListType_Opaque: + offset = offsets.naomi2OpaqueOffset; + polyNumber = &poly - pvrrc.global_param_op.head(); + break; + case ListType_Punch_Through: + offset = offsets.naomi2PunchThroughOffset; + polyNumber = &poly - pvrrc.global_param_pt.head(); + break; + case ListType_Translucent: + offset = offsets.naomi2TranslucentOffset; + polyNumber = &poly - pvrrc.global_param_tr.head(); + break; + } + } + descriptorSets.bindPerPolyDescriptorSets(cmdBuffer, poly, polyNumber, *GetMainBuffer(0)->buffer, offset, offsets.lightsOffset); + } vk::Pipeline pipeline = pipelineManager->GetPipeline(listType, autosort, poly, pass, gpuPalette); cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); - if (needTexture) - GetCurrentDescSet().BindPerPolyDescriptorSets(cmdBuffer, (Texture *)poly.texture, poly.tsp, (Texture *)poly.texture1, poly.tsp1); cmdBuffer.drawIndexed(count, 1, first, 0, 0); } @@ -132,19 +158,23 @@ void OITDrawer::DrawModifierVolumes(const vk::CommandBuffer& cmdBuffer, int firs { // OR'ing (open volume or quad) if (Translucent) - pipeline = pipelineManager->GetTrModifierVolumePipeline(ModVolMode::Or, param.isp.CullMode); + pipeline = pipelineManager->GetTrModifierVolumePipeline(ModVolMode::Or, param.isp.CullMode, param.isNaomi2()); else - pipeline = pipelineManager->GetModifierVolumePipeline(ModVolMode::Or, param.isp.CullMode); + pipeline = pipelineManager->GetModifierVolumePipeline(ModVolMode::Or, param.isp.CullMode, param.isNaomi2()); } else { // XOR'ing (closed volume) if (Translucent) - pipeline = pipelineManager->GetTrModifierVolumePipeline(ModVolMode::Xor, param.isp.CullMode); + pipeline = pipelineManager->GetTrModifierVolumePipeline(ModVolMode::Xor, param.isp.CullMode, param.isNaomi2()); else - pipeline = pipelineManager->GetModifierVolumePipeline(ModVolMode::Xor, param.isp.CullMode); + pipeline = pipelineManager->GetModifierVolumePipeline(ModVolMode::Xor, param.isp.CullMode, param.isNaomi2()); } cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + + vk::DeviceSize uniformOffset = Translucent ? offsets.naomi2TrModVolOffset : offsets.naomi2ModVolOffset; + descriptorSets.bindPerPolyDescriptorSets(cmdBuffer, param, first + cmv, *GetMainBuffer(0)->buffer, uniformOffset); + cmdBuffer.draw(param.count * 3, 1, param.first * 3, 0); if (mv_mode == 1 || mv_mode == 2) @@ -155,10 +185,10 @@ void OITDrawer::DrawModifierVolumes(const vk::CommandBuffer& cmdBuffer, int firs vk::MemoryBarrier barrier(vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead); cmdBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eFragmentShader, vk::PipelineStageFlagBits::eFragmentShader, vk::DependencyFlagBits::eByRegion, barrier, nullptr, nullptr); - pipeline = pipelineManager->GetTrModifierVolumePipeline(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isp.CullMode); + pipeline = pipelineManager->GetTrModifierVolumePipeline(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isp.CullMode, param.isNaomi2()); } else - pipeline = pipelineManager->GetModifierVolumePipeline(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isp.CullMode); + pipeline = pipelineManager->GetModifierVolumePipeline(mv_mode == 1 ? ModVolMode::Inclusion : ModVolMode::Exclusion, param.isp.CullMode, param.isNaomi2()); cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); cmdBuffer.draw((param.first + param.count - mod_base) * 3, 1, mod_base * 3, 0); @@ -178,57 +208,19 @@ void OITDrawer::DrawModifierVolumes(const vk::CommandBuffer& cmdBuffer, int firs void OITDrawer::UploadMainBuffer(const OITDescriptorSets::VertexShaderUniforms& vertexUniforms, const OITDescriptorSets::FragmentShaderUniforms& fragmentUniforms) { - using VertexShaderUniforms = OITDescriptorSets::VertexShaderUniforms; - using FragmentShaderUniforms = OITDescriptorSets::FragmentShaderUniforms; - - // TODO Put this logic in an allocator - std::vector chunks; - std::vector chunkSizes; + BufferPacker packer; // Vertex - chunks.push_back(pvrrc.verts.head()); - chunkSizes.push_back(pvrrc.verts.bytes()); - - u32 padding = align(pvrrc.verts.bytes(), 4); - offsets.modVolOffset = pvrrc.verts.bytes() + padding; - chunks.push_back(nullptr); - chunkSizes.push_back(padding); - + packer.add(pvrrc.verts.head(), pvrrc.verts.bytes()); // Modifier Volumes - chunks.push_back(pvrrc.modtrig.head()); - chunkSizes.push_back(pvrrc.modtrig.bytes()); - padding = align(offsets.modVolOffset + pvrrc.modtrig.bytes(), 4); - offsets.indexOffset = offsets.modVolOffset + pvrrc.modtrig.bytes() + padding; - chunks.push_back(nullptr); - chunkSizes.push_back(padding); - + offsets.modVolOffset = packer.add(pvrrc.modtrig.head(), pvrrc.modtrig.bytes()); // Index - chunks.push_back(pvrrc.idx.head()); - chunkSizes.push_back(pvrrc.idx.bytes()); - + offsets.indexOffset = packer.add(pvrrc.idx.head(), pvrrc.idx.bytes()); // Uniform buffers - u32 indexSize = pvrrc.idx.bytes(); - padding = align(offsets.indexOffset + indexSize, std::max(4, (int)GetContext()->GetUniformBufferAlignment())); - offsets.vertexUniformOffset = offsets.indexOffset + indexSize + padding; - chunks.push_back(nullptr); - chunkSizes.push_back(padding); - - chunks.push_back(&vertexUniforms); - chunkSizes.push_back(sizeof(vertexUniforms)); - padding = align(offsets.vertexUniformOffset + sizeof(VertexShaderUniforms), std::max(4, (int)GetContext()->GetUniformBufferAlignment())); - offsets.fragmentUniformOffset = offsets.vertexUniformOffset + sizeof(VertexShaderUniforms) + padding; - chunks.push_back(nullptr); - chunkSizes.push_back(padding); - - chunks.push_back(&fragmentUniforms); - chunkSizes.push_back(sizeof(fragmentUniforms)); + offsets.vertexUniformOffset = packer.addUniform(&vertexUniforms, sizeof(vertexUniforms)); + offsets.fragmentUniformOffset = packer.addUniform(&fragmentUniforms, sizeof(fragmentUniforms)); // Translucent poly params - padding = align(offsets.fragmentUniformOffset + sizeof(FragmentShaderUniforms), std::max(4, (int)GetContext()->GetStorageBufferAlignment())); - offsets.polyParamsOffset = offsets.fragmentUniformOffset + sizeof(FragmentShaderUniforms) + padding; - chunks.push_back(nullptr); - chunkSizes.push_back(padding); - std::vector trPolyParams(pvrrc.global_param_tr.used() * 2); if (pvrrc.global_param_tr.used() == 0) trPolyParams.push_back(0); // makes the validation layers happy @@ -243,12 +235,18 @@ void OITDrawer::UploadMainBuffer(const OITDescriptorSets::VertexShaderUniforms& } } offsets.polyParamsSize = trPolyParams.size() * 4; - chunks.push_back(trPolyParams.data()); - chunkSizes.push_back((u32)offsets.polyParamsSize); - u32 totalSize = (u32)(offsets.polyParamsOffset + offsets.polyParamsSize); + offsets.polyParamsOffset = packer.addStorage(trPolyParams.data(), offsets.polyParamsSize); - BufferData *buffer = GetMainBuffer(totalSize); - buffer->upload(chunks.size(), &chunkSizes[0], &chunks[0]); + std::vector n2uniforms; + std::vector n2lights; + if (settings.platform.isNaomi2()) + { + uploadNaomi2Uniforms(packer, offsets, n2uniforms, true); + offsets.lightsOffset = uploadNaomi2Lights(packer, n2lights); + } + + BufferData *buffer = GetMainBuffer(packer.size()); + packer.upload(*buffer); } bool OITDrawer::Draw(const Texture *fogTexture, const Texture *paletteTexture) @@ -264,7 +262,7 @@ bool OITDrawer::Draw(const Texture *fogTexture, const Texture *paletteTexture) } OITDescriptorSets::VertexShaderUniforms vtxUniforms; - vtxUniforms.normal_matrix = matrices.GetNormalMatrix(); + vtxUniforms.ndcMat = matrices.GetNormalMatrix(); OITDescriptorSets::FragmentShaderUniforms fragUniforms = MakeFragmentUniforms(); fragUniforms.shade_scale_factor = FPU_SHAD_SCALE.scale_factor / 256.f; @@ -285,14 +283,13 @@ bool OITDrawer::Draw(const Texture *fogTexture, const Texture *paletteTexture) // Update per-frame descriptor set and bind it const vk::Buffer mainBuffer = GetMainBuffer(0)->buffer.get(); - GetCurrentDescSet().UpdateUniforms(mainBuffer, (u32)offsets.vertexUniformOffset, (u32)offsets.fragmentUniformOffset, + descriptorSets.updateUniforms(mainBuffer, (u32)offsets.vertexUniformOffset, (u32)offsets.fragmentUniformOffset, fogTexture->GetImageView(), (u32)offsets.polyParamsOffset, (u32)offsets.polyParamsSize, depthAttachments[0]->GetStencilView(), - depthAttachments[0]->GetImageView(), paletteTexture->GetImageView()); - GetCurrentDescSet().BindPerFrameDescriptorSets(cmdBuffer); - GetCurrentDescSet().UpdateColorInputDescSet(0, colorAttachments[0]->GetImageView()); - GetCurrentDescSet().UpdateColorInputDescSet(1, colorAttachments[1]->GetImageView()); - oitBuffers->BindDescriptorSet(cmdBuffer, pipelineManager->GetPipelineLayout(), 3); + depthAttachments[0]->GetImageView(), paletteTexture->GetImageView(), oitBuffers); + descriptorSets.bindPerFrameDescriptorSets(cmdBuffer); + descriptorSets.updateColorInputDescSet(0, colorAttachments[0]->GetImageView()); + descriptorSets.updateColorInputDescSet(1, colorAttachments[1]->GetImageView()); // Bind vertex and index buffers const vk::DeviceSize zeroOffset[] = { 0 }; @@ -302,6 +299,9 @@ bool OITDrawer::Draw(const Texture *fogTexture, const Texture *paletteTexture) // Make sure to push constants even if not used OITDescriptorSets::PushConstants pushConstants = { }; cmdBuffer.pushConstants(pipelineManager->GetPipelineLayout(), vk::ShaderStageFlagBits::eFragment, 0, pushConstants); + OITDescriptorSets::VtxPushConstants vtxPushConstants = { }; + cmdBuffer.pushConstants(pipelineManager->GetPipelineLayout(), vk::ShaderStageFlagBits::eVertex, + sizeof(pushConstants), vtxPushConstants); const std::array clear_colors = { pvrrc.isRTT ? vk::ClearColorValue(std::array{0.f, 0.f, 0.f, 1.f}) : getBorderColor(), @@ -370,7 +370,7 @@ bool OITDrawer::Draw(const Texture *fogTexture, const Texture *paletteTexture) // Final subpass cmdBuffer.nextSubpass(vk::SubpassContents::eInline); - GetCurrentDescSet().BindColorInputDescSet(cmdBuffer, (pvrrc.render_passes.used() - 1 - render_pass) % 2); + descriptorSets.bindColorInputDescSet(cmdBuffer, (pvrrc.render_passes.used() - 1 - render_pass) % 2); if (initialPass && !pvrrc.isRTT && clearNeeded[GetCurrentImage()]) { @@ -381,38 +381,26 @@ bool OITDrawer::Draw(const Texture *fogTexture, const Texture *paletteTexture) } SetScissor(cmdBuffer, baseScissor); - if (!oitBuffers->isFirstFrameAfterInit()) - { - // Tr modifier volumes - if (GetContext()->GetVendorID() != VulkanContext::VENDOR_QUALCOMM) // Adreno bug - DrawModifierVolumes(cmdBuffer, previous_pass.mvo_tr_count, current_pass.mvo_tr_count - previous_pass.mvo_tr_count); - - vk::Pipeline pipeline = pipelineManager->GetFinalPipeline(); - cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); - quadBuffer->Bind(cmdBuffer); - quadBuffer->Draw(cmdBuffer); - } - - // Clear - vk::MemoryBarrier memoryBarrier(vk::AccessFlagBits::eShaderRead, vk::AccessFlagBits::eShaderWrite); - cmdBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eFragmentShader, vk::PipelineStageFlagBits::eFragmentShader, - vk::DependencyFlagBits::eByRegion, 1, &memoryBarrier, 0, nullptr, 0, nullptr); - vk::Pipeline pipeline = pipelineManager->GetClearPipeline(); - cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); - quadBuffer->Bind(cmdBuffer); - quadBuffer->Draw(cmdBuffer); - if (oitBuffers->isFirstFrameAfterInit()) { // missing the transparent stuff on the first frame cuz I'm lazy + // Clear + cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipelineManager->GetClearPipeline()); + quadBuffer->Bind(cmdBuffer); + quadBuffer->Draw(cmdBuffer); + vk::MemoryBarrier memoryBarrier(vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eShaderRead); cmdBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eFragmentShader, vk::PipelineStageFlagBits::eFragmentShader, vk::DependencyFlagBits::eByRegion, 1, &memoryBarrier, 0, nullptr, 0, nullptr); - pipeline = pipelineManager->GetFinalPipeline(); - cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); - quadBuffer->Bind(cmdBuffer); - quadBuffer->Draw(cmdBuffer); } + // Tr modifier volumes + if (GetContext()->GetVendorID() != VulkanContext::VENDOR_QUALCOMM) // Adreno bug + DrawModifierVolumes(cmdBuffer, previous_pass.mvo_tr_count, current_pass.mvo_tr_count - previous_pass.mvo_tr_count); + + vk::Pipeline pipeline = pipelineManager->GetFinalPipeline(); + cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + quadBuffer->Bind(cmdBuffer); + quadBuffer->Draw(cmdBuffer); if (!finalPass) { diff --git a/core/rend/vulkan/oit/oit_drawer.h b/core/rend/vulkan/oit/oit_drawer.h index 94d4698b6..7c708567d 100644 --- a/core/rend/vulkan/oit/oit_drawer.h +++ b/core/rend/vulkan/oit/oit_drawer.h @@ -52,18 +52,11 @@ protected: if (!quadBuffer) quadBuffer = std::unique_ptr(new QuadBuffer()); this->oitBuffers = oitBuffers; - if (descriptorSets.size() > GetContext()->GetSwapChainSize()) - descriptorSets.resize(GetContext()->GetSwapChainSize()); - else - while (descriptorSets.size() < GetContext()->GetSwapChainSize()) - { - descriptorSets.emplace_back(); - descriptorSets.back().Init(samplerManager, - pipelineManager->GetPipelineLayout(), - pipelineManager->GetPerFrameDSLayout(), - pipelineManager->GetPerPolyDSLayout(), - pipelineManager->GetColorInputDSLayout()); - } + descriptorSets.init(samplerManager, + pipelineManager->GetPipelineLayout(), + pipelineManager->GetPerFrameDSLayout(), + pipelineManager->GetPerPolyDSLayout(), + pipelineManager->GetColorInputDSLayout()); } void Term() { @@ -75,20 +68,18 @@ protected: depthAttachments[0].reset(); depthAttachments[1].reset(); mainBuffers.clear(); - descriptorSets.clear(); + descriptorSets.term(); } int GetCurrentImage() const { return imageIndex; } void NewImage() { - GetCurrentDescSet().Reset(); + descriptorSets.reset(); imageIndex = (imageIndex + 1) % GetContext()->GetSwapChainSize(); renderPass = 0; } - OITDescriptorSets& GetCurrentDescSet() { return descriptorSets[GetCurrentImage()]; } - BufferData* GetMainBuffer(u32 size) { u32 bufferIndex = imageIndex + renderPass * GetContext()->GetSwapChainSize(); @@ -138,6 +129,12 @@ private: vk::DeviceSize fragmentUniformOffset = 0; vk::DeviceSize polyParamsOffset = 0; vk::DeviceSize polyParamsSize = 0; + vk::DeviceSize naomi2OpaqueOffset = 0; + vk::DeviceSize naomi2PunchThroughOffset = 0; + vk::DeviceSize naomi2TranslucentOffset = 0; + vk::DeviceSize naomi2ModVolOffset = 0; + vk::DeviceSize naomi2TrModVolOffset = 0; + vk::DeviceSize lightsOffset = 0; } offsets; std::unique_ptr quadBuffer; @@ -152,7 +149,7 @@ private: bool needDepthTransition = false; int imageIndex = 0; int renderPass = 0; - std::vector descriptorSets; + OITDescriptorSets descriptorSets; std::vector> mainBuffers; }; diff --git a/core/rend/vulkan/oit/oit_pipeline.cpp b/core/rend/vulkan/oit/oit_pipeline.cpp index cffe8e295..89dcf8b04 100644 --- a/core/rend/vulkan/oit/oit_pipeline.cpp +++ b/core/rend/vulkan/oit/oit_pipeline.cpp @@ -139,7 +139,9 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor }; vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates); - vk::ShaderModule vertex_module = shaderManager->GetVertexShader(OITShaderManager::VertexShaderParams{ pp.pcw.Gouraud == 1 }); + bool twoVolume = pp.tsp1.full != (u32)-1 || pp.tcw1.full != (u32)-1; + vk::ShaderModule vertex_module = shaderManager->GetVertexShader( + OITShaderManager::VertexShaderParams{ pp.pcw.Gouraud == 1, pp.isNaomi2(), pass != Pass::Depth, twoVolume, pp.pcw.Texture == 1 }); OITShaderManager::FragmentShaderParams params = {}; params.alphaTest = listType == ListType_Punch_Through; params.bumpmap = pp.tcw.PixelFmt == PixelBumpMap; @@ -153,7 +155,7 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP params.texture = pp.pcw.Texture; params.useAlpha = pp.tsp.UseAlpha; params.pass = pass; - params.twoVolume = pp.tsp1.full != (u32)-1 || pp.tcw1.full != (u32)-1; + params.twoVolume = twoVolume; params.palette = gpuPalette; vk::ShaderModule fragment_module = shaderManager->GetFragmentShader(params); @@ -346,7 +348,7 @@ void OITPipelineManager::CreateClearPipeline() clearPipeline = GetContext()->GetDevice().createGraphicsPipelineUnique(GetContext()->GetPipelineCache(), graphicsPipelineCreateInfo); } -void OITPipelineManager::CreateModVolPipeline(ModVolMode mode, int cullMode) +void OITPipelineManager::CreateModVolPipeline(ModVolMode mode, int cullMode, bool naomi2) { verify(mode != ModVolMode::Final); @@ -438,7 +440,7 @@ void OITPipelineManager::CreateModVolPipeline(ModVolMode mode, int cullMode) vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor }; vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates); - vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(); + vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(naomi2); vk::ShaderModule fragment_module = shaderManager->GetModVolShader(); vk::PipelineShaderStageCreateInfo stages[] = { @@ -464,12 +466,12 @@ void OITPipelineManager::CreateModVolPipeline(ModVolMode mode, int cullMode) 0 // subpass ); - modVolPipelines[hash(mode, cullMode)] = + modVolPipelines[hash(mode, cullMode, naomi2)] = GetContext()->GetDevice().createGraphicsPipelineUnique(GetContext()->GetPipelineCache(), graphicsPipelineCreateInfo); } -void OITPipelineManager::CreateTrModVolPipeline(ModVolMode mode, int cullMode) +void OITPipelineManager::CreateTrModVolPipeline(ModVolMode mode, int cullMode, bool naomi2) { verify(mode != ModVolMode::Final); @@ -533,7 +535,7 @@ void OITPipelineManager::CreateTrModVolPipeline(ModVolMode mode, int cullMode) vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor }; vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates); - vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(); + vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(naomi2); vk::ShaderModule fragment_module = shaderManager->GetTrModVolShader(mode); vk::PipelineShaderStageCreateInfo stages[] = { @@ -559,7 +561,7 @@ void OITPipelineManager::CreateTrModVolPipeline(ModVolMode mode, int cullMode) 2 // subpass ); - trModVolPipelines[hash(mode, cullMode)] = + trModVolPipelines[hash(mode, cullMode, naomi2)] = GetContext()->GetDevice().createGraphicsPipelineUnique(GetContext()->GetPipelineCache(), graphicsPipelineCreateInfo); } diff --git a/core/rend/vulkan/oit/oit_pipeline.h b/core/rend/vulkan/oit/oit_pipeline.h index 995c549d4..5e2949d16 100644 --- a/core/rend/vulkan/oit/oit_pipeline.h +++ b/core/rend/vulkan/oit/oit_pipeline.h @@ -24,23 +24,17 @@ #include "oit_renderpass.h" #include "oit_buffer.h" #include "../texture.h" +#include "../desc_set.h" #include -#include class OITDescriptorSets { public: - OITDescriptorSets() = default; - OITDescriptorSets(OITDescriptorSets&&) = default; - OITDescriptorSets(const OITDescriptorSets&) = delete; - OITDescriptorSets& operator=(OITDescriptorSets&&) = default; - OITDescriptorSets& operator=(const OITDescriptorSets&) = delete; - // std140 alignment required struct VertexShaderUniforms { - glm::mat4 normal_matrix; + glm::mat4 ndcMat; }; // std140 alignment required @@ -62,9 +56,8 @@ public: glm::vec4 clipTest; glm::ivec4 blend_mode0; // Only using 2 elements but easier for std140 float trilinearAlpha; - int pp_Number; float palette_index; - int _pad; + int _pad[2]; // two volume mode glm::ivec4 blend_mode1; // Only using 2 elements but easier for std140 @@ -77,29 +70,31 @@ public: int ignore_tex_alpha0; int ignore_tex_alpha1; }; + static_assert(sizeof(PushConstants) == 96, "PushConstants size changed. Update vertex push constant layout(offset) in vertex shaders"); - void Init(SamplerManager* samplerManager, vk::PipelineLayout pipelineLayout, vk::DescriptorSetLayout perFrameLayout, + struct VtxPushConstants + { + int polyNumber; + }; + + void init(SamplerManager* samplerManager, vk::PipelineLayout pipelineLayout, vk::DescriptorSetLayout perFrameLayout, vk::DescriptorSetLayout perPolyLayout, vk::DescriptorSetLayout colorInputLayout) { this->samplerManager = samplerManager; this->pipelineLayout = pipelineLayout; - this->perFrameLayout = perFrameLayout; - this->perPolyLayout = perPolyLayout; - this->colorInputLayout = colorInputLayout; + + perFrameAlloc.setLayout(perFrameLayout); + perPolyAlloc.setLayout(perPolyLayout); + colorInputAlloc.setLayout(colorInputLayout); } + // FIXME way too many params - void UpdateUniforms(vk::Buffer buffer, u32 vertexUniformOffset, u32 fragmentUniformOffset, vk::ImageView fogImageView, + void updateUniforms(vk::Buffer buffer, u32 vertexUniformOffset, u32 fragmentUniformOffset, vk::ImageView fogImageView, u32 polyParamsOffset, u32 polyParamsSize, vk::ImageView stencilImageView, vk::ImageView depthImageView, - vk::ImageView paletteImageView) + vk::ImageView paletteImageView, OITBuffers *oitBuffers) { - if (perFrameDescSets.empty()) - { - perFrameDescSets = GetContext()->GetDevice().allocateDescriptorSetsUnique( - vk::DescriptorSetAllocateInfo(GetContext()->GetDescriptorPool(), 1, &perFrameLayout)); - } - perFrameDescSetsInFlight.emplace_back(std::move(perFrameDescSets.back())); - perFrameDescSets.pop_back(); - vk::DescriptorSet perFrameDescSet = *perFrameDescSetsInFlight.back(); + if (!perFrameDescSet) + perFrameDescSet = perFrameAlloc.alloc(); std::vector bufferInfos; bufferInfos.emplace_back(buffer, vertexUniformOffset, sizeof(VertexShaderUniforms)); @@ -140,92 +135,119 @@ public: writeDescriptorSets.emplace_back(perFrameDescSet, 4, 0, 1, vk::DescriptorType::eInputAttachment, &stencilImageInfo, nullptr, nullptr); vk::DescriptorImageInfo depthImageInfo(vk::Sampler(), depthImageView, vk::ImageLayout::eDepthStencilReadOnlyOptimal); writeDescriptorSets.emplace_back(perFrameDescSet, 5, 0, 1, vk::DescriptorType::eInputAttachment, &depthImageInfo, nullptr, nullptr); + oitBuffers->updateDescriptorSet(perFrameDescSet, writeDescriptorSets); - GetContext()->GetDevice().updateDescriptorSets(writeDescriptorSets, nullptr); + getContext()->GetDevice().updateDescriptorSets(writeDescriptorSets, nullptr); } - void UpdateColorInputDescSet(int index, vk::ImageView colorImageView) + void updateColorInputDescSet(int index, vk::ImageView colorImageView) { if (!colorInputDescSets[index]) - { - colorInputDescSets[index] = std::move(GetContext()->GetDevice().allocateDescriptorSetsUnique( - vk::DescriptorSetAllocateInfo(GetContext()->GetDescriptorPool(), 1, &colorInputLayout)).front()); - } + colorInputDescSets[index] = colorInputAlloc.alloc(); + vk::DescriptorImageInfo colorImageInfo(vk::Sampler(), colorImageView, vk::ImageLayout::eShaderReadOnlyOptimal); - vk::WriteDescriptorSet writeDescriptorSet(*colorInputDescSets[index], 0, 0, 1, vk::DescriptorType::eInputAttachment, &colorImageInfo, nullptr, nullptr); + vk::WriteDescriptorSet writeDescriptorSet(colorInputDescSets[index], 0, 0, 1, vk::DescriptorType::eInputAttachment, &colorImageInfo, nullptr, nullptr); - GetContext()->GetDevice().updateDescriptorSets(1, &writeDescriptorSet, 0, nullptr); + getContext()->GetDevice().updateDescriptorSets(1, &writeDescriptorSet, 0, nullptr); } - void SetTexture(Texture *texture0, TSP tsp0, Texture *texture1, TSP tsp1) + void bindPerPolyDescriptorSets(vk::CommandBuffer cmdBuffer, const PolyParam& poly, int polyNumber, vk::Buffer buffer, + vk::DeviceSize uniformOffset, vk::DeviceSize lightOffset) { - auto index = std::make_tuple(texture0, tsp0.full & SamplerManager::TSP_Mask, - texture1, tsp1.full & SamplerManager::TSP_Mask); - if (perPolyDescSetsInFlight.find(index) != perPolyDescSetsInFlight.end()) - return; - - if (perPolyDescSets.empty()) - { - std::vector layouts(10, perPolyLayout); - perPolyDescSets = GetContext()->GetDevice().allocateDescriptorSetsUnique( - vk::DescriptorSetAllocateInfo(GetContext()->GetDescriptorPool(), (u32)layouts.size(), &layouts[0])); - } - vk::DescriptorImageInfo imageInfo0(samplerManager->GetSampler(tsp0), texture0->GetReadOnlyImageView(), vk::ImageLayout::eShaderReadOnlyOptimal); - + vk::DescriptorSet perPolyDescSet = perPolyAlloc.alloc(); std::vector writeDescriptorSets; - writeDescriptorSets.emplace_back(*perPolyDescSets.back(), 0, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo0, nullptr, nullptr); - if (texture1 != nullptr) + vk::DescriptorImageInfo imageInfo0; + if (poly.texture != nullptr) { - vk::DescriptorImageInfo imageInfo1(samplerManager->GetSampler(tsp1), texture1->GetReadOnlyImageView(), vk::ImageLayout::eShaderReadOnlyOptimal); - - writeDescriptorSets.emplace_back(*perPolyDescSets.back(), 1, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo1, nullptr, nullptr); + imageInfo0 = vk::DescriptorImageInfo{ samplerManager->GetSampler(poly.tsp), ((Texture *)poly.texture)->GetReadOnlyImageView(), + vk::ImageLayout::eShaderReadOnlyOptimal }; + writeDescriptorSets.emplace_back(perPolyDescSet, 0, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo0, nullptr, nullptr); } - GetContext()->GetDevice().updateDescriptorSets(writeDescriptorSets, nullptr); - perPolyDescSetsInFlight[index] = std::move(perPolyDescSets.back()); - perPolyDescSets.pop_back(); + vk::DescriptorImageInfo imageInfo1; + if (poly.texture1 != nullptr) + { + imageInfo1 = vk::DescriptorImageInfo{ samplerManager->GetSampler(poly.tsp1), ((Texture *)poly.texture1)->GetReadOnlyImageView(), + vk::ImageLayout::eShaderReadOnlyOptimal }; + writeDescriptorSets.emplace_back(perPolyDescSet, 1, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo1, nullptr, nullptr); + } + + vk::DescriptorBufferInfo uniBufferInfo; + vk::DescriptorBufferInfo lightBufferInfo; + if (poly.isNaomi2()) + { + const vk::DeviceSize uniformAlignment = VulkanContext::Instance()->GetUniformBufferAlignment(); + size_t size = sizeof(N2VertexShaderUniforms) + align(sizeof(N2VertexShaderUniforms), uniformAlignment); + uniBufferInfo = vk::DescriptorBufferInfo{ buffer, uniformOffset + polyNumber * size, sizeof(N2VertexShaderUniforms) }; + writeDescriptorSets.emplace_back(perPolyDescSet, 2, 0, 1, vk::DescriptorType::eUniformBuffer, nullptr, &uniBufferInfo, nullptr); + + if (poly.lightModel != nullptr) + { + size = sizeof(VkN2LightConstants) + align(sizeof(VkN2LightConstants), uniformAlignment); + lightBufferInfo = vk::DescriptorBufferInfo{ buffer, lightOffset + (poly.lightModel - pvrrc.lightModels.head()) * size, sizeof(VkN2LightConstants) }; + writeDescriptorSets.emplace_back(perPolyDescSet, 3, 0, 1, vk::DescriptorType::eUniformBuffer, nullptr, &lightBufferInfo, nullptr); + } + // TODO no light + } + + getContext()->GetDevice().updateDescriptorSets(writeDescriptorSets, nullptr); + + cmdBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, 1, 1, &perPolyDescSet, 0, nullptr); } - void BindPerFrameDescriptorSets(vk::CommandBuffer cmdBuffer) + void bindPerPolyDescriptorSets(vk::CommandBuffer cmdBuffer, const ModifierVolumeParam& mvParam, int polyNumber, vk::Buffer buffer, vk::DeviceSize uniformOffset) { - cmdBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, 0, 1, &perFrameDescSetsInFlight.back().get(), 0, nullptr); + if (!mvParam.isNaomi2()) + return; + vk::DescriptorSet perPolyDescSet = perPolyAlloc.alloc(); + + const vk::DeviceSize uniformAlignment = VulkanContext::Instance()->GetUniformBufferAlignment(); + size_t size = sizeof(N2VertexShaderUniforms) + align(sizeof(N2VertexShaderUniforms), uniformAlignment); + vk::DescriptorBufferInfo uniBufferInfo{ buffer, uniformOffset + polyNumber * size, sizeof(N2VertexShaderUniforms) }; + vk::WriteDescriptorSet writeDescriptorSet(perPolyDescSet, 2, 0, 1, vk::DescriptorType::eUniformBuffer, nullptr, &uniBufferInfo, nullptr); + + getContext()->GetDevice().updateDescriptorSets(1, &writeDescriptorSet, 0, nullptr); + + cmdBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, 1, 1, &perPolyDescSet, 0, nullptr); } - void BindColorInputDescSet(vk::CommandBuffer cmdBuffer, int index) + void bindPerFrameDescriptorSets(vk::CommandBuffer cmdBuffer) { - cmdBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, 2, 1, &colorInputDescSets[index].get(), 0, nullptr); + cmdBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, 0, 1, &perFrameDescSet, 0, nullptr); } - void BindPerPolyDescriptorSets(vk::CommandBuffer cmdBuffer, Texture *texture0, TSP tsp0, Texture *texture1, TSP tsp1) + void bindColorInputDescSet(vk::CommandBuffer cmdBuffer, int index) { - auto index = std::make_tuple(texture0, tsp0.full & SamplerManager::TSP_Mask, texture1, tsp1.full & SamplerManager::TSP_Mask); - cmdBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, 1, 1, - &perPolyDescSetsInFlight[index].get(), 0, nullptr); + cmdBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, 2, 1, &colorInputDescSets[index], 0, nullptr); } - void Reset() + void reset() { - for (auto& pair : perPolyDescSetsInFlight) - perPolyDescSets.emplace_back(std::move(pair.second)); - perPolyDescSetsInFlight.clear(); - for (auto& descset : perFrameDescSetsInFlight) - perFrameDescSets.emplace_back(std::move(descset)); - perFrameDescSetsInFlight.clear(); + perFrameDescSet = vk::DescriptorSet{}; + colorInputDescSets[0] = vk::DescriptorSet{}; + colorInputDescSets[1] = vk::DescriptorSet{}; + perFrameAlloc.nextFrame(); + perPolyAlloc.nextFrame(); + colorInputAlloc.nextFrame(); + } + + void term() + { + perFrameAlloc.term(); + perPolyAlloc.term(); + colorInputAlloc.term(); } private: - VulkanContext *GetContext() const { return VulkanContext::Instance(); } + VulkanContext *getContext() const { return VulkanContext::Instance(); } - vk::DescriptorSetLayout perFrameLayout; - vk::DescriptorSetLayout perPolyLayout; - vk::DescriptorSetLayout colorInputLayout; vk::PipelineLayout pipelineLayout; - std::vector perFrameDescSets; - std::vector perFrameDescSetsInFlight; - std::array colorInputDescSets; - std::vector perPolyDescSets; - std::map, vk::UniqueDescriptorSet> perPolyDescSetsInFlight; + std::array colorInputDescSets; + DynamicDescSetAlloc perFrameAlloc; + DynamicDescSetAlloc perPolyAlloc; + DynamicDescSetAlloc colorInputAlloc; + vk::DescriptorSet perFrameDescSet = {}; SamplerManager* samplerManager; }; @@ -242,6 +264,7 @@ public: if (!perFrameLayout) { + vk::Device device = GetContext()->GetDevice(); // Descriptor set and pipeline layout vk::DescriptorSetLayoutBinding perFrameBindings[] = { { 0, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex }, // vertex uniforms @@ -251,27 +274,37 @@ public: { 4, vk::DescriptorType::eInputAttachment, 1, vk::ShaderStageFlagBits::eFragment }, // stencil input attachment { 5, vk::DescriptorType::eInputAttachment, 1, vk::ShaderStageFlagBits::eFragment }, // depth input attachment { 6, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// palette texture + // OIT buffers + { 7, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // pixel buffer + { 8, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // pixel counter + { 9, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // a-buffer pointers }; - perFrameLayout = GetContext()->GetDevice().createDescriptorSetLayoutUnique( + perFrameLayout = device.createDescriptorSetLayoutUnique( vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(perFrameBindings), perFrameBindings)); vk::DescriptorSetLayoutBinding colorInputBindings[] = { { 0, vk::DescriptorType::eInputAttachment, 1, vk::ShaderStageFlagBits::eFragment }, // color input attachment }; - colorInputLayout = GetContext()->GetDevice().createDescriptorSetLayoutUnique( + colorInputLayout = device.createDescriptorSetLayoutUnique( vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(colorInputBindings), colorInputBindings)); vk::DescriptorSetLayoutBinding perPolyBindings[] = { - { 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// texture 0 - { 1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// texture 1 (for 2-volume mode) + { 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment }, // texture 0 + { 1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment }, // texture 1 (for 2-volume mode) + { 2, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex }, // Naomi2 uniforms + { 3, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex }, // Naomi2 lights }; - perPolyLayout = GetContext()->GetDevice().createDescriptorSetLayoutUnique( + perPolyLayout = device.createDescriptorSetLayoutUnique( vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(perPolyBindings), perPolyBindings)); - vk::PushConstantRange pushConstant(vk::ShaderStageFlagBits::eFragment, 0, sizeof(OITDescriptorSets::PushConstants)); - vk::DescriptorSetLayout layouts[] = { *perFrameLayout, *perPolyLayout, *colorInputLayout, oitBuffers->GetDescriptorSetLayout() }; - pipelineLayout = GetContext()->GetDevice().createPipelineLayoutUnique( - vk::PipelineLayoutCreateInfo(vk::PipelineLayoutCreateFlags(), ARRAY_SIZE(layouts), layouts, 1, &pushConstant)); + vk::PushConstantRange pushConstants[] = { + vk::PushConstantRange(vk::ShaderStageFlagBits::eFragment, 0, sizeof(OITDescriptorSets::PushConstants)), + vk::PushConstantRange(vk::ShaderStageFlagBits::eVertex, sizeof(OITDescriptorSets::PushConstants), sizeof(OITDescriptorSets::VtxPushConstants)), + }; + + vk::DescriptorSetLayout layouts[] = { *perFrameLayout, *perPolyLayout, *colorInputLayout }; + pipelineLayout = device.createPipelineLayoutUnique( + vk::PipelineLayoutCreateInfo(vk::PipelineLayoutCreateFlags(), ARRAY_SIZE(layouts), layouts, ARRAY_SIZE(pushConstants), pushConstants)); } pipelines.clear(); @@ -290,23 +323,23 @@ public: return *pipelines[pipehash]; } - vk::Pipeline GetModifierVolumePipeline(ModVolMode mode, int cullMode) + vk::Pipeline GetModifierVolumePipeline(ModVolMode mode, int cullMode, bool naomi2) { - u32 pipehash = hash(mode, cullMode); + u32 pipehash = hash(mode, cullMode, naomi2); const auto &pipeline = modVolPipelines.find(pipehash); if (pipeline != modVolPipelines.end()) return pipeline->second.get(); - CreateModVolPipeline(mode, cullMode); + CreateModVolPipeline(mode, cullMode, naomi2); return *modVolPipelines[pipehash]; } - vk::Pipeline GetTrModifierVolumePipeline(ModVolMode mode, int cullMode) + vk::Pipeline GetTrModifierVolumePipeline(ModVolMode mode, int cullMode, bool naomi2) { - u32 pipehash = hash(mode, cullMode); + u32 pipehash = hash(mode, cullMode, naomi2); const auto &pipeline = trModVolPipelines.find(pipehash); if (pipeline != trModVolPipelines.end()) return pipeline->second.get(); - CreateTrModVolPipeline(mode, cullMode); + CreateTrModVolPipeline(mode, cullMode, naomi2); return *trModVolPipelines[pipehash]; } @@ -330,8 +363,8 @@ public: vk::RenderPass GetRenderPass(bool initial, bool last) { return renderPasses->GetRenderPass(initial, last); } private: - void CreateModVolPipeline(ModVolMode mode, int cullMode); - void CreateTrModVolPipeline(ModVolMode mode, int cullMode); + void CreateModVolPipeline(ModVolMode mode, int cullMode, bool naomi2); + void CreateTrModVolPipeline(ModVolMode mode, int cullMode, bool naomi2); u32 hash(u32 listType, bool autosort, const PolyParam *pp, Pass pass, bool gpuPalette) const { @@ -351,13 +384,13 @@ private: | (pp->tsp.SrcInstr << 14) | (pp->tsp.DstInstr << 17); } hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | ((autosort ? 6 : pp->isp.DepthMode) << 23); - hash |= ((u32)gpuPalette << 26) | ((u32)pass << 27); + hash |= ((u32)gpuPalette << 26) | ((u32)pass << 27) | ((u32)pp->isNaomi2() << 29); return hash; } - u32 hash(ModVolMode mode, int cullMode) const + u32 hash(ModVolMode mode, int cullMode, bool naomi2) const { - return ((int)mode << 2) | cullMode; + return ((int)mode << 2) | cullMode | ((u32)naomi2 << 5); } vk::PipelineVertexInputStateCreateInfo GetMainVertexInputStateCreateInfo(bool full = true) const @@ -376,6 +409,7 @@ private: vk::VertexInputAttributeDescription(4, 0, vk::Format::eR8G8B8A8Uint, offsetof(Vertex, col1)), // base1 color vk::VertexInputAttributeDescription(5, 0, vk::Format::eR8G8B8A8Uint, offsetof(Vertex, spc1)), // offset1 color vk::VertexInputAttributeDescription(6, 0, vk::Format::eR32G32Sfloat, offsetof(Vertex, u1)), // tex1 coord + vk::VertexInputAttributeDescription(7, 0, vk::Format::eR32G32B32Sfloat, offsetof(Vertex, nx)), // naomi2 normal }; static const vk::VertexInputAttributeDescription vertexInputLightAttributeDescriptions[] = { diff --git a/core/rend/vulkan/oit/oit_shaders.cpp b/core/rend/vulkan/oit/oit_shaders.cpp index baa72eddc..c0c26044e 100644 --- a/core/rend/vulkan/oit/oit_shaders.cpp +++ b/core/rend/vulkan/oit/oit_shaders.cpp @@ -25,9 +25,14 @@ static const char OITVertexShaderSource[] = R"( layout (std140, set = 0, binding = 0) uniform VertexShaderUniforms { - mat4 normal_matrix; + mat4 ndcMat; } uniformBuffer; +layout (push_constant) uniform constants +{ + layout(offset = 96) int polyNumber; +} pushConstants; + layout (location = 0) in vec4 in_pos; layout (location = 1) in uvec4 in_base; layout (location = 2) in uvec4 in_offs; @@ -42,10 +47,11 @@ layout (location = 2) noperspective out highp vec3 vtx_uv; layout (location = 3) INTERPOLATION out highp vec4 vtx_base1; // New for OIT, only for OP/PT with 2-volume layout (location = 4) INTERPOLATION out highp vec4 vtx_offs1; layout (location = 5) noperspective out highp vec2 vtx_uv1; +layout (location = 6) flat out uint vtx_index; void main() { - vec4 vpos = uniformBuffer.normal_matrix * in_pos; + vec4 vpos = uniformBuffer.ndcMat * in_pos; vtx_base = vec4(in_base) / 255.0; vtx_offs = vec4(in_offs) / 255.0; vtx_uv = vec3(in_uv * vpos.z, vpos.z); @@ -58,7 +64,7 @@ void main() vtx_base1 *= vpos.z; vtx_offs1 *= vpos.z; #endif - // FIXME need pushConstants.pp_Number and gl_VertexID... + vtx_index = (uint(pushConstants.polyNumber) << 18) + uint(gl_VertexIndex); vpos.w = 1.0; vpos.z = 0.0; gl_Position = vpos; @@ -81,18 +87,18 @@ layout (std140, set = 0, binding = 1) uniform FragmentShaderUniforms uint viewportWidth; } uniformBuffer; -layout(set = 3, binding = 2) buffer abufferPointer_ { +layout(set = 0, binding = 9) buffer abufferPointer_ { uint pointers[]; } abufferPointer; -layout(set = 3, binding = 1) buffer PixelCounter_ { +layout(set = 0, binding = 8) buffer PixelCounter_ { uint buffer_index; } PixelCounter; )" OIT_POLY_PARAM R"( -layout (set = 3, binding = 0, std430) coherent restrict buffer PixelBuffer_ { +layout (set = 0, binding = 7, std430) coherent restrict buffer PixelBuffer_ { Pixel pixels[]; } PixelBuffer; @@ -138,7 +144,6 @@ layout (push_constant) uniform pushBlock vec4 clipTest; ivec4 blend_mode0; float trilinearAlpha; - int pp_Number; float palette_index; // two volume mode @@ -177,6 +182,7 @@ layout (location = 2) noperspective in highp vec3 vtx_uv; layout (location = 3) INTERPOLATION in highp vec4 vtx_base1; // new for OIT. Only if 2 vol layout (location = 4) INTERPOLATION in highp vec4 vtx_offs1; layout (location = 5) noperspective in highp vec2 vtx_uv1; +layout (location = 6) flat in uint vtx_index; #if pp_FogCtrl != 2 || pp_TwoVolumes == 1 layout (set = 0, binding = 2) uniform sampler2D fog_table; @@ -422,7 +428,7 @@ void main() Pixel pixel; pixel.color = packColors(clamp(color, vec4(0.0), vec4(1.0))); pixel.depth = vtx_uv.z; - pixel.seq_num = uint(pushConstants.pp_Number); + pixel.seq_num = vtx_index; pixel.next = atomicExchange(abufferPointer.pointers[coords.x + coords.y * uniformBuffer.viewportWidth], idx); PixelBuffer.pixels[idx] = pixel; @@ -460,20 +466,25 @@ int fillAndSortFragmentArray(ivec2 coords) idx = PixelBuffer.pixels[idx].next; for (; idx != EOL && count < MAX_PIXELS_PER_FRAGMENT; count++) { - const Pixel p = PixelBuffer.pixels[idx]; + float depth = PixelBuffer.pixels[idx].depth; + uint index = getPolyIndex(PixelBuffer.pixels[idx]); int j = count - 1; - Pixel jp = PixelBuffer.pixels[pixel_list[j]]; + float jdepth = PixelBuffer.pixels[pixel_list[j]].depth; + uint jindex = getPolyIndex(PixelBuffer.pixels[pixel_list[j]]); while (j >= 0 - && (jp.depth > p.depth - || (jp.depth == p.depth && getPolyIndex(jp) > getPolyIndex(p)))) + && (jdepth > depth + || (jdepth == depth && jindex > index))) { pixel_list[j + 1] = pixel_list[j]; j--; if (j >= 0) - jp = PixelBuffer.pixels[pixel_list[j]]; + { + jdepth = PixelBuffer.pixels[pixel_list[j]].depth; + jindex = getPolyIndex(PixelBuffer.pixels[pixel_list[j]]); + } } pixel_list[j + 1] = idx; - idx = p.next; + idx = PixelBuffer.pixels[idx].next; } return count; } @@ -490,7 +501,7 @@ vec4 resolveAlphaBlend(ivec2 coords) { for (int i = 0; i < num_frag; i++) { const Pixel pixel = PixelBuffer.pixels[pixel_list[i]]; - const PolyParam pp = TrPolyParam.tr_poly_params[getPolyIndex(pixel)]; + const PolyParam pp = TrPolyParam.tr_poly_params[getPolyNumber(pixel)]; bool area1 = false; bool shadowed = false; if (isShadowed(pixel)) @@ -587,6 +598,9 @@ void main(void) // Visualize the number of layers in use //FragColor = vec4(float(fillAndSortFragmentArray(coords)) / MAX_PIXELS_PER_FRAGMENT * 4, 0, 0, 1); FragColor = resolveAlphaBlend(coords); + + // Reset pointers + abufferPointer.pointers[coords.x + coords.y * uniformBuffer.viewportWidth] = EOL; } )"; @@ -618,7 +632,7 @@ void main() while (idx != EOL && list_len < MAX_PIXELS_PER_FRAGMENT) { const Pixel pixel = PixelBuffer.pixels[idx]; - const PolyParam pp = TrPolyParam.tr_poly_params[getPolyIndex(pixel)]; + const PolyParam pp = TrPolyParam.tr_poly_params[getPolyNumber(pixel)]; if (getShadowEnable(pp)) { #if MV_MODE == MV_XOR @@ -646,20 +660,135 @@ void main() static const char OITFinalVertexShaderSource[] = R"( layout (location = 0) in vec3 in_pos; +layout (push_constant) uniform pushBlock +{ + int polyNumber_not_used; +} pushConstants; + void main() { gl_Position = vec4(in_pos, 1.0); } )"; +static const char OITN2VertexShaderSource[] = R"( +layout (std140, set = 0, binding = 0) uniform VertexShaderUniforms +{ + mat4 ndcMat; +} uniformBuffer; + +layout (push_constant) uniform constants +{ + layout(offset = 96) int polyNumber_not_used; +} pushConstants; + +layout (location = 0) in vec4 in_pos; +layout (location = 1) in uvec4 in_base; +layout (location = 2) in uvec4 in_offs; +layout (location = 3) in mediump vec2 in_uv; +layout (location = 4) in uvec4 in_base1; +layout (location = 5) in uvec4 in_offs1; +layout (location = 6) in mediump vec2 in_uv1; +layout (location = 7) in vec3 in_normal; + +layout (location = 0) INTERPOLATION out highp vec4 vtx_base; +layout (location = 1) INTERPOLATION out highp vec4 vtx_offs; +layout (location = 2) noperspective out highp vec3 vtx_uv; +layout (location = 3) INTERPOLATION out highp vec4 vtx_base1; +layout (location = 4) INTERPOLATION out highp vec4 vtx_offs1; +layout (location = 5) noperspective out highp vec2 vtx_uv1; +layout (location = 6) flat out uint vtx_index; + +void wDivide(inout vec4 vpos) +{ + vpos = vec4(vpos.xy / vpos.w, 1.0 / vpos.w, 1.0); + vpos = uniformBuffer.ndcMat * vpos; +#if pp_Gouraud == 1 + vtx_base *= vpos.z; + vtx_offs *= vpos.z; +#if pp_TwoVolumes == 1 + vtx_base1 *= vpos.z; + vtx_offs1 *= vpos.z; +#endif +#endif + vtx_uv = vec3(vtx_uv.xy * vpos.z, vpos.z); +#if pp_TwoVolumes == 1 + vtx_uv1 *= vpos.z; +#endif + vpos.w = 1.0; + vpos.z = 0.0; +} + +void main() +{ + vec4 vpos = n2Uniform.mvMat * in_pos; + vtx_base = vec4(in_base) / 255.0; + vtx_offs = vec4(in_offs) / 255.0; + + #if LIGHT_ON == 1 + vec3 vnorm = normalize(mat3(n2Uniform.normalMat) * in_normal); + #endif + + #if pp_TwoVolumes == 1 + vtx_base1 = vec4(in_base1) / 255.0; + vtx_offs1 = vec4(in_offs1) / 255.0; + vtx_uv1 = in_uv1; + #if LIGHT_ON == 1 + // FIXME need offset0 and offset1 for bump maps + if (n2Uniform.bumpMapping == 1) + computeBumpMap(vtx_offs, vtx_offs1, vpos.xyz, in_normal, n2Uniform.normalMat); + else + { + computeColors(vtx_base1, vtx_offs1, 1, vpos.xyz, vnorm); + #if pp_Texture == 0 + vtx_base1 += vtx_offs1; + #endif + } + if (n2Uniform.envMapping[1] == 1) + computeEnvMap(vtx_uv1.xy, vpos.xyz, vnorm); + #endif + #endif + #if LIGHT_ON == 1 + if (n2Uniform.bumpMapping == 0) + { + computeColors(vtx_base, vtx_offs, 0, vpos.xyz, vnorm); + #if pp_Texture == 0 + vtx_base += vtx_offs; + #endif + } + #endif + + vtx_uv.xy = in_uv; + #if LIGHT_ON == 1 + if (n2Uniform.envMapping[0] == 1) + computeEnvMap(vtx_uv.xy, vpos.xyz, vnorm); + #endif + + vpos = n2Uniform.projMat * vpos; + wDivide(vpos); + vtx_index = (uint(n2Uniform.polyNumber) << 18) + uint(gl_VertexIndex); + + gl_Position = vpos; +} +)"; + extern const char ModVolVertexShaderSource[]; +extern const char N2ModVolVertexShaderSource[]; +extern const char N2LightShaderSource[]; vk::UniqueShaderModule OITShaderManager::compileShader(const VertexShaderParams& params) { VulkanSource src; src.addConstant("pp_Gouraud", (int)params.gouraud) - .addSource(GouraudSource) - .addSource(OITVertexShaderSource); + .addSource(GouraudSource); + if (params.naomi2) + src.addConstant("pp_TwoVolumes", (int)params.twoVolume) + .addConstant("LIGHT_ON", (int)params.lightOn) + .addConstant("pp_Texture", (int)params.texture) + .addSource(N2LightShaderSource) + .addSource(OITN2VertexShaderSource); + else + src.addSource(OITVertexShaderSource); return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eVertex, src.generate()); } @@ -699,6 +828,7 @@ vk::UniqueShaderModule OITShaderManager::compileFinalVertexShader() { return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eVertex, VulkanSource().addSource(OITFinalVertexShaderSource).generate()); } + vk::UniqueShaderModule OITShaderManager::compileClearShader() { VulkanSource src; @@ -706,9 +836,15 @@ vk::UniqueShaderModule OITShaderManager::compileClearShader() .addSource(OITClearShaderSource); return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, src.generate()); } -vk::UniqueShaderModule OITShaderManager::compileModVolVertexShader() + +vk::UniqueShaderModule OITShaderManager::compileModVolVertexShader(bool naomi2) { - return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eVertex, VulkanSource().addSource(ModVolVertexShaderSource).generate()); + VulkanSource src; + if (naomi2) + src.addSource(N2ModVolVertexShaderSource); + else + src.addSource(ModVolVertexShaderSource); + return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eVertex, src.generate()); } vk::UniqueShaderModule OITShaderManager::compileModVolFragmentShader() { diff --git a/core/rend/vulkan/oit/oit_shaders.h b/core/rend/vulkan/oit/oit_shaders.h index b26716cda..2a9a84616 100644 --- a/core/rend/vulkan/oit/oit_shaders.h +++ b/core/rend/vulkan/oit/oit_shaders.h @@ -30,8 +30,13 @@ public: struct VertexShaderParams { bool gouraud; + bool naomi2; + bool lightOn; + bool twoVolume; + bool texture; - u32 hash() { return (u32)gouraud; } + u32 hash() { return (u32)gouraud | ((u32)naomi2 << 1) | ((u32)lightOn << 2) + | ((u32)twoVolume << 3) | ((u32)texture << 4); } }; // alpha test, clip test, use alpha, texture, ignore alpha, shader instr, offset, fog, gouraud, bump, clamp @@ -64,11 +69,12 @@ public: vk::ShaderModule GetVertexShader(const VertexShaderParams& params) { return getShader(vertexShaders, params); } vk::ShaderModule GetFragmentShader(const FragmentShaderParams& params) { return getShader(fragmentShaders, params); } - vk::ShaderModule GetModVolVertexShader() + vk::ShaderModule GetModVolVertexShader(bool naomi2) { - if (!modVolVertexShader) - modVolVertexShader = compileModVolVertexShader(); - return *modVolVertexShader; + vk::UniqueShaderModule& shader = naomi2 ? n2ModVolVertexShader : modVolVertexShader; + if (!shader) + shader = compileModVolVertexShader(naomi2); + return *shader; } vk::ShaderModule GetModVolShader() { @@ -85,9 +91,9 @@ public: vk::ShaderModule GetFinalShader() { - if (!finalAutosortShader) - finalAutosortShader = compileFinalShader(); - return *finalAutosortShader; + if (!finalFragmentShader) + finalFragmentShader = compileFinalShader(); + return *finalFragmentShader; } vk::ShaderModule GetFinalVertexShader() { @@ -114,7 +120,7 @@ private: } vk::UniqueShaderModule compileShader(const VertexShaderParams& params); vk::UniqueShaderModule compileShader(const FragmentShaderParams& params); - vk::UniqueShaderModule compileModVolVertexShader(); + vk::UniqueShaderModule compileModVolVertexShader(bool naomi2); vk::UniqueShaderModule compileModVolFragmentShader(); void compileTrModVolFragmentShader(ModVolMode mode); vk::UniqueShaderModule compileFinalShader(); @@ -124,12 +130,12 @@ private: std::map vertexShaders; std::map fragmentShaders; vk::UniqueShaderModule modVolVertexShader; + vk::UniqueShaderModule n2ModVolVertexShader; vk::UniqueShaderModule modVolShader; std::vector trModVolShaders; vk::UniqueShaderModule finalVertexShader; - vk::UniqueShaderModule finalAutosortShader; - vk::UniqueShaderModule finalSortedShader; + vk::UniqueShaderModule finalFragmentShader; vk::UniqueShaderModule clearShader; }; diff --git a/core/rend/vulkan/pipeline.cpp b/core/rend/vulkan/pipeline.cpp index 72956cf58..cf8adf16b 100644 --- a/core/rend/vulkan/pipeline.cpp +++ b/core/rend/vulkan/pipeline.cpp @@ -23,7 +23,7 @@ #include "rend/osd.h" #include "quad.h" -void PipelineManager::CreateModVolPipeline(ModVolMode mode, int cullMode) +void PipelineManager::CreateModVolPipeline(ModVolMode mode, int cullMode, bool naomi2) { // Vertex input state vk::PipelineVertexInputStateCreateInfo pipelineVertexInputStateCreateInfo; @@ -138,7 +138,7 @@ void PipelineManager::CreateModVolPipeline(ModVolMode mode, int cullMode) vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor }; vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates); - vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(); + vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(naomi2); vk::ShaderModule fragment_module = shaderManager->GetModVolShader(); vk::PipelineShaderStageCreateInfo stages[] = { @@ -163,12 +163,12 @@ void PipelineManager::CreateModVolPipeline(ModVolMode mode, int cullMode) renderPass // renderPass ); - modVolPipelines[hash(mode, cullMode)] = + modVolPipelines[hash(mode, cullMode, naomi2)] = GetContext()->GetDevice().createGraphicsPipelineUnique(GetContext()->GetPipelineCache(), graphicsPipelineCreateInfo); } -void PipelineManager::CreateDepthPassPipeline(int cullMode) +void PipelineManager::CreateDepthPassPipeline(int cullMode, bool naomi2) { // Vertex input state vk::PipelineVertexInputStateCreateInfo pipelineVertexInputStateCreateInfo = GetMainVertexInputStateCreateInfo(false); @@ -241,7 +241,7 @@ void PipelineManager::CreateDepthPassPipeline(int cullMode) vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor }; vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates); - vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(); + vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(naomi2); vk::ShaderModule fragment_module = shaderManager->GetModVolShader(); vk::PipelineShaderStageCreateInfo stages[] = { @@ -266,7 +266,7 @@ void PipelineManager::CreateDepthPassPipeline(int cullMode) renderPass // renderPass ); - depthPassPipelines[cullMode] = + depthPassPipelines[hash(cullMode, naomi2)] = GetContext()->GetDevice().createGraphicsPipelineUnique(GetContext()->GetPipelineCache(), graphicsPipelineCreateInfo); } @@ -392,7 +392,7 @@ void PipelineManager::CreatePipeline(u32 listType, bool sortTriangles, const Pol vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor }; vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates); - vk::ShaderModule vertex_module = shaderManager->GetVertexShader(VertexShaderParams{ pp.pcw.Gouraud == 1 }); + vk::ShaderModule vertex_module = shaderManager->GetVertexShader(VertexShaderParams{ pp.pcw.Gouraud == 1, pp.isNaomi2() }); FragmentShaderParams params = {}; params.alphaTest = listType == ListType_Punch_Through; params.bumpmap = pp.tcw.PixelFmt == PixelBumpMap; diff --git a/core/rend/vulkan/pipeline.h b/core/rend/vulkan/pipeline.h index 9cb6dabc5..f0ba26de7 100644 --- a/core/rend/vulkan/pipeline.h +++ b/core/rend/vulkan/pipeline.h @@ -24,35 +24,24 @@ #include "texture.h" #include "utils.h" #include "vulkan_context.h" +#include "desc_set.h" #include class DescriptorSets { public: - DescriptorSets() = default; - DescriptorSets(DescriptorSets &&) = default; - DescriptorSets(const DescriptorSets &) = delete; - DescriptorSets& operator=(DescriptorSets &&) = default; - DescriptorSets& operator=(const DescriptorSets &) = delete; - - void Init(SamplerManager* samplerManager, vk::PipelineLayout pipelineLayout, vk::DescriptorSetLayout perFrameLayout, vk::DescriptorSetLayout perPolyLayout) + void init(SamplerManager* samplerManager, vk::PipelineLayout pipelineLayout, vk::DescriptorSetLayout perFrameLayout, vk::DescriptorSetLayout perPolyLayout) { this->samplerManager = samplerManager; this->pipelineLayout = pipelineLayout; - this->perFrameLayout = perFrameLayout; - this->perPolyLayout = perPolyLayout; + perFrameAlloc.setLayout(perFrameLayout); + perPolyAlloc.setLayout(perPolyLayout); } - void UpdateUniforms(vk::Buffer buffer, u32 vertexUniformOffset, u32 fragmentUniformOffset, vk::ImageView fogImageView, vk::ImageView paletteImageView) + void updateUniforms(vk::Buffer buffer, u32 vertexUniformOffset, u32 fragmentUniformOffset, vk::ImageView fogImageView, vk::ImageView paletteImageView) { - if (perFrameDescSets.empty()) - { - perFrameDescSets = GetContext()->GetDevice().allocateDescriptorSetsUnique( - vk::DescriptorSetAllocateInfo(GetContext()->GetDescriptorPool(), 1, &perFrameLayout)); - } - perFrameDescSetsInFlight.emplace_back(std::move(perFrameDescSets.back())); - perFrameDescSets.pop_back(); - vk::DescriptorSet perFrameDescSet = *perFrameDescSetsInFlight.back(); + if (!perFrameDescSet) + perFrameDescSet = perFrameAlloc.alloc(); std::vector bufferInfos; bufferInfos.emplace_back(buffer, vertexUniformOffset, sizeof(VertexShaderUniforms)); @@ -83,64 +72,87 @@ public: imageInfo = { palSampler, paletteImageView, vk::ImageLayout::eShaderReadOnlyOptimal }; writeDescriptorSets.emplace_back(perFrameDescSet, 3, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr); } - GetContext()->GetDevice().updateDescriptorSets(writeDescriptorSets, nullptr); + getContext()->GetDevice().updateDescriptorSets(writeDescriptorSets, nullptr); } - void SetTexture(Texture *texture, TSP tsp) + void bindPerPolyDescriptorSets(vk::CommandBuffer cmdBuffer, const PolyParam& poly, int polyNumber, vk::Buffer buffer, + vk::DeviceSize uniformOffset, vk::DeviceSize lightOffset) { - auto& inFlight = perPolyDescSetsInFlight; - std::pair index = std::make_pair(texture, tsp.full & SamplerManager::TSP_Mask); - if (inFlight.find(index) != inFlight.end()) - return; - - if (perPolyDescSets.empty()) - { - std::vector layouts(10, perPolyLayout); - perPolyDescSets = GetContext()->GetDevice().allocateDescriptorSetsUnique( - vk::DescriptorSetAllocateInfo(GetContext()->GetDescriptorPool(), (u32)layouts.size(), &layouts[0])); - } - vk::DescriptorImageInfo imageInfo(samplerManager->GetSampler(tsp), texture->GetReadOnlyImageView(), vk::ImageLayout::eShaderReadOnlyOptimal); - + vk::DescriptorSet perPolyDescSet = perPolyAlloc.alloc(); std::vector writeDescriptorSets; - writeDescriptorSets.emplace_back(*perPolyDescSets.back(), 0, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr); - GetContext()->GetDevice().updateDescriptorSets(writeDescriptorSets, nullptr); - inFlight[index] = std::move(perPolyDescSets.back()); - perPolyDescSets.pop_back(); + vk::DescriptorImageInfo imageInfo; + if (poly.texture != nullptr) + { + imageInfo = vk::DescriptorImageInfo(samplerManager->GetSampler(poly.tsp), + ((Texture *)poly.texture)->GetReadOnlyImageView(), vk::ImageLayout::eShaderReadOnlyOptimal); + writeDescriptorSets.emplace_back(perPolyDescSet, 0, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr); + } + + vk::DescriptorBufferInfo uniBufferInfo; + vk::DescriptorBufferInfo lightBufferInfo; + if (poly.isNaomi2()) + { + const vk::DeviceSize uniformAlignment = VulkanContext::Instance()->GetUniformBufferAlignment(); + size_t size = sizeof(N2VertexShaderUniforms) + align(sizeof(N2VertexShaderUniforms), uniformAlignment); + uniBufferInfo = vk::DescriptorBufferInfo{ buffer, uniformOffset + polyNumber * size, sizeof(N2VertexShaderUniforms) }; + writeDescriptorSets.emplace_back(perPolyDescSet, 2, 0, 1, vk::DescriptorType::eUniformBuffer, nullptr, &uniBufferInfo, nullptr); + + if (poly.lightModel != nullptr) + { + size = sizeof(VkN2LightConstants) + align(sizeof(VkN2LightConstants), uniformAlignment); + lightBufferInfo = vk::DescriptorBufferInfo{ buffer, lightOffset + (poly.lightModel - pvrrc.lightModels.head()) * size, sizeof(VkN2LightConstants) }; + writeDescriptorSets.emplace_back(perPolyDescSet, 3, 0, 1, vk::DescriptorType::eUniformBuffer, nullptr, &lightBufferInfo, nullptr); + } + // TODO no light + } + + getContext()->GetDevice().updateDescriptorSets(writeDescriptorSets, nullptr); + cmdBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, 1, 1, &perPolyDescSet, 0, nullptr); } - void BindPerFrameDescriptorSets(vk::CommandBuffer cmdBuffer) + void bindPerPolyDescriptorSets(vk::CommandBuffer cmdBuffer, const ModifierVolumeParam& mvParam, int polyNumber, vk::Buffer buffer, + vk::DeviceSize uniformOffset) { - cmdBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, 0, 1, &perFrameDescSetsInFlight.back().get(), 0, nullptr); + if (!mvParam.isNaomi2()) + return; + vk::DescriptorSet perPolyDescSet = perPolyAlloc.alloc(); + + const vk::DeviceSize uniformAlignment = VulkanContext::Instance()->GetUniformBufferAlignment(); + size_t size = sizeof(N2VertexShaderUniforms) + align(sizeof(N2VertexShaderUniforms), uniformAlignment); + vk::DescriptorBufferInfo uniBufferInfo{ buffer, uniformOffset + polyNumber * size, sizeof(N2VertexShaderUniforms) }; + vk::WriteDescriptorSet writeDescriptorSet(perPolyDescSet, 2, 0, 1, vk::DescriptorType::eUniformBuffer, nullptr, &uniBufferInfo, nullptr); + + getContext()->GetDevice().updateDescriptorSets(1, &writeDescriptorSet, 0, nullptr); + + cmdBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, 1, 1, &perPolyDescSet, 0, nullptr); } - void BindPerPolyDescriptorSets(vk::CommandBuffer cmdBuffer, Texture *texture, TSP tsp) + void bindPerFrameDescriptorSets(vk::CommandBuffer cmdBuffer) { - cmdBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, 1, 1, - &perPolyDescSetsInFlight[std::make_pair(texture, tsp.full & SamplerManager::TSP_Mask)].get(), 0, nullptr); + cmdBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, 0, 1, &perFrameDescSet, 0, nullptr); } - void Reset() + void reset() { - for (auto& pair : perPolyDescSetsInFlight) - perPolyDescSets.emplace_back(std::move(pair.second)); - perPolyDescSetsInFlight.clear(); - for (auto& descset : perFrameDescSetsInFlight) - perFrameDescSets.emplace_back(std::move(descset)); - perFrameDescSetsInFlight.clear(); + perFrameAlloc.nextFrame(); + perPolyAlloc.nextFrame(); + perFrameDescSet = vk::DescriptorSet{}; + } + + void term() + { + perFrameAlloc.term(); + perPolyAlloc.term(); } private: - VulkanContext *GetContext() const { return VulkanContext::Instance(); } + VulkanContext *getContext() const { return VulkanContext::Instance(); } - vk::DescriptorSetLayout perFrameLayout; - vk::DescriptorSetLayout perPolyLayout; vk::PipelineLayout pipelineLayout; - - std::vector perFrameDescSets; - std::vector perFrameDescSetsInFlight; - std::vector perPolyDescSets; - std::map, vk::UniqueDescriptorSet> perPolyDescSetsInFlight; + DynamicDescSetAlloc perFrameAlloc; + DynamicDescSetAlloc perPolyAlloc; + vk::DescriptorSet perFrameDescSet = {}; SamplerManager* samplerManager = nullptr; }; @@ -165,6 +177,8 @@ public: }; vk::DescriptorSetLayoutBinding perPolyBindings[] = { { 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// texture + { 2, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex }, // Naomi2 uniforms + { 3, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex }, // Naomi2 lights }; perFrameLayout = GetContext()->GetDevice().createDescriptorSetLayoutUnique( vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(perFrameBindings), perFrameBindings)); @@ -195,25 +209,26 @@ public: return *pipelines[pipehash]; } - vk::Pipeline GetModifierVolumePipeline(ModVolMode mode, int cullMode) + vk::Pipeline GetModifierVolumePipeline(ModVolMode mode, int cullMode, bool naomi2) { - u32 pipehash = hash(mode, cullMode); + u32 pipehash = hash(mode, cullMode, naomi2); const auto &pipeline = modVolPipelines.find(pipehash); if (pipeline != modVolPipelines.end()) return pipeline->second.get(); - CreateModVolPipeline(mode, cullMode); + CreateModVolPipeline(mode, cullMode, naomi2); return *modVolPipelines[pipehash]; } - vk::Pipeline GetDepthPassPipeline(int cullMode) + vk::Pipeline GetDepthPassPipeline(int cullMode, bool naomi2) { - cullMode = std::max(std::min(cullMode, (int)depthPassPipelines.size() - 1), 0); - const auto &pipeline = depthPassPipelines[cullMode]; - if (!pipeline) - CreateDepthPassPipeline(cullMode); + u32 pipehash = hash(cullMode, naomi2); + const auto &pipeline = depthPassPipelines.find(pipehash); + if (pipeline != depthPassPipelines.end()) + return pipeline->second.get(); + CreateDepthPassPipeline(cullMode, naomi2); - return *pipeline; + return *depthPassPipelines[pipehash]; } void Reset() @@ -228,8 +243,8 @@ public: vk::RenderPass GetRenderPass() const { return renderPass; } private: - void CreateModVolPipeline(ModVolMode mode, int cullMode); - void CreateDepthPassPipeline(int cullMode); + void CreateModVolPipeline(ModVolMode mode, int cullMode, bool naomi2); + void CreateDepthPassPipeline(int cullMode, bool naomi2); u32 hash(u32 listType, bool sortTriangles, const PolyParam *pp, bool gpuPalette) const { @@ -241,13 +256,17 @@ private: | (pp->tsp.ColorClamp << 11) | ((config::Fog ? pp->tsp.FogCtrl : 2) << 12) | (pp->tsp.SrcInstr << 14) | (pp->tsp.DstInstr << 17); hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | (pp->isp.DepthMode << 23); - hash |= ((u32)sortTriangles << 26) | ((u32)gpuPalette << 27); + hash |= ((u32)sortTriangles << 26) | ((u32)gpuPalette << 27) | ((u32)pp->isNaomi2() << 28); return hash; } - u32 hash(ModVolMode mode, int cullMode) const + u32 hash(ModVolMode mode, int cullMode, bool naomi2) const { - return ((int)mode << 2) | cullMode; + return ((int)mode << 2) | cullMode | ((int)naomi2 << 5); + } + u32 hash(int cullMode, bool naomi2) const + { + return cullMode | ((int)naomi2 << 2); } vk::PipelineVertexInputStateCreateInfo GetMainVertexInputStateCreateInfo(bool full = true) const @@ -263,6 +282,7 @@ private: vk::VertexInputAttributeDescription(1, 0, vk::Format::eR8G8B8A8Uint, offsetof(Vertex, col)), // base color vk::VertexInputAttributeDescription(2, 0, vk::Format::eR8G8B8A8Uint, offsetof(Vertex, spc)), // offset color vk::VertexInputAttributeDescription(3, 0, vk::Format::eR32G32Sfloat, offsetof(Vertex, u)), // tex coord + vk::VertexInputAttributeDescription(4, 0, vk::Format::eR32G32B32Sfloat, offsetof(Vertex, nx)), // naomi2 normal }; static const vk::VertexInputAttributeDescription vertexInputLightAttributeDescriptions[] = { @@ -280,7 +300,7 @@ private: std::map pipelines; std::map modVolPipelines; - std::array depthPassPipelines; + std::map depthPassPipelines; vk::UniquePipelineLayout pipelineLayout; vk::UniqueDescriptorSetLayout perFrameLayout; diff --git a/core/rend/vulkan/shaders.cpp b/core/rend/vulkan/shaders.cpp index 22b54994f..db6d3c4fe 100644 --- a/core/rend/vulkan/shaders.cpp +++ b/core/rend/vulkan/shaders.cpp @@ -26,7 +26,7 @@ static const char VertexShaderSource[] = R"( layout (std140, set = 0, binding = 0) uniform VertexShaderUniforms { - mat4 normal_matrix; + mat4 ndcMat; } uniformBuffer; layout (location = 0) in vec4 in_pos; @@ -40,7 +40,7 @@ layout (location = 2) noperspective out highp vec3 vtx_uv; void main() { - vec4 vpos = uniformBuffer.normal_matrix * in_pos; + vec4 vpos = uniformBuffer.ndcMat * in_pos; vtx_base = vec4(in_base) / 255.0; vtx_offs = vec4(in_offs) / 255.0; vtx_uv = vec3(in_uv * vpos.z, vpos.z); @@ -226,7 +226,7 @@ void main() extern const char ModVolVertexShaderSource[] = R"( layout (std140, set = 0, binding = 0) uniform VertexShaderUniforms { - mat4 normal_matrix; + mat4 ndcMat; } uniformBuffer; layout (location = 0) in vec4 in_pos; @@ -234,7 +234,7 @@ layout (location = 0) noperspective out highp float depth; void main() { - vec4 vpos = uniformBuffer.normal_matrix * in_pos; + vec4 vpos = uniformBuffer.ndcMat * in_pos; depth = vpos.z; vpos.w = 1.0; vpos.z = 0.0; @@ -325,12 +325,363 @@ void main() } )"; +extern const char N2LightShaderSource[] = R"( + +layout (std140, set = 1, binding = 2) uniform N2VertexShaderUniforms +{ + mat4 mvMat; + mat4 normalMat; + mat4 projMat; + ivec2 envMapping; + int bumpMapping; + int polyNumber; + + vec2 glossCoef; + ivec2 constantColor; + ivec2 modelDiffuse; + ivec2 modelSpecular; +} n2Uniform; + +#define PI 3.1415926 + +#define LMODE_SINGLE_SIDED 0 +#define LMODE_DOUBLE_SIDED 1 +#define LMODE_DOUBLE_SIDED_WITH_TOLERANCE 2 +#define LMODE_SPECIAL_EFFECT 3 +#define LMODE_THIN_SURFACE 4 +#define LMODE_BUMP_MAP 5 + +#define ROUTING_BASEDIFF_BASESPEC_ADD 0 +#define ROUTING_BASEDIFF_OFFSSPEC_ADD 1 +#define ROUTING_OFFSDIFF_BASESPEC_ADD 2 +#define ROUTING_OFFSDIFF_OFFSSPEC_ADD 3 +#define ROUTING_ALPHADIFF_ADD 4 +#define ROUTING_ALPHAATTEN_ADD 5 +#define ROUTING_FOGDIFF_ADD 6 +#define ROUTING_FOGATTENUATION_ADD 7 +#define ROUTING_BASEDIFF_BASESPEC_SUB 8 +#define ROUTING_BASEDIFF_OFFSSPEC_SUB 9 +#define ROUTING_OFFSDIFF_BASESPEC_SUB 10 +#define ROUTING_OFFSDIFF_OFFSSPEC_SUB 11 +#define ROUTING_ALPHADIFF_SUB 12 +#define ROUTING_ALPHAATTEN_SUB 13 + +struct N2Light +{ + vec4 color; + vec4 direction; // For parallel/spot + vec4 position; // For spot/point + + int parallel; + int routing; + int dmode; + int smode; + + ivec2 diffuse; + ivec2 specular; + + float attnDistA; + float attnDistB; + float attnAngleA; // For spot + float attnAngleB; + + int distAttnMode; // For spot/point + int _pad1; + int _pad2; + int _pad3; +}; + +layout (std140, set = 1, binding = 3) uniform N2Lights +{ + N2Light lights[16]; + vec4 ambientBase[2]; + vec4 ambientOffset[2]; + ivec2 ambientMaterialBase; + ivec2 ambientMaterialOffset; + int lightCount; + int useBaseOver; + int bumpId0; + int bumpId1; +} n2Lights; + +void computeColors(inout vec4 baseCol, inout vec4 offsetCol, in int volIdx, in vec3 position, in vec3 normal) +{ + if (n2Uniform.constantColor[volIdx] == 1) + return; + vec3 diffuse = vec3(0.0); + vec3 specular = vec3(0.0); + float diffuseAlpha = 0.0; + float specularAlpha = 0.0; + vec3 reflectDir = reflect(normalize(position), normal); + const float BASE_FACTOR = 1.45; + + for (int i = 0; i < n2Lights.lightCount; i++) + { + vec3 lightDir; // direction to the light + vec3 lightColor = n2Lights.lights[i].color.rgb; + if (n2Lights.lights[i].parallel == 1) + { + lightDir = normalize(n2Lights.lights[i].direction.xyz); + } + else + { + lightDir = normalize(n2Lights.lights[i].position.xyz - position); + if (n2Lights.lights[i].attnDistA != 1.0 || n2Lights.lights[i].attnDistB != 0.0) + { + float distance = length(n2Lights.lights[i].position.xyz - position); + if (n2Lights.lights[i].distAttnMode == 0) + distance = 1.0 / distance; + lightColor *= clamp(n2Lights.lights[i].attnDistB * distance + n2Lights.lights[i].attnDistA, 0.0, 1.0); + } + if (n2Lights.lights[i].attnAngleA != 1.0 || n2Lights.lights[i].attnAngleB != 0.0) + { + vec3 spotDir = n2Lights.lights[i].direction.xyz; + float cosAngle = 1.0 - max(0.0, dot(lightDir, spotDir)); + lightColor *= clamp(cosAngle * n2Lights.lights[i].attnAngleB + n2Lights.lights[i].attnAngleA, 0.0, 1.0); + } + } + if (n2Lights.lights[i].diffuse[volIdx] == 1) + { + float factor = BASE_FACTOR; + if (n2Lights.lights[i].dmode == LMODE_SINGLE_SIDED) + factor *= max(dot(normal, lightDir), 0.0); + else if (n2Lights.lights[i].dmode == LMODE_DOUBLE_SIDED) + factor *= abs(dot(normal, lightDir)); + + if (n2Lights.lights[i].routing == ROUTING_ALPHADIFF_SUB) + diffuseAlpha -= lightColor.r * factor; + else if (n2Lights.lights[i].routing == ROUTING_BASEDIFF_BASESPEC_ADD || n2Lights.lights[i].routing == ROUTING_BASEDIFF_OFFSSPEC_ADD) + diffuse += lightColor * factor; + if (n2Lights.lights[i].routing == ROUTING_OFFSDIFF_BASESPEC_ADD || n2Lights.lights[i].routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD) + specular += lightColor * factor; + } + if (n2Lights.lights[i].specular[volIdx] == 1) + { + float factor = BASE_FACTOR; + if (n2Lights.lights[i].smode == LMODE_SINGLE_SIDED) + factor *= clamp(pow(max(dot(lightDir, reflectDir), 0.0), n2Uniform.glossCoef[volIdx]), 0.0, 1.0); + else if (n2Lights.lights[i].smode == LMODE_DOUBLE_SIDED) + factor *= clamp(pow(abs(dot(lightDir, reflectDir)), n2Uniform.glossCoef[volIdx]), 0.0, 1.0); + + if (n2Lights.lights[i].routing == ROUTING_ALPHADIFF_SUB) + specularAlpha -= lightColor.r * factor; + else if (n2Lights.lights[i].routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD || n2Lights.lights[i].routing == ROUTING_BASEDIFF_OFFSSPEC_ADD) + specular += lightColor * factor; + if (n2Lights.lights[i].routing == ROUTING_BASEDIFF_BASESPEC_ADD || n2Lights.lights[i].routing == ROUTING_OFFSDIFF_BASESPEC_ADD) + diffuse += lightColor * factor; + } + } + // ambient with material + if (n2Lights.ambientMaterialBase[volIdx] == 1) + diffuse += n2Lights.ambientBase[volIdx].rgb; + if (n2Lights.ambientMaterialOffset[volIdx] == 1) + specular += n2Lights.ambientOffset[volIdx].rgb; + + if (n2Uniform.modelDiffuse[volIdx] == 1) + baseCol.rgb *= diffuse; + if (n2Uniform.modelSpecular[volIdx] == 1) + offsetCol.rgb *= specular; + + // ambient w/o material + if (n2Lights.ambientMaterialBase[volIdx] == 0 && n2Uniform.modelDiffuse[volIdx] == 1) + baseCol.rgb += n2Lights.ambientBase[volIdx].rgb; + if (n2Lights.ambientMaterialOffset[volIdx] == 0 && n2Uniform.modelSpecular[volIdx] == 1) + offsetCol.rgb += n2Lights.ambientOffset[volIdx].rgb; + + baseCol.a += diffuseAlpha; + offsetCol.a += specularAlpha; + if (n2Lights.useBaseOver == 1) + { + vec4 overflow = max(baseCol - vec4(1.0), 0.0); + offsetCol += overflow; + } + baseCol = clamp(baseCol, 0.0, 1.0); + offsetCol = clamp(offsetCol, 0.0, 1.0); +} + +void computeEnvMap(inout vec2 uv, in vec3 position, in vec3 normal) +{ + // Spherical mapping + //vec3 r = reflect(normalize(position), normal); + //float m = 2.0 * sqrt(r.x * r.x + r.y * r.y + (r.z + 1.0) * (r.z + 1.0)); + //uv += r.xy / m + 0.5; + + // Cheap env mapping + uv += normal.xy / 2.0 + 0.5; + uv = clamp(uv, 0.0, 1.0); +} + +void computeBumpMap(inout vec4 color0, in vec4 color1, in vec3 position, in vec3 normal, in mat4 normalMat) +{ + // TODO + if (n2Lights.bumpId0 == -1) + return; + normal = normalize(normal); + vec3 tangent = color0.xyz; + if (tangent.x > 0.5) + tangent.x -= 1.0; + if (tangent.y > 0.5) + tangent.y -= 1.0; + if (tangent.z > 0.5) + tangent.z -= 1.0; + //tangent = normalize(normalMat * vec4(tangent, 0.0)).xyz; + tangent = normalize(tangent); + vec3 bitangent = color1.xyz; + if (bitangent.x > 0.5) + bitangent.x -= 1.0; + if (bitangent.y > 0.5) + bitangent.y -= 1.0; + if (bitangent.z > 0.5) + bitangent.z -= 1.0; + //bitangent = normalize(normalMat * vec4(bitangent, 0.0)).xyz; + bitangent = normalize(bitangent); + + float scaleDegree = color0.w; + float scaleOffset = color1.w; + + vec3 lightDir; // direction to the light + if (n2Lights.lights[n2Lights.bumpId0].parallel == 1) + lightDir = n2Lights.lights[n2Lights.bumpId0].direction.xyz; + else + lightDir = n2Lights.lights[n2Lights.bumpId0].position.xyz - position; + lightDir = normalize(lightDir * mat3(normalMat)); + + float n = dot(lightDir, normal); + float cosQ = dot(lightDir, tangent); + float sinQ = dot(lightDir, bitangent); + + float sinT = clamp(n, 0.0, 1.0); + float k1 = 1.0 - scaleDegree; + float k2 = scaleDegree * sinT; + float k3 = scaleDegree * sqrt(1.0 - sinT * sinT); // cos T + + float q = acos(cosQ); + if (sinQ < 0.0) + q = 2.0 * PI - q; + + color0.r = k2; + color0.g = k3; + color0.b = q / PI / 2.0; + color0.a = k1; +} +)"; + +static const char N2VertexShaderSource[] = R"( +layout (std140, set = 0, binding = 0) uniform VertexShaderUniforms +{ + mat4 ndcMat; +} uniformBuffer; + +layout (location = 0) in vec4 in_pos; +layout (location = 1) in uvec4 in_base; +layout (location = 2) in uvec4 in_offs; +layout (location = 3) in mediump vec2 in_uv; +layout (location = 4) in vec3 in_normal; + +layout (location = 0) INTERPOLATION out highp vec4 vtx_base; +layout (location = 1) INTERPOLATION out highp vec4 vtx_offs; +layout (location = 2) noperspective out highp vec3 vtx_uv; + +void wDivide(inout vec4 vpos) +{ + vpos = vec4(vpos.xy / vpos.w, 1.0 / vpos.w, 1.0); + vpos = uniformBuffer.ndcMat * vpos; +#if pp_Gouraud == 1 + vtx_base *= vpos.z; + vtx_offs *= vpos.z; +#endif + vtx_uv = vec3(vtx_uv.xy * vpos.z, vpos.z); + vpos.w = 1.0; + vpos.z = 0.0; +} + +void main() +{ + vec4 vpos = n2Uniform.mvMat * in_pos; + vtx_base = vec4(in_base) / 255.0; + vtx_offs = vec4(in_offs) / 255.0; + + vec3 vnorm = normalize(mat3(n2Uniform.normalMat) * in_normal); + + // TODO bump mapping + if (n2Uniform.bumpMapping == 0) + { + computeColors(vtx_base, vtx_offs, 0, vpos.xyz, vnorm); + #if pp_Texture == 0 + vtx_base += vtx_offs; + #endif + } + + vtx_uv.xy = in_uv; + if (n2Uniform.envMapping[0] == 1) + computeEnvMap(vtx_uv.xy, vpos.xyz, vnorm); + + vpos = n2Uniform.projMat * vpos; + wDivide(vpos); + + gl_Position = vpos; +} +)"; + +extern const char N2ModVolVertexShaderSource[] = R"( +layout (std140, set = 0, binding = 0) uniform VertexShaderUniforms +{ + mat4 ndcMat; +} uniformBuffer; + +layout (std140, set = 1, binding = 2) uniform N2VertexShaderUniforms +{ + mat4 mvMat; + mat4 normalMat; + mat4 projMat; + ivec2 envMapping; + int bumpMapping; + int polyNumber; + + vec2 glossCoef; + ivec2 constantColor; + ivec2 modelDiffuse; + ivec2 modelSpecular; +} n2Uniform; + +layout (location = 0) in vec4 in_pos; +layout (location = 0) noperspective out highp float depth; + +void wDivide(inout vec4 vpos) +{ + vpos = vec4(vpos.xy / vpos.w, 1.0 / vpos.w, 1.0); + vpos = uniformBuffer.ndcMat * vpos; + depth = vpos.z; + vpos.w = 1.0; + vpos.z = 0.0; +} + +void main() +{ + vec4 vpos = n2Uniform.mvMat * in_pos; + vpos = n2Uniform.projMat * vpos; + wDivide(vpos); + + gl_Position = vpos; +} +)"; + vk::UniqueShaderModule ShaderManager::compileShader(const VertexShaderParams& params) { VulkanSource src; - src.addConstant("pp_Gouraud", (int)params.gouraud) - .addSource(GouraudSource) - .addSource(VertexShaderSource); + if (!params.naomi2) + { + src.addConstant("pp_Gouraud", (int)params.gouraud) + .addSource(GouraudSource) + .addSource(VertexShaderSource); + } + else + { + src.addConstant("pp_Gouraud", (int)params.gouraud) + .addSource(GouraudSource) + .addSource(N2LightShaderSource) + .addSource(N2VertexShaderSource); + } return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eVertex, src.generate()); } @@ -355,9 +706,10 @@ vk::UniqueShaderModule ShaderManager::compileShader(const FragmentShaderParams& return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, src.generate()); } -vk::UniqueShaderModule ShaderManager::compileModVolVertexShader() +vk::UniqueShaderModule ShaderManager::compileModVolVertexShader(bool naomi2) { - return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eVertex, VulkanSource().addSource(ModVolVertexShaderSource).generate()); + return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eVertex, + VulkanSource().addSource(naomi2 ? N2ModVolVertexShaderSource : ModVolVertexShaderSource).generate()); } vk::UniqueShaderModule ShaderManager::compileModVolFragmentShader() diff --git a/core/rend/vulkan/shaders.h b/core/rend/vulkan/shaders.h index 61223f7fb..d75ce37c9 100644 --- a/core/rend/vulkan/shaders.h +++ b/core/rend/vulkan/shaders.h @@ -28,8 +28,9 @@ struct VertexShaderParams { bool gouraud; + bool naomi2; - u32 hash() { return (u32)gouraud; } + u32 hash() { return (u32)gouraud | ((u32)naomi2 << 1); } }; // alpha test, clip test, use alpha, texture, ignore alpha, shader instr, offset, fog, gouraud, bump, clamp, trilinear @@ -62,7 +63,7 @@ struct FragmentShaderParams // std140 alignment required struct VertexShaderUniforms { - glm::mat4 normal_matrix; + glm::mat4 ndcMat; }; // std140 alignment required @@ -76,16 +77,71 @@ struct FragmentShaderUniforms float sp_FOG_DENSITY; }; +// std140 alignment required +struct N2VertexShaderUniforms +{ + glm::mat4 mvMat; + glm::mat4 normalMat; + glm::mat4 projMat; + int envMapping[2]; + int bumpMapping; + int polyNumber; + + float glossCoef[2]; + int constantColor[2]; + int modelDiffuse[2]; + int modelSpecular[2]; +}; + +// std140 alignment required +struct VkN2Light +{ + float color[4]; + float direction[4]; + float position[4]; + + int parallel; + int routing; + int dmode; + int smode; + + int diffuse[2]; + int specular[2]; + + float attnDistA; + float attnDistB; + float attnAngleA; + float attnAngleB; + + int distAttnMode; + int _pad[3]; +}; + +// std140 alignment required +struct VkN2LightConstants +{ + VkN2Light lights[16]; + float ambientBase[2][4]; + float ambientOffset[2][4]; + int ambientMaterialBase[2]; + int ambientMaterialOffset[2]; + int lightCount; + int useBaseOver; + int bumpId1; + int bumpId2; +}; + class ShaderManager { public: vk::ShaderModule GetVertexShader(const VertexShaderParams& params) { return getShader(vertexShaders, params); } vk::ShaderModule GetFragmentShader(const FragmentShaderParams& params) { return getShader(fragmentShaders, params); } - vk::ShaderModule GetModVolVertexShader() + vk::ShaderModule GetModVolVertexShader(bool naomi2) { - if (!modVolVertexShader) - modVolVertexShader = compileModVolVertexShader(); - return *modVolVertexShader; + vk::UniqueShaderModule& shader = naomi2 ? n2ModVolVertexShader : modVolVertexShader; + if (!shader) + shader = compileModVolVertexShader(naomi2); + return *shader; } vk::ShaderModule GetModVolShader() { @@ -148,7 +204,7 @@ private: } vk::UniqueShaderModule compileShader(const VertexShaderParams& params); vk::UniqueShaderModule compileShader(const FragmentShaderParams& params); - vk::UniqueShaderModule compileModVolVertexShader(); + vk::UniqueShaderModule compileModVolVertexShader(bool naomi2); vk::UniqueShaderModule compileModVolFragmentShader(); vk::UniqueShaderModule compileQuadVertexShader(bool rotate); vk::UniqueShaderModule compileQuadFragmentShader(bool ignoreTexAlpha); @@ -158,6 +214,7 @@ private: std::map vertexShaders; std::map fragmentShaders; vk::UniqueShaderModule modVolVertexShader; + vk::UniqueShaderModule n2ModVolVertexShader; vk::UniqueShaderModule modVolShader; vk::UniqueShaderModule quadVertexShader; vk::UniqueShaderModule quadRotateVertexShader; diff --git a/core/rend/vulkan/utils.h b/core/rend/vulkan/utils.h index 7c27ffb3c..4d563cdc9 100644 --- a/core/rend/vulkan/utils.h +++ b/core/rend/vulkan/utils.h @@ -96,3 +96,9 @@ public: static inline vk::ClearColorValue getBorderColor() { return vk::ClearColorValue(std::array{ VO_BORDER_COL.red(), VO_BORDER_COL.green(), VO_BORDER_COL.blue(), 1.f }); } + +static inline u32 align(vk::DeviceSize offset, u32 alignment) +{ + u32 pad = (u32)(offset & (alignment - 1)); + return pad == 0 ? 0 : alignment - pad; +} diff --git a/core/rend/vulkan/vulkan_context.cpp b/core/rend/vulkan/vulkan_context.cpp index a045a5e09..61e8e8d3a 100644 --- a/core/rend/vulkan/vulkan_context.cpp +++ b/core/rend/vulkan/vulkan_context.cpp @@ -431,19 +431,19 @@ bool VulkanContext::InitDevice() vk::DescriptorPoolSize pool_sizes[] = { { vk::DescriptorType::eSampler, 2 }, - { vk::DescriptorType::eCombinedImageSampler, 15000 }, + { vk::DescriptorType::eCombinedImageSampler, 40000 }, { vk::DescriptorType::eSampledImage, 2 }, { vk::DescriptorType::eStorageImage, 12 }, { vk::DescriptorType::eUniformTexelBuffer, 2 }, { vk::DescriptorType::eStorageTexelBuffer, 2 }, - { vk::DescriptorType::eUniformBuffer, 36 }, + { vk::DescriptorType::eUniformBuffer, 80000 }, { vk::DescriptorType::eStorageBuffer, 36 }, { vk::DescriptorType::eUniformBufferDynamic, 2 }, { vk::DescriptorType::eStorageBufferDynamic, 2 }, { vk::DescriptorType::eInputAttachment, 36 } }; descriptorPool = device->createDescriptorPoolUnique(vk::DescriptorPoolCreateInfo(vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, - 10000, ARRAY_SIZE(pool_sizes), pool_sizes)); + 40000, ARRAY_SIZE(pool_sizes), pool_sizes)); std::string cachePath = hostfs::getShaderCachePath("vulkan_pipeline.cache"); diff --git a/core/rend/vulkan/vulkan_renderer.h b/core/rend/vulkan/vulkan_renderer.h index fc9916afe..5242464c9 100644 --- a/core/rend/vulkan/vulkan_renderer.h +++ b/core/rend/vulkan/vulkan_renderer.h @@ -140,6 +140,8 @@ public: bool result; if (ctx->rend.isRenderFramebuffer) result = RenderFramebuffer(ctx); + else if (settings.platform.isNaomi2()) + result = ta_parse_naomi2(ctx); else result = ta_parse_vdrc(ctx);