diff --git a/Source/Core/VideoBackends/Metal/MTLGfx.mm b/Source/Core/VideoBackends/Metal/MTLGfx.mm index f0376ba3ed..acc79683ef 100644 --- a/Source/Core/VideoBackends/Metal/MTLGfx.mm +++ b/Source/Core/VideoBackends/Metal/MTLGfx.mm @@ -386,9 +386,11 @@ void Metal::Gfx::SetSamplerState(u32 index, const SamplerState& state) g_state_tracker->SetSampler(index, state); } -void Metal::Gfx::SetComputeImageTexture(u32, AbstractTexture* texture, bool read, bool write) +void Metal::Gfx::SetComputeImageTexture(u32 index, AbstractTexture* texture, bool read, bool write) { - g_state_tracker->SetComputeTexture(static_cast(texture)); + g_state_tracker->SetTexture(index + VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS, + texture ? static_cast(texture)->GetMTLTexture() : + nullptr); } void Metal::Gfx::UnbindTexture(const AbstractTexture* texture) diff --git a/Source/Core/VideoBackends/Metal/MTLPipeline.h b/Source/Core/VideoBackends/Metal/MTLPipeline.h index 0d7c6b6650..d8c59a1b5b 100644 --- a/Source/Core/VideoBackends/Metal/MTLPipeline.h +++ b/Source/Core/VideoBackends/Metal/MTLPipeline.h @@ -61,12 +61,14 @@ public: MRCOwned> pipeline); id GetComputePipeline() const { return m_compute_pipeline; } - bool UsesTexture(u32 index) const { return m_textures & (1 << index); } + u32 GetTextures() const { return m_textures; } + u32 GetSamplers() const { return m_samplers; } bool UsesBuffer(u32 index) const { return m_buffers & (1 << index); } private: MRCOwned> m_compute_pipeline; u32 m_textures = 0; + u32 m_samplers = 0; u32 m_buffers = 0; }; } // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLPipeline.mm b/Source/Core/VideoBackends/Metal/MTLPipeline.mm index cc99038859..7d43a1a6d7 100644 --- a/Source/Core/VideoBackends/Metal/MTLPipeline.mm +++ b/Source/Core/VideoBackends/Metal/MTLPipeline.mm @@ -67,5 +67,5 @@ Metal::ComputePipeline::ComputePipeline(ShaderStage stage, MTLComputePipelineRef MRCOwned> pipeline) : Shader(stage, std::move(msl), std::move(shader)), m_compute_pipeline(std::move(pipeline)) { - GetArguments([reflection arguments], &m_textures, nullptr, &m_buffers); + GetArguments([reflection arguments], &m_textures, &m_samplers, &m_buffers); } diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.h b/Source/Core/VideoBackends/Metal/MTLStateTracker.h index f3fa306dd7..08c9d03d56 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.h +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.h @@ -90,7 +90,6 @@ public: void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth); void SetTexture(u32 idx, id texture); void SetSampler(u32 idx, const SamplerState& sampler); - void SetComputeTexture(const Texture* texture); void InvalidateUniforms(bool vertex, bool geometry, bool fragment); void SetUtilityUniform(const void* buffer, size_t size); void SetTexelBuffer(id buffer, u32 offset0, u32 offset1); @@ -191,11 +190,18 @@ private: MRCOwned> m_dummy_texture; + // Compute has a set of samplers and a set of writable images + static constexpr u32 MAX_COMPUTE_TEXTURES = VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS * 2; + static constexpr u32 MAX_PIXEL_TEXTURES = VideoCommon::MAX_PIXEL_SHADER_SAMPLERS; + static constexpr u32 MAX_TEXTURES = std::max(MAX_PIXEL_TEXTURES, MAX_COMPUTE_TEXTURES); + static constexpr u32 MAX_SAMPLERS = + std::max(VideoCommon::MAX_PIXEL_SHADER_SAMPLERS, VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS); + // MARK: State - u8 m_dirty_textures; + u16 m_dirty_textures; u8 m_dirty_samplers; - static_assert(sizeof(m_dirty_textures) * 8 >= VideoCommon::MAX_PIXEL_SHADER_SAMPLERS, - "Make these bigger"); + static_assert(sizeof(m_dirty_textures) * 8 >= MAX_TEXTURES, "Make this bigger"); + static_assert(sizeof(m_dirty_samplers) * 8 >= MAX_SAMPLERS, "Make this bigger"); union Flags { struct @@ -206,7 +212,6 @@ private: bool has_gx_ps_uniform : 1; bool has_utility_vs_uniform : 1; bool has_utility_ps_uniform : 1; - bool has_compute_texture : 1; bool has_pipeline : 1; bool has_scissor : 1; bool has_viewport : 1; @@ -251,11 +256,11 @@ private: Util::Viewport viewport; const Pipeline* render_pipeline = nullptr; const ComputePipeline* compute_pipeline = nullptr; - std::array, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> textures = {}; - std::array, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> samplers = {}; - std::array sampler_min_lod; - std::array sampler_max_lod; - std::array sampler_states; + std::array, MAX_TEXTURES> textures = {}; + std::array, MAX_SAMPLERS> samplers = {}; + std::array sampler_min_lod; + std::array sampler_max_lod; + std::array sampler_states; const Texture* compute_texture = nullptr; std::unique_ptr utility_uniform; u32 utility_uniform_size = 0; diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm index 9a8b866e97..b67c4162b1 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm @@ -345,8 +345,8 @@ void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor) m_current.cull_mode = MTLCullModeNone; m_current.perf_query_group = static_cast(-1); m_flags.NewEncoder(); - m_dirty_samplers = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1; - m_dirty_textures = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1; + m_dirty_samplers = (1 << MAX_SAMPLERS) - 1; + m_dirty_textures = (1 << MAX_TEXTURES) - 1; CheckScissor(); CheckViewport(); ASSERT_MSG(VIDEO, m_current_render_encoder, "Failed to create render encoder!"); @@ -360,8 +360,8 @@ void Metal::StateTracker::BeginComputePass() if (m_manual_buffer_upload) [m_current_compute_encoder waitForFence:m_fence]; m_flags.NewEncoder(); - m_dirty_samplers = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1; - m_dirty_textures = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1; + m_dirty_samplers = (1 << MAX_SAMPLERS) - 1; + m_dirty_textures = (1 << MAX_TEXTURES) - 1; } void Metal::StateTracker::EndRenderPass() @@ -536,15 +536,6 @@ void Metal::StateTracker::SetSampler(u32 idx, const SamplerState& sampler) SetSamplerForce(idx, sampler); } -void Metal::StateTracker::SetComputeTexture(const Texture* texture) -{ - if (m_state.compute_texture != texture) - { - m_state.compute_texture = texture; - m_flags.has_compute_texture = false; - } -} - void Metal::StateTracker::UnbindTexture(id texture) { for (size_t i = 0; i < std::size(m_state.textures); ++i) @@ -897,10 +888,31 @@ void Metal::StateTracker::PrepareCompute() m_flags.has_pipeline = true; [enc setComputePipelineState:pipe->GetComputePipeline()]; } - if (!m_flags.has_compute_texture && pipe->UsesTexture(0)) + if (u32 dirty = m_dirty_textures & pipe->GetTextures()) { - m_flags.has_compute_texture = true; - [enc setTexture:m_state.compute_texture->GetMTLTexture() atIndex:0]; + m_dirty_textures &= ~pipe->GetTextures(); + // Since there's two sets of textures, it's likely there'll be a few in each + // Check each set separately to avoid doing too many unneccessary bindings + constexpr u32 lo_mask = (1 << VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS) - 1; + if (u32 lo = dirty & lo_mask) + { + NSRange range = RangeOfBits(lo); + [enc setTextures:&m_state.textures[range.location] withRange:range]; + } + if (u32 hi = dirty & ~lo_mask) + { + NSRange range = RangeOfBits(hi); + [enc setTextures:&m_state.textures[range.location] withRange:range]; + } + } + if (u32 dirty = m_dirty_samplers & pipe->GetSamplers()) + { + m_dirty_samplers &= ~pipe->GetSamplers(); + NSRange range = RangeOfBits(dirty); + [enc setSamplerStates:&m_state.samplers[range.location] + lodMinClamps:&m_state.sampler_min_lod[range.location] + lodMaxClamps:&m_state.sampler_max_lod[range.location] + withRange:range]; } // Compute and render can't happen at the same time, so just reuse one of the flags if (!m_flags.has_utility_vs_uniform && pipe->UsesBuffer(0)) diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.mm b/Source/Core/VideoBackends/Metal/MTLUtil.mm index 174ea35b8a..c7a5c82e34 100644 --- a/Source/Core/VideoBackends/Metal/MTLUtil.mm +++ b/Source/Core/VideoBackends/Metal/MTLUtil.mm @@ -11,6 +11,7 @@ #include "Common/MsgHandler.h" +#include "VideoCommon/Constants.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/Spirv.h" @@ -365,10 +366,10 @@ static const std::string_view COMPUTE_SHADER_HEADER = R"( #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require -// All resources are packed into one descriptor set for compute. #define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1)) +#define SAMPLER_BINDING(x) layout(set = 1, binding = x) #define SSBO_BINDING(x) layout(std430, set = 2, binding = x) -#define IMAGE_BINDING(format, x) layout(format, set = 1, binding = x) +#define IMAGE_BINDING(format, x) layout(format, set = 3, binding = x) // hlsl to glsl function translation #define API_METAL 1 @@ -462,20 +463,13 @@ std::optional Metal::Util::TranslateShaderToMSL(ShaderStage stage, MakeResourceBinding(spv::ExecutionModelVertex, 2, 1, 0, 0, 0), // vs/ssbo MakeResourceBinding(spv::ExecutionModelFragment, 0, 0, 0, 0, 0), // vs/ubo MakeResourceBinding(spv::ExecutionModelFragment, 0, 1, 1, 0, 0), // vs/ubo - MakeResourceBinding(spv::ExecutionModelFragment, 1, 0, 0, 0, 0), // ps/samp0 - MakeResourceBinding(spv::ExecutionModelFragment, 1, 1, 0, 1, 1), // ps/samp1 - MakeResourceBinding(spv::ExecutionModelFragment, 1, 2, 0, 2, 2), // ps/samp2 - MakeResourceBinding(spv::ExecutionModelFragment, 1, 3, 0, 3, 3), // ps/samp3 - MakeResourceBinding(spv::ExecutionModelFragment, 1, 4, 0, 4, 4), // ps/samp4 - MakeResourceBinding(spv::ExecutionModelFragment, 1, 5, 0, 5, 5), // ps/samp5 - MakeResourceBinding(spv::ExecutionModelFragment, 1, 6, 0, 6, 6), // ps/samp6 - MakeResourceBinding(spv::ExecutionModelFragment, 1, 7, 0, 7, 7), // ps/samp7 - MakeResourceBinding(spv::ExecutionModelFragment, 1, 8, 0, 8, 8), // ps/samp8 + // Dynamic list initialized below Fragment, 1, N, 0, N, N // ps/samp0-N MakeResourceBinding(spv::ExecutionModelFragment, 2, 0, 2, 0, 0), // ps/ssbo MakeResourceBinding(spv::ExecutionModelGLCompute, 0, 1, 0, 0, 0), // cs/ubo - MakeResourceBinding(spv::ExecutionModelGLCompute, 1, 0, 0, 0, 0), // cs/output_image + // Dynamic list initialized below GLCompute, 1, N, 0, N, N, // cs/samp0-N MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 0, 2, 0, 0), // cs/ssbo MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 1, 3, 0, 0), // cs/ssbo + // Dynamic list initialized below GLCompute, 3, N, 0, N, 0, // cs/img0-N }; spirv_cross::CompilerMSL::Options options; @@ -503,6 +497,29 @@ std::optional Metal::Util::TranslateShaderToMSL(ShaderStage stage, for (auto& binding : resource_bindings) compiler.add_msl_resource_binding(binding); + if (stage == ShaderStage::Pixel) + { + for (u32 i = 0; i < VideoCommon::MAX_PIXEL_SHADER_SAMPLERS; i++) // ps/samp0-N + { + compiler.add_msl_resource_binding( + MakeResourceBinding(spv::ExecutionModelFragment, 1, i, 0, i, i)); + } + } + else if (stage == ShaderStage::Compute) + { + u32 img = 0; + u32 smp = 0; + for (u32 i = 0; i < VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS; i++) // cs/samp0-N + { + compiler.add_msl_resource_binding( + MakeResourceBinding(spv::ExecutionModelGLCompute, 1, i, 0, img++, smp++)); + } + for (u32 i = 0; i < VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS; i++) // cs/img0-N + { + compiler.add_msl_resource_binding( + MakeResourceBinding(spv::ExecutionModelGLCompute, 3, i, 0, img++, 0)); + } + } std::string output(MSL_HEADER); std::string compiled = compiler.compile();