From 4d700ac01c2eac4ab759b786da37f12992a028ee Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sun, 7 Apr 2024 17:58:30 +0200 Subject: [PATCH] metal: create pipeline cache at startup --- src/video_core/CMakeLists.txt | 1 + .../renderer_metal/mtl_pipeline_cache.cpp | 314 ++++++++++++++++++ .../renderer_metal/mtl_pipeline_cache.h | 148 +++++++++ .../renderer_metal/mtl_rasterizer.cpp | 4 +- .../renderer_metal/mtl_rasterizer.h | 4 + 5 files changed, 470 insertions(+), 1 deletion(-) create mode 100644 src/video_core/renderer_metal/mtl_pipeline_cache.cpp create mode 100644 src/video_core/renderer_metal/mtl_pipeline_cache.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c55210efae..e90be01e22 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -381,6 +381,7 @@ if (APPLE) renderer_metal/mtl_compute_pipeline.cpp renderer_metal/mtl_device.cpp renderer_metal/mtl_graphics_pipeline.cpp + renderer_metal/mtl_pipeline_cache.cpp renderer_metal/mtl_rasterizer.cpp renderer_metal/mtl_staging_buffer_pool.cpp renderer_metal/mtl_swap_chain.cpp diff --git a/src/video_core/renderer_metal/mtl_pipeline_cache.cpp b/src/video_core/renderer_metal/mtl_pipeline_cache.cpp new file mode 100644 index 0000000000..08fe66ce61 --- /dev/null +++ b/src/video_core/renderer_metal/mtl_pipeline_cache.cpp @@ -0,0 +1,314 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include +#include + +#include "common/bit_cast.h" +#include "common/cityhash.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" +#include "common/microprofile.h" +#include "common/thread_worker.h" +#include "core/core.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/program_header.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_metal/mtl_compute_pipeline.h" +#include "video_core/renderer_metal/mtl_device.h" +#include "video_core/renderer_metal/mtl_pipeline_cache.h" +#include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" +#include "video_core/shader_notify.h" + +namespace Metal { + +namespace { +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::ConvertLegacyToGeneric; +using Shader::Maxwell::GenerateGeometryPassthrough; +using Shader::Maxwell::MergeDualVertexPrograms; +using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::FileEnvironment; +using VideoCommon::GenericEnvironment; +using VideoCommon::GraphicsEnvironment; + +// constexpr u32 CACHE_VERSION = 1; +// constexpr std::array METAL_CACHE_MAGIC_NUMBER{'s', 'u', 'y', 'u', 'm', 'l', 'c', 'h'}; + +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); +} + +} // Anonymous namespace + +size_t ComputePipelineCacheKey::Hash() const noexcept { + const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); + return static_cast(hash); +} + +bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { + return std::memcmp(&rhs, this, sizeof *this) == 0; +} + +size_t GraphicsPipelineCacheKey::Hash() const noexcept { + const u64 hash = Common::CityHash64(reinterpret_cast(this), Size()); + return static_cast(hash); +} + +bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { + return std::memcmp(&rhs, this, Size()) == 0; +} + +PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, + const Device& device_, CommandRecorder& command_recorder_, + BufferCache& buffer_cache_, TextureCache& texture_cache_, + VideoCore::ShaderNotify& shader_notify_) + : VideoCommon::ShaderCache{device_memory_}, device{device_}, + command_recorder{command_recorder_}, buffer_cache{buffer_cache_}, + texture_cache{texture_cache_}, shader_notify{shader_notify_} { + // TODO: query for some of these parameters + profile = Shader::Profile{ + .supported_spirv = 0x00010300U, // HACK + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = true, + .support_int16 = true, + .support_int64 = true, + .support_vertex_instance_id = false, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = false, + .support_viewport_index_layer_non_geometry = false, + .support_viewport_mask = false, + .support_typeless_image_loads = true, + .support_demote_to_helper_invocation = false, + .support_int64_atomics = false, + .support_derivative_control = true, + .support_geometry_shader_passthrough = false, + .support_native_ndc = false, + .support_scaled_attributes = false, + .support_multi_viewport = false, + .support_geometry_streams = false, + + .warp_size_potentially_larger_than_guest = false, + + .lower_left_origin_mode = false, + .need_declared_frag_colors = false, + .need_gather_subpixel_offset = false, + + .has_broken_spirv_clamp = false, + .has_broken_spirv_position_input = false, + .has_broken_unsigned_image_offsets = false, + .has_broken_signed_operations = false, + .has_broken_fp16_float_controls = false, + .ignore_nan_fp_comparisons = false, + .has_broken_spirv_subgroup_mask_vector_extract_dynamic = false, + .has_broken_robust = false, + .min_ssbo_alignment = 4, + .max_user_clip_distances = 8, + }; + + host_info = Shader::HostTranslateInfo{ + .support_float64 = false, + .support_float16 = true, + .support_int64 = false, + .needs_demote_reorder = false, + .support_snorm_render_buffer = true, + .support_viewport_index_layer = true, + .min_ssbo_alignment = 4, + .support_geometry_shader_passthrough = false, + .support_conditional_barrier = false, + }; +} + +PipelineCache::~PipelineCache() = default; + +GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { + if (!RefreshStages(graphics_key.unique_hashes)) { + current_pipeline = nullptr; + return nullptr; + } + + if (current_pipeline) { + GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; + if (next) { + current_pipeline = next; + return BuiltPipeline(current_pipeline); + } + } + return CurrentGraphicsPipelineSlowPath(); +} + +ComputePipeline* PipelineCache::CurrentComputePipeline() { + const ShaderInfo* const shader{ComputeShader()}; + if (!shader) { + return nullptr; + } + const auto& qmd{kepler_compute->launch_description}; + const ComputePipelineCacheKey key{ + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, + .threadgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return pipeline.get(); + } + pipeline = CreateComputePipeline(key, shader); + + return pipeline.get(); +} + +void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + // TODO: implement +} + +GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() { + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& pipeline{pair->second}; + if (is_new) { + pipeline = CreateGraphicsPipeline(); + } + if (!pipeline) { + return nullptr; + } + current_pipeline = pipeline.get(); + + return BuiltPipeline(current_pipeline); +} + +GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { + if (pipeline->IsBuilt()) { + return pipeline; + } + const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); + if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) { + return pipeline; + } + + return nullptr; +} + +std::unique_ptr PipelineCache::CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs) try { + auto hash = key.Hash(); + LOG_INFO(Render_Metal, "0x{:016x}", hash); + + // HACK: create hardcoded shaders + MTL::CompileOptions* compile_options = MTL::CompileOptions::alloc()->init(); + NS::Error* error = nullptr; + MTL::Library* library = device.GetDevice()->newLibrary(NS::String::string( + R"( + #include + using namespace metal; + + constant float2 texCoords[] = { + float2(0.0, -1.0), + float2(0.0, 1.0), + float2(2.0, 1.0), + }; + + struct VertexOut { + float4 position [[position]]; + float2 texCoord; + }; + + vertex VertexOut vertexMain(uint vid [[vertex_id]]) { + VertexOut out; + out.position = float4(texCoords[vid] * 2.0 - 1.0, 0.0, 1.0); + out.texCoord = texCoords[vid]; + + return out; + } + + fragment float4 fragmentMain(VertexOut in [[stage_in]]) { + return float4(in.texCoord, 0.0, 1.0); + } + )", + NS::ASCIIStringEncoding), + compile_options, &error); + if (error) { + LOG_ERROR(Render_Metal, "failed to create blit library: {}", + error->description()->cString(NS::ASCIIStringEncoding)); + } + + std::array functions; + + functions[0] = library->newFunction(NS::String::string("vertexMain", NS::ASCIIStringEncoding)); + functions[1] = + library->newFunction(NS::String::string("fragmentMain", NS::ASCIIStringEncoding)); + + // HACK: dummy info + std::array infos; + infos[0] = new Shader::Info{}; + infos[1] = new Shader::Info{}; + + return std::make_unique(device, command_recorder, key, buffer_cache, + texture_cache, &shader_notify, functions, infos); +} catch (const std::exception& e) { + LOG_ERROR(Render_Metal, "failed to create graphics pipeline: {}", e.what()); + return nullptr; +} + +std::unique_ptr PipelineCache::CreateGraphicsPipeline() { + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); + + main_pools.ReleaseContents(); + + return CreateGraphicsPipeline(main_pools, graphics_key, environments.Span()); +} + +std::unique_ptr PipelineCache::CreateComputePipeline( + const ComputePipelineCacheKey& key, const ShaderInfo* shader) { + const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()}; + const auto& qmd{kepler_compute->launch_description}; + ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start}; + env.SetCachedSize(shader->size_bytes); + + main_pools.ReleaseContents(); + + return CreateComputePipeline(main_pools, key, env); +} + +std::unique_ptr PipelineCache::CreateComputePipeline( + ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env) try { + auto hash = key.Hash(); + LOG_INFO(Render_Metal, "0x{:016x}", hash); + + MTL::Function* function = nullptr; + // TODO: create compute function + + throw std::runtime_error("Compute shaders are not implemented"); + + return std::make_unique(device, &shader_notify, Shader::Info{}, function); +} catch (const std::exception& e) { + LOG_ERROR(Render_Metal, "failed to create compute pipeline: {}", e.what()); + return nullptr; +} + +} // namespace Metal diff --git a/src/video_core/renderer_metal/mtl_pipeline_cache.h b/src/video_core/renderer_metal/mtl_pipeline_cache.h new file mode 100644 index 0000000000..6118387362 --- /dev/null +++ b/src/video_core/renderer_metal/mtl_pipeline_cache.h @@ -0,0 +1,148 @@ +// SPDX-FileCopyrightText: Copyright 2024 suyu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "common/thread_worker.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/host_translate_info.h" +#include "shader_recompiler/object_pool.h" +#include "shader_recompiler/profile.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/host1x/gpu_device_memory_manager.h" +#include "video_core/renderer_metal/mtl_buffer_cache.h" +#include "video_core/renderer_metal/mtl_compute_pipeline.h" +#include "video_core/renderer_metal/mtl_graphics_pipeline.h" +#include "video_core/renderer_metal/mtl_texture_cache.h" +#include "video_core/shader_cache.h" + +namespace Core { +class System; +} + +namespace Shader::IR { +struct Program; +} + +namespace VideoCore { +class ShaderNotify; +} + +namespace Metal { + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +struct ComputePipelineCacheKey { + u64 unique_hash; + u32 shared_memory_size; + std::array threadgroup_size; + + size_t Hash() const noexcept; + + bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; + + bool operator!=(const ComputePipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +} // namespace Metal + +namespace std { + +template <> +struct hash { + size_t operator()(const Metal::ComputePipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std + +namespace Metal { + +class ComputePipeline; +class Device; +class CommandRecorder; + +using VideoCommon::ShaderInfo; + +struct ShaderPools { + void ReleaseContents() { + flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); + } + + Shader::ObjectPool inst{8192}; + Shader::ObjectPool block{32}; + Shader::ObjectPool flow_block{32}; +}; + +class PipelineCache : public VideoCommon::ShaderCache { +public: + explicit PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_, + CommandRecorder& command_recorder_, BufferCache& buffer_cache_, + TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_); + ~PipelineCache(); + + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); + + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); + + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback); + +private: + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); + + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; + + std::unique_ptr CreateGraphicsPipeline(); + + std::unique_ptr CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs); + + std::unique_ptr CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader); + + std::unique_ptr CreateComputePipeline(ShaderPools& pools, + const ComputePipelineCacheKey& key, + Shader::Environment& env); + + const Device& device; + CommandRecorder& command_recorder; + BufferCache& buffer_cache; + TextureCache& texture_cache; + VideoCore::ShaderNotify& shader_notify; + + GraphicsPipelineCacheKey graphics_key{}; + GraphicsPipeline* current_pipeline{}; + + std::unordered_map> compute_cache; + std::unordered_map> graphics_cache; + + ShaderPools main_pools; + + Shader::Profile profile; + Shader::HostTranslateInfo host_info; + + std::filesystem::path pipeline_cache_filename; +}; + +} // namespace Metal diff --git a/src/video_core/renderer_metal/mtl_rasterizer.cpp b/src/video_core/renderer_metal/mtl_rasterizer.cpp index 738cafd7c3..c65fb75ed4 100644 --- a/src/video_core/renderer_metal/mtl_rasterizer.cpp +++ b/src/video_core/renderer_metal/mtl_rasterizer.cpp @@ -35,7 +35,9 @@ RasterizerMetal::RasterizerMetal(Tegra::GPU& gpu_, buffer_cache_runtime(device, command_recorder, staging_buffer_pool), buffer_cache(device_memory, buffer_cache_runtime), texture_cache_runtime(device, command_recorder, staging_buffer_pool), - texture_cache(texture_cache_runtime, device_memory) {} + texture_cache(texture_cache_runtime, device_memory), + pipeline_cache(device_memory, device, command_recorder, buffer_cache, texture_cache, + gpu.ShaderNotify()) {} RasterizerMetal::~RasterizerMetal() = default; void RasterizerMetal::Draw(bool is_indexed, u32 instance_count) { diff --git a/src/video_core/renderer_metal/mtl_rasterizer.h b/src/video_core/renderer_metal/mtl_rasterizer.h index 29aabe3f56..4c50c8e190 100644 --- a/src/video_core/renderer_metal/mtl_rasterizer.h +++ b/src/video_core/renderer_metal/mtl_rasterizer.h @@ -9,6 +9,7 @@ #include "video_core/engines/maxwell_dma.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_metal/mtl_buffer_cache.h" +#include "video_core/renderer_metal/mtl_pipeline_cache.h" #include "video_core/renderer_metal/mtl_texture_cache.h" namespace Core { @@ -110,6 +111,9 @@ private: // HACK: make the texture cache public so that renderer can access it public: TextureCache texture_cache; + +private: + PipelineCache pipeline_cache; }; } // namespace Metal