diff --git a/src/common/heap_array.h b/src/common/heap_array.h index 14a7b8254..163f0183d 100644 --- a/src/common/heap_array.h +++ b/src/common/heap_array.h @@ -279,6 +279,8 @@ public: m_size = 0; } + void assign(const std::span data) { assign(data.data(), data.size()); } + void assign(const T* begin, const T* end) { const size_t size = reinterpret_cast(end) - reinterpret_cast(begin); diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index 8285d8ce4..13ee8644a 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -1780,6 +1780,13 @@ std::unique_ptr GPUDevice::TranspileAndCreateShaderFromSource( GPUShaderStage stage, GPUShaderLanguage source_language, std::string_view source, const char* entry_point, GPUShaderLanguage target_language, u32 target_version, DynamicHeapArray* out_binary, Error* error) { + // Currently, entry points must be "main". TODO: rename the entry point in the SPIR-V. + if (std::strcmp(entry_point, "main") != 0) + { + Error::SetStringView(error, "Entry point must be main."); + return {}; + } + // Disable optimization when targeting OpenGL GLSL, otherwise, the name-based linking will fail. const bool optimization = (!m_debug_device && target_language != GPUShaderLanguage::GLSL && target_language != GPUShaderLanguage::GLSLES); @@ -1827,7 +1834,14 @@ std::unique_ptr GPUDevice::TranspileAndCreateShaderFromSource( if (!TranslateVulkanSpvToLanguage(spv, stage, target_language, target_version, &dest_source, error)) return {}; - // TODO: MSL needs entry point suffixed. +#ifdef __APPLE__ + // MSL converter suffixes 0. + if (target_language == GPUShaderLanguage::MSL) + { + return CreateShaderFromSource(stage, target_language, dest_source, + TinyString::from_format("{}0", entry_point).c_str(), out_binary, error); + } +#endif return CreateShaderFromSource(stage, target_language, dest_source, entry_point, out_binary, error); } diff --git a/src/util/metal_device.h b/src/util/metal_device.h index 9c5c0415c..0257eea9a 100644 --- a/src/util/metal_device.h +++ b/src/util/metal_device.h @@ -317,14 +317,12 @@ private: void SetFeatures(FeatureMask disabled_features); bool LoadShaders(); + std::unique_ptr CreateShaderFromMSL(GPUShaderStage stage, std::string_view source, + std::string_view entry_point, Error* error); id GetFunctionFromLibrary(id library, NSString* name); id CreateComputePipeline(id function, NSString* name); ClearPipelineConfig GetCurrentClearPipelineConfig() const; id GetClearDepthPipeline(const ClearPipelineConfig& config); - - std::unique_ptr CreateShaderFromMSL(GPUShaderStage stage, std::string_view source, - std::string_view entry_point, Error* error); - id GetDepthState(const GPUPipeline::DepthState& ds); void CreateCommandBuffer(); diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index d8a3626cd..99bae4ab2 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -24,6 +24,18 @@ Log_SetChannel(MetalDevice); // TODO: Disable hazard tracking and issue barriers explicitly. +// Used for shader "binaries". +namespace { +struct MetalShaderBinaryHeader +{ + u32 entry_point_offset; + u32 entry_point_length; + u32 source_offset; + u32 source_length; +}; +static_assert(sizeof(MetalShaderBinaryHeader) == 16); +} // namespace + // Looking across a range of GPUs, the optimal copy alignment for Vulkan drivers seems // to be between 1 (AMD/NV) and 64 (Intel). So, we'll go with 64 here. static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 64; @@ -648,39 +660,55 @@ std::unique_ptr MetalDevice::CreateShaderFromMSL(GPUShaderStage stage std::unique_ptr MetalDevice::CreateShaderFromBinary(GPUShaderStage stage, std::span data, Error* error) { - const std::string_view str_data(reinterpret_cast(data.data()), data.size()); - return CreateShaderFromMSL(stage, str_data, "main0", error); + if (data.size() < sizeof(MetalShaderBinaryHeader)) + { + Error::SetStringView(error, "Invalid header."); + return {}; + } + + // Need to copy for alignment reasons. + MetalShaderBinaryHeader hdr; + std::memcpy(&hdr, data.data(), sizeof(hdr)); + if (static_cast(hdr.entry_point_offset) + static_cast(hdr.entry_point_length) > data.size() || + static_cast(hdr.source_offset) + static_cast(hdr.source_length) > data.size()) + { + Error::SetStringView(error, "Out of range fields in header."); + return {}; + } + + const std::string_view entry_point(reinterpret_cast(data.data() + hdr.entry_point_offset), + hdr.entry_point_length); + const std::string source(reinterpret_cast(data.data() + hdr.source_offset), hdr.source_length); + return CreateShaderFromMSL(stage, source, entry_point, error); } std::unique_ptr MetalDevice::CreateShaderFromSource(GPUShaderStage stage, GPUShaderLanguage language, std::string_view source, const char* entry_point, DynamicHeapArray* out_binary, Error* error) { - static constexpr bool dump_shaders = false; - - DynamicHeapArray spv; - if (!CompileGLSLShaderToVulkanSpv(stage, language, source, entry_point, !m_debug_device, false, &spv, error)) - return {}; - - std::string msl; - if (!TranslateVulkanSpvToLanguage(spv.cspan(), stage, GPUShaderLanguage::MSL, 230, &msl, error)) - return {}; - - if constexpr (dump_shaders) + if (language != GPUShaderLanguage::MSL) { - static unsigned s_next_id = 0; - ++s_next_id; - DumpShader(s_next_id, "_input", source); - DumpShader(s_next_id, "_msl", msl); + return TranspileAndCreateShaderFromSource(stage, language, source, entry_point, GPUShaderLanguage::MSL, + m_render_api_version, out_binary, error); } + // Source is the "binary" here, since Metal doesn't allow us to access the bytecode :( + const std::span msl(reinterpret_cast(source.data()), source.size()); if (out_binary) { - out_binary->resize(msl.size()); - std::memcpy(out_binary->data(), msl.data(), msl.size()); + MetalShaderBinaryHeader hdr; + hdr.entry_point_offset = sizeof(MetalShaderBinaryHeader); + hdr.entry_point_length = static_cast(std::strlen(entry_point)); + hdr.source_offset = hdr.entry_point_offset + hdr.entry_point_length; + hdr.source_length = static_cast(source.size()); + + out_binary->resize(sizeof(hdr) + hdr.entry_point_length + hdr.source_length); + std::memcpy(out_binary->data(), &hdr, sizeof(hdr)); + std::memcpy(&out_binary->data()[hdr.entry_point_offset], entry_point, hdr.entry_point_length); + std::memcpy(&out_binary->data()[hdr.source_offset], source.data(), hdr.source_length); } - return CreateShaderFromMSL(stage, msl, "main0", error); + return CreateShaderFromMSL(stage, source, entry_point, error); } MetalPipeline::MetalPipeline(id pipeline, id depth, MTLCullMode cull_mode,