Add MultipartParser external

2023-12-10 20:21:12 -05:00 · 2023-12-10 20:21:12 -05:00 · e46d7500a1
parent 4ff5ff2772
commit e46d7500a1
6 changed files with 480 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -765,6 +765,8 @@ add_subdirectory(Externals/rangeset)

 add_subdirectory(Externals/FatFs)

+add_subdirectory(Externals/MultipartParser)
+
 if (USE_RETRO_ACHIEVEMENTS)
 	add_subdirectory(Externals/rcheevos)
 endif()
--- a/Externals/MultipartParser/CMakeLists.txt
+++ b/Externals/MultipartParser/CMakeLists.txt
@ -0,0 +1,2 @@
+add_library(MultipartParser INTERFACE IMPORTED GLOBAL)
+set_target_properties(MultipartParser PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR})
--- a/Externals/MultipartParser/LICENSE
+++ b/Externals/MultipartParser/LICENSE
@ -0,0 +1,19 @@
+Copyright (c) 2010 Hongli Lai
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/Externals/MultipartParser/MultipartParser.h
+++ b/Externals/MultipartParser/MultipartParser.h
@ -0,0 +1,419 @@
+#ifndef _MULTIPART_PARSER_H_
+#define _MULTIPART_PARSER_H_
+
+#include <cstring>
+#include <functional>
+#include <stdexcept>
+#include <string>
+#include <sys/types.h>
+
+class MultipartParser {
+public:
+  using Callback = std::function<void(const char *buffer, size_t start,
+                                      size_t end, void *userData)>;
+
+  std::string boundary;
+
+private:
+  static const char CR = 13;
+  static const char LF = 10;
+  static const char SPACE = 32;
+  static const char HYPHEN = 45;
+  static const char COLON = 58;
+  static const char SEMICOLON = 59;
+  static const size_t UNMARKED = (size_t)-1;
+
+  enum State {
+    ERROR,
+    START,
+    START_BOUNDARY,
+    HEADER_FIELD_START,
+    HEADER_FIELD,
+    HEADER_VALUE_START,
+    HEADER_VALUE,
+    HEADER_VALUE_ALMOST_DONE,
+    HEADERS_ALMOST_DONE,
+    PART_DATA_START,
+    PART_DATA,
+    PART_END,
+    END
+  };
+
+  enum Flags { PART_BOUNDARY = 1, LAST_BOUNDARY = 2 };
+
+  const char *boundaryData;
+  size_t boundarySize;
+  bool boundaryIndex[256];
+  char *lookbehind;
+  size_t lookbehindSize;
+  State state;
+  int flags;
+  size_t index;
+  size_t headerFieldMark;
+  size_t headerValueMark;
+  size_t partDataMark;
+  const char *errorReason;
+
+  void resetCallbacks() {
+    onPartBegin = NULL;
+    onHeaderField = NULL;
+    onHeaderValue = NULL;
+    onHeaderEnd = NULL;
+    onHeadersEnd = NULL;
+    onPartData = NULL;
+    onPartEnd = NULL;
+    onEnd = NULL;
+    userData = NULL;
+  }
+
+  void indexBoundary() {
+    const char *current;
+    const char *end = boundaryData + boundarySize;
+
+    memset(boundaryIndex, 0, sizeof(boundaryIndex));
+
+    for (current = boundaryData; current < end; current++) {
+      boundaryIndex[(unsigned char)*current] = true;
+    }
+  }
+
+  void callback(Callback cb, const char *buffer = NULL, size_t start = UNMARKED,
+                size_t end = UNMARKED, bool allowEmpty = false) {
+    if (start != UNMARKED && start == end && !allowEmpty) {
+      return;
+    }
+    if (cb != NULL) {
+      cb(buffer, start, end, userData);
+    }
+  }
+
+  void dataCallback(Callback cb, size_t &mark, const char *buffer, size_t i,
+                    size_t bufferLen, bool clear, bool allowEmpty = false) {
+    if (mark == UNMARKED) {
+      return;
+    }
+
+    if (!clear) {
+      callback(cb, buffer, mark, bufferLen, allowEmpty);
+      mark = 0;
+    } else {
+      callback(cb, buffer, mark, i, allowEmpty);
+      mark = UNMARKED;
+    }
+  }
+
+  char lower(char c) const { return c | 0x20; }
+
+  inline bool isBoundaryChar(char c) const {
+    return boundaryIndex[(unsigned char)c];
+  }
+
+  bool isHeaderFieldCharacter(char c) const {
+    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == HYPHEN;
+  }
+
+  void setError(const char *message) {
+    state = ERROR;
+    errorReason = message;
+  }
+
+  void processPartData(size_t &prevIndex, size_t &_index, const char *buffer,
+                       size_t len, size_t boundaryEnd, size_t &i, char c,
+                       State &_state, int &_flags) {
+    prevIndex = _index;
+
+    if (_index == 0) {
+      // boyer-moore derived algorithm to safely skip non-boundary data
+      while (i + boundarySize <= len) {
+        if (isBoundaryChar(buffer[i + boundaryEnd])) {
+          break;
+        }
+
+        i += boundarySize;
+      }
+      if (i == len) {
+        return;
+      }
+      c = buffer[i];
+    }
+
+    if (_index < boundarySize) {
+      if (boundary[_index] == c) {
+        if (_index == 0) {
+          dataCallback(onPartData, partDataMark, buffer, i, len, true);
+        }
+        _index++;
+      } else {
+        _index = 0;
+      }
+    } else if (_index == boundarySize) {
+      _index++;
+      if (c == CR) {
+        // CR = part boundary
+        _flags |= PART_BOUNDARY;
+      } else if (c == HYPHEN) {
+        // HYPHEN = end boundary
+        _flags |= LAST_BOUNDARY;
+      } else {
+        _index = 0;
+      }
+    } else if (_index - 1 == boundarySize) {
+      if (_flags & PART_BOUNDARY) {
+        _index = 0;
+        if (c == LF) {
+          // unset the PART_BOUNDARY flag
+          _flags &= ~PART_BOUNDARY;
+          callback(onPartEnd);
+          callback(onPartBegin);
+          _state = HEADER_FIELD_START;
+          return;
+        }
+      } else if (_flags & LAST_BOUNDARY) {
+        if (c == HYPHEN) {
+          callback(onPartEnd);
+          callback(onEnd);
+          _state = END;
+        } else {
+          _index = 0;
+        }
+      } else {
+        _index = 0;
+      }
+    } else if (_index - 2 == boundarySize) {
+      if (c == CR) {
+        _index++;
+      } else {
+        _index = 0;
+      }
+    } else if (_index - boundarySize == 3) {
+      _index = 0;
+      if (c == LF) {
+        callback(onPartEnd);
+        callback(onEnd);
+        _state = END;
+        return;
+      }
+    }
+
+    if (_index > 0) {
+      // when matching a possible boundary, keep a lookbehind reference
+      // in case it turns out to be a false lead
+      if (_index - 1 >= lookbehindSize) {
+        setError("Parser bug: index overflows lookbehind buffer. "
+                 "Please send bug report with input file attached.");
+        return;
+      }
+      lookbehind[_index - 1] = c;
+    } else if (prevIndex > 0) {
+      // if our boundary turned out to be rubbish, the captured lookbehind
+      // belongs to partData
+      callback(onPartData, lookbehind, 0, prevIndex);
+      prevIndex = 0;
+      partDataMark = i;
+
+      // reconsider the current character even so it interrupted the sequence
+      // it could be the beginning of a new sequence
+      i--;
+    }
+  }
+
+public:
+  Callback onPartBegin;
+  Callback onHeaderField;
+  Callback onHeaderValue;
+  Callback onHeaderEnd;
+  Callback onHeadersEnd;
+  Callback onPartData;
+  Callback onPartEnd;
+  Callback onEnd;
+  void *userData;
+
+  MultipartParser() {
+    lookbehind = NULL;
+    resetCallbacks();
+    reset();
+  }
+
+  MultipartParser(const std::string &boundary) {
+    lookbehind = NULL;
+    resetCallbacks();
+    setBoundary(boundary);
+  }
+
+  ~MultipartParser() { delete[] lookbehind; }
+
+  void reset() {
+    delete[] lookbehind;
+    state = ERROR;
+    boundary.clear();
+    boundaryData = boundary.c_str();
+    boundarySize = 0;
+    lookbehind = NULL;
+    lookbehindSize = 0;
+    flags = 0;
+    index = 0;
+    headerFieldMark = UNMARKED;
+    headerValueMark = UNMARKED;
+    partDataMark = UNMARKED;
+    errorReason = "Parser uninitialized.";
+  }
+
+  void setBoundary(const std::string &_boundary) {
+    reset();
+    this->boundary = "\r\n--" + _boundary;
+    boundaryData = this->boundary.c_str();
+    boundarySize = this->boundary.size();
+    indexBoundary();
+    lookbehind = new char[boundarySize + 8];
+    lookbehindSize = boundarySize + 8;
+    state = START;
+    errorReason = "No error.";
+  }
+
+  size_t feed(const char *buffer, size_t len) {
+    if (state == ERROR || len == 0) {
+      return 0;
+    }
+
+    State _state = this->state;
+    int _flags = this->flags;
+    size_t prevIndex = this->index;
+    size_t _index = this->index;
+    size_t boundaryEnd = boundarySize - 1;
+    size_t i;
+    char c, cl;
+
+    for (i = 0; i < len; i++) {
+      c = buffer[i];
+
+      switch (_state) {
+      case ERROR:
+        return i;
+      case START:
+        _index = 0;
+        _state = START_BOUNDARY;
+      case START_BOUNDARY:
+        if (_index == boundarySize - 2) {
+          if (c != CR) {
+            setError("Malformed. Expected CR after boundary.");
+            return i;
+          }
+          _index++;
+          break;
+        } else if (_index - 1 == boundarySize - 2) {
+          if (c != LF) {
+            setError("Malformed. Expected LF after boundary CR.");
+            return i;
+          }
+          _index = 0;
+          callback(onPartBegin);
+          _state = HEADER_FIELD_START;
+          break;
+        }
+        if (c != boundary[_index + 2]) {
+          setError(
+              "Malformed. Found different boundary data than the given one.");
+          return i;
+        }
+        _index++;
+        break;
+      case HEADER_FIELD_START:
+        _state = HEADER_FIELD;
+        headerFieldMark = i;
+        _index = 0;
+      case HEADER_FIELD:
+        if (c == CR) {
+          headerFieldMark = UNMARKED;
+          _state = HEADERS_ALMOST_DONE;
+          break;
+        }
+
+        _index++;
+        if (c == HYPHEN) {
+          break;
+        }
+
+        if (c == COLON) {
+          if (_index == 1) {
+            // empty header field
+            setError("Malformed first header name character.");
+            return i;
+          }
+          dataCallback(onHeaderField, headerFieldMark, buffer, i, len, true);
+          _state = HEADER_VALUE_START;
+          break;
+        }
+
+        cl = lower(c);
+        if (cl < 'a' || cl > 'z') {
+          setError("Malformed header name.");
+          return i;
+        }
+        break;
+      case HEADER_VALUE_START:
+        if (c == SPACE) {
+          break;
+        }
+
+        headerValueMark = i;
+        _state = HEADER_VALUE;
+      case HEADER_VALUE:
+        if (c == CR && buffer[i - 1] != SEMICOLON) {
+          dataCallback(onHeaderValue, headerValueMark, buffer, i, len, true,
+                       true);
+          callback(onHeaderEnd);
+          _state = HEADER_VALUE_ALMOST_DONE;
+        }
+        break;
+      case HEADER_VALUE_ALMOST_DONE:
+        if (c != LF) {
+          setError("Malformed header value: LF expected after CR");
+          return i;
+        }
+
+        _state = HEADER_FIELD_START;
+        break;
+      case HEADERS_ALMOST_DONE:
+        if (c != LF) {
+          setError("Malformed header ending: LF expected after CR");
+          return i;
+        }
+
+        callback(onHeadersEnd);
+        _state = PART_DATA_START;
+        break;
+      case PART_DATA_START:
+        _state = PART_DATA;
+        partDataMark = i;
+      case PART_DATA:
+        processPartData(prevIndex, _index, buffer, len, boundaryEnd, i, c,
+                        _state, _flags);
+        break;
+      case END:
+        break;
+      default:
+        return i;
+      }
+    }
+
+    dataCallback(onHeaderField, headerFieldMark, buffer, i, len, false);
+    dataCallback(onHeaderValue, headerValueMark, buffer, i, len, false);
+    dataCallback(onPartData, partDataMark, buffer, i, len, false);
+
+    this->index = _index;
+    this->state = _state;
+    this->flags = _flags;
+
+    return len;
+  }
+
+  bool succeeded() const { return state == END; }
+
+  bool hasError() const { return state == ERROR; }
+
+  bool stopped() const { return state == ERROR || state == END; }
+
+  const char *getErrorMessage() const { return errorReason; }
+};
+
+#endif /* _MULTIPART_PARSER_H_ */
--- a/Externals/MultipartParser/README.markdown
+++ b/Externals/MultipartParser/README.markdown
@ -0,0 +1,37 @@
+What is it?
+===========
+An simple, efficient parser for multipart MIME messages, based on
+[Formidable's](http://github.com/felixge/node-formidable) parser.
+
+Why?
+----
+MIME multipart messages are a total pain to parse because the grammar is
+so insane. Furthermore, the MIME specification is incredibly large. This
+has led to an army of equally large and complex MIME libraries. If you
+just want to parse a MIME multipart message without hassle then using all
+of those libraries are less than ideal. They all tend to handle the kitchen
+sink (e.g. they handling email parsing and all kinds of other stuff you don't
+need) or they depend on other libraries that you may not want (e.g. APR, glib)
+or they are under-documented or under-tested or just not efficient (e.g.
+buffering all data in memory; good luck parsing a 2 GB file upload). You can
+write your own parser but because the multipart grammar is so much of a pain
+it's very easy to make mistakes.
+
+Goals and highlights of this parser
+-----------------------------------
+
+* Multipart parsing, and only multipart parsing.
+* Event-driven API.
+* No dependencies on any external libraries, just straight C++ with STL.
+* Efficient. Nothing in the input is buffered except what's absolutely
+  necessary for parsing.
+* Only one level of multipart parsing. A multipart message part can itself
+  be a multipart message, but this parser doesn't attempt to provide a
+  complex API for handling nested multipart messages. Instead the developer
+  should just use another parser instance to parse nested messages.
+* No I/O is handled for you. This parser won't depend on any particular
+  I/O library or even any particular operating system's I/O API. It won't
+  block on I/O by itself, giving you full control over when (not) to block.
+  It won't save data to files by itself, giving you full control over what to
+  do with the parsed data.
+* Not thread-safe, but reentrant. No dependencies on any threading libraries.
--- a/Source/VSProps/Base.Dolphin.props
+++ b/Source/VSProps/Base.Dolphin.props
@ -14,6 +14,7 @@
      <AdditionalIncludeDirectories>$(ExternalsDir)OpenAL\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
      <AdditionalIncludeDirectories>$(ExternalsDir)expr\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
      <AdditionalIncludeDirectories>$(ExternalsDir)rangeset\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>$(ExternalsDir)MultipartParser;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
      <AdditionalIncludeDirectories>$(ExternalsDir)Vulkan-Headers\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
      <AdditionalIncludeDirectories>$(ExternalsDir)VulkanMemoryAllocator\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
      <AdditionalIncludeDirectories>$(ExternalsDir)WIL\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>