mirror of https://github.com/PCSX2/pcsx2.git
8376 lines
300 KiB
C++
8376 lines
300 KiB
C++
#ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
|
|
#define _C4_YML_PARSE_ENGINE_DEF_HPP_
|
|
|
|
#include "c4/yml/parse_engine.hpp"
|
|
#include "c4/error.hpp"
|
|
#include "c4/charconv.hpp"
|
|
#include "c4/utf.hpp"
|
|
|
|
#include <ctype.h>
|
|
|
|
#include "c4/yml/detail/parser_dbg.hpp"
|
|
#include "c4/yml/filter_processor.hpp"
|
|
#ifdef RYML_DBG
|
|
#include <c4/dump.hpp>
|
|
#include "c4/yml/detail/print.hpp"
|
|
#endif
|
|
|
|
|
|
#if defined(RYML_WITH_TAB_TOKENS)
|
|
#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
|
|
#define _RYML_WITHOUT_TAB_TOKENS(...)
|
|
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
|
|
#else
|
|
#define _RYML_WITH_TAB_TOKENS(...)
|
|
#define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
|
|
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
|
|
#endif
|
|
|
|
|
|
// scaffold:
|
|
#define _c4dbgnextline() \
|
|
do { \
|
|
_c4dbgq("\n-----------"); \
|
|
_c4dbgt("handling line={}, offset={}B", \
|
|
m_evt_handler->m_curr->pos.line, \
|
|
m_evt_handler->m_curr->pos.offset); \
|
|
} while(0)
|
|
|
|
|
|
#if defined(_MSC_VER)
|
|
# pragma warning(push)
|
|
# pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
|
|
# pragma warning(disable: 4702/*unreachable code*/)
|
|
#elif defined(__clang__)
|
|
# pragma clang diagnostic push
|
|
# pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
|
|
# pragma clang diagnostic ignored "-Wformat-nonliteral"
|
|
# pragma clang diagnostic ignored "-Wold-style-cast"
|
|
#elif defined(__GNUC__)
|
|
# pragma GCC diagnostic push
|
|
# pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
|
|
# pragma GCC diagnostic ignored "-Wformat-nonliteral"
|
|
# pragma GCC diagnostic ignored "-Wold-style-cast"
|
|
# if __GNUC__ >= 7
|
|
# pragma GCC diagnostic ignored "-Wduplicated-branches"
|
|
# endif
|
|
#endif
|
|
|
|
// NOLINTBEGIN(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered)
|
|
|
|
namespace c4 {
|
|
namespace yml {
|
|
|
|
namespace { // NOLINT
|
|
|
|
C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) noexcept
|
|
{
|
|
RYML_ASSERT(s.len > 0);
|
|
RYML_ASSERT(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
|
|
return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
|
|
}
|
|
|
|
inline bool _is_doc_begin_token(csubstr s)
|
|
{
|
|
RYML_ASSERT(s.begins_with('-'));
|
|
RYML_ASSERT(!s.ends_with("\n"));
|
|
RYML_ASSERT(!s.ends_with("\r"));
|
|
return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
|
|
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
|
|
}
|
|
|
|
inline bool _is_doc_end_token(csubstr s)
|
|
{
|
|
RYML_ASSERT(s.begins_with('.'));
|
|
RYML_ASSERT(!s.ends_with("\n"));
|
|
RYML_ASSERT(!s.ends_with("\r"));
|
|
return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
|
|
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
|
|
}
|
|
|
|
inline bool _is_doc_token(csubstr s) noexcept
|
|
{
|
|
//
|
|
// NOTE: this function was failing under some scenarios when
|
|
// compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
|
|
// related to optimizer assumptions on the input string and
|
|
// possibly caused from UB around assignment to that string (the
|
|
// call site was in _scan_block()). For more details see:
|
|
//
|
|
// https://github.com/biojppm/rapidyaml/issues/440
|
|
//
|
|
// The current version does not suffer this problem, but it may
|
|
// appear again.
|
|
//
|
|
//
|
|
// UPDATE. The problem appeared again in gcc12 and gcc13 with -Os
|
|
// (but not any other optimization level, nor any other compiler
|
|
// or version), because the assignment to s is being hoisted out
|
|
// of the loop which calls this function. Then the length doesn't
|
|
// enter the s.len >= 3 when it should. Adding a
|
|
// C4_DONT_OPTIMIZE(var) makes the problem go away.
|
|
//
|
|
if(s.len >= 3)
|
|
{
|
|
switch(s.str[0])
|
|
{
|
|
case '-':
|
|
//return _is_doc_begin_token(s); // this was failing with gcc -O2
|
|
return (s.str[1] == '-' && s.str[2] == '-')
|
|
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
|
|
case '.':
|
|
//return _is_doc_end_token(s); // this was failing with gcc -O2
|
|
return (s.str[1] == '.' && s.str[2] == '.')
|
|
&& (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
inline size_t _is_special_json_scalar(csubstr s)
|
|
{
|
|
RYML_ASSERT(s.len);
|
|
switch(s.str[0])
|
|
{
|
|
case 'f':
|
|
if(s.len >= 5 && s.begins_with("false"))
|
|
return 5u;
|
|
break;
|
|
case 't':
|
|
if(s.len >= 4 && s.begins_with("true"))
|
|
return 4u;
|
|
break;
|
|
case 'n':
|
|
if(s.len >= 4 && s.begins_with("null"))
|
|
return 4u;
|
|
break;
|
|
}
|
|
return 0u;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
|
|
{
|
|
return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
|
|
}
|
|
|
|
//! look for the next newline chars, and jump to the right of those
|
|
inline substr from_next_line(substr rem)
|
|
{
|
|
size_t nlpos = rem.first_of("\r\n");
|
|
if(nlpos == csubstr::npos)
|
|
return {};
|
|
const char nl = rem[nlpos];
|
|
rem = rem.right_of(nlpos);
|
|
if(rem.empty())
|
|
return {};
|
|
if(_extend_from_combined_newline(nl, rem.front()))
|
|
rem = rem.sub(1);
|
|
return rem;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
|
|
{
|
|
RYML_ASSERT(r[*i] == '\n');
|
|
size_t numnl_following = 0;
|
|
++(*i);
|
|
for( ; *i < r.len; ++(*i))
|
|
{
|
|
if(r.str[*i] == '\n')
|
|
++numnl_following;
|
|
// skip leading whitespace
|
|
else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
|
|
;
|
|
else
|
|
break;
|
|
}
|
|
return numnl_following;
|
|
}
|
|
|
|
/** @p i is set to the first non whitespace character after the line
|
|
* @return the number of empty lines after the initial position */
|
|
inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
|
|
{
|
|
RYML_ASSERT(r[*i] == '\n');
|
|
size_t numnl_following = 0;
|
|
++(*i);
|
|
if(indentation == 0)
|
|
{
|
|
for( ; *i < r.len; ++(*i))
|
|
{
|
|
if(r.str[*i] == '\n')
|
|
++numnl_following;
|
|
// skip leading whitespace
|
|
else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
|
|
;
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for( ; *i < r.len; ++(*i))
|
|
{
|
|
if(r.str[*i] == '\n')
|
|
{
|
|
++numnl_following;
|
|
// skip the indentation after the newline
|
|
size_t stop = *i + indentation;
|
|
for( ; *i < r.len; ++(*i))
|
|
{
|
|
if(r.str[*i] != ' ' && r.str[*i] != '\r')
|
|
break;
|
|
RYML_ASSERT(*i < stop);
|
|
}
|
|
C4_UNUSED(stop);
|
|
}
|
|
// skip leading whitespace
|
|
else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
|
|
;
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
return numnl_following;
|
|
}
|
|
|
|
} // anon namespace
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
ParseEngine<EventHandler>::~ParseEngine()
|
|
{
|
|
_free();
|
|
_clr();
|
|
}
|
|
|
|
template<class EventHandler>
|
|
ParseEngine<EventHandler>::ParseEngine(EventHandler *evt_handler, ParserOptions opts)
|
|
: m_options(opts)
|
|
, m_file()
|
|
, m_buf()
|
|
, m_evt_handler(evt_handler)
|
|
, m_pending_anchors()
|
|
, m_pending_tags()
|
|
, m_was_inside_qmrk(false)
|
|
, m_doc_empty(false)
|
|
, m_prev_colon(npos)
|
|
, m_encoding(NOBOM)
|
|
, m_newline_offsets()
|
|
, m_newline_offsets_size(0)
|
|
, m_newline_offsets_capacity(0)
|
|
, m_newline_offsets_buf()
|
|
{
|
|
RYML_CHECK(evt_handler);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
ParseEngine<EventHandler>::ParseEngine(ParseEngine &&that) noexcept
|
|
: m_options(that.m_options)
|
|
, m_file(that.m_file)
|
|
, m_buf(that.m_buf)
|
|
, m_evt_handler(that.m_evt_handler)
|
|
, m_pending_anchors(that.m_pending_anchors)
|
|
, m_pending_tags(that.m_pending_tags)
|
|
, m_was_inside_qmrk(false)
|
|
, m_doc_empty(false)
|
|
, m_prev_colon(npos)
|
|
, m_encoding(NOBOM)
|
|
, m_newline_offsets(that.m_newline_offsets)
|
|
, m_newline_offsets_size(that.m_newline_offsets_size)
|
|
, m_newline_offsets_capacity(that.m_newline_offsets_capacity)
|
|
, m_newline_offsets_buf(that.m_newline_offsets_buf)
|
|
{
|
|
that._clr();
|
|
}
|
|
|
|
template<class EventHandler>
|
|
ParseEngine<EventHandler>::ParseEngine(ParseEngine const& that)
|
|
: m_options(that.m_options)
|
|
, m_file(that.m_file)
|
|
, m_buf(that.m_buf)
|
|
, m_evt_handler(that.m_evt_handler)
|
|
, m_pending_anchors(that.m_pending_anchors)
|
|
, m_pending_tags(that.m_pending_tags)
|
|
, m_was_inside_qmrk(false)
|
|
, m_doc_empty(false)
|
|
, m_prev_colon(npos)
|
|
, m_encoding(NOBOM)
|
|
, m_newline_offsets()
|
|
, m_newline_offsets_size()
|
|
, m_newline_offsets_capacity()
|
|
, m_newline_offsets_buf()
|
|
{
|
|
if(that.m_newline_offsets_capacity)
|
|
{
|
|
_resize_locations(that.m_newline_offsets_capacity);
|
|
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
|
|
memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
|
|
m_newline_offsets_size = that.m_newline_offsets_size;
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
ParseEngine<EventHandler>& ParseEngine<EventHandler>::operator=(ParseEngine &&that) noexcept
|
|
{
|
|
_free();
|
|
m_options = (that.m_options);
|
|
m_file = (that.m_file);
|
|
m_buf = (that.m_buf);
|
|
m_evt_handler = that.m_evt_handler;
|
|
m_pending_anchors = that.m_pending_anchors;
|
|
m_pending_tags = that.m_pending_tags;
|
|
m_was_inside_qmrk = that.m_was_inside_qmrk;
|
|
m_doc_empty = that.m_doc_empty;
|
|
m_prev_colon = that.m_prev_colon;
|
|
m_encoding = that.m_encoding;
|
|
m_newline_offsets = (that.m_newline_offsets);
|
|
m_newline_offsets_size = (that.m_newline_offsets_size);
|
|
m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
|
|
m_newline_offsets_buf = (that.m_newline_offsets_buf);
|
|
that._clr();
|
|
return *this;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
ParseEngine<EventHandler>& ParseEngine<EventHandler>::operator=(ParseEngine const& that)
|
|
{
|
|
if(&that != this)
|
|
{
|
|
_free();
|
|
m_options = (that.m_options);
|
|
m_file = (that.m_file);
|
|
m_buf = (that.m_buf);
|
|
m_evt_handler = that.m_evt_handler;
|
|
m_pending_anchors = that.m_pending_anchors;
|
|
m_pending_tags = that.m_pending_tags;
|
|
m_was_inside_qmrk = that.m_was_inside_qmrk;
|
|
m_doc_empty = that.m_doc_empty;
|
|
m_prev_colon = that.m_prev_colon;
|
|
m_encoding = that.m_encoding;
|
|
if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
|
|
_resize_locations(that.m_newline_offsets_capacity);
|
|
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
|
|
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
|
|
memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
|
|
m_newline_offsets_size = that.m_newline_offsets_size;
|
|
m_newline_offsets_buf = that.m_newline_offsets_buf;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_clr()
|
|
{
|
|
m_options = {};
|
|
m_file = {};
|
|
m_buf = {};
|
|
m_evt_handler = {};
|
|
m_pending_anchors = {};
|
|
m_pending_tags = {};
|
|
m_was_inside_qmrk = false;
|
|
m_doc_empty = true;
|
|
m_prev_colon = npos;
|
|
m_encoding = NOBOM;
|
|
m_newline_offsets = {};
|
|
m_newline_offsets_size = {};
|
|
m_newline_offsets_capacity = {};
|
|
m_newline_offsets_buf = {};
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_free()
|
|
{
|
|
if(m_newline_offsets)
|
|
{
|
|
_RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
|
|
m_newline_offsets = nullptr;
|
|
m_newline_offsets_size = 0u;
|
|
m_newline_offsets_capacity = 0u;
|
|
m_newline_offsets_buf = nullptr;
|
|
}
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_reset()
|
|
{
|
|
m_pending_anchors = {};
|
|
m_pending_tags = {};
|
|
m_doc_empty = true;
|
|
m_was_inside_qmrk = false;
|
|
m_prev_colon = npos;
|
|
m_encoding = NOBOM;
|
|
if(m_options.locations())
|
|
{
|
|
_prepare_locations();
|
|
}
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
|
|
{
|
|
#define _ryml_relocate(s) \
|
|
if((s).is_sub(prev_arena)) \
|
|
{ \
|
|
(s).str = next_arena.str + ((s).str - prev_arena.str); \
|
|
}
|
|
_ryml_relocate(m_buf);
|
|
_ryml_relocate(m_newline_offsets_buf);
|
|
for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
|
|
_ryml_relocate(m_pending_tags.annotations[i].str);
|
|
for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
|
|
_ryml_relocate(m_pending_anchors.annotations[i].str);
|
|
#undef _ryml_relocate
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_s_relocate_arena(void* data, csubstr prev_arena, substr next_arena)
|
|
{
|
|
((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
template<class DumpFn>
|
|
void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
|
|
{
|
|
auto const *const C4_RESTRICT st = m_evt_handler->m_curr;
|
|
auto const& lc = st->line_contents;
|
|
csubstr contents = lc.stripped;
|
|
if(contents.len)
|
|
{
|
|
// print the yaml src line
|
|
size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
|
|
if(m_file.len)
|
|
{
|
|
detail::_dump(std::forward<DumpFn>(dumpfn), "{}:", m_file);
|
|
offs += m_file.len + 1;
|
|
}
|
|
detail::_dump(std::forward<DumpFn>(dumpfn), "{}:{}: ", st->pos.line, st->pos.col);
|
|
csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
|
|
csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
|
|
detail::_dump(std::forward<DumpFn>(dumpfn), "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
|
|
// highlight the remaining portion of the previous line
|
|
size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
|
|
size_t lastcol = firstcol + lc.rem.len;
|
|
for(size_t i = 0; i < offs + firstcol; ++i)
|
|
std::forward<DumpFn>(dumpfn)(" ");
|
|
std::forward<DumpFn>(dumpfn)("^");
|
|
for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
|
|
std::forward<DumpFn>(dumpfn)("~");
|
|
detail::_dump(std::forward<DumpFn>(dumpfn), "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
|
|
}
|
|
else
|
|
{
|
|
std::forward<DumpFn>(dumpfn)("\n");
|
|
}
|
|
|
|
#ifdef RYML_DBG
|
|
// next line: print the state flags
|
|
{
|
|
char flagbuf_[128];
|
|
detail::_dump(std::forward<DumpFn>(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
template<class ...Args>
|
|
void ParseEngine<EventHandler>::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const
|
|
{
|
|
char errmsg[RYML_ERRMSG_SIZE];
|
|
detail::_SubstrWriter writer(errmsg);
|
|
auto dumpfn = [&writer](csubstr s){ writer.append(s); };
|
|
detail::_dump(dumpfn, fmt, args...);
|
|
writer.append('\n');
|
|
_fmt_msg(dumpfn);
|
|
size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE;
|
|
m_evt_handler->cancel_parse();
|
|
m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_evt_handler->m_curr->pos, m_evt_handler->m_stack.m_callbacks.m_user_data);
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
#ifdef RYML_DBG
|
|
template<class EventHandler>
|
|
template<class ...Args>
|
|
void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const
|
|
{
|
|
if(_dbg_enabled())
|
|
{
|
|
auto dumpfn = [](csubstr s){ if(s.str) fwrite(s.str, 1, s.len, stdout); };
|
|
detail::_dump(dumpfn, fmt, args...);
|
|
dumpfn("\n");
|
|
_fmt_msg(dumpfn);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_finished_file() const
|
|
{
|
|
bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
|
|
if(ret)
|
|
{
|
|
_c4dbgp("finished file!!!");
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const
|
|
{
|
|
return m_evt_handler->m_curr->line_contents.rem.empty();
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
|
|
{
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(rem.len && (rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[0] == '\t')))
|
|
{
|
|
size_t pos = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
|
|
if(pos == npos)
|
|
pos = rem.len; // maybe the line is just all whitespace
|
|
_c4dbgpf("skip {} whitespace characters", pos);
|
|
_line_progressed(pos);
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_maybe_skipchars(char c)
|
|
{
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(rem.len && rem.str[0] == c)
|
|
{
|
|
size_t pos = rem.first_not_of(c);
|
|
if(pos == npos)
|
|
pos = rem.len; // maybe the line is just all c
|
|
_c4dbgpf("skip {}x'{}'", pos, c);
|
|
_line_progressed(pos);
|
|
}
|
|
}
|
|
|
|
#ifdef RYML_NO_COVERAGE__TO_BE_DELETED
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_maybe_skipchars_up_to(char c, size_t max_to_skip)
|
|
{
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(rem.len && rem.str[0] == c)
|
|
{
|
|
size_t pos = rem.first_not_of(c);
|
|
if(pos == npos)
|
|
pos = rem.len; // maybe the line is just all c
|
|
if(pos > max_to_skip)
|
|
pos = max_to_skip;
|
|
_c4dbgpf("skip {}x'{}'", pos, c);
|
|
_line_progressed(pos);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
template<class EventHandler>
|
|
template<size_t N>
|
|
void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
|
|
size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
|
|
if(pos == npos)
|
|
pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
|
|
_c4dbgpf("skip {} characters", pos);
|
|
_line_progressed(pos);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_skip_comment()
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with('#'));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
csubstr full = m_evt_handler->m_curr->line_contents.full;
|
|
// raise an error if the comment is not preceded by whitespace
|
|
if(!full.begins_with('#'))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str);
|
|
const char c = full[(size_t)(rem.str - full.str - 1)];
|
|
if(C4_UNLIKELY(c != ' ' && c != '\t'))
|
|
_RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "comment not preceded by whitespace");
|
|
}
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str);
|
|
}
|
|
_c4dbgpf("comment was '{}'", rem);
|
|
_line_progressed(rem.len);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_maybe_skip_comment()
|
|
{
|
|
csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(' ');
|
|
if(s.begins_with('#'))
|
|
{
|
|
_line_progressed((size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
|
|
_skip_comment();
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
|
|
{
|
|
if(m_evt_handler->m_curr->line_contents.rem.len)
|
|
{
|
|
if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
|
|
{
|
|
size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
|
|
if(pos == npos)
|
|
pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
|
|
_c4dbgpf("skip {}x'{}'", pos, ' ');
|
|
_line_progressed(pos);
|
|
}
|
|
if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ':'))
|
|
{
|
|
_c4dbgp("found ':' colon next");
|
|
_line_progressed(1);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
|
|
{
|
|
if(m_evt_handler->m_curr->line_contents.rem.len)
|
|
{
|
|
if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
|
|
{
|
|
size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
|
|
if(pos == npos)
|
|
pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
|
|
_c4dbgpf("skip {}x'{}'", pos, ' ');
|
|
_line_progressed(pos);
|
|
}
|
|
if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ','))
|
|
{
|
|
_c4dbgp("found ',' comma next");
|
|
_line_progressed(1);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_scan_anchor()
|
|
{
|
|
csubstr s = m_evt_handler->m_curr->line_contents.rem;
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'));
|
|
csubstr anchor = s.range(1, s.first_of(' '));
|
|
_line_progressed(1u + anchor.len);
|
|
_maybe_skipchars(' ');
|
|
return anchor;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_scan_ref_seq()
|
|
{
|
|
csubstr s = m_evt_handler->m_curr->line_contents.rem;
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
|
|
csubstr ref = s.first(s.first_of(",] :"));
|
|
_line_progressed(ref.len);
|
|
return ref;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_scan_ref_map()
|
|
{
|
|
csubstr s = m_evt_handler->m_curr->line_contents.rem;
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
|
|
csubstr ref = s.first(s.first_of(",} "));
|
|
_line_progressed(ref.len);
|
|
return ref;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_scan_tag()
|
|
{
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(' ');
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
|
|
csubstr t;
|
|
if(rem.begins_with("!!"))
|
|
{
|
|
_c4dbgp("begins with '!!'");
|
|
if(has_any(FLOW))
|
|
t = rem.left_of(rem.first_of(" ,"));
|
|
else
|
|
t = rem.left_of(rem.first_of(' '));
|
|
}
|
|
else if(rem.begins_with("!<"))
|
|
{
|
|
_c4dbgp("begins with '!<'");
|
|
t = rem.left_of(rem.first_of('>'), true);
|
|
}
|
|
#ifdef RYML_NO_COVERAGE__TO_BE_DELETED
|
|
else if(rem.begins_with("!h!"))
|
|
{
|
|
_c4dbgp("begins with '!h!'");
|
|
t = rem.left_of(rem.first_of(' '));
|
|
}
|
|
#endif
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
|
|
_c4dbgp("begins with '!'");
|
|
if(has_any(FLOW))
|
|
t = rem.left_of(rem.first_of(" ,"));
|
|
else
|
|
t = rem.left_of(rem.first_of(' '));
|
|
}
|
|
_line_progressed(t.len);
|
|
_maybe_skip_whitespace_tokens();
|
|
return t;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty());
|
|
|
|
// it's not a scalar if it starts with any of these characters:
|
|
switch(s.str[0])
|
|
{
|
|
// these are all legal tokens which mean no scalar is starting:
|
|
case '[':
|
|
case ']':
|
|
case '{':
|
|
case '}':
|
|
case '!':
|
|
case '&':
|
|
case '*':
|
|
case '|':
|
|
case '>':
|
|
case '#':
|
|
_c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
|
|
return false;
|
|
// '-' and ':' are illegal at the beginning if not followed by a scalar character
|
|
case '-':
|
|
case ':':
|
|
if(s.len > 1)
|
|
{
|
|
switch(s.str[1])
|
|
{
|
|
case '\n':
|
|
case '\r':
|
|
case '{':
|
|
case '[':
|
|
//_RYML_WITHOUT_TAB_TOKENS(case '\t'):
|
|
_c4err("invalid token \":{}\"", _c4prc(s.str[1]));
|
|
break;
|
|
case ' ':
|
|
case '}':
|
|
case ']':
|
|
if(s.str[0] == ':')
|
|
{
|
|
_c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
|
|
return false;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
break;
|
|
case '?':
|
|
if(s.len > 1)
|
|
{
|
|
switch(s.str[1])
|
|
{
|
|
case ' ':
|
|
case '\n':
|
|
case '\r':
|
|
_RYML_WITHOUT_TAB_TOKENS(case '\t':)
|
|
_c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
|
|
return false;
|
|
case '{':
|
|
case '}':
|
|
case '[':
|
|
case ']':
|
|
_c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
break;
|
|
// everything else is a legal starting character
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
|
|
|
|
substr s = m_evt_handler->m_curr->line_contents.rem;
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with('\n'));
|
|
|
|
if(!s.len)
|
|
return false;
|
|
|
|
if(!_is_valid_start_scalar_plain_flow(s))
|
|
return false;
|
|
|
|
_c4dbgp("scanning seqflow scalar...");
|
|
|
|
const size_t start_offset = m_evt_handler->m_curr->pos.offset;
|
|
bool needs_filter = false;
|
|
while(true)
|
|
{
|
|
_c4dbgpf("scanning scalar: curr line=[{}]~~~{}~~~", s.len, s);
|
|
for(size_t i = 0; i < s.len; ++i)
|
|
{
|
|
const char c = s.str[i];
|
|
switch(c)
|
|
{
|
|
case ',':
|
|
_c4dbgpf("found terminating character at {}: '{}'", i, c);
|
|
_line_progressed(i);
|
|
if(m_evt_handler->m_curr->pos.offset + i > start_offset)
|
|
{
|
|
goto ended_scalar;
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("at the beginning. no scalar here.");
|
|
return false;
|
|
}
|
|
break;
|
|
case ']':
|
|
_c4dbgpf("found terminating character at {}: '{}'", i, c);
|
|
_line_progressed(i);
|
|
goto ended_scalar;
|
|
break;
|
|
case '#':
|
|
_c4dbgp("found suspicious '#'");
|
|
if(!i || (s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t')))
|
|
{
|
|
_c4dbgpf("found terminating character at {}: '{}'", i, c);
|
|
_line_progressed(i);
|
|
goto ended_scalar;
|
|
}
|
|
break;
|
|
case ':':
|
|
_c4dbgp("found suspicious ':'");
|
|
if(s.len > i+1)
|
|
{
|
|
const char next = s.str[i+1];
|
|
_c4dbgpf("next char is '{}'", _c4prc(next));
|
|
if(next == ' ' || next == ',' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
|
|
{
|
|
_c4dbgp("map starting!");
|
|
if(m_evt_handler->m_curr->pos.offset + i > start_offset)
|
|
{
|
|
_c4dbgp("scalar finished!");
|
|
_line_progressed(i);
|
|
goto ended_scalar;
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("at the beginning. no scalar here.");
|
|
return false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("it's a scalar indeed.");
|
|
++i; // skip the next char
|
|
}
|
|
}
|
|
else if(s.len == i+1)
|
|
{
|
|
_c4dbgp("':' at line end. map starting!");
|
|
return false;
|
|
}
|
|
break;
|
|
case '[':
|
|
case '{':
|
|
case '}':
|
|
_line_progressed(i);
|
|
_c4err("invalid character: '{}'", c); // noreturn
|
|
default:
|
|
;
|
|
}
|
|
}
|
|
_line_progressed(s.len);
|
|
if(!_finished_file())
|
|
{
|
|
_c4dbgp("next line!");
|
|
_line_ended();
|
|
_scan_line();
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("file finished!");
|
|
goto ended_scalar;
|
|
}
|
|
s = m_evt_handler->m_curr->line_contents.rem;
|
|
needs_filter = true;
|
|
}
|
|
|
|
ended_scalar:
|
|
|
|
sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
|
|
sc->needs_filter = needs_filter;
|
|
|
|
_c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
|
|
|
|
return true;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
|
|
|
|
substr s = m_evt_handler->m_curr->line_contents.rem;
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
|
|
|
|
if(!s.len)
|
|
return false;
|
|
|
|
if(!_is_valid_start_scalar_plain_flow(s))
|
|
return false;
|
|
|
|
_c4dbgp("scanning scalar...");
|
|
|
|
const size_t start_offset = m_evt_handler->m_curr->pos.offset;
|
|
bool needs_filter = false;
|
|
while(true)
|
|
{
|
|
for(size_t i = 0; i < s.len; ++i)
|
|
{
|
|
const char c = s.str[i];
|
|
switch(c)
|
|
{
|
|
case ',':
|
|
case '}':
|
|
_line_progressed(i);
|
|
_c4dbgpf("found terminating character: '{}'", c);
|
|
goto ended_scalar;
|
|
case ':':
|
|
if(s.len == i+1 || s.str[i+1] == ' ' || s.str[i+1] == ',' || s.str[i+1] == '}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] == '\t'))
|
|
{
|
|
_line_progressed(i);
|
|
_c4dbgpf("found terminating character: '{}'", c);
|
|
goto ended_scalar;
|
|
}
|
|
break;
|
|
case '{':
|
|
case '[':
|
|
_line_progressed(i);
|
|
_c4err("invalid character: '{}'", c); // noreturn
|
|
break;
|
|
case ']':
|
|
_line_progressed(i);
|
|
if(has_any(RSEQIMAP))
|
|
goto ended_scalar;
|
|
else
|
|
_c4err("invalid character: '{}'", c); // noreturn
|
|
break;
|
|
case '#':
|
|
if(!i || s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t'))
|
|
{
|
|
_line_progressed(i);
|
|
_c4dbgpf("found terminating character: '{}'", c);
|
|
goto ended_scalar;
|
|
}
|
|
break;
|
|
default:
|
|
;
|
|
}
|
|
}
|
|
_c4dbgp("next line!");
|
|
_line_progressed(s.len);
|
|
if(!_finished_file())
|
|
{
|
|
_c4dbgp("next line!");
|
|
_line_ended();
|
|
_scan_line();
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("file finished!");
|
|
goto ended_scalar;
|
|
}
|
|
s = m_evt_handler->m_curr->line_contents.rem;
|
|
needs_filter = true;
|
|
}
|
|
|
|
ended_scalar:
|
|
|
|
sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \n\t\r", " \n\r"));
|
|
sc->needs_filter = needs_filter;
|
|
|
|
_c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
|
|
|
|
return sc->scalar.len > 0u;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
|
|
|
|
substr s = m_evt_handler->m_curr->line_contents.rem;
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
|
|
|
|
if(!s.len)
|
|
return false;
|
|
|
|
_c4dbgp("scanning scalar...");
|
|
|
|
switch(s.str[0])
|
|
{
|
|
case ']':
|
|
case '{':
|
|
case ',':
|
|
_c4dbgp("not a scalar.");
|
|
return false;
|
|
}
|
|
|
|
{
|
|
const size_t len = _is_special_json_scalar(s);
|
|
if(len)
|
|
{
|
|
sc->scalar = s.first(len);
|
|
sc->needs_filter = false;
|
|
_c4dbgpf("special json scalar: '{}'", sc->scalar);
|
|
_line_progressed(len);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// must be a number
|
|
size_t i = 0;
|
|
for( ; i < s.len; ++i)
|
|
{
|
|
const char c = s.str[i];
|
|
switch(c)
|
|
{
|
|
case ',':
|
|
case ']':
|
|
case ' ':
|
|
case '\t':
|
|
_c4dbgpf("found terminating character: '{}'", c);
|
|
goto ended_scalar;
|
|
case '#':
|
|
if(!i || s.str[i-1] == ' ')
|
|
{
|
|
_c4dbgpf("found terminating character: '{}'", c);
|
|
goto ended_scalar;
|
|
}
|
|
break;
|
|
default:
|
|
;
|
|
}
|
|
}
|
|
|
|
ended_scalar:
|
|
|
|
if(C4_LIKELY(i > 0))
|
|
{
|
|
_line_progressed(i);
|
|
sc->scalar = s.first(i);
|
|
sc->needs_filter = false;
|
|
_c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL));
|
|
|
|
substr s = m_evt_handler->m_curr->line_contents.rem;
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
|
|
|
|
if(!s.len)
|
|
return false;
|
|
|
|
_c4dbgp("scanning scalar...");
|
|
|
|
{
|
|
const size_t len = _is_special_json_scalar(s);
|
|
if(len)
|
|
{
|
|
sc->scalar = s.first(len);
|
|
sc->needs_filter = false;
|
|
_c4dbgpf("special json scalar: '{}'", sc->scalar);
|
|
_line_progressed(len);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// must be a number
|
|
size_t i = 0;
|
|
for( ; i < s.len; ++i)
|
|
{
|
|
const char c = s.str[i];
|
|
switch(c)
|
|
{
|
|
case ',':
|
|
case '}':
|
|
case ' ':
|
|
case '\t':
|
|
_c4dbgpf("found terminating character: '{}'", c);
|
|
goto ended_scalar;
|
|
case '#':
|
|
if(!i || s.str[i-1] == ' ')
|
|
{
|
|
_c4dbgpf("found terminating character: '{}'", c);
|
|
goto ended_scalar;
|
|
}
|
|
break;
|
|
default:
|
|
;
|
|
}
|
|
}
|
|
|
|
ended_scalar:
|
|
|
|
if(C4_LIKELY(i > 0))
|
|
{
|
|
_line_progressed(i);
|
|
sc->scalar = s.first(i);
|
|
sc->needs_filter = false;
|
|
_c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '-');
|
|
return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '.');
|
|
return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK|RUNK|USTY));
|
|
|
|
substr s = m_evt_handler->m_curr->line_contents.rem;
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
|
|
|
|
if(!s.len)
|
|
return false;
|
|
|
|
switch(s.str[0])
|
|
{
|
|
case '-':
|
|
if(_is_blck_token(s))
|
|
{
|
|
return false;
|
|
}
|
|
else if(_is_doc_begin(s))
|
|
{
|
|
_c4dbgp("token is doc start");
|
|
return false;
|
|
}
|
|
break;
|
|
case ':':
|
|
case '?':
|
|
if(_is_blck_token(s))
|
|
return false;
|
|
break;
|
|
case '[':
|
|
case '{':
|
|
case '&':
|
|
case '*':
|
|
case '!':
|
|
_RYML_WITH_TAB_TOKENS(case '\t':)
|
|
return false;
|
|
case '.':
|
|
if(_is_doc_end(s))
|
|
{
|
|
_c4dbgp("token is doc end");
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
|
|
_c4dbgpf("plain scalar! indentation={}", indentation);
|
|
|
|
const size_t start_offset = m_evt_handler->m_curr->pos.offset;
|
|
const size_t start_line = m_evt_handler->m_curr->pos.line;
|
|
|
|
bool needs_filter = false;
|
|
while(true)
|
|
{
|
|
_c4dbgpf("plain scalar line: [{}]~~~{}~~~", s.len, s);
|
|
for(size_t i = 0; i < s.len; ++i)
|
|
{
|
|
const char curr = s.str[i];
|
|
//_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
|
|
switch(curr)
|
|
{
|
|
case ':':
|
|
_c4dbgpf("[{}]: got suspicious ':'", i);
|
|
// are there more characters?
|
|
if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
|
|
{
|
|
_c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
|
|
_line_progressed(i);
|
|
// ': ' is accepted only on the first line
|
|
if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
|
|
{
|
|
_c4dbgp("start line. scalar ends here");
|
|
goto ended_scalar;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
size_t j = i;
|
|
while(j + 1 < s.len && s.str[j+1] == ':')
|
|
{
|
|
_c4dbgp("skip colon");
|
|
++j;
|
|
}
|
|
i = j > i ? j-1 : i;
|
|
_c4dbgp("nothing to see here");
|
|
}
|
|
break;
|
|
case '#':
|
|
_c4dbgp("got suspicious '#'");
|
|
if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
|
|
{
|
|
_c4dbgp("comment! scalar ends here");
|
|
_line_progressed(i);
|
|
goto ended_scalar;
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("nothing to see here");
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
_line_progressed(s.len);
|
|
csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
|
|
next_peeked = next_peeked.trimr("\n\r");
|
|
const size_t next_indentation = next_peeked.first_not_of(' ');
|
|
_c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
|
|
if(next_indentation < indentation)
|
|
{
|
|
_c4dbgp("smaller indentation! scalar ended");
|
|
goto ended_scalar;
|
|
}
|
|
else if(next_indentation == 0 && next_peeked.len > 0)
|
|
{
|
|
const char first = next_peeked.str[0];
|
|
switch(first)
|
|
{
|
|
case '-':
|
|
next_peeked = next_peeked.trimr("\n\r");
|
|
_c4dbgpf("doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
|
|
if(_is_doc_begin_token(next_peeked))
|
|
{
|
|
_c4dbgp("doc begin! scalar ended");
|
|
goto ended_scalar;
|
|
}
|
|
break;
|
|
case '.':
|
|
next_peeked = next_peeked.trimr("\n\r");
|
|
_c4dbgpf("doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
|
|
if(_is_doc_end_token(next_peeked))
|
|
{
|
|
_c4dbgp("doc end! scalar ended");
|
|
goto ended_scalar;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
// load with next line
|
|
_c4dbgp("next line!");
|
|
if(!_finished_file())
|
|
{
|
|
_c4dbgp("next line!");
|
|
_line_ended();
|
|
_scan_line();
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("file finished!");
|
|
goto ended_scalar;
|
|
}
|
|
s = m_evt_handler->m_curr->line_contents.rem;
|
|
needs_filter = true;
|
|
}
|
|
|
|
ended_scalar:
|
|
|
|
sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
|
|
sc->needs_filter = needs_filter;
|
|
|
|
_c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
|
|
|
|
return true;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
|
|
return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
|
|
return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY));
|
|
return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
|
|
{
|
|
substr rem{}; // declare here because of the goto
|
|
size_t nlpos{}; // declare here because of the goto
|
|
pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
|
|
if(pos >= m_buf.len)
|
|
goto next_is_empty;
|
|
|
|
// look for the next newline chars, and jump to the right of those
|
|
rem = from_next_line(m_buf.sub(pos));
|
|
if(rem.empty())
|
|
goto next_is_empty;
|
|
|
|
// now get everything up to and including the following newline chars
|
|
nlpos = rem.first_of("\r\n");
|
|
if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
|
|
nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
|
|
rem = rem.left_of(nlpos, /*include_pos*/true);
|
|
|
|
_c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
|
|
return rem;
|
|
|
|
next_is_empty:
|
|
_c4dbgpf("peek next line @ {}: (len=0)''", pos);
|
|
return {};
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_scan_line()
|
|
{
|
|
if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
|
|
m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
|
|
else
|
|
m_evt_handler->m_curr->line_contents.reset(m_buf.last(0), m_buf.last(0));
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
|
|
{
|
|
_c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->line_contents.full.len, ahead, m_evt_handler->m_curr->pos.col, m_evt_handler->m_curr->pos.col+ahead, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset+ahead);
|
|
m_evt_handler->m_curr->pos.offset += ahead;
|
|
m_evt_handler->m_curr->pos.col += ahead;
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.stripped.len+1);
|
|
m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_line_ended()
|
|
{
|
|
_c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
|
|
m_evt_handler->m_curr->pos.line,
|
|
m_evt_handler->m_curr->line_contents.full.len,
|
|
m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len,
|
|
m_evt_handler->m_curr->pos.col, 1);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.stripped.len + 1);
|
|
m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
|
|
++m_evt_handler->m_curr->pos.line;
|
|
m_evt_handler->m_curr->pos.col = 1;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_line_ended_undo()
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len);
|
|
const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
|
|
_c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
|
|
m_evt_handler->m_curr->pos.offset -= delta;
|
|
--m_evt_handler->m_curr->pos.line;
|
|
m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.stripped.len + 1u;
|
|
// don't forget to undo also the changes to the remainder of the line
|
|
//_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_buf.len || m_buf[m_evt_handler->m_curr->pos.offset] == '\n' || m_buf[m_evt_handler->m_curr->pos.offset] == '\r');
|
|
m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_set_indentation(size_t indentation)
|
|
{
|
|
m_evt_handler->m_curr->indref = indentation;
|
|
_c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_save_indentation()
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begin() >= m_evt_handler->m_curr->line_contents.full.begin());
|
|
m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
|
|
_c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_end_map_blck()
|
|
{
|
|
_c4dbgp("mapblck: end");
|
|
if(has_any(RKCL|RVAL))
|
|
{
|
|
_c4dbgp("mapblck: set missing val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
}
|
|
else if(has_any(QMRK))
|
|
{
|
|
_c4dbgp("mapblck: set missing keyval");
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
}
|
|
m_evt_handler->end_map();
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_end_seq_blck()
|
|
{
|
|
if(has_any(RVAL))
|
|
{
|
|
_c4dbgp("seqblck: set missing val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
}
|
|
m_evt_handler->end_seq();
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_end2_map()
|
|
{
|
|
_c4dbgp("map: end");
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
|
|
if(has_any(BLCK))
|
|
{
|
|
_end_map_blck();
|
|
}
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
|
|
m_evt_handler->_pop();
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_end2_seq()
|
|
{
|
|
_c4dbgp("seq: end");
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
|
|
if(has_any(BLCK))
|
|
{
|
|
_end_seq_blck();
|
|
}
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
|
|
m_evt_handler->_pop();
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_begin2_doc()
|
|
{
|
|
m_doc_empty = true;
|
|
add_flags(RDOC);
|
|
m_evt_handler->begin_doc();
|
|
m_evt_handler->m_curr->indref = 0; // ?
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_begin2_doc_expl()
|
|
{
|
|
m_doc_empty = true;
|
|
add_flags(RDOC);
|
|
m_evt_handler->begin_doc_expl();
|
|
m_evt_handler->m_curr->indref = 0; // ?
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_end2_doc()
|
|
{
|
|
_c4dbgp("doc: end");
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
|
|
if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
|
|
{
|
|
_c4dbgp("doc was empty; add empty val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
}
|
|
m_evt_handler->end_doc();
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_end2_doc_expl()
|
|
{
|
|
_c4dbgp("doc: end");
|
|
if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
|
|
{
|
|
_c4dbgp("doc: no children; add empty val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
}
|
|
m_evt_handler->end_doc_expl();
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_maybe_begin_doc()
|
|
{
|
|
if(has_none(RDOC))
|
|
{
|
|
_c4dbgp("doc must be started");
|
|
_begin2_doc();
|
|
}
|
|
}
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_maybe_end_doc()
|
|
{
|
|
if(has_any(RDOC))
|
|
{
|
|
_c4dbgp("doc must be finished");
|
|
_end2_doc();
|
|
}
|
|
else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
|
|
{
|
|
_c4dbgp("no doc to finish, but pending annotations");
|
|
m_evt_handler->begin_doc();
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->end_doc();
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
|
|
if(m_evt_handler->m_stack[0].flags & RDOC)
|
|
{
|
|
_c4dbgp("root is RDOC");
|
|
if(m_evt_handler->m_curr->level != 0)
|
|
_handle_indentation_pop(&m_evt_handler->m_stack[0]);
|
|
}
|
|
else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags & RDOC))
|
|
{
|
|
_c4dbgp("root is STREAM");
|
|
if(m_evt_handler->m_curr->level != 1)
|
|
_handle_indentation_pop(&m_evt_handler->m_stack[1]);
|
|
}
|
|
else
|
|
{
|
|
_c4err("internal error");
|
|
}
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_end_doc_suddenly()
|
|
{
|
|
_c4dbgp("end doc suddenly");
|
|
_end_doc_suddenly__pop();
|
|
_end2_doc_expl();
|
|
addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_start_doc_suddenly()
|
|
{
|
|
_c4dbgp("start doc suddenly");
|
|
_end_doc_suddenly__pop();
|
|
_end2_doc();
|
|
_begin2_doc_expl();
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_end_stream()
|
|
{
|
|
_c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
|
|
if(has_all(RSEQ|FLOW))
|
|
_c4err("missing terminating ]");
|
|
else if(has_all(RMAP|FLOW))
|
|
_c4err("missing terminating }");
|
|
if(m_evt_handler->m_stack.size() > 1)
|
|
_handle_indentation_pop(m_evt_handler->m_stack.begin());
|
|
if(has_all(RDOC))
|
|
{
|
|
_end2_doc();
|
|
}
|
|
else if(has_all(RTOP|RUNK))
|
|
{
|
|
if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
|
|
{
|
|
if(m_doc_empty)
|
|
{
|
|
m_evt_handler->begin_doc();
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->end_doc();
|
|
}
|
|
}
|
|
}
|
|
m_evt_handler->end_stream();
|
|
}
|
|
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
|
|
{
|
|
_c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
|
|
while(m_evt_handler->m_curr != popto)
|
|
{
|
|
if(has_any(RSEQ))
|
|
{
|
|
_c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
|
|
_end2_seq();
|
|
}
|
|
else if(has_any(RMAP))
|
|
{
|
|
_c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
|
|
_end2_map();
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
_c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
|
|
{
|
|
// search the stack frame to jump to based on its indentation
|
|
using state_type = typename EventHandler::state;
|
|
state_type const* popto = nullptr;
|
|
auto &stack = m_evt_handler->m_stack;
|
|
_RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
|
|
_RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
|
|
const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
|
|
#ifdef RYML_DBG
|
|
if(_dbg_enabled())
|
|
{
|
|
char flagbuf_[128];
|
|
for(state_type const& s : stack)
|
|
_dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
|
|
}
|
|
#endif
|
|
for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
|
|
{
|
|
_c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
|
|
if(s->indref == ind)
|
|
{
|
|
_c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
|
|
popto = s;
|
|
break;
|
|
}
|
|
}
|
|
if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
|
|
{
|
|
_c4err("parse error: incorrect indentation?");
|
|
}
|
|
_handle_indentation_pop(popto);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
|
|
{
|
|
// search the stack frame to jump to based on its indentation
|
|
using state_type = typename EventHandler::state;
|
|
auto &stack = m_evt_handler->m_stack;
|
|
_RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
|
|
_RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
|
|
const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
|
|
state_type const* popto = nullptr;
|
|
#ifdef RYML_DBG
|
|
char flagbuf_[128];
|
|
if(_dbg_enabled())
|
|
{
|
|
for(state_type const& s : stack)
|
|
_dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
|
|
}
|
|
#endif
|
|
for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
|
|
{
|
|
_c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
|
|
if(s->indref < ind)
|
|
{
|
|
break;
|
|
}
|
|
else if(s->indref == ind)
|
|
{
|
|
_c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
|
|
if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
|
|
{
|
|
break;
|
|
}
|
|
popto = s;
|
|
if(has_all(RSEQ|BLCK, s))
|
|
{
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
const size_t first = rem.first_not_of(' ');
|
|
_RYML_CB_ASSERT(stack.m_callbacks, first == ind || first == npos);
|
|
rem = rem.right_of(first, true);
|
|
_c4dbgpf("indentless? rem='{}' first={}", rem, first);
|
|
if(rem.begins_with('-') && _is_blck_token(rem))
|
|
{
|
|
_c4dbgp("parent was indentless seq");
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
|
|
{
|
|
_c4err("parse error: incorrect indentation?");
|
|
}
|
|
_handle_indentation_pop(popto);
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
template<class EventHandler>
|
|
typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
|
|
{
|
|
// quoted scalars can spread over multiple lines!
|
|
// nice explanation here: http://yaml-multiline.info/
|
|
|
|
// a span to the end of the file
|
|
size_t b = m_evt_handler->m_curr->pos.offset;
|
|
substr s = m_buf.sub(b);
|
|
if(s.begins_with(' '))
|
|
{
|
|
s = s.triml(' ');
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
|
|
_line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
|
|
}
|
|
b = m_evt_handler->m_curr->pos.offset; // take this into account
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('\''));
|
|
|
|
// skip the opening quote
|
|
_line_progressed(1);
|
|
s = s.sub(1);
|
|
|
|
bool needs_filter = false;
|
|
|
|
size_t numlines = 1; // we already have one line
|
|
size_t pos = npos; // find the pos of the matching quote
|
|
while( ! _finished_file())
|
|
{
|
|
const csubstr line = m_evt_handler->m_curr->line_contents.rem;
|
|
bool line_is_blank = true;
|
|
_c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
|
|
for(size_t i = 0; i < line.len; ++i)
|
|
{
|
|
const char curr = line.str[i];
|
|
if(curr == '\'') // single quotes are escaped with two single quotes
|
|
{
|
|
const char next = i+1 < line.len ? line.str[i+1] : '~';
|
|
if(next != '\'') // so just look for the first quote
|
|
{ // without another after it
|
|
pos = i;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
needs_filter = true; // needs filter to remove escaped quotes
|
|
++i; // skip the escaped quote
|
|
}
|
|
}
|
|
else if(curr != ' ')
|
|
{
|
|
line_is_blank = false;
|
|
}
|
|
}
|
|
|
|
// leading whitespace also needs filtering
|
|
needs_filter = needs_filter
|
|
|| (numlines > 1)
|
|
|| line_is_blank
|
|
|| (_at_line_begin() && line.begins_with(' '));
|
|
|
|
if(pos == npos)
|
|
{
|
|
_line_progressed(line.len);
|
|
++numlines;
|
|
}
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '\'');
|
|
_line_progressed(pos + 1); // progress beyond the quote
|
|
pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
|
|
break;
|
|
}
|
|
|
|
_line_ended();
|
|
_scan_line();
|
|
}
|
|
|
|
if(pos == npos)
|
|
{
|
|
_c4err("reached end of file while looking for closing quote");
|
|
}
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\'');
|
|
s = s.sub(0, pos-1);
|
|
}
|
|
|
|
_c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
|
|
|
|
return ScannedScalar { s, needs_filter };
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
template<class EventHandler>
|
|
typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
|
|
{
|
|
// quoted scalars can spread over multiple lines!
|
|
// nice explanation here: http://yaml-multiline.info/
|
|
|
|
// a span to the end of the file
|
|
size_t b = m_evt_handler->m_curr->pos.offset;
|
|
substr s = m_buf.sub(b);
|
|
if(s.begins_with(' '))
|
|
{
|
|
s = s.triml(' ');
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
|
|
_line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
|
|
}
|
|
b = m_evt_handler->m_curr->pos.offset; // take this into account
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('"'));
|
|
|
|
// skip the opening quote
|
|
_line_progressed(1);
|
|
s = s.sub(1);
|
|
|
|
bool needs_filter = false;
|
|
|
|
size_t numlines = 1; // we already have one line
|
|
size_t pos = npos; // find the pos of the matching quote
|
|
while( ! _finished_file())
|
|
{
|
|
const csubstr line = m_evt_handler->m_curr->line_contents.rem;
|
|
#if defined(__GNUC__) && __GNUC__ == 11
|
|
C4_DONT_OPTIMIZE(line); // prevent erroneous hoist of the assignment out of the loop
|
|
#endif
|
|
bool line_is_blank = true;
|
|
_c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, line);
|
|
for(size_t i = 0; i < line.len; ++i)
|
|
{
|
|
const char curr = line.str[i];
|
|
if(curr != ' ')
|
|
line_is_blank = false;
|
|
// every \ is an escape
|
|
if(curr == '\\')
|
|
{
|
|
const char next = i+1 < line.len ? line.str[i+1] : '~';
|
|
needs_filter = true;
|
|
if(next == '"' || next == '\\')
|
|
++i;
|
|
}
|
|
else if(curr == '"')
|
|
{
|
|
pos = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// leading whitespace also needs filtering
|
|
needs_filter = needs_filter
|
|
|| (numlines > 1)
|
|
|| line_is_blank
|
|
|| (_at_line_begin() && line.begins_with(' '));
|
|
|
|
if(pos == npos)
|
|
{
|
|
_line_progressed(line.len);
|
|
++numlines;
|
|
}
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '"');
|
|
_line_progressed(pos + 1); // progress beyond the quote
|
|
pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
|
|
break;
|
|
}
|
|
|
|
_line_ended();
|
|
_scan_line();
|
|
}
|
|
|
|
if(pos == npos)
|
|
{
|
|
_c4err("reached end of file looking for closing quote");
|
|
}
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"');
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
|
|
s = s.sub(0, pos-1);
|
|
}
|
|
|
|
_c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
|
|
|
|
return ScannedScalar { s, needs_filter };
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
|
|
{
|
|
_c4dbgpf("blck: indref={}", indref);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref != npos);
|
|
|
|
// nice explanation here: http://yaml-multiline.info/
|
|
csubstr s = m_evt_handler->m_curr->line_contents.rem;
|
|
csubstr trimmed = s.triml(' ');
|
|
if(trimmed.str > s.str)
|
|
{
|
|
_c4dbgp("skipping whitespace");
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
|
|
_line_progressed(static_cast<size_t>(trimmed.str - s.str));
|
|
s = trimmed;
|
|
}
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'));
|
|
|
|
_c4dbgpf("blck: specs=[{}]~~~{}~~~", s.len, s);
|
|
|
|
// parse the spec
|
|
BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
|
|
size_t indentation = npos; // have to find out if no spec is given
|
|
csubstr digits;
|
|
if(s.len > 1)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"));
|
|
csubstr t = s.sub(1);
|
|
_c4dbgpf("blck: spec is multichar: '{}'", t);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
|
|
size_t pos = t.first_of("-+");
|
|
_c4dbgpf("blck: spec chomp char at {}", pos);
|
|
if(pos != npos)
|
|
{
|
|
if(t[pos] == '-')
|
|
chomp = CHOMP_STRIP;
|
|
else if(t[pos] == '+')
|
|
chomp = CHOMP_KEEP;
|
|
if(pos == 0)
|
|
t = t.sub(1);
|
|
else
|
|
t = t.first(pos);
|
|
}
|
|
// from here to the end, only digits are considered
|
|
digits = t.left_of(t.first_not_of("0123456789"));
|
|
if( ! digits.empty())
|
|
{
|
|
if(C4_UNLIKELY(digits.len > 1))
|
|
_c4err("parse error: invalid indentation");
|
|
_c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
|
|
if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
|
|
_c4err("parse error: could not read indentation as decimal");
|
|
if(C4_UNLIKELY( ! indentation))
|
|
_c4err("parse error: null indentation");
|
|
_c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
|
|
indentation += m_evt_handler->m_curr->indref;
|
|
}
|
|
}
|
|
|
|
_c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
|
|
|
|
// finish the current line
|
|
_line_progressed(s.len);
|
|
_line_ended();
|
|
_scan_line();
|
|
|
|
// start with a zero-length block, already pointing at the right place
|
|
substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset, size_t(0));// m_evt_handler->m_curr->line_contents.full.sub(0, 0);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.begin());
|
|
|
|
// read every full line into a raw block,
|
|
// from which newlines are to be stripped as needed.
|
|
//
|
|
// If no explicit indentation was given, pick it from the first
|
|
// non-empty line. See
|
|
// https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
|
|
size_t num_lines = 0;
|
|
size_t first = m_evt_handler->m_curr->pos.line;
|
|
size_t provisional_indentation = npos;
|
|
LineContents lc;
|
|
while(( ! _finished_file()))
|
|
{
|
|
// peek next line, but do not advance immediately
|
|
lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
|
|
#if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
|
|
C4_DONT_OPTIMIZE(lc.rem);
|
|
#endif
|
|
_c4dbgpf("blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped);
|
|
// evaluate termination conditions
|
|
if(indentation != npos)
|
|
{
|
|
_c4dbgpf("blck: indentation={}", indentation);
|
|
// stop when the line is deindented and not empty
|
|
if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
|
|
{
|
|
if(raw_block.len)
|
|
{
|
|
_c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
|
|
}
|
|
else
|
|
{
|
|
_c4err("indentation decreased without any scalar");
|
|
}
|
|
break;
|
|
}
|
|
else if(indentation == 0)
|
|
{
|
|
_c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
|
|
if(_is_doc_token(lc.rem))
|
|
{
|
|
_c4dbgp("blck: stop. indentation=0 and doc ended");
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const size_t fns = lc.stripped.first_not_of(' ');
|
|
_c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
|
|
if(fns != npos) // non-empty line
|
|
{
|
|
_RYML_WITH_TAB_TOKENS(
|
|
if(C4_UNLIKELY(lc.stripped.begins_with('\t')))
|
|
_c4err("parse error");
|
|
)
|
|
_c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
|
|
if(provisional_indentation == npos)
|
|
{
|
|
if(lc.indentation < indref)
|
|
{
|
|
_c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
|
|
if(raw_block.len == 0)
|
|
{
|
|
_c4dbgp("blck: was empty, undo next line");
|
|
_line_ended_undo();
|
|
}
|
|
break;
|
|
}
|
|
else if(lc.indentation == m_evt_handler->m_curr->indref)
|
|
{
|
|
if(has_any(RSEQ|RMAP))
|
|
{
|
|
_c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
|
|
break;
|
|
}
|
|
}
|
|
_c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
|
|
indentation = lc.indentation;
|
|
}
|
|
else
|
|
{
|
|
if(lc.indentation >= provisional_indentation)
|
|
{
|
|
_c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
|
|
//indentation = provisional_indentation ? provisional_indentation : lc.indentation;
|
|
indentation = lc.indentation;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
//_c4err("parse error: first non-empty block line should have at least the original indentation");
|
|
}
|
|
}
|
|
}
|
|
else // empty line
|
|
{
|
|
_c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);
|
|
if(provisional_indentation != npos)
|
|
{
|
|
if(lc.stripped.len >= provisional_indentation)
|
|
{
|
|
_c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);
|
|
provisional_indentation = lc.stripped.len;
|
|
}
|
|
#ifdef RYML_NO_COVERAGE__TO_BE_DELETED
|
|
else if(lc.indentation >= provisional_indentation && lc.indentation != npos)
|
|
{
|
|
_c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
|
|
provisional_indentation = lc.indentation;
|
|
}
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
|
|
_c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
|
|
if(provisional_indentation == npos)
|
|
{
|
|
provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL);
|
|
_c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
|
|
}
|
|
if(provisional_indentation < indref)
|
|
{
|
|
provisional_indentation = indref;
|
|
_c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// advance now that we know the folded scalar continues
|
|
m_evt_handler->m_curr->line_contents = lc;
|
|
_c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
|
|
raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
|
|
_line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
|
|
_line_ended();
|
|
++num_lines;
|
|
}
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
|
|
C4_UNUSED(num_lines);
|
|
C4_UNUSED(first);
|
|
|
|
if(indentation == npos)
|
|
{
|
|
_c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
|
|
indentation = provisional_indentation;
|
|
}
|
|
|
|
if(num_lines)
|
|
_line_ended_undo();
|
|
|
|
_c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
|
|
|
|
sb->scalar = raw_block;
|
|
sb->indentation = indentation;
|
|
sb->chomp = chomp;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
/** @cond dev */
|
|
|
|
// a debugging scaffold:
|
|
#if 0
|
|
#define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
|
|
#else
|
|
#define _c4dbgfws(...)
|
|
#endif
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
|
|
{
|
|
_c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
|
|
_RYML_CB_ASSERT(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t');
|
|
|
|
const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
|
|
if(first_pos != npos)
|
|
{
|
|
const char first_char = proc.src[first_pos];
|
|
_c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
|
|
if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
|
|
{
|
|
_c4dbgfws("whitespace is trailing on line", "");
|
|
proc.skip(first_pos - proc.rpos);
|
|
}
|
|
else // a legit whitespace
|
|
{
|
|
proc.copy();
|
|
_c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
|
|
}
|
|
return true;
|
|
}
|
|
_c4dbgfws("whitespace is trailing on line", "");
|
|
return false;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
|
|
{
|
|
if(!_filter_ws_handle_to_first_non_space(proc))
|
|
{
|
|
_c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
|
|
proc.copy(proc.src.len - proc.rpos);
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
|
|
{
|
|
if(!_filter_ws_handle_to_first_non_space(proc))
|
|
{
|
|
_c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
|
|
proc.skip(proc.src.len - proc.rpos);
|
|
}
|
|
}
|
|
|
|
#undef _c4dbgfws
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
/* plain scalars */
|
|
|
|
// a debugging scaffold:
|
|
#if 0
|
|
#define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
|
|
#else
|
|
#define _c4dbgfps(fmt, ...)
|
|
#endif
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
|
|
{
|
|
_RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
|
|
|
|
_c4dbgfps("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
|
|
size_t ii = proc.rpos;
|
|
const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
|
|
if(numnl_following)
|
|
{
|
|
proc.set('\n', numnl_following);
|
|
_c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
|
|
}
|
|
else
|
|
{
|
|
const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
|
|
if(ret != npos)
|
|
{
|
|
proc.set(' ');
|
|
_c4dbgfps("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
|
|
}
|
|
else
|
|
{
|
|
_c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
|
|
ii = proc.src.len;
|
|
}
|
|
}
|
|
proc.rpos = ii;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
|
|
{
|
|
_RYML_CB_ASSERT(this->callbacks(), indentation != npos);
|
|
_c4dbgfps("before=[{}]~~~{}~~~", proc.src.len, proc.src);
|
|
|
|
while(proc.has_more_chars())
|
|
{
|
|
const char curr = proc.curr();
|
|
_c4dbgfps("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
|
|
switch(curr)
|
|
{
|
|
case ' ':
|
|
_RYML_WITH_TAB_TOKENS(case '\t':)
|
|
_c4dbgfps("whitespace", curr);
|
|
_filter_ws_skip_trailing(proc);
|
|
break;
|
|
case '\n':
|
|
_c4dbgfps("newline", curr);
|
|
_filter_nl_plain(proc, /*indentation*/indentation);
|
|
break;
|
|
case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
|
|
_c4dbgfps("carriage return, ignore", curr);
|
|
proc.skip();
|
|
break;
|
|
default:
|
|
proc.copy();
|
|
break;
|
|
}
|
|
}
|
|
|
|
_c4dbgfps("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
|
|
|
|
return proc.result();
|
|
}
|
|
|
|
#undef _c4dbgfps
|
|
|
|
|
|
template<class EventHandler>
|
|
FilterResult ParseEngine<EventHandler>::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
|
|
{
|
|
FilterProcessorSrcDst proc(scalar, dst);
|
|
return _filter_plain(proc, indentation);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
FilterResult ParseEngine<EventHandler>::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation)
|
|
{
|
|
FilterProcessorInplaceEndExtending proc(dst, cap);
|
|
return _filter_plain(proc, indentation);
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
/* single quoted */
|
|
|
|
// a debugging scaffold:
|
|
#if 0
|
|
#define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
|
|
#else
|
|
#define _c4dbgfsq(fmt, ...)
|
|
#endif
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
|
|
{
|
|
_RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
|
|
|
|
_c4dbgfsq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
|
|
size_t ii = proc.rpos;
|
|
const size_t numnl_following = _count_following_newlines(proc.src, &ii);
|
|
if(numnl_following)
|
|
{
|
|
proc.set('\n', numnl_following);
|
|
_c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
|
|
}
|
|
else
|
|
{
|
|
const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
|
|
if(ret != npos)
|
|
{
|
|
proc.set(' ');
|
|
_c4dbgfsq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
|
|
}
|
|
else
|
|
{
|
|
proc.set(' ');
|
|
_c4dbgfsq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
|
|
}
|
|
}
|
|
proc.rpos = ii;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
|
|
{
|
|
_c4dbgfsq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
|
|
|
|
// from the YAML spec for double-quoted scalars:
|
|
// https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
|
|
while(proc.has_more_chars())
|
|
{
|
|
const char curr = proc.curr();
|
|
_c4dbgfsq("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
|
|
switch(curr)
|
|
{
|
|
case ' ':
|
|
case '\t':
|
|
_c4dbgfsq("whitespace", curr);
|
|
_filter_ws_copy_trailing(proc);
|
|
break;
|
|
case '\n':
|
|
_c4dbgfsq("newline", curr);
|
|
_filter_nl_squoted(proc);
|
|
break;
|
|
case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
|
|
_c4dbgfsq("skip cr", curr);
|
|
proc.skip();
|
|
break;
|
|
case '\'':
|
|
_c4dbgfsq("squote", curr);
|
|
if(proc.next() == '\'')
|
|
{
|
|
_c4dbgfsq("two consecutive squotes", curr);
|
|
proc.skip();
|
|
proc.copy();
|
|
}
|
|
else
|
|
{
|
|
_c4err("filter error");
|
|
}
|
|
break;
|
|
default:
|
|
proc.copy();
|
|
break;
|
|
}
|
|
}
|
|
|
|
_c4dbgfsq(": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
|
|
|
|
return proc.result();
|
|
}
|
|
|
|
#undef _c4dbgfsq
|
|
|
|
template<class EventHandler>
|
|
FilterResult ParseEngine<EventHandler>::filter_scalar_squoted(csubstr scalar, substr dst)
|
|
{
|
|
FilterProcessorSrcDst proc(scalar, dst);
|
|
return _filter_squoted(proc);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
FilterResult ParseEngine<EventHandler>::filter_scalar_squoted_in_place(substr dst, size_t cap)
|
|
{
|
|
FilterProcessorInplaceEndExtending proc(dst, cap);
|
|
return _filter_squoted(proc);
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
/* double quoted */
|
|
|
|
// a debugging scaffold:
|
|
#if 0
|
|
#define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
|
|
#else
|
|
#define _c4dbgfdq(...)
|
|
#endif
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
|
|
{
|
|
_RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
|
|
|
|
_c4dbgfdq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
|
|
size_t ii = proc.rpos;
|
|
const size_t numnl_following = _count_following_newlines(proc.src, &ii);
|
|
if(numnl_following)
|
|
{
|
|
proc.set('\n', numnl_following);
|
|
_c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
|
|
}
|
|
else
|
|
{
|
|
const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
|
|
if(ret != npos)
|
|
{
|
|
proc.set(' ');
|
|
_c4dbgfdq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
|
|
}
|
|
else
|
|
{
|
|
proc.set(' ');
|
|
_c4dbgfdq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
|
|
}
|
|
if(ii < proc.src.len && proc.src.str[ii] == '\\')
|
|
{
|
|
_c4dbgfdq("backslash at [{}]", ii);
|
|
const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
|
|
if(next == ' ' || next == '\t')
|
|
{
|
|
_c4dbgfdq("extend skip to backslash", "");
|
|
++ii;
|
|
}
|
|
}
|
|
}
|
|
proc.rpos = ii;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
|
|
{
|
|
char next = proc.next();
|
|
_c4dbgfdq("backslash, next='{}'", _c4prc(next));
|
|
if(next == '\r')
|
|
{
|
|
if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
|
|
{
|
|
proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
|
|
next = '\n';
|
|
_c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
|
|
}
|
|
}
|
|
|
|
if(next == '\n')
|
|
{
|
|
size_t ii = proc.rpos + 2;
|
|
for( ; ii < proc.src.len; ++ii)
|
|
{
|
|
// skip leading whitespace
|
|
if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
|
|
;
|
|
else
|
|
break;
|
|
}
|
|
proc.skip(ii - proc.rpos);
|
|
}
|
|
else if(next == '"' || next == '/' || next == ' ' || next == '\t')
|
|
{
|
|
// escapes for json compatibility
|
|
proc.translate_esc(next);
|
|
_c4dbgfdq("here, used '{}'", _c4prc(next));
|
|
}
|
|
else if(next == '\r')
|
|
{
|
|
proc.skip();
|
|
}
|
|
else if(next == 'n')
|
|
{
|
|
proc.translate_esc('\n');
|
|
}
|
|
else if(next == 'r')
|
|
{
|
|
proc.translate_esc('\r');
|
|
}
|
|
else if(next == 't')
|
|
{
|
|
proc.translate_esc('\t');
|
|
}
|
|
else if(next == '\\')
|
|
{
|
|
proc.translate_esc('\\');
|
|
}
|
|
else if(next == 'x') // UTF8
|
|
{
|
|
if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
|
|
_c4err("\\x requires 2 hex digits. scalar pos={}", proc.rpos);
|
|
csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
|
|
_c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
|
|
uint8_t byteval = {};
|
|
if(C4_UNLIKELY(!read_hex(codepoint, &byteval)))
|
|
_c4err("failed to read \\x codepoint. scalar pos={}", proc.rpos);
|
|
proc.translate_esc_bulk((const char*)&byteval, 1u, /*nread*/3u);
|
|
_c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
|
|
}
|
|
else if(next == 'u') // UTF16
|
|
{
|
|
if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
|
|
_c4err("\\u requires 4 hex digits. scalar pos={}", proc.rpos);
|
|
char readbuf[8];
|
|
csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
|
|
uint32_t codepoint_val = {};
|
|
if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
|
|
_c4err("failed to parse \\u codepoint. scalar pos={}", proc.rpos);
|
|
const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
|
|
if(C4_UNLIKELY(numbytes == 0))
|
|
_c4err("failed to decode code point={}", proc.rpos);
|
|
_RYML_CB_ASSERT(callbacks(), numbytes <= 4);
|
|
proc.translate_esc_bulk(readbuf, numbytes, /*nread*/5u);
|
|
}
|
|
else if(next == 'U') // UTF32
|
|
{
|
|
if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
|
|
_c4err("\\U requires 8 hex digits. scalar pos={}", proc.rpos);
|
|
char readbuf[8];
|
|
csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
|
|
uint32_t codepoint_val = {};
|
|
if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
|
|
_c4err("failed to parse \\U codepoint. scalar pos={}", proc.rpos);
|
|
const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
|
|
if(C4_UNLIKELY(numbytes == 0))
|
|
_c4err("failed to decode code point={}", proc.rpos);
|
|
_RYML_CB_ASSERT(callbacks(), numbytes <= 4);
|
|
proc.translate_esc_bulk(readbuf, numbytes, /*nread*/9u);
|
|
}
|
|
// https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
|
|
else if(next == '0')
|
|
{
|
|
proc.translate_esc('\0');
|
|
}
|
|
else if(next == 'b') // backspace
|
|
{
|
|
proc.translate_esc('\b');
|
|
}
|
|
else if(next == 'f') // form feed
|
|
{
|
|
proc.translate_esc('\f');
|
|
}
|
|
else if(next == 'a') // bell character
|
|
{
|
|
proc.translate_esc('\a');
|
|
}
|
|
else if(next == 'v') // vertical tab
|
|
{
|
|
proc.translate_esc('\v');
|
|
}
|
|
else if(next == 'e') // escape character
|
|
{
|
|
proc.translate_esc('\x1b');
|
|
}
|
|
else if(next == '_') // unicode non breaking space \u00a0
|
|
{
|
|
// https://www.compart.com/en/unicode/U+00a0
|
|
const char payload[] = {
|
|
_RYML_CHCONST(-0x3e, 0xc2),
|
|
_RYML_CHCONST(-0x60, 0xa0),
|
|
};
|
|
proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
|
|
}
|
|
else if(next == 'N') // unicode next line \u0085
|
|
{
|
|
// https://www.compart.com/en/unicode/U+0085
|
|
const char payload[] = {
|
|
_RYML_CHCONST(-0x3e, 0xc2),
|
|
_RYML_CHCONST(-0x7b, 0x85),
|
|
};
|
|
proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
|
|
}
|
|
else if(next == 'L') // unicode line separator \u2028
|
|
{
|
|
// https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
|
|
const char payload[] = {
|
|
_RYML_CHCONST(-0x1e, 0xe2),
|
|
_RYML_CHCONST(-0x80, 0x80),
|
|
_RYML_CHCONST(-0x58, 0xa8),
|
|
};
|
|
proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
|
|
}
|
|
else if(next == 'P') // unicode paragraph separator \u2029
|
|
{
|
|
// https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
|
|
const char payload[] = {
|
|
_RYML_CHCONST(-0x1e, 0xe2),
|
|
_RYML_CHCONST(-0x80, 0x80),
|
|
_RYML_CHCONST(-0x57, 0xa9),
|
|
};
|
|
proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
|
|
}
|
|
else if(next == '\0')
|
|
{
|
|
proc.skip();
|
|
}
|
|
else
|
|
{
|
|
_c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
|
|
}
|
|
_c4dbgfdq("backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
|
|
}
|
|
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
|
|
{
|
|
_c4dbgfdq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
|
|
// from the YAML spec for double-quoted scalars:
|
|
// https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
|
|
while(proc.has_more_chars())
|
|
{
|
|
const char curr = proc.curr();
|
|
_c4dbgfdq("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
|
|
switch(curr)
|
|
{
|
|
case ' ':
|
|
case '\t':
|
|
{
|
|
_c4dbgfdq("whitespace", curr);
|
|
_filter_ws_copy_trailing(proc);
|
|
break;
|
|
}
|
|
case '\n':
|
|
{
|
|
_c4dbgfdq("newline", curr);
|
|
_filter_nl_dquoted(proc);
|
|
break;
|
|
}
|
|
case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
|
|
{
|
|
_c4dbgfdq("carriage return, ignore", curr);
|
|
proc.skip();
|
|
break;
|
|
}
|
|
case '\\':
|
|
{
|
|
_filter_dquoted_backslash(proc);
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
proc.copy();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
_c4dbgfdq("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
|
|
return proc.result();
|
|
}
|
|
|
|
#undef _c4dbgfdq
|
|
|
|
|
|
template<class EventHandler>
|
|
FilterResult ParseEngine<EventHandler>::filter_scalar_dquoted(csubstr scalar, substr dst)
|
|
{
|
|
FilterProcessorSrcDst proc(scalar, dst);
|
|
return _filter_dquoted(proc);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
FilterResultExtending ParseEngine<EventHandler>::filter_scalar_dquoted_in_place(substr dst, size_t cap)
|
|
{
|
|
FilterProcessorInplaceMidExtending proc(dst, cap);
|
|
return _filter_dquoted(proc);
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
// block filtering helpers
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
|
|
{
|
|
_RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
|
|
_RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos);
|
|
|
|
// a debugging scaffold:
|
|
#if 0
|
|
#define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
|
|
#else
|
|
#define _c4dbgchomp(...)
|
|
#endif
|
|
|
|
// advance to the last line having spaces beyond the indentation
|
|
{
|
|
size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
|
|
if(last != npos)
|
|
{
|
|
_c4dbgchomp("found newline and larger indentation. last={}", last);
|
|
last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
|
|
_RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len);
|
|
// remove indentation spaces, copy the rest
|
|
while((proc.rpos < last) && proc.has_more_chars())
|
|
{
|
|
const char curr = proc.curr();
|
|
_c4dbgchomp("curr='{}'", _c4prc(curr));
|
|
switch(curr)
|
|
{
|
|
case '\n':
|
|
{
|
|
_c4dbgchomp("newline! remlen={}", proc.rem().len);
|
|
proc.copy();
|
|
// are there spaces after the newline?
|
|
csubstr at_next_line = proc.rem();
|
|
if(at_next_line.begins_with(' '))
|
|
{
|
|
_c4dbgchomp("next line begins with spaces. indentation={}", indentation);
|
|
// there are spaces.
|
|
size_t first_non_space = at_next_line.first_not_of(' ');
|
|
_c4dbgchomp("first_non_space={}", first_non_space);
|
|
if(first_non_space == npos)
|
|
{
|
|
_c4dbgchomp("{} spaces, to the end", at_next_line.len);
|
|
first_non_space = at_next_line.len;
|
|
}
|
|
if(first_non_space <= indentation)
|
|
{
|
|
_c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
|
|
proc.skip(first_non_space);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
|
|
proc.skip(indentation);
|
|
// copy the spaces after the indentation
|
|
_c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
|
|
proc.copy(first_non_space - indentation);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case '\r':
|
|
proc.skip();
|
|
break;
|
|
default:
|
|
_c4err("parse error");
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// from now on, we only have line ends (or indentation spaces)
|
|
switch(chomp)
|
|
{
|
|
case CHOMP_CLIP:
|
|
{
|
|
bool had_one = false;
|
|
while(proc.has_more_chars())
|
|
{
|
|
const char curr = proc.curr();
|
|
_c4dbgchomp("CLIP: '{}'", _c4prc(curr));
|
|
switch(curr)
|
|
{
|
|
case '\n':
|
|
{
|
|
_c4dbgchomp("copy newline!", curr);
|
|
proc.copy();
|
|
proc.set_at_end();
|
|
had_one = true;
|
|
break;
|
|
}
|
|
case ' ':
|
|
case '\r':
|
|
_c4dbgchomp("skip!", curr);
|
|
proc.skip();
|
|
break;
|
|
}
|
|
}
|
|
if(!had_one) // there were no newline characters. add one.
|
|
{
|
|
_c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
|
|
proc.set('\n');
|
|
}
|
|
break;
|
|
}
|
|
case CHOMP_KEEP:
|
|
{
|
|
_c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
|
|
while(proc.has_more_chars())
|
|
{
|
|
const char curr = proc.curr();
|
|
_c4dbgchomp("KEEP: '{}'", _c4prc(curr));
|
|
switch(curr)
|
|
{
|
|
case '\n':
|
|
_c4dbgchomp("copy newline!", curr);
|
|
proc.copy();
|
|
break;
|
|
case ' ':
|
|
case '\r':
|
|
_c4dbgchomp("skip!", curr);
|
|
proc.skip();
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case CHOMP_STRIP:
|
|
{
|
|
_c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
|
|
// nothing to do!
|
|
break;
|
|
}
|
|
}
|
|
|
|
#undef _c4dbgchomp
|
|
}
|
|
|
|
|
|
// a debugging scaffold:
|
|
#if 0
|
|
#define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
|
|
#else
|
|
#define _c4dbgfb(...)
|
|
#endif
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
|
|
{
|
|
csubstr rem = proc.rem(); // remaining
|
|
if(rem.len)
|
|
{
|
|
size_t first = rem.first_not_of(' ');
|
|
if(first != npos)
|
|
{
|
|
_c4dbgfb("{} spaces follow before next nonws character", first);
|
|
if(first < indentation)
|
|
{
|
|
_c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
|
|
proc.skip(first);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgfb("skip {} spaces from indentation", indentation);
|
|
proc.skip(indentation);
|
|
}
|
|
}
|
|
#ifdef RYML_NO_COVERAGE__TO_BE_DELETED
|
|
else
|
|
{
|
|
_c4dbgfb("all spaces to the end: {} spaces", first);
|
|
first = rem.len;
|
|
if(first)
|
|
{
|
|
if(first < indentation)
|
|
{
|
|
_c4dbgfb("skip everything", first);
|
|
proc.skip(proc.src.len - proc.rpos);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgfb("skip {} spaces from indentation", indentation);
|
|
proc.skip(indentation);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
|
|
{
|
|
csubstr contents = proc.src.trimr(" \n\r");
|
|
_c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
|
|
if(!contents.len)
|
|
{
|
|
_c4dbgfb("ws: all whitespace: len={}", proc.src.len);
|
|
if(chomp == CHOMP_KEEP && proc.src.len)
|
|
{
|
|
_c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
|
|
while(proc.has_more_chars())
|
|
{
|
|
const char curr = proc.curr();
|
|
if(curr == '\n')
|
|
proc.copy();
|
|
else
|
|
proc.skip();
|
|
}
|
|
if(!proc.wpos)
|
|
{
|
|
proc.set('\n');
|
|
}
|
|
}
|
|
}
|
|
return contents.len;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
|
|
{
|
|
_c4dbgfb("contents_len={}", contents_len);
|
|
|
|
_RYML_CB_ASSERT(this->callbacks(), contents_len > 0u);
|
|
|
|
// extend contents to just before the first newline at the end,
|
|
// in case it is preceded by spaces
|
|
size_t firstnewl = proc.src.first_of('\n', contents_len);
|
|
if(firstnewl != npos)
|
|
{
|
|
contents_len = firstnewl;
|
|
_c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
|
|
}
|
|
else
|
|
{
|
|
contents_len = proc.src.len;
|
|
_c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
|
|
}
|
|
|
|
return contents_len;
|
|
}
|
|
|
|
#undef _c4dbgfb
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
|
|
// a debugging scaffold:
|
|
#if 0
|
|
#define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
|
|
#else
|
|
#define _c4dbgfbl(...)
|
|
#endif
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
|
|
{
|
|
_c4dbgfbl("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
|
|
|
|
size_t contents_len = _handle_all_whitespace(proc, chomp);
|
|
if(!contents_len)
|
|
return proc.result();
|
|
|
|
contents_len = _extend_to_chomp(proc, contents_len);
|
|
|
|
_c4dbgfbl("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
|
|
|
|
_filter_block_indentation(proc, indentation);
|
|
|
|
// now filter the bulk
|
|
while(proc.has_more_chars(/*maxpos*/contents_len))
|
|
{
|
|
const char curr = proc.curr();
|
|
_c4dbgfbl("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
|
|
switch(curr)
|
|
{
|
|
case '\n':
|
|
{
|
|
_c4dbgfbl("found newline. skip indentation on the next line", curr);
|
|
proc.copy(); // copy the newline
|
|
_filter_block_indentation(proc, indentation);
|
|
break;
|
|
}
|
|
case '\r':
|
|
proc.skip();
|
|
break;
|
|
default:
|
|
proc.copy();
|
|
break;
|
|
}
|
|
}
|
|
|
|
_c4dbgfbl("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
|
|
|
|
_filter_chomp(proc, chomp, indentation);
|
|
|
|
_c4dbgfbl("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
|
|
|
|
return proc.result();
|
|
}
|
|
|
|
#undef _c4dbgfbl
|
|
|
|
template<class EventHandler>
|
|
FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
|
|
{
|
|
FilterProcessorSrcDst proc(scalar, dst);
|
|
return _filter_block_literal(proc, indentation, chomp);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
|
|
{
|
|
FilterProcessorInplaceEndExtending proc(scalar, cap);
|
|
return _filter_block_literal(proc, indentation, chomp);
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
|
|
// a debugging scaffold:
|
|
#if 0
|
|
#define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
|
|
#else
|
|
#define _c4dbgfbf(...)
|
|
#endif
|
|
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
|
|
{
|
|
_filter_block_indentation(proc, indentation);
|
|
while(proc.has_more_chars(len))
|
|
{
|
|
const char curr = proc.curr();
|
|
_c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
|
|
switch(curr)
|
|
{
|
|
case '\n':
|
|
_c4dbgfbf("newline.", curr);
|
|
proc.copy();
|
|
_filter_block_indentation(proc, indentation);
|
|
break;
|
|
case '\r':
|
|
proc.skip();
|
|
break;
|
|
case ' ':
|
|
case '\t':
|
|
{
|
|
size_t first = proc.rem().first_not_of(" \t");
|
|
_c4dbgfbf("space. first={}", first);
|
|
if(first == npos)
|
|
first = proc.rem().len;
|
|
_c4dbgfbf("... indentation increased to {}", first);
|
|
_filter_block_folded_indented_block(proc, indentation, len, first);
|
|
break;
|
|
}
|
|
default:
|
|
_c4dbgfbf("newl leading: not space, not newline. stop.", 0);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
|
|
{
|
|
switch(num_newl)
|
|
{
|
|
case 1u:
|
|
_c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
|
|
wpos_at_first_newl = proc.wpos;
|
|
proc.skip();
|
|
proc.set(' ');
|
|
break;
|
|
case 2u:
|
|
_c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
|
|
_RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl != npos);
|
|
_RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ');
|
|
_RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
|
|
proc.skip();
|
|
proc.set_at(wpos_at_first_newl, '\n');
|
|
_RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n');
|
|
break;
|
|
default:
|
|
_c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
|
|
proc.copy();
|
|
break;
|
|
}
|
|
return wpos_at_first_newl;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
|
|
{
|
|
_RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
|
|
size_t num_newl = 0;
|
|
size_t wpos_at_first_newl = npos;
|
|
while(proc.has_more_chars(len))
|
|
{
|
|
const char curr = proc.curr();
|
|
_c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
|
|
switch(curr)
|
|
{
|
|
case '\n':
|
|
{
|
|
_c4dbgfbf("newline. sofar={}", num_newl);
|
|
// NOTE: vs2022-32bit-release builds were giving wrong
|
|
// results in this block, if it was written as either
|
|
// as a switch(num_newl) or its equivalent if-form.
|
|
//
|
|
// For this reason, we're using a dedicated function
|
|
// (**_compress), which seems to work around the issue.
|
|
//
|
|
// The manifested problem was that somewhere between the
|
|
// assignment to curr and this point, proc.wpos (the
|
|
// write-position of the processor) jumped to npos, which
|
|
// made the write wrap-around! To make things worse,
|
|
// enabling prints via _c4dbgpf() and _c4dbgfbf() made the
|
|
// problem go away!
|
|
//
|
|
// The only way to make the problem appear with prints
|
|
// enabled was by disabling all prints in this function
|
|
// (including in the block which was moved to the compress
|
|
// function) and then selectively enabling only some of
|
|
// those prints.
|
|
//
|
|
// This may be due to some bug in the cl-x86 optimizer; or
|
|
// it may be triggered by some UB which may be
|
|
// inadvertedly present in this function or in the filter
|
|
// processor. This is despite our best efforts to weed out
|
|
// any such UB problem: neither clang-tidy nor none of the
|
|
// sanitizers, or gcc's -fanalyzer pointed to any problems
|
|
// in this code.
|
|
//
|
|
// In the end, moving this block to a separate function
|
|
// was the only way to bury the problem. But it may
|
|
// resurface again, as The Undead, rising to from the
|
|
// grave to haunt us with his terrible presence.
|
|
//
|
|
// We may have to revisit this. With a stake, and lots of
|
|
// garlic.
|
|
wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
|
|
_filter_block_indentation(proc, indentation);
|
|
break;
|
|
}
|
|
case ' ':
|
|
case '\t':
|
|
{
|
|
size_t first = proc.rem().first_not_of(" \t");
|
|
_c4dbgfbf("space. first={}", first);
|
|
if(first == npos)
|
|
first = proc.rem().len;
|
|
_c4dbgfbf("... indentation increased to {}", first);
|
|
if(num_newl)
|
|
{
|
|
_c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
|
|
proc.set_at(wpos_at_first_newl, '\n');
|
|
}
|
|
if(num_newl > 1u)
|
|
{
|
|
_c4dbgfbf("... add missing newline", wpos_at_first_newl);
|
|
proc.set('\n');
|
|
}
|
|
_filter_block_folded_indented_block(proc, indentation, len, first);
|
|
num_newl = 0;
|
|
wpos_at_first_newl = npos;
|
|
break;
|
|
}
|
|
case '\r':
|
|
proc.skip();
|
|
break;
|
|
default:
|
|
_c4dbgfbf("not space, not newline. stop.", 0);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
|
|
{
|
|
_RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos));
|
|
if(curr_indentation)
|
|
proc.copy(curr_indentation);
|
|
while(proc.has_more_chars(len))
|
|
{
|
|
const char curr = proc.curr();
|
|
_c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
|
|
switch(curr)
|
|
{
|
|
case '\n':
|
|
{
|
|
proc.copy();
|
|
_filter_block_indentation(proc, indentation);
|
|
csubstr rem = proc.rem();
|
|
const size_t first = rem.first_not_of(' ');
|
|
_c4dbgfbf("newline. firstns={}", first);
|
|
if(first == 0)
|
|
{
|
|
const char c = rem[first];
|
|
_c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
|
|
if(c == '\n' || c == '\r')
|
|
{
|
|
;
|
|
}
|
|
else
|
|
{
|
|
_c4dbgfbf("done with indented block", first);
|
|
goto endloop;
|
|
}
|
|
}
|
|
else if(first != npos)
|
|
{
|
|
proc.copy(first);
|
|
_c4dbgfbf("copy all {} spaces", first);
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case '\r':
|
|
proc.skip();
|
|
break;
|
|
default:
|
|
proc.copy();
|
|
break;
|
|
}
|
|
}
|
|
endloop:
|
|
return;
|
|
}
|
|
|
|
|
|
template<class EventHandler>
|
|
template<class FilterProcessor>
|
|
auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
|
|
{
|
|
_c4dbgfbf("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
|
|
|
|
size_t contents_len = _handle_all_whitespace(proc, chomp);
|
|
if(!contents_len)
|
|
return proc.result();
|
|
|
|
contents_len = _extend_to_chomp(proc, contents_len);
|
|
|
|
_c4dbgfbf("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
|
|
|
|
_filter_block_folded_newlines_leading(proc, indentation, contents_len);
|
|
|
|
// now filter the bulk
|
|
while(proc.has_more_chars(/*maxpos*/contents_len))
|
|
{
|
|
const char curr = proc.curr();
|
|
_c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
|
|
switch(curr)
|
|
{
|
|
case '\n':
|
|
{
|
|
_c4dbgfbf("found newline", curr);
|
|
_filter_block_folded_newlines(proc, indentation, contents_len);
|
|
break;
|
|
}
|
|
case '\r':
|
|
proc.skip();
|
|
break;
|
|
default:
|
|
proc.copy();
|
|
break;
|
|
}
|
|
}
|
|
|
|
_c4dbgfbf("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
|
|
|
|
_filter_chomp(proc, chomp, indentation);
|
|
|
|
_c4dbgfbf("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
|
|
|
|
return proc.result();
|
|
}
|
|
|
|
#undef _c4dbgfbf
|
|
|
|
template<class EventHandler>
|
|
FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
|
|
{
|
|
FilterProcessorSrcDst proc(scalar, dst);
|
|
return _filter_block_folded(proc, indentation, chomp);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
|
|
{
|
|
FilterProcessorInplaceEndExtending proc(scalar, cap);
|
|
return _filter_block_folded(proc, indentation, chomp);
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s, size_t indentation)
|
|
{
|
|
_c4dbgpf("filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
|
|
FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid());
|
|
_c4dbgpf("filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
|
|
return r.get();
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
|
|
{
|
|
_c4dbgpf("filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
|
|
FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
|
|
_RYML_CB_ASSERT(this->callbacks(), r.valid());
|
|
_c4dbgpf("filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
|
|
return r.get();
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
|
|
{
|
|
_c4dbgpf("filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
|
|
FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
|
|
if(C4_LIKELY(r.valid()))
|
|
{
|
|
_c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
|
|
return r.get();
|
|
}
|
|
else
|
|
{
|
|
const size_t len = r.required_len();
|
|
_c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
|
|
substr dst = m_evt_handler->alloc_arena(len, &s);
|
|
_c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
|
|
_RYML_CB_ASSERT(this->callbacks(), dst.len == len);
|
|
FilterResult rsd = this->filter_scalar_dquoted(s, dst);
|
|
_c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
|
|
_RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len); // may be smaller!
|
|
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
|
|
_c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
|
|
return rsd.get();
|
|
}
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
|
|
{
|
|
_c4dbgpf("filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
|
|
FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
|
|
if(C4_LIKELY(r.valid()))
|
|
{
|
|
_c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
|
|
return r.get();
|
|
}
|
|
else
|
|
{
|
|
_c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
|
|
substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
|
|
FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp);
|
|
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
|
|
_c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
|
|
return rsd.get();
|
|
}
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
|
|
{
|
|
_c4dbgpf("filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
|
|
FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
|
|
if(C4_LIKELY(r.valid()))
|
|
{
|
|
_c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
|
|
return r.get();
|
|
}
|
|
else
|
|
{
|
|
_c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
|
|
substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
|
|
FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp);
|
|
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
|
|
_c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
|
|
return rsd.get();
|
|
}
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
|
|
{
|
|
if(sc.needs_filter)
|
|
{
|
|
if(m_options.scalar_filtering())
|
|
{
|
|
return _filter_scalar_plain(sc.scalar, indentation);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("plain scalar left unfiltered");
|
|
m_evt_handler->mark_key_scalar_unfiltered();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("plain scalar doesn't need filtering");
|
|
}
|
|
return sc.scalar;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
|
|
{
|
|
if(sc.needs_filter)
|
|
{
|
|
if(m_options.scalar_filtering())
|
|
{
|
|
return _filter_scalar_plain(sc.scalar, indentation);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("plain scalar left unfiltered");
|
|
m_evt_handler->mark_val_scalar_unfiltered();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("plain scalar doesn't need filtering");
|
|
}
|
|
return sc.scalar;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
|
|
{
|
|
if(sc.needs_filter)
|
|
{
|
|
if(m_options.scalar_filtering())
|
|
{
|
|
return _filter_scalar_squot(sc.scalar);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("squo key scalar left unfiltered");
|
|
m_evt_handler->mark_key_scalar_unfiltered();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("squo key scalar doesn't need filtering");
|
|
}
|
|
return sc.scalar;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
|
|
{
|
|
if(sc.needs_filter)
|
|
{
|
|
if(m_options.scalar_filtering())
|
|
{
|
|
return _filter_scalar_squot(sc.scalar);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("squo val scalar left unfiltered");
|
|
m_evt_handler->mark_val_scalar_unfiltered();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("squo val scalar doesn't need filtering");
|
|
}
|
|
return sc.scalar;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
|
|
{
|
|
if(sc.needs_filter)
|
|
{
|
|
if(m_options.scalar_filtering())
|
|
{
|
|
return _filter_scalar_dquot(sc.scalar);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("dquo scalar left unfiltered");
|
|
m_evt_handler->mark_key_scalar_unfiltered();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("dquo scalar doesn't need filtering");
|
|
}
|
|
return sc.scalar;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
|
|
{
|
|
if(sc.needs_filter)
|
|
{
|
|
if(m_options.scalar_filtering())
|
|
{
|
|
return _filter_scalar_dquot(sc.scalar);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("dquo scalar left unfiltered");
|
|
m_evt_handler->mark_val_scalar_unfiltered();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("dquo scalar doesn't need filtering");
|
|
}
|
|
return sc.scalar;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
|
|
{
|
|
if(m_options.scalar_filtering())
|
|
{
|
|
return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("literal scalar left unfiltered");
|
|
m_evt_handler->mark_key_scalar_unfiltered();
|
|
}
|
|
return sb.scalar;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
|
|
{
|
|
if(m_options.scalar_filtering())
|
|
{
|
|
return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("literal scalar left unfiltered");
|
|
m_evt_handler->mark_val_scalar_unfiltered();
|
|
}
|
|
return sb.scalar;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
|
|
{
|
|
if(m_options.scalar_filtering())
|
|
{
|
|
return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("folded scalar left unfiltered");
|
|
m_evt_handler->mark_key_scalar_unfiltered();
|
|
}
|
|
return sb.scalar;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
|
|
{
|
|
if(m_options.scalar_filtering())
|
|
{
|
|
return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("folded scalar left unfiltered");
|
|
m_evt_handler->mark_val_scalar_unfiltered();
|
|
}
|
|
return sb.scalar;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
|
|
#ifdef RYML_DBG // !!! <----------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::add_flags(ParserFlag_t on, ParserState * s)
|
|
{
|
|
char buf1_[64], buf2_[64], buf3_[64];
|
|
csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
|
|
csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
|
|
csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
|
|
_c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
|
|
s->flags |= on;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState * s)
|
|
{
|
|
char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
|
|
csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
|
|
csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
|
|
csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
|
|
csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
|
|
_c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
|
|
s->flags |= on;
|
|
s->flags &= ~off;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off, ParserState * s)
|
|
{
|
|
char buf1_[64], buf2_[64], buf3_[64];
|
|
csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
|
|
csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
|
|
csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
|
|
_c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
|
|
s->flags &= ~off;
|
|
}
|
|
|
|
inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
|
|
{
|
|
size_t pos = 0;
|
|
bool gotone = false;
|
|
|
|
#define _prflag(fl) \
|
|
if((flags & fl) == (fl)) \
|
|
{ \
|
|
if(gotone) \
|
|
{ \
|
|
if(pos + 1 < buf.len) \
|
|
buf[pos] = '|'; \
|
|
++pos; \
|
|
} \
|
|
csubstr fltxt = #fl; \
|
|
if(pos + fltxt.len <= buf.len) \
|
|
memcpy(buf.str + pos, fltxt.str, fltxt.len); \
|
|
pos += fltxt.len; \
|
|
gotone = true; \
|
|
}
|
|
|
|
_prflag(RTOP);
|
|
_prflag(RUNK);
|
|
_prflag(RMAP);
|
|
_prflag(RSEQ);
|
|
_prflag(FLOW);
|
|
_prflag(BLCK);
|
|
_prflag(QMRK);
|
|
_prflag(RKEY);
|
|
_prflag(RVAL);
|
|
_prflag(RKCL);
|
|
_prflag(RNXT);
|
|
_prflag(SSCL);
|
|
_prflag(QSCL);
|
|
_prflag(RSET);
|
|
_prflag(RDOC);
|
|
_prflag(NDOC);
|
|
_prflag(USTY);
|
|
_prflag(RSEQIMAP);
|
|
|
|
#undef _prflag
|
|
|
|
if(pos == 0)
|
|
if(buf.len > 0)
|
|
buf[pos++] = '0';
|
|
|
|
RYML_CHECK(pos <= buf.len);
|
|
|
|
return buf.first(pos);
|
|
}
|
|
|
|
#endif // RYML_DBG !!! <----------------------------------
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
csubstr ParseEngine<EventHandler>::location_contents(Location const& loc) const
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
|
|
return m_buf.sub(loc.offset);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
Location ParseEngine<EventHandler>::location(ConstNodeRef node) const
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, node.readable());
|
|
return location(*node.tree(), node.id());
|
|
}
|
|
|
|
template<class EventHandler>
|
|
Location ParseEngine<EventHandler>::location(Tree const& tree, id_type node) const
|
|
{
|
|
// try hard to avoid getting the location from a null string.
|
|
Location loc;
|
|
if(_location_from_node(tree, node, &loc, 0))
|
|
return loc;
|
|
return val_location(m_buf.str);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_location_from_node(Tree const& tree, id_type node, Location *C4_RESTRICT loc, id_type level) const
|
|
{
|
|
if(tree.has_key(node))
|
|
{
|
|
csubstr k = tree.key(node);
|
|
if(C4_LIKELY(k.str != nullptr))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, k.is_sub(m_buf));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(k));
|
|
*loc = val_location(k.str);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
if(tree.has_val(node))
|
|
{
|
|
csubstr v = tree.val(node);
|
|
if(C4_LIKELY(v.str != nullptr))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, v.is_sub(m_buf));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(v));
|
|
*loc = val_location(v.str);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
if(tree.is_container(node))
|
|
{
|
|
if(_location_from_cont(tree, node, loc))
|
|
return true;
|
|
}
|
|
|
|
if(tree.type(node) != NOTYPE && level == 0)
|
|
{
|
|
// try the prev sibling
|
|
{
|
|
const id_type prev = tree.prev_sibling(node);
|
|
if(prev != NONE)
|
|
{
|
|
if(_location_from_node(tree, prev, loc, level+1))
|
|
return true;
|
|
}
|
|
}
|
|
// try the next sibling
|
|
{
|
|
const id_type next = tree.next_sibling(node);
|
|
if(next != NONE)
|
|
{
|
|
if(_location_from_node(tree, next, loc, level+1))
|
|
return true;
|
|
}
|
|
}
|
|
// try the parent
|
|
{
|
|
const id_type parent = tree.parent(node);
|
|
if(parent != NONE)
|
|
{
|
|
if(_location_from_node(tree, parent, loc, level+1))
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_location_from_cont(Tree const& tree, id_type node, Location *C4_RESTRICT loc) const
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, tree.is_container(node));
|
|
if(!tree.is_stream(node))
|
|
{
|
|
const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container
|
|
if(tree.has_children(node))
|
|
{
|
|
id_type child = tree.first_child(node);
|
|
if(tree.has_key(child))
|
|
{
|
|
// when a map starts, the container was set after the key
|
|
csubstr k = tree.key(child);
|
|
if(k.str && node_start > k.str)
|
|
node_start = k.str;
|
|
}
|
|
}
|
|
*loc = val_location(node_start);
|
|
return true;
|
|
}
|
|
else // it's a stream
|
|
{
|
|
*loc = val_location(m_buf.str); // just return the front of the buffer
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
template<class EventHandler>
|
|
Location ParseEngine<EventHandler>::val_location(const char *val) const
|
|
{
|
|
if(C4_UNLIKELY(val == nullptr))
|
|
return {m_file, 0, 0, 0};
|
|
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations());
|
|
// NOTE: if any of these checks fails, the parser needs to be
|
|
// instantiated with locations enabled.
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations());
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
|
|
// NOTE: the pointer needs to belong to the buffer that was used to parse.
|
|
csubstr src = m_buf;
|
|
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
|
|
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
|
|
// ok. search the first stored newline after the given ptr
|
|
using lineptr_type = size_t const* C4_RESTRICT;
|
|
lineptr_type lineptr = nullptr;
|
|
size_t offset = (size_t)(val - src.begin());
|
|
if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
|
|
{
|
|
// just do a linear search if the size is small.
|
|
for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
|
|
{
|
|
if(*curr > offset)
|
|
{
|
|
lineptr = curr;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// do a bisection search if the size is not small.
|
|
//
|
|
// We could use std::lower_bound but this is simple enough and
|
|
// spares the costly include of <algorithm>.
|
|
size_t count = m_newline_offsets_size;
|
|
size_t step;
|
|
lineptr_type it;
|
|
lineptr = m_newline_offsets;
|
|
while(count)
|
|
{
|
|
step = count >> 1;
|
|
it = lineptr + step;
|
|
if(*it < offset)
|
|
{
|
|
lineptr = ++it;
|
|
count -= step + 1;
|
|
}
|
|
else
|
|
{
|
|
count = step;
|
|
}
|
|
}
|
|
}
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
|
|
Location loc;
|
|
loc.name = m_file;
|
|
loc.offset = offset;
|
|
loc.line = (size_t)(lineptr - m_newline_offsets);
|
|
if(lineptr > m_newline_offsets)
|
|
loc.col = (offset - *(lineptr-1) - 1u);
|
|
else
|
|
loc.col = offset;
|
|
return loc;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_prepare_locations()
|
|
{
|
|
m_newline_offsets_buf = m_buf;
|
|
size_t numnewlines = 1u + m_buf.count('\n');
|
|
_resize_locations(numnewlines);
|
|
m_newline_offsets_size = 0;
|
|
for(size_t i = 0; i < m_buf.len; i++)
|
|
if(m_buf[i] == '\n')
|
|
m_newline_offsets[m_newline_offsets_size++] = i;
|
|
m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
|
|
{
|
|
if(numnewlines > m_newline_offsets_capacity)
|
|
{
|
|
if(m_newline_offsets)
|
|
_RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
|
|
m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
|
|
m_newline_offsets_capacity = numnewlines;
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_locations_dirty() const
|
|
{
|
|
return !m_newline_offsets_size;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
|
|
{
|
|
// don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
|
|
if(m_evt_handler->m_curr->line_contents.rem.len > 0)
|
|
{
|
|
if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
|
|
{
|
|
_c4dbgpf("starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
|
|
_skipchars(" \t");
|
|
}
|
|
// comments
|
|
if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
|
|
{
|
|
_c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
|
|
_line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_colon()
|
|
{
|
|
size_t curr = m_evt_handler->m_curr->pos.line;
|
|
if(m_prev_colon != npos)
|
|
{
|
|
if(curr == m_prev_colon)
|
|
_c4err("two colons on same line");
|
|
}
|
|
m_prev_colon = curr;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
|
|
{
|
|
_c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
|
|
if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations))) // NOLINT(bugprone-sizeof-expression)
|
|
_c4err("too many annotations");
|
|
dst->annotations[dst->num_entries].str = str;
|
|
dst->annotations[dst->num_entries].indentation = indentation;
|
|
dst->annotations[dst->num_entries].line = line;
|
|
++dst->num_entries;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
|
|
{
|
|
dst->num_entries = 0;
|
|
}
|
|
|
|
#ifdef RYML_NO_COVERAGE__TO_BE_DELETED
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
|
|
{
|
|
if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
|
|
size_t to_skip = m_evt_handler->m_curr->indref;
|
|
if(m_pending_anchors.num_entries)
|
|
to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
|
|
if(m_pending_tags.num_entries)
|
|
to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
|
|
_c4dbgpf("annotations pending, skip indentation up to {}!", to_skip);
|
|
_maybe_skipchars_up_to(' ', to_skip);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_annotations_require_key_container() const
|
|
{
|
|
return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_check_tag(csubstr tag)
|
|
{
|
|
if(!tag.begins_with("!<"))
|
|
{
|
|
if(C4_UNLIKELY(tag.first_of("[]{},") != npos))
|
|
_RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "tags must not contain any of '[]{},'", m_evt_handler->m_curr->pos);
|
|
}
|
|
else
|
|
{
|
|
if(C4_UNLIKELY(!tag.ends_with('>')))
|
|
_RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "malformed tag", m_evt_handler->m_curr->pos);
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
|
|
{
|
|
_c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
|
|
if(m_pending_tags.num_entries)
|
|
{
|
|
_c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
|
|
if(C4_LIKELY(m_pending_tags.num_entries == 1))
|
|
{
|
|
_check_tag(m_pending_tags.annotations[0].str);
|
|
m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
|
|
_clear_annotations(&m_pending_tags);
|
|
}
|
|
else
|
|
{
|
|
_c4err("too many tags");
|
|
}
|
|
}
|
|
if(m_pending_anchors.num_entries)
|
|
{
|
|
_c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
|
|
if(C4_LIKELY(m_pending_anchors.num_entries == 1))
|
|
{
|
|
m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
|
|
_clear_annotations(&m_pending_anchors);
|
|
}
|
|
else
|
|
{
|
|
_c4err("too many anchors");
|
|
}
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
|
|
{
|
|
_c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
|
|
if(m_pending_tags.num_entries)
|
|
{
|
|
_c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
|
|
if(C4_LIKELY(m_pending_tags.num_entries == 1))
|
|
{
|
|
_check_tag(m_pending_tags.annotations[0].str);
|
|
m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
|
|
_clear_annotations(&m_pending_tags);
|
|
}
|
|
else
|
|
{
|
|
_c4err("too many tags");
|
|
}
|
|
}
|
|
if(m_pending_anchors.num_entries)
|
|
{
|
|
_c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
|
|
if(C4_LIKELY(m_pending_anchors.num_entries == 1))
|
|
{
|
|
m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
|
|
_clear_annotations(&m_pending_anchors);
|
|
}
|
|
else
|
|
{
|
|
_c4err("too many anchors");
|
|
}
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
|
|
{
|
|
_c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
|
|
if(m_pending_tags.num_entries == 2)
|
|
{
|
|
_c4dbgp("2 tags, setting entry 0");
|
|
_check_tag(m_pending_tags.annotations[0].str);
|
|
m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
|
|
}
|
|
else if(m_pending_tags.num_entries == 1)
|
|
{
|
|
_c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
|
|
if(m_pending_tags.annotations[0].line < current_line)
|
|
{
|
|
_c4dbgp("...tag is for the map. setting it.");
|
|
_check_tag(m_pending_tags.annotations[0].str);
|
|
m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
|
|
_clear_annotations(&m_pending_tags);
|
|
}
|
|
}
|
|
//
|
|
if(m_pending_anchors.num_entries == 2)
|
|
{
|
|
_c4dbgp("2 anchors, setting entry 0");
|
|
m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
|
|
}
|
|
else if(m_pending_anchors.num_entries == 1)
|
|
{
|
|
_c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
|
|
if(m_pending_anchors.annotations[0].line < current_line)
|
|
{
|
|
_c4dbgp("...anchor is for the map. setting it.");
|
|
m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
|
|
_clear_annotations(&m_pending_anchors);
|
|
}
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
|
|
{
|
|
_c4dbgp("annotations_before_start_mapblck_as_key");
|
|
if(m_pending_tags.num_entries == 2)
|
|
{
|
|
_check_tag(m_pending_tags.annotations[0].str);
|
|
m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
|
|
}
|
|
if(m_pending_anchors.num_entries == 2)
|
|
{
|
|
m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
|
|
{
|
|
_c4dbgp("annotations_after_start_mapblck");
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
|
|
if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
|
|
{
|
|
key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
|
|
switch(m_pending_tags.num_entries)
|
|
{
|
|
case 1u:
|
|
_check_tag(m_pending_tags.annotations[0].str);
|
|
m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
|
|
_clear_annotations(&m_pending_tags);
|
|
break;
|
|
case 2u:
|
|
_check_tag(m_pending_tags.annotations[1].str);
|
|
m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
|
|
_clear_annotations(&m_pending_tags);
|
|
break;
|
|
}
|
|
switch(m_pending_anchors.num_entries)
|
|
{
|
|
case 1u:
|
|
m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
|
|
_clear_annotations(&m_pending_anchors);
|
|
break;
|
|
case 2u:
|
|
m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
|
|
_clear_annotations(&m_pending_anchors);
|
|
break;
|
|
}
|
|
}
|
|
_set_indentation(key_indentation);
|
|
}
|
|
|
|
template<class EventHandler>
|
|
size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
|
|
// select the left-most annotation on the max line
|
|
auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
|
|
for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
|
|
{
|
|
auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
|
|
if(ann.line > curr->line)
|
|
curr = &ann;
|
|
else if(ann.indentation < curr->indentation)
|
|
curr = &ann;
|
|
}
|
|
for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
|
|
{
|
|
auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
|
|
if(ann.line > curr->line)
|
|
curr = &ann;
|
|
else if(ann.indentation < curr->indentation)
|
|
curr = &ann;
|
|
}
|
|
return curr->line < val_line ? val_indentation : curr->indentation;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
|
|
const size_t pos = rem.find('#');
|
|
_c4dbgpf("handle_directive: pos={} rem={}", pos, rem);
|
|
if(pos == npos) // no comments
|
|
{
|
|
m_evt_handler->add_directive(rem);
|
|
_line_progressed(rem.len);
|
|
}
|
|
else
|
|
{
|
|
csubstr to_comment = rem.first(pos);
|
|
csubstr trimmed = to_comment.trimr(" \t");
|
|
m_evt_handler->add_directive(trimmed);
|
|
_line_progressed(pos);
|
|
_skip_comment();
|
|
}
|
|
}
|
|
|
|
template<class EventHandler>
|
|
bool ParseEngine<EventHandler>::_handle_bom()
|
|
{
|
|
const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(rem.len)
|
|
{
|
|
const csubstr rest = rem.sub(1);
|
|
// https://yaml.org/spec/1.2.2/#52-character-encodings
|
|
#define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f') // is the character ASCII?
|
|
if(rem.begins_with({"\x00\x00\xfe\xff", 4}) || (rem.begins_with({"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
|
|
{
|
|
_c4dbgp("byte order mark: UTF32BE");
|
|
_handle_bom(UTF32BE);
|
|
_line_progressed(4);
|
|
return true;
|
|
}
|
|
else if(rem.begins_with("\xff\xfe\x00\x00") || (rest.begins_with({"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
|
|
{
|
|
_c4dbgp("byte order mark: UTF32LE");
|
|
_handle_bom(UTF32LE);
|
|
_line_progressed(4);
|
|
return true;
|
|
}
|
|
else if(rem.begins_with("\xfe\xff") || (rem.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
|
|
{
|
|
_c4dbgp("byte order mark: UTF16BE");
|
|
_handle_bom(UTF16BE);
|
|
_line_progressed(2);
|
|
return true;
|
|
}
|
|
else if(rem.begins_with("\xff\xfe") || (rest.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
|
|
{
|
|
_c4dbgp("byte order mark: UTF16LE");
|
|
_handle_bom(UTF16LE);
|
|
_line_progressed(2);
|
|
return true;
|
|
}
|
|
else if(rem.begins_with("\xef\xbb\xbf"))
|
|
{
|
|
_c4dbgp("byte order mark: UTF8");
|
|
_handle_bom(UTF8);
|
|
_line_progressed(3);
|
|
return true;
|
|
}
|
|
#undef _rymlisascii
|
|
}
|
|
return false;
|
|
}
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_bom(Encoding_e enc)
|
|
{
|
|
if(m_encoding == NOBOM)
|
|
{
|
|
const bool is_beginning_of_file = m_evt_handler->m_curr->line_contents.rem.str == m_buf.str;
|
|
if(enc == UTF8 || is_beginning_of_file)
|
|
m_encoding = enc;
|
|
else
|
|
_c4err("non-UTF8 byte order mark can appear only at the beginning of the file");
|
|
}
|
|
else if(enc != m_encoding)
|
|
{
|
|
_c4err("byte order mark can only be set once");
|
|
}
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_seq_json()
|
|
{
|
|
seqjson_start:
|
|
_c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
|
|
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
|
|
|
|
_handle_flow_skip_whitespace();
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto seqjson_again;
|
|
|
|
if(has_any(RVAL))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("mapjson[RVAL]: '{}'", first);
|
|
switch(first)
|
|
{
|
|
case '"':
|
|
{
|
|
_c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
|
|
ScannedScalar sc = _scan_scalar_dquot();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
|
|
m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
break;
|
|
}
|
|
case '[':
|
|
{
|
|
_c4dbgp("seqjson[RVAL]: start child seqjson");
|
|
addrem_flags(RNXT, RVAL);
|
|
m_evt_handler->begin_seq_val_flow();
|
|
addrem_flags(RVAL, RNXT);
|
|
_line_progressed(1);
|
|
break;
|
|
}
|
|
case '{':
|
|
{
|
|
_c4dbgp("seqjson[RVAL]: start child mapjson");
|
|
addrem_flags(RNXT, RVAL);
|
|
m_evt_handler->begin_map_val_flow();
|
|
addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
|
|
_line_progressed(1);
|
|
goto seqjson_finish;
|
|
}
|
|
case ']': // this happens on a trailing comma like ", ]"
|
|
{
|
|
_c4dbgp("seqjson[RVAL]: end!");
|
|
rem_flags(RSEQ);
|
|
m_evt_handler->end_seq();
|
|
_line_progressed(1);
|
|
if(!has_all(RSEQ|FLOW))
|
|
goto seqjson_finish;
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
ScannedScalar sc;
|
|
if(_scan_scalar_seq_json(&sc))
|
|
{
|
|
_c4dbgp("seqjson[RVAL]: it's a plain scalar.");
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
|
|
m_evt_handler->set_val_scalar_plain(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else // RNXT
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("mapjson[RNXT]: '{}'", first);
|
|
switch(first)
|
|
{
|
|
case ',':
|
|
{
|
|
_c4dbgp("seqjson[RNXT]: expect next val");
|
|
addrem_flags(RVAL, RNXT);
|
|
m_evt_handler->add_sibling();
|
|
_line_progressed(1);
|
|
break;
|
|
}
|
|
case ']':
|
|
{
|
|
_c4dbgp("seqjson[RNXT]: end!");
|
|
m_evt_handler->end_seq();
|
|
_line_progressed(1);
|
|
goto seqjson_finish;
|
|
}
|
|
default:
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
|
|
seqjson_again:
|
|
_c4dbgt("seqjson: go again", 0);
|
|
if(_finished_line())
|
|
{
|
|
if(C4_LIKELY(!_finished_file()))
|
|
{
|
|
_line_ended();
|
|
_scan_line();
|
|
_c4dbgnextline();
|
|
}
|
|
else
|
|
{
|
|
_c4err("missing terminating ]");
|
|
}
|
|
}
|
|
goto seqjson_start;
|
|
|
|
seqjson_finish:
|
|
_c4dbgp("seqjson: finish");
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_map_json()
|
|
{
|
|
mapjson_start:
|
|
_c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
|
|
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)));
|
|
|
|
_handle_flow_skip_whitespace();
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto mapjson_again;
|
|
|
|
if(has_any(RKEY))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("mapjson[RKEY]: '{}'", first);
|
|
switch(first)
|
|
{
|
|
case '"':
|
|
{
|
|
_c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
|
|
ScannedScalar sc = _scan_scalar_dquot();
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
|
|
m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
|
|
addrem_flags(RKCL, RKEY);
|
|
break;
|
|
}
|
|
case '}': // this happens on a trailing comma like ", }"
|
|
{
|
|
_c4dbgp("mapjson[RKEY]: end!");
|
|
m_evt_handler->end_map();
|
|
_line_progressed(1);
|
|
goto mapjson_finish;
|
|
}
|
|
default:
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(has_any(RVAL))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("mapjson[RVAL]: '{}'", first);
|
|
switch(first)
|
|
{
|
|
case '"':
|
|
{
|
|
_c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
|
|
ScannedScalar sc = _scan_scalar_dquot();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
|
|
m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
break;
|
|
}
|
|
case '[':
|
|
{
|
|
_c4dbgp("mapjson[RVAL]: start val seqjson");
|
|
addrem_flags(RNXT, RVAL);
|
|
m_evt_handler->begin_seq_val_flow();
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
addrem_flags(RSEQ|RVAL, RMAP|RNXT);
|
|
_line_progressed(1);
|
|
goto mapjson_finish;
|
|
}
|
|
case '{':
|
|
{
|
|
_c4dbgp("mapjson[RVAL]: start val mapjson");
|
|
addrem_flags(RNXT, RVAL);
|
|
m_evt_handler->begin_map_val_flow();
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
addrem_flags(RKEY, RNXT);
|
|
_line_progressed(1);
|
|
// keep going in this function
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
ScannedScalar sc;
|
|
if(_scan_scalar_map_json(&sc))
|
|
{
|
|
_c4dbgp("mapjson[RVAL]: plain scalar.");
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
|
|
m_evt_handler->set_val_scalar_plain(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else if(has_any(RKCL)) // read the key colon
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("mapjson[RKCL]: '{}'", first);
|
|
if(first == ':')
|
|
{
|
|
_c4dbgp("mapjson[RKCL]: found the colon");
|
|
addrem_flags(RVAL, RKCL);
|
|
_line_progressed(1);
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(has_any(RNXT))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
_c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
|
|
if(rem.begins_with(','))
|
|
{
|
|
_c4dbgp("mapjson[RNXT]: expect next keyval");
|
|
m_evt_handler->add_sibling();
|
|
addrem_flags(RKEY, RNXT);
|
|
_line_progressed(1);
|
|
}
|
|
else if(rem.begins_with('}'))
|
|
{
|
|
_c4dbgp("mapjson[RNXT]: end!");
|
|
m_evt_handler->end_map();
|
|
_line_progressed(1);
|
|
goto mapjson_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
|
|
mapjson_again:
|
|
_c4dbgt("mapjson: go again", 0);
|
|
if(_finished_line())
|
|
{
|
|
if(C4_LIKELY(!_finished_file()))
|
|
{
|
|
_line_ended();
|
|
_scan_line();
|
|
_c4dbgnextline();
|
|
}
|
|
else
|
|
{
|
|
_c4err("missing terminating }");
|
|
}
|
|
}
|
|
goto mapjson_start;
|
|
|
|
mapjson_finish:
|
|
_c4dbgp("mapjson: finish");
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_seq_imap()
|
|
{
|
|
seqimap_start:
|
|
_c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
|
|
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
|
|
|
|
_handle_flow_skip_whitespace();
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto seqimap_again;
|
|
|
|
if(has_any(RVAL))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
|
|
ScannedScalar sc;
|
|
if(first == '\'')
|
|
{
|
|
_c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
|
|
sc = _scan_scalar_squot();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
|
|
m_evt_handler->set_val_scalar_squoted(maybe_filtered);
|
|
m_evt_handler->end_map();
|
|
goto seqimap_finish;
|
|
}
|
|
else if(first == '"')
|
|
{
|
|
_c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
|
|
sc = _scan_scalar_dquot();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
|
|
m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
|
|
m_evt_handler->end_map();
|
|
goto seqimap_finish;
|
|
}
|
|
// block scalars (ie | and >) cannot appear in flow containers
|
|
else if(_scan_scalar_plain_map_flow(&sc))
|
|
{
|
|
_c4dbgp("seqimap[RVAL]: it's a scalar.");
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
|
|
m_evt_handler->set_val_scalar_plain(maybe_filtered);
|
|
m_evt_handler->end_map();
|
|
goto seqimap_finish;
|
|
}
|
|
else if(first == '[')
|
|
{
|
|
_c4dbgp("seqimap[RVAL]: start child seqflow");
|
|
addrem_flags(RNXT, RVAL);
|
|
m_evt_handler->begin_seq_val_flow();
|
|
addrem_flags(RVAL, RNXT|RSEQIMAP);
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
_line_progressed(1);
|
|
goto seqimap_finish;
|
|
}
|
|
else if(first == '{')
|
|
{
|
|
_c4dbgp("seqimap[RVAL]: start child mapflow");
|
|
addrem_flags(RNXT, RVAL);
|
|
m_evt_handler->begin_map_val_flow();
|
|
addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
_line_progressed(1);
|
|
goto seqimap_finish;
|
|
}
|
|
else if(first == ',' || first == ']')
|
|
{
|
|
_c4dbgp("seqimap[RVAL]: finish without val.");
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->end_map();
|
|
goto seqimap_finish;
|
|
}
|
|
else if(first == '&')
|
|
{
|
|
csubstr anchor = _scan_anchor();
|
|
_c4dbgp("seqimap[RVAL]: anchor!");
|
|
m_evt_handler->set_val_anchor(anchor);
|
|
}
|
|
else if(first == '*')
|
|
{
|
|
csubstr ref = _scan_ref_seq();
|
|
_c4dbgp("seqimap[RVAL]: ref!");
|
|
m_evt_handler->set_val_ref(ref);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(has_any(RNXT))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
|
|
if(first == ',' || first == ']')
|
|
{
|
|
// we may get here because a map or a seq started and we
|
|
// return later
|
|
_c4dbgp("seqimap: done");
|
|
m_evt_handler->end_map();
|
|
goto seqimap_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(has_any(QMRK))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
|
|
ScannedScalar sc;
|
|
if(first == '\'')
|
|
{
|
|
_c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
|
|
sc = _scan_scalar_squot();
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
|
|
m_evt_handler->set_key_scalar_squoted(maybe_filtered);
|
|
addrem_flags(RKCL, QMRK);
|
|
goto seqimap_again;
|
|
}
|
|
else if(first == '"')
|
|
{
|
|
_c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
|
|
sc = _scan_scalar_dquot();
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
|
|
m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
|
|
addrem_flags(RKCL, QMRK);
|
|
goto seqimap_again;
|
|
}
|
|
// block scalars (ie | and >) cannot appear in flow containers
|
|
else if(_scan_scalar_plain_map_flow(&sc))
|
|
{
|
|
_c4dbgp("seqimap[QMRK]: it's a scalar.");
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
|
|
m_evt_handler->set_key_scalar_plain(maybe_filtered);
|
|
addrem_flags(RKCL, QMRK);
|
|
goto seqimap_again;
|
|
}
|
|
else if(first == '[')
|
|
{
|
|
_c4dbgp("seqimap[QMRK]: start child seqflow");
|
|
addrem_flags(RKCL, QMRK);
|
|
m_evt_handler->begin_seq_key_flow();
|
|
addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
_line_progressed(1);
|
|
goto seqimap_finish;
|
|
}
|
|
else if(first == '{')
|
|
{
|
|
_c4dbgp("seqimap[QMRK]: start child mapflow");
|
|
addrem_flags(RKCL, QMRK);
|
|
m_evt_handler->begin_map_key_flow();
|
|
addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
_line_progressed(1);
|
|
goto seqimap_finish;
|
|
}
|
|
else if(first == ',' || first == ']')
|
|
{
|
|
_c4dbgp("seqimap[QMRK]: finish without key.");
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->end_map();
|
|
goto seqimap_finish;
|
|
}
|
|
else if(first == '&')
|
|
{
|
|
csubstr anchor = _scan_anchor();
|
|
_c4dbgp("seqimap[QMRK]: anchor!");
|
|
m_evt_handler->set_key_anchor(anchor);
|
|
}
|
|
else if(first == '*')
|
|
{
|
|
csubstr ref = _scan_ref_seq();
|
|
_c4dbgp("seqimap[QMRK]: ref!");
|
|
m_evt_handler->set_key_ref(ref);
|
|
addrem_flags(RKCL, QMRK);
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(has_any(RKCL))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKCL));
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
|
|
if(first == ':')
|
|
{
|
|
_c4dbgp("seqimap[RKCL]: found ':'");
|
|
addrem_flags(RVAL, RKCL);
|
|
_line_progressed(1);
|
|
goto seqimap_again;
|
|
}
|
|
else if(first == ',' || first == ']')
|
|
{
|
|
_c4dbgp("seqimap[RKCL]: found ','. finish without val");
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->end_map();
|
|
goto seqimap_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
|
|
seqimap_again:
|
|
_c4dbgt("seqimap: go again", 0);
|
|
if(_finished_line())
|
|
{
|
|
if(C4_LIKELY(!_finished_file()))
|
|
{
|
|
_line_ended();
|
|
_scan_line();
|
|
_c4dbgnextline();
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
goto seqimap_start;
|
|
|
|
seqimap_finish:
|
|
_c4dbgp("seqimap: finish");
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_seq_flow()
|
|
{
|
|
seqflow_start:
|
|
_c4dbgpf("handle2_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
|
|
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos);
|
|
|
|
_handle_flow_skip_whitespace();
|
|
// don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
|
|
if(!m_evt_handler->m_curr->line_contents.rem.len)
|
|
goto seqflow_again;
|
|
|
|
if(has_any(RVAL))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
|
|
ScannedScalar sc;
|
|
if(first == '\'')
|
|
{
|
|
_c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
|
|
sc = _scan_scalar_squot();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
|
|
m_evt_handler->set_val_scalar_squoted(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else if(first == '"')
|
|
{
|
|
_c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
|
|
sc = _scan_scalar_dquot();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
|
|
m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
// block scalars (ie | and >) cannot appear in flow containers
|
|
else if(_scan_scalar_plain_seq_flow(&sc))
|
|
{
|
|
_c4dbgp("seqflow[RVAL]: it's a scalar.");
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
|
|
m_evt_handler->set_val_scalar_plain(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else if(first == '[')
|
|
{
|
|
_c4dbgp("seqflow[RVAL]: start child seqflow");
|
|
addrem_flags(RNXT, RVAL);
|
|
m_evt_handler->begin_seq_val_flow();
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
addrem_flags(RVAL, RNXT);
|
|
_line_progressed(1);
|
|
}
|
|
else if(first == '{')
|
|
{
|
|
_c4dbgp("seqflow[RVAL]: start child mapflow");
|
|
addrem_flags(RNXT, RVAL);
|
|
m_evt_handler->begin_map_val_flow();
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
|
|
_line_progressed(1);
|
|
goto seqflow_finish;
|
|
}
|
|
else if(first == ']') // this happens on a trailing comma like ", ]"
|
|
{
|
|
_c4dbgp("seqflow[RVAL]: end!");
|
|
_line_progressed(1);
|
|
m_evt_handler->end_seq();
|
|
goto seqflow_finish;
|
|
}
|
|
else if(first == '*')
|
|
{
|
|
csubstr ref = _scan_ref_seq();
|
|
_c4dbgpf("seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
|
|
m_evt_handler->set_val_ref(ref);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else if(first == '&')
|
|
{
|
|
csubstr anchor = _scan_anchor();
|
|
_c4dbgpf("seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
|
|
m_evt_handler->set_val_anchor(anchor);
|
|
if(_maybe_scan_following_comma())
|
|
{
|
|
_c4dbgp("seqflow[RVAL]: empty scalar!");
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->add_sibling();
|
|
}
|
|
}
|
|
else if(first == '!')
|
|
{
|
|
csubstr tag = _scan_tag();
|
|
_c4dbgpf("seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
|
|
_check_tag(tag);
|
|
m_evt_handler->set_val_tag(tag);
|
|
if(_maybe_scan_following_comma())
|
|
{
|
|
_c4dbgp("seqflow[RVAL]: empty scalar!");
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->add_sibling();
|
|
}
|
|
}
|
|
else if(first == ':')
|
|
{
|
|
_c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
|
|
addrem_flags(RNXT, RVAL);
|
|
m_evt_handler->begin_map_val_flow();
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
|
|
_line_progressed(1);
|
|
goto seqflow_finish;
|
|
}
|
|
else if(first == '?')
|
|
{
|
|
_c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
|
|
addrem_flags(RNXT, RVAL);
|
|
m_was_inside_qmrk = true;
|
|
m_evt_handler->begin_map_val_flow();
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto seqflow_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else // RNXT
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
|
|
if(first == ',')
|
|
{
|
|
_c4dbgp("seqflow[RNXT]: expect next val");
|
|
addrem_flags(RVAL, RNXT);
|
|
m_evt_handler->add_sibling();
|
|
_line_progressed(1);
|
|
}
|
|
else if(first == ']')
|
|
{
|
|
_c4dbgp("seqflow[RNXT]: end!");
|
|
m_evt_handler->end_seq();
|
|
_line_progressed(1);
|
|
goto seqflow_finish;
|
|
}
|
|
else if(first == ':')
|
|
{
|
|
_c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
|
|
m_evt_handler->actually_val_is_first_key_of_new_map_flow();
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
_line_progressed(1);
|
|
addrem_flags(RSEQIMAP|RVAL, RNXT);
|
|
goto seqflow_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
|
|
seqflow_again:
|
|
_c4dbgt("seqflow: go again", 0);
|
|
if(_finished_line())
|
|
{
|
|
if(C4_LIKELY(!_finished_file()))
|
|
{
|
|
_line_ended();
|
|
_scan_line();
|
|
_c4dbgnextline();
|
|
}
|
|
else
|
|
{
|
|
_c4err("missing terminating ]");
|
|
}
|
|
}
|
|
goto seqflow_start;
|
|
|
|
seqflow_finish:
|
|
_c4dbgp("seqflow: finish");
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_map_flow()
|
|
{
|
|
mapflow_start:
|
|
_c4dbgpf("handle2_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
|
|
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
|
|
|
|
_handle_flow_skip_whitespace();
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto mapflow_again;
|
|
|
|
if(has_any(RKEY))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("mapflow[RKEY]: '{}'", first);
|
|
ScannedScalar sc;
|
|
if(first == '\'')
|
|
{
|
|
_c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
|
|
sc = _scan_scalar_squot();
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
|
|
m_evt_handler->set_key_scalar_squoted(maybe_filtered);
|
|
addrem_flags(RKCL, RKEY|QMRK);
|
|
}
|
|
else if(first == '"')
|
|
{
|
|
_c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
|
|
sc = _scan_scalar_dquot();
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
|
|
m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
|
|
addrem_flags(RKCL, RKEY|QMRK);
|
|
}
|
|
// block scalars (ie | and >) cannot appear in flow containers
|
|
else if(_scan_scalar_plain_map_flow(&sc))
|
|
{
|
|
_c4dbgp("mapflow[RKEY]: plain scalar");
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
|
|
m_evt_handler->set_key_scalar_plain(maybe_filtered);
|
|
addrem_flags(RKCL, RKEY|QMRK);
|
|
}
|
|
else if(first == '?')
|
|
{
|
|
_c4dbgp("mapflow[RKEY]: explicit key");
|
|
_line_progressed(1);
|
|
addrem_flags(QMRK, RKEY);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == ':')
|
|
{
|
|
_c4dbgp("mapflow[RKEY]: setting empty key");
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
addrem_flags(RVAL, RKEY|QMRK);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == ',')
|
|
{
|
|
_c4dbgp("mapflow[RKEY]: empty key+val!");
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
addrem_flags(RNXT, RKEY|QMRK);
|
|
// keep going in this function
|
|
}
|
|
else if(first == '}') // this happens on a trailing comma like ", }"
|
|
{
|
|
_c4dbgp("mapflow[RKEY]: end!");
|
|
m_evt_handler->end_map();
|
|
_line_progressed(1);
|
|
goto mapflow_finish;
|
|
}
|
|
else if(first == '&')
|
|
{
|
|
csubstr anchor = _scan_anchor();
|
|
_c4dbgpf("mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
|
|
m_evt_handler->set_key_anchor(anchor);
|
|
}
|
|
else if(first == '*')
|
|
{
|
|
csubstr ref = _scan_ref_map();
|
|
_c4dbgpf("mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
|
|
m_evt_handler->set_key_ref(ref);
|
|
addrem_flags(RKCL, RKEY);
|
|
}
|
|
else if(first == '[')
|
|
{
|
|
// RYML's tree cannot store container keys, but that's
|
|
// handled inside the tree sink. Other sink types may be
|
|
// able to handle it.
|
|
_c4dbgp("mapflow[RKEY]: start child seqflow (!)");
|
|
addrem_flags(RKCL, RKEY);
|
|
m_evt_handler->begin_seq_key_flow();
|
|
addrem_flags(RSEQ|RVAL, RMAP|RKCL);
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
_line_progressed(1);
|
|
goto mapflow_finish;
|
|
}
|
|
else if(first == '{')
|
|
{
|
|
// RYML's tree cannot store container keys, but that's
|
|
// handled inside the tree sink. Other sink types may be
|
|
// able to handle it.
|
|
_c4dbgp("mapflow[RKEY]: start child mapflow (!)");
|
|
addrem_flags(RKCL, RKEY);
|
|
m_evt_handler->begin_map_key_flow();
|
|
addrem_flags(RKEY, RVAL|RKCL);
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
_line_progressed(1);
|
|
// keep going in this function
|
|
}
|
|
else if(first == '!')
|
|
{
|
|
csubstr tag = _scan_tag();
|
|
_c4dbgpf("mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
|
|
_check_tag(tag);
|
|
m_evt_handler->set_key_tag(tag);
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(has_any(RKCL)) // read the key colon
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("mapflow[RKCL]: '{}'", first);
|
|
if(first == ':')
|
|
{
|
|
_c4dbgp("mapflow[RKCL]: found the colon");
|
|
addrem_flags(RVAL, RKCL);
|
|
_line_progressed(1);
|
|
}
|
|
else if(first == '}')
|
|
{
|
|
_c4dbgp("mapflow[RKCL]: end with missing val!");
|
|
addrem_flags(RVAL, RKCL);
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->end_map();
|
|
_line_progressed(1);
|
|
goto mapflow_finish;
|
|
}
|
|
else if(first == ',')
|
|
{
|
|
_c4dbgp("mapflow[RKCL]: got comma. val is missing");
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->add_sibling();
|
|
addrem_flags(RKEY, RKCL);
|
|
_line_progressed(1);
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(has_any(RVAL))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("mapflow[RVAL]: '{}'", first);
|
|
ScannedScalar sc;
|
|
if(first == '\'')
|
|
{
|
|
_c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
|
|
sc = _scan_scalar_squot();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
|
|
m_evt_handler->set_val_scalar_squoted(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else if(first == '"')
|
|
{
|
|
_c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
|
|
sc = _scan_scalar_dquot();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
|
|
m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
// block scalars (ie | and >) cannot appear in flow containers
|
|
else if(_scan_scalar_plain_map_flow(&sc))
|
|
{
|
|
_c4dbgp("mapflow[RVAL]: plain scalar.");
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
|
|
m_evt_handler->set_val_scalar_plain(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else if(first == '[')
|
|
{
|
|
_c4dbgp("mapflow[RVAL]: start val seqflow");
|
|
addrem_flags(RNXT, RVAL);
|
|
m_evt_handler->begin_seq_val_flow();
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
addrem_flags(RSEQ|RVAL, RMAP|RNXT);
|
|
_line_progressed(1);
|
|
goto mapflow_finish;
|
|
}
|
|
else if(first == '{')
|
|
{
|
|
_c4dbgp("mapflow[RVAL]: start val mapflow");
|
|
addrem_flags(RNXT, RVAL);
|
|
m_evt_handler->begin_map_val_flow();
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
addrem_flags(RKEY, RNXT);
|
|
_line_progressed(1);
|
|
// keep going in this function
|
|
}
|
|
else if(first == '}')
|
|
{
|
|
_c4dbgp("mapflow[RVAL]: end!");
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->end_map();
|
|
_line_progressed(1);
|
|
goto mapflow_finish;
|
|
}
|
|
else if(first == ',')
|
|
{
|
|
_c4dbgp("mapflow[RVAL]: empty val!");
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
addrem_flags(RNXT, RVAL);
|
|
// keep going in this function
|
|
}
|
|
else if(first == '*')
|
|
{
|
|
csubstr ref = _scan_ref_map();
|
|
_c4dbgpf("mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
|
|
m_evt_handler->set_val_ref(ref);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else if(first == '&')
|
|
{
|
|
csubstr anchor = _scan_anchor();
|
|
_c4dbgpf("mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
|
|
m_evt_handler->set_val_anchor(anchor);
|
|
}
|
|
else if(first == '!')
|
|
{
|
|
csubstr tag = _scan_tag();
|
|
_c4dbgpf("mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
|
|
_check_tag(tag);
|
|
m_evt_handler->set_val_tag(tag);
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(has_any(RNXT))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
_c4dbgpf("mapflow[RNXT]: '{}'", rem.str[0]);
|
|
if(rem.begins_with(','))
|
|
{
|
|
_c4dbgp("mapflow[RNXT]: expect next keyval");
|
|
m_evt_handler->add_sibling();
|
|
addrem_flags(RKEY, RNXT);
|
|
_line_progressed(1);
|
|
}
|
|
else if(rem.begins_with('}'))
|
|
{
|
|
_c4dbgp("mapflow[RNXT]: end!");
|
|
m_evt_handler->end_map();
|
|
_line_progressed(1);
|
|
goto mapflow_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(has_any(QMRK))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("mapflow[QMRK]: '{}'", first);
|
|
ScannedScalar sc;
|
|
if(first == '\'')
|
|
{
|
|
_c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
|
|
sc = _scan_scalar_squot();
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
|
|
m_evt_handler->set_key_scalar_squoted(maybe_filtered);
|
|
addrem_flags(RKCL, QMRK);
|
|
}
|
|
else if(first == '"')
|
|
{
|
|
_c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
|
|
sc = _scan_scalar_dquot();
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
|
|
m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
|
|
addrem_flags(RKCL, QMRK);
|
|
}
|
|
// block scalars (ie | and >) cannot appear in flow containers
|
|
else if(_scan_scalar_plain_map_flow(&sc))
|
|
{
|
|
_c4dbgp("mapflow[QMRK]: plain scalar");
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
|
|
m_evt_handler->set_key_scalar_plain(maybe_filtered);
|
|
addrem_flags(RKCL, QMRK);
|
|
}
|
|
else if(first == ':')
|
|
{
|
|
_c4dbgp("mapflow[QMRK]: setting empty key");
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
addrem_flags(RVAL, QMRK);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '}') // this happens on a trailing comma like ", }"
|
|
{
|
|
_c4dbgp("mapflow[QMRK]: end!");
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->end_map();
|
|
_line_progressed(1);
|
|
goto mapflow_finish;
|
|
}
|
|
else if(first == ',')
|
|
{
|
|
_c4dbgp("mapflow[QMRK]: empty key+val!");
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
addrem_flags(RNXT, QMRK);
|
|
}
|
|
else if(first == '&')
|
|
{
|
|
csubstr anchor = _scan_anchor();
|
|
_c4dbgpf("mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
|
|
m_evt_handler->set_key_anchor(anchor);
|
|
}
|
|
else if(first == '*')
|
|
{
|
|
csubstr ref = _scan_ref_map();
|
|
_c4dbgpf("mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
|
|
m_evt_handler->set_key_ref(ref);
|
|
addrem_flags(RKCL, QMRK);
|
|
}
|
|
else if(first == '[')
|
|
{
|
|
// RYML's tree cannot store container keys, but that's
|
|
// handled inside the tree sink. Other sink types may be
|
|
// able to handle it.
|
|
_c4dbgp("mapflow[QMRK]: start child seqflow (!)");
|
|
addrem_flags(RKCL, QMRK);
|
|
m_evt_handler->begin_seq_key_flow();
|
|
addrem_flags(RSEQ|RVAL, RMAP|RKCL);
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
_line_progressed(1);
|
|
goto mapflow_finish;
|
|
}
|
|
else if(first == '{')
|
|
{
|
|
// RYML's tree cannot store container keys, but that's
|
|
// handled inside the tree sink. Other sink types may be
|
|
// able to handle it.
|
|
_c4dbgp("mapflow[QMRK]: start child mapflow (!)");
|
|
addrem_flags(RKCL, QMRK);
|
|
m_evt_handler->begin_map_key_flow();
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
addrem_flags(RKEY, RKCL);
|
|
_line_progressed(1);
|
|
// keep going in this function
|
|
}
|
|
else if(first == '!')
|
|
{
|
|
csubstr tag = _scan_tag();
|
|
_c4dbgpf("mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
|
|
_check_tag(tag);
|
|
m_evt_handler->set_key_tag(tag);
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
|
|
mapflow_again:
|
|
_c4dbgt("mapflow: go again", 0);
|
|
if(_finished_line())
|
|
{
|
|
if(C4_LIKELY(!_finished_file()))
|
|
{
|
|
_line_ended();
|
|
_scan_line();
|
|
_c4dbgnextline();
|
|
}
|
|
else
|
|
{
|
|
_c4err("missing terminating }");
|
|
}
|
|
}
|
|
goto mapflow_start;
|
|
|
|
mapflow_finish:
|
|
_c4dbgp("mapflow: finish");
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_seq_block()
|
|
{
|
|
seqblck_start:
|
|
_c4dbgpf("handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
|
|
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)));
|
|
|
|
_maybe_skip_comment();
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto seqblck_again;
|
|
|
|
if(has_any(RVAL))
|
|
{
|
|
_c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
if(m_evt_handler->m_curr->at_line_beginning())
|
|
{
|
|
_c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
|
|
if(m_evt_handler->m_curr->indentation_ge())
|
|
{
|
|
_c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
|
|
_line_progressed(m_evt_handler->m_curr->line_contents.indentation);
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto seqblck_again;
|
|
}
|
|
else if(m_evt_handler->m_curr->indentation_lt())
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: smaller indentation!");
|
|
_handle_indentation_pop_from_block_seq();
|
|
goto seqblck_finish;
|
|
}
|
|
else if(m_evt_handler->m_curr->line_contents.indentation == npos)
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: empty line!");
|
|
_line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
|
|
goto seqblck_again;
|
|
}
|
|
}
|
|
#ifdef RYML_NO_COVERAGE__TO_BE_DELETED
|
|
else
|
|
{
|
|
// accomodate annotation on the previous line. eg:
|
|
// - &elm
|
|
// foo # <-- on this line
|
|
// - &elm
|
|
// &foo foo: bar # <-- on this line
|
|
if(rem.str[0] == ' ')
|
|
{
|
|
if(_handle_indentation_from_annotations())
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: annotations!");
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto seqblck_again;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
_RYML_CB_ASSERT(callbacks(), rem.len);
|
|
_c4dbgpf("seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
|
|
const char first = rem.str[0];
|
|
const size_t startline = m_evt_handler->m_curr->pos.line;
|
|
// warning: the gcc optimizer on x86 builds is brittle with
|
|
// this function:
|
|
const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
|
|
ScannedScalar sc;
|
|
if(first == '\'')
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: single-quoted scalar");
|
|
sc = _scan_scalar_squot();
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
|
|
m_evt_handler->set_val_scalar_squoted(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
_handle_colon();
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
|
|
m_evt_handler->set_key_scalar_squoted(maybe_filtered);
|
|
addrem_flags(RMAP|RVAL, RSEQ|RNXT);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto seqblck_finish;
|
|
}
|
|
}
|
|
else if(first == '"')
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: double-quoted scalar");
|
|
sc = _scan_scalar_dquot();
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
|
|
m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
_handle_colon();
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
|
|
m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
|
|
addrem_flags(RMAP|RVAL, RSEQ|RNXT);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto seqblck_finish;
|
|
}
|
|
}
|
|
// block scalars can only appear as keys when in QMRK scope
|
|
// (ie, after ? tokens), so no need to scan following colon in
|
|
// here.
|
|
else if(first == '|')
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: block-literal scalar");
|
|
ScannedBlock sb;
|
|
_scan_block(&sb, m_evt_handler->m_curr->indref + 1);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
|
|
m_evt_handler->set_val_scalar_literal(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else if(first == '>')
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: block-folded scalar");
|
|
ScannedBlock sb;
|
|
_scan_block(&sb, m_evt_handler->m_curr->indref + 1);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
|
|
m_evt_handler->set_val_scalar_folded(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else if(_scan_scalar_plain_seq_blck(&sc))
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: plain scalar.");
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
|
|
m_evt_handler->set_val_scalar_plain(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else
|
|
{
|
|
if(startindent > m_evt_handler->m_curr->indref)
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
_handle_colon();
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
|
|
m_evt_handler->set_key_scalar_plain(maybe_filtered);
|
|
addrem_flags(RMAP|RVAL, RSEQ|RNXT);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto seqblck_finish;
|
|
}
|
|
else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(RMAP|BLCK, m_evt_handler->m_parent))
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: empty val + end indentless seq + set key");
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->end_seq();
|
|
m_evt_handler->add_sibling();
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
|
|
m_evt_handler->set_key_scalar_plain(maybe_filtered);
|
|
addrem_flags(RVAL, RNXT|RKEY);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto seqblck_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
}
|
|
else if(first == '[')
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: start child seqflow");
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_seq_val_flow();
|
|
addrem_flags(FLOW|RVAL, BLCK|RNXT);
|
|
_line_progressed(1);
|
|
_set_indentation(m_evt_handler->m_parent->indref + 1u);
|
|
goto seqblck_finish;
|
|
}
|
|
else if(first == '{')
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: start child mapflow");
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_map_val_flow();
|
|
addrem_flags(RMAP|RKEY|FLOW, BLCK|RSEQ|RVAL|RNXT);
|
|
_line_progressed(1);
|
|
_set_indentation(m_evt_handler->m_parent->indref + 1u);
|
|
goto seqblck_finish;
|
|
}
|
|
else if(first == '-')
|
|
{
|
|
if(startindent == m_evt_handler->m_curr->indref)
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: prev val was empty");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
// keep in RVAL, but for the next sibling
|
|
m_evt_handler->add_sibling();
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: start child seqblck");
|
|
_RYML_CB_ASSERT(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_seq_val_block();
|
|
addrem_flags(RVAL, RNXT);
|
|
_save_indentation();
|
|
// keep going on inside this function
|
|
}
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == ':')
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
_handle_colon();
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
addrem_flags(RMAP|RVAL, RSEQ|RNXT);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto seqblck_finish;
|
|
}
|
|
else if(first == '&')
|
|
{
|
|
const csubstr anchor = _scan_anchor();
|
|
_c4dbgpf("seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
|
|
// we need to buffer the anchors, as there may be two
|
|
// consecutive anchors in here
|
|
_add_annotation(&m_pending_anchors, anchor, startindent, startline);
|
|
}
|
|
else if(first == '*')
|
|
{
|
|
csubstr ref = _scan_ref_seq();
|
|
_c4dbgpf("seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: set ref as val!");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_ref(ref);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: ref is key of map");
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
m_evt_handler->set_key_ref(ref);
|
|
addrem_flags(RMAP|RVAL, RSEQ|RNXT);
|
|
_set_indentation(startindent);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto seqblck_finish;
|
|
}
|
|
}
|
|
else if(first == '!')
|
|
{
|
|
csubstr tag = _scan_tag();
|
|
_c4dbgpf("seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
|
|
// we need to buffer the tags, as there may be two
|
|
// consecutive tags in here
|
|
_add_annotation(&m_pending_tags, tag, startindent, startline);
|
|
}
|
|
else if(first == '?')
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
|
|
addrem_flags(RNXT, RVAL);
|
|
m_was_inside_qmrk = true;
|
|
m_evt_handler->begin_map_val_block();
|
|
addrem_flags(RMAP|QMRK, RSEQ|RNXT);
|
|
_save_indentation();
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto seqblck_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else // RNXT
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
//
|
|
// handle indentation
|
|
//
|
|
_c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
|
|
if(C4_LIKELY(_at_line_begin()))
|
|
{
|
|
_c4dbgp("seqblck[RNXT]: at line begin");
|
|
if(m_evt_handler->m_curr->indentation_ge())
|
|
{
|
|
_c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
|
|
_line_progressed(m_evt_handler->m_curr->indref);
|
|
_maybe_skip_whitespace_tokens();
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto seqblck_again;
|
|
}
|
|
else if(m_evt_handler->m_curr->indentation_lt())
|
|
{
|
|
_c4dbgp("seqblck[RNXT]: smaller indentation!");
|
|
_handle_indentation_pop_from_block_seq();
|
|
if(has_all(RSEQ|BLCK))
|
|
{
|
|
_c4dbgp("seqblck[RNXT]: still seqblck!");
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
|
|
_line_progressed(m_evt_handler->m_curr->line_contents.indentation);
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto seqblck_again;
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("seqblck[RNXT]: no longer seqblck!");
|
|
goto seqblck_finish;
|
|
}
|
|
}
|
|
else if(m_evt_handler->m_curr->line_contents.indentation == npos)
|
|
{
|
|
_c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
|
|
_line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto seqblck_again;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("seqblck[RNXT]: NOT at line begin");
|
|
if(!rem.begins_with_any(" \t"))
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
else
|
|
{
|
|
_skipchars(" \t");
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
{
|
|
_c4dbgp("seqblck[RNXT]: again");
|
|
goto seqblck_again;
|
|
}
|
|
}
|
|
}
|
|
//
|
|
// now handle the tokens
|
|
//
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
|
|
if(first == '-')
|
|
{
|
|
if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
|
|
{
|
|
_c4dbgp("seqblck[RNXT]: expect next val");
|
|
addrem_flags(RVAL, RNXT);
|
|
m_evt_handler->add_sibling();
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("seqblck[RNXT]: start doc");
|
|
_start_doc_suddenly();
|
|
_line_progressed(3);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto seqblck_finish;
|
|
}
|
|
}
|
|
else if(first == ':')
|
|
{
|
|
// This happens for example in `- [a: b]: c` (after
|
|
// terminating the seq, ie, after `]`). All other cases
|
|
// (ie colon after scalars) are caught elsewhere (ie, in
|
|
// RVAL state).
|
|
auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
|
|
if(C4_LIKELY(prev_state && (prev_state->flags & RMAP)))
|
|
{
|
|
_c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
|
|
m_evt_handler->end_seq();
|
|
goto seqblck_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(first == '.')
|
|
{
|
|
_c4dbgp("seqblck[RNXT]: maybe doc?");
|
|
csubstr rs = rem.sub(1);
|
|
if(rs == ".." || rs.begins_with(".. "))
|
|
{
|
|
_c4dbgp("seqblck[RNXT]: end+start doc");
|
|
_end_doc_suddenly();
|
|
_line_progressed(3);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto seqblck_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// may be an indentless sequence nested in a map...
|
|
//if(m_evt_handler->m_stack.size() >= 2)
|
|
#ifdef RYML_DBG
|
|
char flagbuf_[128];
|
|
for(auto const& s : m_evt_handler->m_stack)
|
|
{
|
|
_dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
|
|
}
|
|
#endif
|
|
if(m_evt_handler->m_parent && has_all(RMAP|BLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
|
|
{
|
|
_c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
|
|
_RYML_CB_ASSERT(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
|
|
_handle_indentation_pop(m_evt_handler->m_parent);
|
|
_RYML_CB_ASSERT(this->callbacks(), has_all(RMAP|BLCK));
|
|
m_evt_handler->add_sibling();
|
|
addrem_flags(RKEY, RNXT);
|
|
goto seqblck_finish;
|
|
}
|
|
else //if(first != '*')
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
}
|
|
|
|
seqblck_again:
|
|
_c4dbgt("seqblck: go again", 0);
|
|
if(_finished_line())
|
|
{
|
|
_line_ended();
|
|
_scan_line();
|
|
if(_finished_file())
|
|
{
|
|
_c4dbgp("seqblck: finish!");
|
|
_end_seq_blck();
|
|
goto seqblck_finish;
|
|
}
|
|
_c4dbgnextline();
|
|
}
|
|
goto seqblck_start;
|
|
|
|
seqblck_finish:
|
|
_c4dbgp("seqblck: finish");
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_map_block()
|
|
{
|
|
mapblck_start:
|
|
_c4dbgpf("handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
|
|
|
|
// states: RKEY|QMRK -> RKCL -> RVAL -> RNXT
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
|
|
|
|
_maybe_skip_comment();
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto mapblck_again;
|
|
|
|
if(has_any(RKEY))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
//
|
|
// handle indentation
|
|
//
|
|
if(m_evt_handler->m_curr->at_line_beginning())
|
|
{
|
|
if(m_evt_handler->m_curr->indentation_eq())
|
|
{
|
|
_c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
|
|
_line_progressed(m_evt_handler->m_curr->indref);
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto mapblck_again;
|
|
}
|
|
else if(m_evt_handler->m_curr->indentation_lt())
|
|
{
|
|
_c4dbgp("mapblck[RKEY]: smaller indentation!");
|
|
_handle_indentation_pop_from_block_map();
|
|
_line_progressed(m_evt_handler->m_curr->line_contents.indentation);
|
|
if(has_all(RMAP|BLCK))
|
|
{
|
|
_c4dbgp("mapblck[RKEY]: still mapblck!");
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY));
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto mapblck_again;
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[RKEY]: no longer mapblck!");
|
|
goto mapblck_finish;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
|
|
_c4err("invalid indentation");
|
|
}
|
|
}
|
|
//
|
|
// now handle the tokens
|
|
//
|
|
const char first = rem.str[0];
|
|
const size_t startline = m_evt_handler->m_curr->pos.line;
|
|
const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
|
|
_c4dbgpf("mapblck[RKEY]: '{}'", first);
|
|
ScannedScalar sc;
|
|
if(first == '\'')
|
|
{
|
|
_c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
|
|
sc = _scan_scalar_squot();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->set_key_scalar_squoted(maybe_filtered);
|
|
addrem_flags(RVAL, RKEY);
|
|
if(!_maybe_scan_following_colon())
|
|
_c4err("could not find ':' colon after key");
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '"')
|
|
{
|
|
_c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
|
|
sc = _scan_scalar_dquot();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
|
|
addrem_flags(RVAL, RKEY);
|
|
if(!_maybe_scan_following_colon())
|
|
_c4err("could not find ':' colon after key");
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
// block scalars (| and >) can not be used as keys unless they
|
|
// appear in an explicit QMRK scope (ie, after the ? token),
|
|
else if(C4_UNLIKELY(first == '|'))
|
|
{
|
|
_c4err("block literal keys must be enclosed in '?'");
|
|
}
|
|
else if(C4_UNLIKELY(first == '>'))
|
|
{
|
|
_c4err("block literal keys must be enclosed in '?'");
|
|
}
|
|
else if(_scan_scalar_plain_map_blck(&sc))
|
|
{
|
|
_c4dbgp("mapblck[RKEY]: plain scalar");
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->set_key_scalar_plain(maybe_filtered);
|
|
addrem_flags(RVAL, RKEY);
|
|
if(!_maybe_scan_following_colon())
|
|
_c4err("could not find ':' colon after key");
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '?')
|
|
{
|
|
_c4dbgp("mapblck[RKEY]: key token!");
|
|
addrem_flags(QMRK, RKEY);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
m_was_inside_qmrk = true;
|
|
goto mapblck_again;
|
|
}
|
|
else if(first == ':')
|
|
{
|
|
_c4dbgp("mapblck[RKEY]: setting empty key");
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
addrem_flags(RVAL, RKEY);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '*')
|
|
{
|
|
csubstr ref = _scan_ref_map();
|
|
_c4dbgpf("mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->set_key_ref(ref);
|
|
addrem_flags(RVAL, RKEY);
|
|
if(!_maybe_scan_following_colon())
|
|
_c4err("could not find ':' colon after key");
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '&')
|
|
{
|
|
csubstr anchor = _scan_anchor();
|
|
_c4dbgpf("mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
|
|
_add_annotation(&m_pending_anchors, anchor, startindent, startline);
|
|
}
|
|
else if(first == '!')
|
|
{
|
|
csubstr tag = _scan_tag();
|
|
_c4dbgpf("mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
|
|
_add_annotation(&m_pending_tags, tag, startindent, startline);
|
|
}
|
|
else if(first == '[')
|
|
{
|
|
// RYML's tree cannot store container keys, but that's
|
|
// handled inside the tree handler. Other handlers may be
|
|
// able to handle it.
|
|
_c4dbgp("mapblck[RKEY]: start child seqflow (!)");
|
|
addrem_flags(RKCL, RKEY);
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->begin_seq_key_flow();
|
|
addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
|
|
_line_progressed(1);
|
|
_set_indentation(startindent);
|
|
goto mapblck_finish;
|
|
}
|
|
else if(first == '{')
|
|
{
|
|
// RYML's tree cannot store container keys, but that's
|
|
// handled inside the tree handler. Other handlers may be
|
|
// able to handle it.
|
|
_c4dbgp("mapblck[RKEY]: start child mapflow (!)");
|
|
addrem_flags(RKCL, RKEY);
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->begin_map_key_flow();
|
|
addrem_flags(FLOW|RKEY, BLCK|RKCL);
|
|
_line_progressed(1);
|
|
_set_indentation(startindent);
|
|
goto mapblck_finish;
|
|
}
|
|
else if(first == '-')
|
|
{
|
|
_c4dbgp("mapblck[RKEY]: maybe doc?");
|
|
if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
|
|
{
|
|
_c4dbgp("mapblck[RKEY]: end+start doc");
|
|
_start_doc_suddenly();
|
|
_line_progressed(3);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto mapblck_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(first == '.')
|
|
{
|
|
_c4dbgp("mapblck[RKEY]: maybe end doc?");
|
|
if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
|
|
{
|
|
_c4dbgp("mapblck[RKEY]: end doc");
|
|
_end_doc_suddenly();
|
|
_line_progressed(3);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto mapblck_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
_RYML_WITH_TAB_TOKENS(
|
|
else if(first == '\t')
|
|
{
|
|
_c4dbgp("mapblck[RKEY]: skip tabs");
|
|
_maybe_skipchars('\t');
|
|
})
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(has_any(RKCL)) // read the key colon
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
//
|
|
// handle indentation
|
|
//
|
|
if(m_evt_handler->m_curr->at_line_beginning())
|
|
{
|
|
if(m_evt_handler->m_curr->indentation_eq())
|
|
{
|
|
_c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
|
|
_line_progressed(m_evt_handler->m_curr->indref);
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto mapblck_again;
|
|
}
|
|
else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
|
|
{
|
|
_c4err("invalid indentation");
|
|
}
|
|
}
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("mapblck[RKCL]: '{}'", first);
|
|
if(first == ':')
|
|
{
|
|
_c4dbgp("mapblck[RKCL]: found the colon");
|
|
addrem_flags(RVAL, RKCL);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '?')
|
|
{
|
|
_c4dbgp("mapblck[RKCL]: got '?'. val was empty");
|
|
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->add_sibling();
|
|
addrem_flags(QMRK, RKCL);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '-')
|
|
{
|
|
if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
|
|
{
|
|
_c4dbgp("mapblck[RKCL]: end+start doc");
|
|
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
|
|
_start_doc_suddenly();
|
|
_line_progressed(3);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto mapblck_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(first == '.')
|
|
{
|
|
_c4dbgp("mapblck[RKCL]: maybe end doc?");
|
|
csubstr rs = rem.sub(1);
|
|
if(rs == ".." || rs.begins_with(".. "))
|
|
{
|
|
_c4dbgp("mapblck[RKCL]: end+start doc");
|
|
_end_doc_suddenly();
|
|
_line_progressed(3);
|
|
goto mapblck_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(m_was_inside_qmrk)
|
|
{
|
|
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
|
|
_c4dbgp("mapblck[RKCL]: missing :");
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->add_sibling();
|
|
m_was_inside_qmrk = false;
|
|
addrem_flags(RKEY, RKCL);
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(has_any(RVAL))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
//
|
|
// handle indentation
|
|
//
|
|
if(m_evt_handler->m_curr->at_line_beginning())
|
|
{
|
|
_c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
|
|
m_evt_handler->m_curr->more_indented = false;
|
|
if(m_evt_handler->m_curr->indref == npos)
|
|
{
|
|
_c4dbgpf("mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
|
|
_set_indentation(m_evt_handler->m_curr->line_contents.indentation);
|
|
_line_progressed(m_evt_handler->m_curr->indref);
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto mapblck_again;
|
|
}
|
|
else if(m_evt_handler->m_curr->indentation_eq())
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: skip indentation!");
|
|
_line_progressed(m_evt_handler->m_curr->indref);
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto mapblck_again;
|
|
// TODO: this is valid:
|
|
//
|
|
// ```yaml
|
|
// a:
|
|
// b:
|
|
// ---
|
|
// a:
|
|
// b
|
|
// ---
|
|
// a:
|
|
// b: c
|
|
// ```
|
|
//
|
|
// ... but this is not:
|
|
//
|
|
// ```yaml
|
|
// a:
|
|
// v
|
|
// ---
|
|
// a: b: c
|
|
// ```
|
|
//
|
|
// here, we probably need to set a boolean on the state
|
|
// to disambiguate between these cases.
|
|
}
|
|
else if(m_evt_handler->m_curr->indentation_gt())
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: more indented!");
|
|
m_evt_handler->m_curr->more_indented = true;
|
|
_line_progressed(m_evt_handler->m_curr->line_contents.indentation);
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto mapblck_again;
|
|
}
|
|
else if(m_evt_handler->m_curr->indentation_lt())
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: smaller indentation!");
|
|
_handle_indentation_pop_from_block_map();
|
|
if(has_all(RMAP|BLCK))
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: still mapblck!");
|
|
_line_progressed(m_evt_handler->m_curr->line_contents.indentation);
|
|
if(has_any(RNXT))
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: speculatively expect next keyval");
|
|
m_evt_handler->add_sibling();
|
|
addrem_flags(RKEY, RNXT);
|
|
}
|
|
goto mapblck_again;
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: no longer mapblck!");
|
|
goto mapblck_finish;
|
|
}
|
|
}
|
|
else if(m_evt_handler->m_curr->line_contents.indentation == npos)
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: empty line!");
|
|
_line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
|
|
goto mapblck_again;
|
|
}
|
|
}
|
|
//
|
|
// now handle the tokens
|
|
//
|
|
const char first = rem.str[0];
|
|
const size_t startline = m_evt_handler->m_curr->pos.line;
|
|
const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
|
|
_c4dbgpf("mapblck[RVAL]: '{}'", first);
|
|
ScannedScalar sc;
|
|
if(first == '\'')
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
|
|
sc = _scan_scalar_squot();
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
|
|
m_evt_handler->set_val_scalar_squoted(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else
|
|
{
|
|
if(startindent != m_evt_handler->m_curr->indref)
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_colon();
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
|
|
m_evt_handler->set_key_scalar_squoted(maybe_filtered);
|
|
_maybe_skip_whitespace_tokens();
|
|
_set_indentation(m_evt_handler->m_curr->line_contents.indentation);
|
|
// keep the child state on RVAL
|
|
addrem_flags(RVAL, RNXT);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->add_sibling();
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
|
|
m_evt_handler->set_key_scalar_squoted(maybe_filtered);
|
|
// keep going on RVAL
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
}
|
|
}
|
|
else if(first == '"')
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
|
|
sc = _scan_scalar_dquot();
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
|
|
m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else
|
|
{
|
|
if(startindent != m_evt_handler->m_curr->indref)
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_colon();
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
|
|
m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
|
|
_maybe_skip_whitespace_tokens();
|
|
_set_indentation(m_evt_handler->m_curr->line_contents.indentation);
|
|
// keep the child state on RVAL
|
|
addrem_flags(RVAL, RNXT);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->add_sibling();
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
|
|
m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
|
|
// keep going on RVAL
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
}
|
|
}
|
|
// block scalars can only appear as keys when in QMRK scope
|
|
// (ie, after ? tokens), so no need to scan following colon
|
|
else if(first == '|')
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
|
|
ScannedBlock sb;
|
|
_scan_block(&sb, m_evt_handler->m_curr->indref + 1);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
|
|
m_evt_handler->set_val_scalar_literal(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else if(first == '>')
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
|
|
ScannedBlock sb;
|
|
_scan_block(&sb, m_evt_handler->m_curr->indref + 1);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
|
|
m_evt_handler->set_val_scalar_folded(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else if(_scan_scalar_plain_map_blck(&sc))
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: plain scalar.");
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
|
|
m_evt_handler->set_val_scalar_plain(maybe_filtered);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else
|
|
{
|
|
if(startindent != m_evt_handler->m_curr->indref)
|
|
{
|
|
_c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
_handle_colon();
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
|
|
m_evt_handler->set_key_scalar_plain(maybe_filtered);
|
|
_maybe_skip_whitespace_tokens();
|
|
_set_indentation(m_evt_handler->m_curr->line_contents.indentation);
|
|
// keep the child state on RVAL
|
|
addrem_flags(RVAL, RNXT);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->add_sibling();
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
|
|
m_evt_handler->set_key_scalar_plain(maybe_filtered);
|
|
// keep going on RVAL
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
}
|
|
}
|
|
else if(first == '-')
|
|
{
|
|
if(rem.len == 1 || rem.str[1] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[1] == '\t'))
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: start val seqblck");
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_seq_val_block();
|
|
addrem_flags(RSEQ|RVAL, RMAP|RNXT);
|
|
_set_indentation(startindent);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto mapblck_finish;
|
|
}
|
|
else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: end+start doc");
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
|
|
_start_doc_suddenly();
|
|
_line_progressed(3);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto mapblck_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(first == '[')
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: start val seqflow");
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_seq_val_flow();
|
|
addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RNXT);
|
|
_set_indentation(m_evt_handler->m_curr->indref + 1u);
|
|
_line_progressed(1);
|
|
goto mapblck_finish;
|
|
}
|
|
else if(first == '{')
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: start val mapflow");
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_map_val_flow();
|
|
addrem_flags(RKEY|FLOW, BLCK|RVAL|RNXT);
|
|
m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
|
|
_set_indentation(m_evt_handler->m_curr->indref + 1u);
|
|
_line_progressed(1);
|
|
goto mapblck_finish;
|
|
}
|
|
else if(first == '*')
|
|
{
|
|
csubstr ref = _scan_ref_map();
|
|
_c4dbgpf("mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
|
|
if(startindent == m_evt_handler->m_curr->indref)
|
|
{
|
|
_c4dbgpf("mapblck[RVAL]: same indentation {}", startindent);
|
|
m_evt_handler->set_val_ref(ref);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgpf("mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
|
|
if(_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: start child map, block");
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_map_val_block();
|
|
m_evt_handler->set_key_ref(ref);
|
|
_set_indentation(startindent);
|
|
// keep going in RVAL
|
|
addrem_flags(RVAL, RNXT);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: was val ref");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_ref(ref);
|
|
addrem_flags(RNXT, RVAL);
|
|
}
|
|
}
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '&')
|
|
{
|
|
csubstr anchor = _scan_anchor();
|
|
_c4dbgpf("mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
|
|
if(startindent == m_evt_handler->m_curr->indref)
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: anchor for next key. val is missing!");
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->add_sibling();
|
|
addrem_flags(RKEY, RVAL);
|
|
}
|
|
// we need to buffer the anchors, as there may be two
|
|
// consecutive anchors in here
|
|
_add_annotation(&m_pending_anchors, anchor, startindent, startline);
|
|
}
|
|
else if(first == '!')
|
|
{
|
|
csubstr tag = _scan_tag();
|
|
_c4dbgpf("mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
|
|
if(startindent == m_evt_handler->m_curr->indref)
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: tag for next key. val is missing!");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->add_sibling();
|
|
addrem_flags(RKEY, RVAL);
|
|
}
|
|
// we need to buffer the tags, as there may be two
|
|
// consecutive tags in here
|
|
_add_annotation(&m_pending_tags, tag, startindent, startline);
|
|
}
|
|
else if(first == '?')
|
|
{
|
|
if(startindent == m_evt_handler->m_curr->indref)
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: got '?'. val was empty");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->add_sibling();
|
|
addrem_flags(QMRK, RVAL);
|
|
}
|
|
else if(startindent > m_evt_handler->m_curr->indref)
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: start val mapblck");
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_map_val_block();
|
|
addrem_flags(QMRK|BLCK, RNXT);
|
|
_set_indentation(startindent);
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
m_was_inside_qmrk = true;
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto mapblck_again;
|
|
}
|
|
else if(first == ':')
|
|
{
|
|
if(startindent == m_evt_handler->m_curr->indref)
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: got ':'. val was empty, next key as well");
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->add_sibling();
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
}
|
|
else if(startindent > m_evt_handler->m_curr->indref)
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: start val mapblck");
|
|
addrem_flags(RNXT, RVAL);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
_handle_colon();
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
_set_indentation(m_evt_handler->m_curr->line_contents.indentation);
|
|
// keep the child state on RVAL
|
|
addrem_flags(RVAL, RNXT);
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto mapblck_again;
|
|
}
|
|
else if(first == '.')
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: maybe doc?");
|
|
csubstr rs = rem.sub(1);
|
|
if(rs == ".." || rs.begins_with(".. "))
|
|
{
|
|
_c4dbgp("seqblck[RVAL]: end doc expl");
|
|
_end_doc_suddenly();
|
|
_line_progressed(3);
|
|
_maybe_skip_whitespace_tokens();
|
|
goto mapblck_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
_RYML_WITH_TAB_TOKENS(
|
|
else if(first == '\t')
|
|
{
|
|
_c4dbgp("mapblck[RVAL]: skip tabs");
|
|
_maybe_skipchars('\t');
|
|
})
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(has_any(RNXT))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
|
|
//
|
|
// handle indentation
|
|
//
|
|
if(m_evt_handler->m_curr->at_line_beginning())
|
|
{
|
|
_c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
|
|
if(m_evt_handler->m_curr->indentation_eq())
|
|
{
|
|
_c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
|
|
_line_progressed(m_evt_handler->m_curr->indref);
|
|
_c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
|
|
m_evt_handler->add_sibling();
|
|
addrem_flags(RKEY, RNXT);
|
|
goto mapblck_again;
|
|
}
|
|
else if(m_evt_handler->m_curr->indentation_lt())
|
|
{
|
|
_c4dbgp("mapblck[RNXT]: smaller indentation!");
|
|
_handle_indentation_pop_from_block_map();
|
|
if(has_all(RMAP|BLCK))
|
|
{
|
|
_line_progressed(m_evt_handler->m_curr->line_contents.indentation);
|
|
if(!has_any(RKCL))
|
|
{
|
|
_c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
|
|
m_evt_handler->add_sibling();
|
|
addrem_flags(RKEY, RNXT);
|
|
}
|
|
goto mapblck_again;
|
|
}
|
|
else
|
|
{
|
|
goto mapblck_finish;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[RNXT]: NOT at line begin");
|
|
if(!rem.begins_with_any(" \t"))
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
else
|
|
{
|
|
_skipchars(" \t");
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
{
|
|
_c4dbgp("seqblck[RNXT]: again");
|
|
goto mapblck_again;
|
|
}
|
|
}
|
|
}
|
|
//
|
|
// handle tokens
|
|
//
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
|
|
const char first = rem.str[0];
|
|
_c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
|
|
if(first == ':')
|
|
{
|
|
if(m_evt_handler->m_curr->more_indented)
|
|
{
|
|
_c4dbgp("mapblck[RNXT]: start child block map");
|
|
C4_NOT_IMPLEMENTED();
|
|
//m_evt_handler->actually_as_block_map();
|
|
_line_progressed(1);
|
|
_set_indentation(m_evt_handler->m_curr->scalar_col);
|
|
m_evt_handler->m_curr->more_indented = false;
|
|
goto mapblck_again;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(first == ' ')
|
|
{
|
|
_c4dbgp("mapblck[RNXT]: skip spaces");
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(has_any(QMRK))
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
|
|
//
|
|
// handle indentation
|
|
//
|
|
if(m_evt_handler->m_curr->at_line_beginning())
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos);
|
|
if(m_evt_handler->m_curr->indentation_eq())
|
|
{
|
|
_c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
|
|
_line_progressed(m_evt_handler->m_curr->indref);
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto mapblck_again;
|
|
}
|
|
else if(m_evt_handler->m_curr->indentation_lt())
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: smaller indentation!");
|
|
_handle_indentation_pop_from_block_map();
|
|
_line_progressed(m_evt_handler->m_curr->line_contents.indentation);
|
|
if(has_all(RMAP|BLCK))
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: still mapblck!");
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto mapblck_again;
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: no longer mapblck!");
|
|
goto mapblck_finish;
|
|
}
|
|
}
|
|
// indentation can be larger in QMRK state
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: larger indentation !");
|
|
_line_progressed(m_evt_handler->m_curr->line_contents.indentation);
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
goto mapblck_again;
|
|
}
|
|
}
|
|
//
|
|
// now handle the tokens
|
|
//
|
|
const char first = rem.str[0];
|
|
const size_t startline = m_evt_handler->m_curr->pos.line;
|
|
const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
|
|
_c4dbgpf("mapblck[QMRK]: '{}'", first);
|
|
ScannedScalar sc;
|
|
if(first == '\'')
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
|
|
sc = _scan_scalar_squot();
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: set as key");
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->set_key_scalar_squoted(maybe_filtered);
|
|
addrem_flags(RKCL, QMRK);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
|
|
addrem_flags(RKCL, QMRK);
|
|
_handle_annotations_before_start_mapblck_as_key();
|
|
m_evt_handler->begin_map_key_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
m_evt_handler->set_key_scalar_squoted(maybe_filtered);
|
|
_maybe_skip_whitespace_tokens();
|
|
_set_indentation(startindent);
|
|
// keep the child state on RVAL
|
|
addrem_flags(RVAL, RKCL|QMRK);
|
|
}
|
|
}
|
|
else if(first == '"')
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
|
|
sc = _scan_scalar_dquot();
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: set as key");
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
|
|
addrem_flags(RKCL, QMRK);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
|
|
addrem_flags(RKCL, QMRK);
|
|
_handle_annotations_before_start_mapblck_as_key();
|
|
m_evt_handler->begin_map_key_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
|
|
_maybe_skip_whitespace_tokens();
|
|
_set_indentation(startindent);
|
|
// keep the child state on RVAL
|
|
addrem_flags(RVAL, RKCL|QMRK);
|
|
}
|
|
}
|
|
else if(first == '|')
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
|
|
ScannedBlock sb;
|
|
_scan_block(&sb, m_evt_handler->m_curr->indref + 1);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->set_key_scalar_literal(maybe_filtered);
|
|
addrem_flags(RKCL, QMRK);
|
|
}
|
|
else if(first == '>')
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
|
|
ScannedBlock sb;
|
|
_scan_block(&sb, m_evt_handler->m_curr->indref + 1);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->set_key_scalar_folded(maybe_filtered);
|
|
addrem_flags(RKCL, QMRK);
|
|
}
|
|
else if(_scan_scalar_plain_map_blck(&sc))
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: plain scalar");
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: set as key");
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->set_key_scalar_plain(maybe_filtered);
|
|
addrem_flags(RKCL, QMRK);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
|
|
addrem_flags(RKCL, QMRK);
|
|
_handle_annotations_before_start_mapblck_as_key();
|
|
m_evt_handler->begin_map_key_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
m_evt_handler->set_key_scalar_plain(maybe_filtered);
|
|
_maybe_skip_whitespace_tokens();
|
|
_set_indentation(startindent);
|
|
// keep the child state on RVAL
|
|
addrem_flags(RVAL, RKCL|QMRK);
|
|
}
|
|
}
|
|
else if(first == ':')
|
|
{
|
|
if(startindent == m_evt_handler->m_curr->indref)
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: empty key");
|
|
addrem_flags(RVAL, QMRK);
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
|
|
addrem_flags(RKCL, QMRK);
|
|
_handle_annotations_before_start_mapblck_as_key();
|
|
m_evt_handler->begin_map_key_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
_set_indentation(startindent);
|
|
// keep the child state on RVAL
|
|
addrem_flags(RVAL, RKCL|QMRK);
|
|
}
|
|
}
|
|
else if(first == '*')
|
|
{
|
|
csubstr ref = _scan_ref_map();
|
|
_c4dbgpf("mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: set ref as key");
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->set_key_ref(ref);
|
|
addrem_flags(RKCL, QMRK);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
|
|
addrem_flags(RKCL, QMRK);
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->begin_map_key_block();
|
|
m_evt_handler->set_key_ref(ref);
|
|
_set_indentation(startindent);
|
|
// keep the child state on RVAL
|
|
addrem_flags(RVAL, RKCL|QMRK);
|
|
}
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '&')
|
|
{
|
|
csubstr anchor = _scan_anchor();
|
|
_c4dbgpf("mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
|
|
_add_annotation(&m_pending_anchors, anchor, startindent, startline);
|
|
}
|
|
else if(first == '!')
|
|
{
|
|
csubstr tag = _scan_tag();
|
|
_c4dbgpf("mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
|
|
_add_annotation(&m_pending_tags, tag, startindent, startline);
|
|
}
|
|
else if(first == '-')
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: maybe doc?");
|
|
csubstr rs = rem.sub(1);
|
|
if(rs == "--" || rs.begins_with("-- "))
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: end+start doc");
|
|
_start_doc_suddenly();
|
|
_line_progressed(3);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: start child seqblck (!)");
|
|
addrem_flags(RKCL, RKEY|QMRK);
|
|
_handle_annotations_before_blck_key_scalar();
|
|
m_evt_handler->begin_seq_key_block();
|
|
addrem_flags(RVAL|RSEQ, RMAP|RKCL|QMRK);
|
|
_set_indentation(startindent);
|
|
_line_progressed(1);
|
|
}
|
|
_maybe_skip_whitespace_tokens();
|
|
goto mapblck_finish;
|
|
}
|
|
else if(first == '[')
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: start child seqflow (!)");
|
|
addrem_flags(RKCL, RKEY|QMRK);
|
|
m_evt_handler->begin_seq_key_flow();
|
|
addrem_flags(RVAL|RSEQ|FLOW, RMAP|RKCL|QMRK|BLCK);
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
_line_progressed(1);
|
|
goto mapblck_finish;
|
|
}
|
|
else if(first == '{')
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: start child mapblck (!)");
|
|
addrem_flags(RKCL, RKEY|QMRK);
|
|
m_evt_handler->begin_map_key_flow();
|
|
addrem_flags(RKEY|FLOW, RVAL|RKCL|QMRK|BLCK);
|
|
_set_indentation(m_evt_handler->m_parent->indref);
|
|
_line_progressed(1);
|
|
goto mapblck_finish;
|
|
}
|
|
else if(first == '?')
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: another QMRK '?'");
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
m_evt_handler->set_val_scalar_plain_empty();
|
|
m_evt_handler->add_sibling();
|
|
_line_progressed(1);
|
|
}
|
|
else if(first == '.')
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: maybe end doc?");
|
|
csubstr rs = rem.sub(1);
|
|
if(rs == ".." || rs.begins_with(".. "))
|
|
{
|
|
_c4dbgp("mapblck[QMRK]: end+start doc");
|
|
_end_doc_suddenly();
|
|
_line_progressed(3);
|
|
goto mapblck_finish;
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
|
|
mapblck_again:
|
|
_c4dbgt("mapblck: again", 0);
|
|
if(_finished_line())
|
|
{
|
|
_line_ended();
|
|
_scan_line();
|
|
if(_finished_file())
|
|
{
|
|
_c4dbgp("mapblck: file finished!");
|
|
_end_map_blck();
|
|
goto mapblck_finish;
|
|
}
|
|
_c4dbgnextline();
|
|
}
|
|
goto mapblck_start;
|
|
|
|
mapblck_finish:
|
|
_c4dbgp("mapblck: finish");
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_unk_json()
|
|
{
|
|
_c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
|
|
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
|
|
|
|
_maybe_skip_comment();
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
return;
|
|
|
|
size_t pos = rem.first_not_of(" \t");
|
|
if(pos)
|
|
{
|
|
pos = pos != npos ? pos : rem.len;
|
|
_c4dbgpf("skipping indentation of {}", pos);
|
|
_line_progressed(pos);
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
return;
|
|
_c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
|
|
}
|
|
|
|
if(rem.begins_with('['))
|
|
{
|
|
_c4dbgp("it's a seq");
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
m_evt_handler->begin_seq_val_flow();
|
|
addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
|
|
_set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
|
|
m_doc_empty = false;
|
|
_line_progressed(1);
|
|
}
|
|
else if(rem.begins_with('{'))
|
|
{
|
|
_c4dbgp("it's a map");
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
m_evt_handler->begin_map_val_flow();
|
|
addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
|
|
m_doc_empty = false;
|
|
_set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
|
|
_line_progressed(1);
|
|
}
|
|
else if(_handle_bom())
|
|
{
|
|
_c4dbgp("byte order mark");
|
|
}
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
|
|
_maybe_skip_whitespace_tokens();
|
|
csubstr s = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!s.len)
|
|
return;
|
|
const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
|
|
const char first = s.str[0];
|
|
ScannedScalar sc;
|
|
if(first == '"')
|
|
{
|
|
_c4dbgp("runk_json: scanning double-quoted scalar");
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
add_flags(RDOC);
|
|
m_doc_empty = false;
|
|
sc = _scan_scalar_dquot();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("runk_json: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else if(_scan_scalar_plain_unk(&sc))
|
|
{
|
|
_c4dbgp("runk_json: got a plain scalar");
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
add_flags(RDOC);
|
|
m_doc_empty = false;
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("runk_json: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
|
|
m_evt_handler->set_val_scalar_plain(maybe_filtered);
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::_handle_unk()
|
|
{
|
|
_c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
|
|
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
|
|
|
|
_maybe_skip_comment();
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
return;
|
|
|
|
size_t pos = rem.first_not_of(" \t");
|
|
if(pos)
|
|
{
|
|
pos = pos != npos ? pos : rem.len;
|
|
_c4dbgpf("skipping {} whitespace characters", pos);
|
|
_line_progressed(pos);
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
return;
|
|
_c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
|
|
}
|
|
|
|
if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin())
|
|
{
|
|
_c4dbgp("rtop: zero indent + at line begin");
|
|
if(_handle_bom())
|
|
{
|
|
_c4dbgp("byte order mark!");
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
return;
|
|
}
|
|
const char first = rem.str[0];
|
|
if(first == '-')
|
|
{
|
|
_c4dbgp("rtop: suspecting doc");
|
|
if(_is_doc_begin_token(rem))
|
|
{
|
|
_c4dbgp("rtop: begin doc");
|
|
_maybe_end_doc();
|
|
_begin2_doc_expl();
|
|
_set_indentation(0);
|
|
addrem_flags(RDOC|RUNK, NDOC);
|
|
_line_progressed(3u);
|
|
_maybe_skip_whitespace_tokens();
|
|
return;
|
|
}
|
|
}
|
|
else if(first == '.')
|
|
{
|
|
_c4dbgp("rtop: suspecting doc end");
|
|
if(_is_doc_end_token(rem))
|
|
{
|
|
_c4dbgp("rtop: end doc");
|
|
if(has_any(RDOC))
|
|
{
|
|
_end2_doc_expl();
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("rtop: ignore end doc");
|
|
}
|
|
addrem_flags(NDOC|RUNK, RDOC);
|
|
_line_progressed(3u);
|
|
_maybe_skip_whitespace_tokens();
|
|
return;
|
|
}
|
|
}
|
|
else if(first == '%')
|
|
{
|
|
_c4dbgpf("directive: {}", rem);
|
|
if(C4_UNLIKELY(!m_doc_empty && has_none(NDOC)))
|
|
_RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "need document footer before directives");
|
|
_handle_directive(rem);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* no else-if! */
|
|
char first = rem.str[0];
|
|
|
|
if(first == '[')
|
|
{
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
m_doc_empty = false;
|
|
const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
|
|
if(C4_LIKELY( ! _annotations_require_key_container()))
|
|
{
|
|
_c4dbgp("it's a seq, flow");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_seq_val_flow();
|
|
addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
|
|
_set_indentation(startindent);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("start new block map, set flow seq as key (!)");
|
|
_handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
|
|
m_evt_handler->begin_map_val_block();
|
|
addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
|
|
m_evt_handler->begin_seq_key_flow();
|
|
addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
|
|
_set_indentation(startindent);
|
|
}
|
|
_line_progressed(1);
|
|
}
|
|
else if(first == '{')
|
|
{
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
m_doc_empty = false;
|
|
const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
|
|
if(C4_LIKELY( ! _annotations_require_key_container()))
|
|
{
|
|
_c4dbgp("it's a map, flow");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_map_val_flow();
|
|
addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
|
|
_set_indentation(startindent);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("start new block map, set flow map as key (!)");
|
|
_handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
|
|
m_evt_handler->begin_map_val_block();
|
|
addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
|
|
m_evt_handler->begin_map_key_flow();
|
|
addrem_flags(RMAP|FLOW|RKEY, BLCK|RKCL);
|
|
_set_indentation(startindent);
|
|
}
|
|
_line_progressed(1);
|
|
}
|
|
else if(first == '-' && _is_blck_token(rem))
|
|
{
|
|
_c4dbgp("it's a seq, block");
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_seq_val_block();
|
|
addrem_flags(RSEQ|BLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
|
|
m_doc_empty = false;
|
|
_set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '?' && _is_blck_token(rem))
|
|
{
|
|
_c4dbgp("it's a map + this key is complex");
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_map_val_block();
|
|
addrem_flags(RMAP|BLCK|QMRK, RKEY|RVAL|RTOP|RUNK);
|
|
m_doc_empty = false;
|
|
m_was_inside_qmrk = true;
|
|
_save_indentation();
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == ':' && _is_blck_token(rem))
|
|
{
|
|
if(m_doc_empty)
|
|
{
|
|
_c4dbgp("it's a map with an empty key");
|
|
const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
|
|
const size_t startline = m_evt_handler->m_curr->pos.line; // save
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
_handle_colon();
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
m_doc_empty = false;
|
|
_set_indentation(startindent);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("actually prev val is a key!");
|
|
size_t prev_indentation = m_evt_handler->m_curr->indref;
|
|
m_evt_handler->actually_val_is_first_key_of_new_map_block();
|
|
_set_indentation(prev_indentation);
|
|
}
|
|
addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '&')
|
|
{
|
|
csubstr anchor = _scan_anchor();
|
|
_c4dbgpf("anchor! [{}]~~~{}~~~", anchor.len, anchor);
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
|
|
const size_t line = m_evt_handler->m_curr->pos.line;
|
|
_add_annotation(&m_pending_anchors, anchor, indentation, line);
|
|
_set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
|
|
m_doc_empty = false;
|
|
}
|
|
else if(first == '*')
|
|
{
|
|
csubstr ref = _scan_ref_map();
|
|
_c4dbgpf("ref! [{}]~~~{}~~~", ref.len, ref);
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
m_doc_empty = false;
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("runk: set val ref");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_ref(ref);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("runk: start new block map, set ref as key");
|
|
const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
|
|
const size_t startline = m_evt_handler->m_curr->pos.line; // save
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
m_evt_handler->set_key_ref(ref);
|
|
_maybe_skip_whitespace_tokens();
|
|
_set_indentation(startindent);
|
|
addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
|
|
}
|
|
}
|
|
else if(first == '!')
|
|
{
|
|
csubstr tag = _scan_tag();
|
|
_c4dbgpf("unk: val tag! [{}]~~~{}~~~", tag.len, tag);
|
|
// we need to buffer the tags, as there may be two
|
|
// consecutive tags in here
|
|
const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
|
|
const size_t line = m_evt_handler->m_curr->pos.line;
|
|
_add_annotation(&m_pending_tags, tag, indentation, line);
|
|
}
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
|
|
_maybe_skip_whitespace_tokens();
|
|
csubstr s = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!s.len)
|
|
return;
|
|
const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
|
|
const size_t startline = m_evt_handler->m_curr->pos.line; // save
|
|
first = s.str[0];
|
|
ScannedScalar sc;
|
|
if(first == '\'')
|
|
{
|
|
_c4dbgp("runk: scanning single-quoted scalar");
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
add_flags(RDOC);
|
|
m_doc_empty = false;
|
|
sc = _scan_scalar_squot();
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("runk: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
|
|
m_evt_handler->set_val_scalar_squoted(maybe_filtered);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("runk: start new block map, set scalar as key");
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
_handle_colon();
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
|
|
m_evt_handler->set_key_scalar_squoted(maybe_filtered);
|
|
_maybe_skip_whitespace_tokens();
|
|
_set_indentation(startindent);
|
|
addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
|
|
}
|
|
}
|
|
else if(first == '"')
|
|
{
|
|
_c4dbgp("runk: scanning double-quoted scalar");
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
add_flags(RDOC);
|
|
m_doc_empty = false;
|
|
sc = _scan_scalar_dquot();
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("runk: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
|
|
m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("runk: start new block map, set double-quoted scalar as key");
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_colon();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
|
|
m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
|
|
_maybe_skip_whitespace_tokens();
|
|
_set_indentation(startindent);
|
|
addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
|
|
}
|
|
}
|
|
else if(first == '|')
|
|
{
|
|
_c4dbgp("runk: scanning block-literal scalar");
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
add_flags(RDOC);
|
|
m_doc_empty = false;
|
|
ScannedBlock sb;
|
|
_scan_block(&sb, startindent);
|
|
if(C4_LIKELY(!_maybe_scan_following_colon()))
|
|
{
|
|
_c4dbgp("runk: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
|
|
m_evt_handler->set_val_scalar_literal(maybe_filtered);
|
|
}
|
|
else
|
|
{
|
|
_c4err("block literal keys must be enclosed in '?'");
|
|
}
|
|
}
|
|
else if(first == '>')
|
|
{
|
|
_c4dbgp("runk: scanning block-folded scalar");
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
add_flags(RDOC);
|
|
m_doc_empty = false;
|
|
ScannedBlock sb;
|
|
_scan_block(&sb, startindent);
|
|
if(C4_LIKELY(!_maybe_scan_following_colon()))
|
|
{
|
|
_c4dbgp("runk: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
|
|
m_evt_handler->set_val_scalar_folded(maybe_filtered);
|
|
}
|
|
else
|
|
{
|
|
_c4err("block folded keys must be enclosed in '?'");
|
|
}
|
|
}
|
|
else if(_scan_scalar_plain_unk(&sc))
|
|
{
|
|
_c4dbgp("runk: got a plain scalar");
|
|
m_evt_handler->check_trailing_doc_token();
|
|
_maybe_begin_doc();
|
|
add_flags(RDOC);
|
|
m_doc_empty = false;
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("runk: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
|
|
m_evt_handler->set_val_scalar_plain(maybe_filtered);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("runk: start new block map, set scalar as key");
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
_handle_colon();
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
|
|
m_evt_handler->set_key_scalar_plain(maybe_filtered);
|
|
_maybe_skip_whitespace_tokens();
|
|
_set_indentation(startindent);
|
|
addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
C4_COLD void ParseEngine<EventHandler>::_handle_usty()
|
|
{
|
|
_c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
|
|
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK|FLOW));
|
|
|
|
#ifdef RYML_NO_COVERAGE__TO_BE_DELETED
|
|
if(has_any(RNXT))
|
|
{
|
|
_c4dbgp("usty[RNXT]: finishing!");
|
|
_end_stream();
|
|
}
|
|
#endif
|
|
|
|
_maybe_skip_comment();
|
|
csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
return;
|
|
|
|
size_t pos = rem.first_not_of(" \t");
|
|
if(pos)
|
|
{
|
|
pos = pos != npos ? pos : rem.len;
|
|
_c4dbgpf("skipping indentation of {}", pos);
|
|
_line_progressed(pos);
|
|
rem = m_evt_handler->m_curr->line_contents.rem;
|
|
if(!rem.len)
|
|
return;
|
|
_c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
|
|
}
|
|
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
|
|
size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
|
|
char first = rem.str[0];
|
|
if(has_any(RSEQ)) // destination is a sequence
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP));
|
|
_c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
|
|
if(first == '[')
|
|
{
|
|
_c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
|
|
add_flags(RNXT);
|
|
m_evt_handler->_push();
|
|
addrem_flags(FLOW|RVAL, RNXT|USTY);
|
|
_set_indentation(startindent);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '-' && _is_blck_token(rem))
|
|
{
|
|
_c4dbgp("usty[RSEQ]: it's a block seq. merging it");
|
|
add_flags(RNXT);
|
|
m_evt_handler->_push();
|
|
addrem_flags(BLCK|RVAL, RNXT|USTY);
|
|
_set_indentation(startindent);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else
|
|
{
|
|
_c4err("can only parse a seq into an existing seq");
|
|
}
|
|
}
|
|
else if(has_any(RMAP)) // destination is a map
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
|
|
_c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
|
|
if(first == '{')
|
|
{
|
|
_c4dbgp("usty[RMAP]: it's a flow map. merging it");
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->_push();
|
|
addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
|
|
_set_indentation(startindent);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '?' && _is_blck_token(rem))
|
|
{
|
|
_c4dbgp("usty[RMAP]: it's a block map + this key is complex");
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->_push();
|
|
addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
|
|
m_was_inside_qmrk = true;
|
|
_save_indentation();
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == ':' && _is_blck_token(rem))
|
|
{
|
|
_c4dbgp("usty[RMAP]: it's a map with an empty key");
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->_push();
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
|
|
_save_indentation();
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(rem.begins_with('&'))
|
|
{
|
|
csubstr anchor = _scan_anchor();
|
|
_c4dbgpf("usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
|
|
const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
|
|
const size_t line = m_evt_handler->m_curr->pos.line;
|
|
_add_annotation(&m_pending_anchors, anchor, indentation, line);
|
|
_set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
|
|
}
|
|
else if(first == '*')
|
|
{
|
|
csubstr ref = _scan_ref_map();
|
|
_c4dbgpf("usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4err("cannot read a VAL to a map");
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("usty[RMAP]: start new block map, set ref as key");
|
|
const size_t startline = m_evt_handler->m_curr->pos.line; // save
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
m_evt_handler->_push();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
m_evt_handler->set_key_ref(ref);
|
|
_maybe_skip_whitespace_tokens();
|
|
_set_indentation(startindent);
|
|
addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
|
|
}
|
|
}
|
|
else if(first == '!')
|
|
{
|
|
csubstr tag = _scan_tag();
|
|
_c4dbgpf("usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
|
|
// we need to buffer the tags, as there may be two
|
|
// consecutive tags in here
|
|
const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
|
|
const size_t line = m_evt_handler->m_curr->pos.line;
|
|
_add_annotation(&m_pending_tags, tag, indentation, line);
|
|
}
|
|
else if(first == '[' || (first == '-' && _is_blck_token(rem)))
|
|
{
|
|
_c4err("cannot parse a seq into an existing map");
|
|
}
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
|
|
startindent = m_evt_handler->m_curr->line_contents.indentation; // save
|
|
const size_t startline = m_evt_handler->m_curr->pos.line; // save
|
|
ScannedScalar sc;
|
|
_c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
|
|
if(first == '\'')
|
|
{
|
|
_c4dbgp("usty[RMAP]: scanning single-quoted scalar");
|
|
sc = _scan_scalar_squot();
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4err("cannot read a VAL to a map");
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("usty[RMAP]: start new block map, set scalar as key");
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
m_evt_handler->_push();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
|
|
m_evt_handler->set_key_scalar_squoted(maybe_filtered);
|
|
_set_indentation(startindent);
|
|
addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
}
|
|
else if(first == '"')
|
|
{
|
|
_c4dbgp("usty[RMAP]: scanning double-quoted scalar");
|
|
sc = _scan_scalar_dquot();
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4err("cannot read a VAL to a map");
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
m_evt_handler->_push();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
|
|
m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
|
|
_set_indentation(startindent);
|
|
addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
}
|
|
else if(first == '|')
|
|
{
|
|
_c4err("block literal keys must be enclosed in '?'");
|
|
}
|
|
else if(first == '>')
|
|
{
|
|
_c4err("block literal keys must be enclosed in '?'");
|
|
}
|
|
else if(_scan_scalar_plain_unk(&sc))
|
|
{
|
|
_c4dbgp("usty[RMAP]: got a plain scalar");
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4err("cannot read a VAL to a map");
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("usty[RMAP]: start new block map, set scalar as key");
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
m_evt_handler->_push();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
|
|
m_evt_handler->set_key_scalar_plain(maybe_filtered);
|
|
_set_indentation(startindent);
|
|
addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
}
|
|
else // destination is unknown
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ));
|
|
_c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
|
|
if(first == '[')
|
|
{
|
|
_c4dbgp("usty[UNK]: it's a flow seq");
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_seq_val_flow();
|
|
addrem_flags(RSEQ|FLOW|RVAL, RNXT|USTY);
|
|
_set_indentation(startindent);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '-' && _is_blck_token(rem))
|
|
{
|
|
_c4dbgp("usty[UNK]: it's a block seq");
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_seq_val_block();
|
|
addrem_flags(RSEQ|BLCK|RVAL, RNXT|USTY);
|
|
_set_indentation(startindent);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '{')
|
|
{
|
|
_c4dbgp("usty[UNK]: it's a flow map");
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_map_val_flow();
|
|
addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
|
|
_set_indentation(startindent);
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '?' && _is_blck_token(rem))
|
|
{
|
|
_c4dbgp("usty[UNK]: it's a map + this key is complex");
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_map_val_block();
|
|
addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
|
|
m_was_inside_qmrk = true;
|
|
_save_indentation();
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == ':' && _is_blck_token(rem))
|
|
{
|
|
_c4dbgp("usty[UNK]: it's a map with an empty key");
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->begin_map_val_block();
|
|
m_evt_handler->set_key_scalar_plain_empty();
|
|
addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
|
|
_save_indentation();
|
|
_line_progressed(1);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
else if(first == '&')
|
|
{
|
|
csubstr anchor = _scan_anchor();
|
|
_c4dbgpf("usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
|
|
const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
|
|
const size_t line = m_evt_handler->m_curr->pos.line;
|
|
_add_annotation(&m_pending_anchors, anchor, indentation, line);
|
|
_set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
|
|
}
|
|
else if(first == '*')
|
|
{
|
|
csubstr ref = _scan_ref_map();
|
|
_c4dbgpf("usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("usty[UNK]: set val ref");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
m_evt_handler->set_val_ref(ref);
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("usty[UNK]: start new block map, set ref as key");
|
|
const size_t startline = m_evt_handler->m_curr->pos.line; // save
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
m_evt_handler->set_key_ref(ref);
|
|
_maybe_skip_whitespace_tokens();
|
|
_set_indentation(startindent);
|
|
addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
|
|
}
|
|
}
|
|
else if(first == '!')
|
|
{
|
|
csubstr tag = _scan_tag();
|
|
_c4dbgpf("usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
|
|
// we need to buffer the tags, as there may be two
|
|
// consecutive tags in here
|
|
const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
|
|
const size_t line = m_evt_handler->m_curr->pos.line;
|
|
_add_annotation(&m_pending_tags, tag, indentation, line);
|
|
}
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
|
|
startindent = m_evt_handler->m_curr->line_contents.indentation; // save
|
|
const size_t startline = m_evt_handler->m_curr->pos.line; // save
|
|
first = rem.str[0];
|
|
ScannedScalar sc;
|
|
_c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
|
|
if(first == '\'')
|
|
{
|
|
_c4dbgp("usty[UNK]: scanning single-quoted scalar");
|
|
sc = _scan_scalar_squot();
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("usty[UNK]: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
|
|
m_evt_handler->set_val_scalar_squoted(maybe_filtered);
|
|
_end_stream();
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("usty[UNK]: start new block map, set scalar as key");
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
|
|
m_evt_handler->set_key_scalar_squoted(maybe_filtered);
|
|
_set_indentation(startindent);
|
|
addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
}
|
|
else if(first == '"')
|
|
{
|
|
_c4dbgp("usty[UNK]: scanning double-quoted scalar");
|
|
sc = _scan_scalar_dquot();
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("usty[UNK]: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
|
|
m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
|
|
_end_stream();
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
|
|
m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
|
|
_set_indentation(startindent);
|
|
addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
}
|
|
else if(first == '|')
|
|
{
|
|
_c4dbgp("usty[UNK]: scanning block-literal scalar");
|
|
ScannedBlock sb;
|
|
_scan_block(&sb, startindent);
|
|
_c4dbgp("usty[UNK]: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
|
|
m_evt_handler->set_val_scalar_literal(maybe_filtered);
|
|
_end_stream();
|
|
}
|
|
else if(first == '>')
|
|
{
|
|
_c4dbgp("usty[UNK]: scanning block-folded scalar");
|
|
ScannedBlock sb;
|
|
_scan_block(&sb, startindent);
|
|
_c4dbgp("usty[UNK]: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
|
|
m_evt_handler->set_val_scalar_folded(maybe_filtered);
|
|
_end_stream();
|
|
}
|
|
else if(_scan_scalar_plain_unk(&sc))
|
|
{
|
|
_c4dbgp("usty[UNK]: got a plain scalar");
|
|
if(!_maybe_scan_following_colon())
|
|
{
|
|
_c4dbgp("usty[UNK]: set as val");
|
|
_handle_annotations_before_blck_val_scalar();
|
|
csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
|
|
m_evt_handler->set_val_scalar_plain(maybe_filtered);
|
|
_end_stream();
|
|
}
|
|
else
|
|
{
|
|
_c4dbgp("usty[UNK]: start new block map, set scalar as key");
|
|
add_flags(RNXT);
|
|
_handle_annotations_before_start_mapblck(startline);
|
|
m_evt_handler->begin_map_val_block();
|
|
_handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
|
|
csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
|
|
m_evt_handler->set_key_scalar_plain(maybe_filtered);
|
|
_set_indentation(startindent);
|
|
addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
|
|
_maybe_skip_whitespace_tokens();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_c4err("parse error");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::parse_json_in_place_ev(csubstr filename, substr src)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
|
|
m_file = filename;
|
|
m_buf = src;
|
|
_reset();
|
|
m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
|
|
m_evt_handler->begin_stream();
|
|
while( ! _finished_file())
|
|
{
|
|
_scan_line();
|
|
while( ! _finished_line())
|
|
{
|
|
_c4dbgnextline();
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
|
|
if(has_any(RSEQ))
|
|
{
|
|
_handle_seq_json();
|
|
}
|
|
else if(has_any(RMAP))
|
|
{
|
|
_handle_map_json();
|
|
}
|
|
else if(has_any(RUNK))
|
|
{
|
|
_handle_unk_json();
|
|
}
|
|
else
|
|
{
|
|
_c4err("internal error");
|
|
}
|
|
}
|
|
if(_finished_file())
|
|
break; // it may have finished because of multiline blocks
|
|
_line_ended();
|
|
}
|
|
_end_stream();
|
|
m_evt_handler->finish_parse();
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
template<class EventHandler>
|
|
void ParseEngine<EventHandler>::parse_in_place_ev(csubstr filename, substr src)
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
|
|
m_file = filename;
|
|
m_buf = src;
|
|
_reset();
|
|
m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
|
|
m_evt_handler->begin_stream();
|
|
while( ! _finished_file())
|
|
{
|
|
_scan_line();
|
|
while( ! _finished_line())
|
|
{
|
|
_c4dbgnextline();
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
|
|
if(has_any(FLOW))
|
|
{
|
|
if(has_none(RSEQIMAP))
|
|
{
|
|
if(has_any(RSEQ))
|
|
{
|
|
_handle_seq_flow();
|
|
}
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
|
|
_handle_map_flow();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_handle_seq_imap();
|
|
}
|
|
}
|
|
else if(has_any(BLCK))
|
|
{
|
|
if(has_any(RSEQ))
|
|
{
|
|
_handle_seq_block();
|
|
}
|
|
else
|
|
{
|
|
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
|
|
_handle_map_block();
|
|
}
|
|
}
|
|
else if(has_any(RUNK))
|
|
{
|
|
_handle_unk();
|
|
}
|
|
else if(has_any(USTY))
|
|
{
|
|
_handle_usty();
|
|
}
|
|
else
|
|
{
|
|
_c4err("internal error");
|
|
}
|
|
}
|
|
if(_finished_file())
|
|
break; // it may have finished because of multiline blocks
|
|
_line_ended();
|
|
}
|
|
_end_stream();
|
|
m_evt_handler->finish_parse();
|
|
}
|
|
/** @endcond */
|
|
|
|
} // namespace yml
|
|
} // namespace c4
|
|
|
|
// NOLINTEND(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered)
|
|
|
|
#undef _c4dbgnextline
|
|
|
|
#if defined(_MSC_VER)
|
|
# pragma warning(pop)
|
|
#elif defined(__clang__)
|
|
# pragma clang diagnostic pop
|
|
#elif defined(__GNUC__)
|
|
# pragma GCC diagnostic pop
|
|
#endif
|
|
|
|
#endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
|