From 91e946d55bfea650390d488af8559f298b8d2864 Mon Sep 17 00:00:00 2001 From: mhoffm Date: Wed, 23 Jun 2021 00:34:26 +0200 Subject: [PATCH] wip heredoc --- src/scanner.cc | 51 ++++++++++++++++++++--------------------- test/corpus/strings.txt | 38 ------------------------------ 2 files changed, 25 insertions(+), 64 deletions(-) diff --git a/src/scanner.cc b/src/scanner.cc index 52ba60a..478c954 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -30,8 +30,7 @@ struct Context { ContextType type; // valid if type == HEREDOC_TEMPLATE - char* identifier; - size_t identifier_size; + string heredoc_identifier; }; struct Scanner { @@ -141,31 +140,27 @@ public: } } - // handle heredoc identifier - if (valid_symbols[HEREDOC_IDENTIFIER]) { - if (lexer->lookahead != 'E') { - if (valid_symbols[TEMPLATE_LITERAL_CHUNK]) { - return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK); + // handle heredoc context + if (valid_symbols[HEREDOC_IDENTIFIER] && !in_heredoc_context()) { + string identifier; + while (iswalnum(lexer->lookahead) || lexer->lookahead == '_' || lexer->lookahead == '-') { + identifier.push_back(lexer->lookahead); + advance(lexer); + } + context_stack.push_back({ .type = HEREDOC_TEMPLATE, .heredoc_identifier = identifier }); + return accept_and_advance(lexer, HEREDOC_IDENTIFIER); + } + if (valid_symbols[HEREDOC_IDENTIFIER] && in_heredoc_context()) { + string expected_identifier = context_stack.back().heredoc_identifier; + + for (string::iterator it = expected_identifier.begin(); it != expected_identifier.end(); ++it) { + if (lexer->lookahead == *it) { + advance(lexer); } else { - return false; - } - } - advance(lexer); - if (lexer->lookahead != 'O') { - if (valid_symbols[TEMPLATE_LITERAL_CHUNK]) { - return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK); - } else { - return false; - } - } - advance(lexer); - if (lexer->lookahead != 'F') { - if (valid_symbols[TEMPLATE_LITERAL_CHUNK]) { - return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK); - } else { - return false; + return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK); } } + context_stack.pop_back(); return accept_and_advance(lexer, HEREDOC_IDENTIFIER); } @@ -180,7 +175,6 @@ public: private: vector context_stack; - vector heredoc_identifier_stack; void advance(TSLexer* lexer) { lexer->advance(lexer, false); @@ -197,6 +191,7 @@ private: advance(lexer); return accept_inplace(lexer, token); } + bool consume_wxdigit(TSLexer* lexer) { advance(lexer); return iswxdigit(lexer->lookahead); @@ -213,8 +208,12 @@ private: return in_context_type(QUOTED_TEMPLATE); } + bool in_heredoc_context() { + return in_context_type(HEREDOC_TEMPLATE); + } + bool in_template_context() { - return in_context_type(QUOTED_TEMPLATE) || in_context_type(HEREDOC_TEMPLATE); + return in_quoted_context() || in_heredoc_context(); } bool in_interpolation_context() { diff --git a/test/corpus/strings.txt b/test/corpus/strings.txt index 40bbf3f..4cd71a5 100644 --- a/test/corpus/strings.txt +++ b/test/corpus/strings.txt @@ -1,22 +1,3 @@ -================================================================================ -bad escape sequence 2 -================================================================================ - -foo = "bar\uZZ" - --------------------------------------------------------------------------------- - -(config_file - (body - (attribute - (identifier) - (expression - (literal_value - (string_lit - (template_literal - (ERROR - (UNEXPECTED '\'))))))))) - ================================================================================ unescaped tab ================================================================================ @@ -34,25 +15,6 @@ foo = "foo bar" (string_lit (template_literal))))))) -================================================================================ -unescaped backslash -================================================================================ - -foo = "foo\bar" - --------------------------------------------------------------------------------- - -(config_file - (body - (attribute - (identifier) - (expression - (literal_value - (string_lit - (template_literal - (ERROR - (UNEXPECTED '\'))))))))) - ================================================================================ escaped backslash at end ================================================================================