From 5160a52f2daba2a3b290c88b843b7ce1c64f134f Mon Sep 17 00:00:00 2001
From: Amaan Qureshi <amaanq12@gmail.com>
Date: Sun, 18 Jun 2023 14:09:06 -0400
Subject: [PATCH] feat: rewrite the scanner in C

---
 dialects/terraform/src/scanner.c  | 436 ++++++++++++++++++++++++++++++
 dialects/terraform/src/scanner.cc | 336 -----------------------
 src/scanner.c                     | 436 ++++++++++++++++++++++++++++++
 src/scanner.cc                    | 336 -----------------------
 4 files changed, 872 insertions(+), 672 deletions(-)
 create mode 100644 dialects/terraform/src/scanner.c
 delete mode 100644 dialects/terraform/src/scanner.cc
 create mode 100644 src/scanner.c
 delete mode 100644 src/scanner.cc

diff --git a/dialects/terraform/src/scanner.c b/dialects/terraform/src/scanner.c
new file mode 100644
index 0000000..c9f0938
--- /dev/null
+++ b/dialects/terraform/src/scanner.c
@@ -0,0 +1,436 @@
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <tree_sitter/parser.h>
+#include <wctype.h>
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#define VEC_RESIZE(vec, _cap)                                                  \
+    void *tmp = realloc((vec).data, (_cap) * sizeof((vec).data[0]));           \
+    assert(tmp != NULL);                                                       \
+    (vec).data = tmp;                                                          \
+    (vec).cap = (_cap);
+
+#define VEC_PUSH(vec, el)                                                      \
+    if ((vec).cap == (vec).len) {                                              \
+        VEC_RESIZE((vec), MAX(16, (vec).len * 2));                             \
+    }                                                                          \
+    (vec).data[(vec).len++] = (el);
+
+#define VEC_POP(vec)                                                           \
+    {                                                                          \
+        STRING_FREE(VEC_BACK((vec)).heredoc_identifier);                       \
+        (vec).len--;                                                           \
+    }
+
+#define VEC_BACK(vec) ((vec).data[(vec).len - 1])
+
+#define VEC_FREE(vec)                                                          \
+    {                                                                          \
+        if ((vec).data != NULL)                                                \
+            free((vec).data);                                                  \
+    }
+
+#define VEC_CLEAR(vec)                                                         \
+    {                                                                          \
+        for (int i = 0; i < (vec).len; i++) {                                  \
+            STRING_FREE((vec).data[i].heredoc_identifier);                     \
+        }                                                                      \
+        (vec).len = 0;                                                         \
+    }
+
+#define STRING_RESIZE(vec, _cap)                                               \
+    void *tmp = realloc((vec).data, (_cap + 1) * sizeof((vec).data[0]));       \
+    assert(tmp != NULL);                                                       \
+    (vec).data = tmp;                                                          \
+    memset((vec).data + (vec).len, 0,                                          \
+           ((_cap + 1) - (vec).len) * sizeof((vec).data[0]));                  \
+    (vec).cap = (_cap);
+
+#define STRING_GROW(vec, _cap)                                                 \
+    if ((vec).cap < (_cap)) {                                                  \
+        STRING_RESIZE((vec), (_cap));                                          \
+    }
+
+#define STRING_PUSH(vec, el)                                                   \
+    if ((vec).cap == (vec).len) {                                              \
+        STRING_RESIZE((vec), MAX(16, (vec).len * 2));                          \
+    }                                                                          \
+    (vec).data[(vec).len++] = (el);
+
+#define STRING_FREE(vec)                                                       \
+    {                                                                          \
+        if ((vec).data != NULL)                                                \
+            free((vec).data);                                                  \
+    }
+
+enum TokenType {
+    QUOTED_TEMPLATE_START,
+    QUOTED_TEMPLATE_END,
+    TEMPLATE_LITERAL_CHUNK,
+    TEMPLATE_INTERPOLATION_START,
+    TEMPLATE_INTERPOLATION_END,
+    TEMPLATE_DIRECTIVE_START,
+    TEMPLATE_DIRECTIVE_END,
+    HEREDOC_IDENTIFIER,
+};
+
+enum ContextType {
+    TEMPLATE_INTERPOLATION,
+    TEMPLATE_DIRECTIVE,
+    QUOTED_TEMPLATE,
+    HEREDOC_TEMPLATE,
+};
+
+typedef struct {
+    uint32_t cap;
+    uint32_t len;
+    char *data;
+} String;
+
+String string_new() {
+    return (String){
+        .cap = 16,
+        .len = 0,
+        .data = calloc(1, sizeof(char) * 17),
+    };
+}
+
+typedef struct {
+    enum ContextType type;
+
+    // valid if type == HEREDOC_TEMPLATE
+    String heredoc_identifier;
+} Context;
+
+Context context_new(enum ContextType type, const char *data) {
+    Context ctx = {
+        .type = type,
+        .heredoc_identifier = string_new(),
+    };
+    ctx.heredoc_identifier.len = strlen(data);
+    ctx.heredoc_identifier.cap = strlen(data);
+    memcpy(ctx.heredoc_identifier.data, data, ctx.heredoc_identifier.len);
+    return ctx;
+}
+
+typedef struct {
+    uint32_t len;
+    uint32_t cap;
+    Context *data;
+} context_vec;
+
+typedef struct {
+    context_vec context_stack;
+} Scanner;
+
+static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
+
+static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
+
+static unsigned serialize(Scanner *scanner, char *buf) {
+    unsigned size = 0;
+
+    if (scanner->context_stack.len > CHAR_MAX) {
+        return 0;
+    }
+
+    buf[size++] = (char)scanner->context_stack.len;
+    for (int i = 0; i < scanner->context_stack.len; i++) {
+        Context *context = &scanner->context_stack.data[i];
+        if (size + 2 + context->heredoc_identifier.len >=
+            TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
+            return 0;
+        }
+        if (context->heredoc_identifier.len > CHAR_MAX) {
+            return 0;
+        }
+        buf[size++] = context->type;
+        buf[size++] = (char)context->heredoc_identifier.len;
+        memcpy(&buf[size], context->heredoc_identifier.data,
+               context->heredoc_identifier.len);
+        size += context->heredoc_identifier.len;
+    }
+    return size;
+}
+
+static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
+    if (length == 0) {
+        return;
+    }
+
+    VEC_CLEAR(scanner->context_stack);
+    unsigned size = 0;
+    uint8_t context_stack_size = buffer[size++];
+    for (uint32_t j = 0; j < context_stack_size; j++) {
+        Context ctx = {
+            .type = (enum ContextType)buffer[size++],
+            .heredoc_identifier = string_new(),
+        };
+        uint8_t heredoc_identifier_size = buffer[size++];
+        STRING_GROW(ctx.heredoc_identifier, heredoc_identifier_size);
+        memcpy(ctx.heredoc_identifier.data, buffer + size,
+               heredoc_identifier_size);
+        ctx.heredoc_identifier.len = heredoc_identifier_size;
+        size += heredoc_identifier_size;
+        VEC_PUSH(scanner->context_stack, ctx);
+    }
+    assert(size == length);
+}
+
+static inline bool accept_inplace(TSLexer *lexer, enum TokenType token) {
+    lexer->result_symbol = token;
+    return true;
+}
+
+static inline bool accept_and_advance(TSLexer *lexer, enum TokenType token) {
+    advance(lexer);
+    return accept_inplace(lexer, token);
+}
+
+static inline bool consume_wxdigit(TSLexer *lexer) {
+    advance(lexer);
+    return iswxdigit(lexer->lookahead);
+}
+
+static inline bool skip_comment(TSLexer *lexer) {
+    while (iswspace(lexer->lookahead)) {
+        skip(lexer);
+    }
+    if (lexer->lookahead != '#') {
+        return false;
+    }
+    skip(lexer);
+    while (lexer->lookahead != '\n') {
+        skip(lexer);
+        if (lexer->eof(lexer)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static inline bool in_context_type(Scanner *scanner, enum ContextType type) {
+    if (scanner->context_stack.len == 0) {
+        return false;
+    }
+    return VEC_BACK(scanner->context_stack).type == type;
+}
+
+static inline bool in_quoted_context(Scanner *scanner) {
+    return in_context_type(scanner, QUOTED_TEMPLATE);
+}
+
+static inline bool in_heredoc_context(Scanner *scanner) {
+    return in_context_type(scanner, HEREDOC_TEMPLATE);
+}
+
+static inline bool in_template_context(Scanner *scanner) {
+    return in_quoted_context(scanner) || in_heredoc_context(scanner);
+}
+
+static inline bool in_interpolation_context(Scanner *scanner) {
+    return in_context_type(scanner, TEMPLATE_INTERPOLATION);
+}
+
+static inline bool in_directive_context(Scanner *scanner) {
+    return in_context_type(scanner, TEMPLATE_DIRECTIVE);
+}
+
+static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
+    bool has_leading_whitespace_with_newline = false;
+    while (iswspace(lexer->lookahead)) {
+        if (lexer->lookahead == '\n') {
+            has_leading_whitespace_with_newline = true;
+        }
+        skip(lexer);
+    }
+    if (lexer->lookahead == '\0') {
+        return false;
+    }
+    // manage quoted context
+    if (valid_symbols[QUOTED_TEMPLATE_START] && !in_quoted_context(scanner) &&
+        lexer->lookahead == '"') {
+        Context ctx = context_new(QUOTED_TEMPLATE, "");
+        VEC_PUSH(scanner->context_stack, ctx);
+        return accept_and_advance(lexer, QUOTED_TEMPLATE_START);
+    }
+    if (valid_symbols[QUOTED_TEMPLATE_END] && in_quoted_context(scanner) &&
+        lexer->lookahead == '"') {
+        VEC_POP(scanner->context_stack);
+        return accept_and_advance(lexer, QUOTED_TEMPLATE_END);
+    }
+
+    // manage template interpolations
+    if (valid_symbols[TEMPLATE_INTERPOLATION_START] &&
+        valid_symbols[TEMPLATE_LITERAL_CHUNK] &&
+        !in_interpolation_context(scanner) && lexer->lookahead == '$') {
+        advance(lexer);
+        if (lexer->lookahead == '{') {
+            Context ctx = context_new(TEMPLATE_INTERPOLATION, "");
+            VEC_PUSH(scanner->context_stack, ctx);
+            return accept_and_advance(lexer, TEMPLATE_INTERPOLATION_START);
+        }
+        // try to scan escape sequence
+        if (lexer->lookahead == '$') {
+            advance(lexer);
+            if (lexer->lookahead == '{') {
+                // $${
+                return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
+            }
+        }
+        return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
+    }
+    if (valid_symbols[TEMPLATE_INTERPOLATION_END] &&
+        in_interpolation_context(scanner) && lexer->lookahead == '}') {
+        VEC_POP(scanner->context_stack);
+        return accept_and_advance(lexer, TEMPLATE_INTERPOLATION_END);
+    }
+
+    // manage template directives
+    if (valid_symbols[TEMPLATE_DIRECTIVE_START] &&
+        valid_symbols[TEMPLATE_LITERAL_CHUNK] &&
+        !in_directive_context(scanner) && lexer->lookahead == '%') {
+        advance(lexer);
+        if (lexer->lookahead == '{') {
+            Context ctx = context_new(TEMPLATE_DIRECTIVE, "");
+            VEC_PUSH(scanner->context_stack, ctx);
+            return accept_and_advance(lexer, TEMPLATE_DIRECTIVE_START);
+        }
+        // try to scan escape sequence
+        if (lexer->lookahead == '%') {
+            advance(lexer);
+            if (lexer->lookahead == '{') {
+                // $${
+                return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
+            }
+        }
+        return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
+    }
+    if (valid_symbols[TEMPLATE_DIRECTIVE_END] &&
+        in_directive_context(scanner) && lexer->lookahead == '}') {
+        VEC_POP(scanner->context_stack);
+        return accept_and_advance(lexer, TEMPLATE_DIRECTIVE_END);
+    }
+
+    // manage heredoc context
+    if (valid_symbols[HEREDOC_IDENTIFIER] && !in_heredoc_context(scanner)) {
+        String identifier = string_new();
+        // TODO: check that this is a valid identifier
+        while (iswalnum(lexer->lookahead) || lexer->lookahead == '_' ||
+               lexer->lookahead == '-') {
+            STRING_PUSH(identifier, lexer->lookahead);
+            advance(lexer);
+        }
+        Context ctx = {HEREDOC_TEMPLATE, identifier};
+        VEC_PUSH(scanner->context_stack, ctx);
+        return accept_inplace(lexer, HEREDOC_IDENTIFIER);
+    }
+    if (valid_symbols[HEREDOC_IDENTIFIER] && in_heredoc_context(scanner) &&
+        has_leading_whitespace_with_newline) {
+        String expected_identifier =
+            VEC_BACK(scanner->context_stack).heredoc_identifier;
+
+        for (size_t i = 0; i < expected_identifier.len; i++) {
+            if (lexer->lookahead == expected_identifier.data[i]) {
+                advance(lexer);
+            } else {
+                return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
+            }
+        }
+        // check if the identifier is on a line of its own
+        lexer->mark_end(lexer);
+        while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
+            advance(lexer);
+        }
+        if (lexer->lookahead == '\n') {
+            VEC_POP(scanner->context_stack);
+            return accept_inplace(lexer, HEREDOC_IDENTIFIER);
+        }
+        advance(lexer);
+        lexer->mark_end(lexer);
+        return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
+    }
+    // manage template literal chunks
+
+    // handle template literal chunks in quoted contexts
+    //
+    // they may not contain newlines and may contain escape sequences
+    if (valid_symbols[TEMPLATE_LITERAL_CHUNK] && in_quoted_context(scanner)) {
+        switch (lexer->lookahead) {
+            case '\\':
+                advance(lexer);
+                switch (lexer->lookahead) {
+                    case '"':
+                    case 'n':
+                    case 'r':
+                    case 't':
+                    case '\\':
+                        return accept_and_advance(lexer,
+                                                  TEMPLATE_LITERAL_CHUNK);
+                    case 'u':
+                        for (int i = 0; i < 4; i++) {
+                            if (!consume_wxdigit(lexer)) {
+                                return false;
+                            }
+                        }
+                        return accept_and_advance(lexer,
+                                                  TEMPLATE_LITERAL_CHUNK);
+                    case 'U':
+                        for (int i = 0; i < 8; i++) {
+                            if (!consume_wxdigit(lexer)) {
+                                return false;
+                            }
+                        }
+                        return accept_and_advance(lexer,
+                                                  TEMPLATE_LITERAL_CHUNK);
+                    default:
+                        return false;
+                }
+        }
+    }
+
+    // handle all other quoted template or string literal characters
+    if (valid_symbols[TEMPLATE_LITERAL_CHUNK] && in_template_context(scanner)) {
+        return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
+    }
+
+    // probably not handled by the external scanner
+    return false;
+}
+
+void *tree_sitter_terraform_external_scanner_create() {
+    Scanner *scanner = calloc(1, sizeof(Scanner));
+    scanner->context_stack.data = calloc(1, sizeof(Context));
+    return scanner;
+}
+
+unsigned tree_sitter_terraform_external_scanner_serialize(void *payload,
+                                                          char *buffer) {
+    Scanner *scanner = (Scanner *)payload;
+    return serialize(scanner, buffer);
+}
+
+void tree_sitter_terraform_external_scanner_deserialize(void *payload,
+                                                        const char *buffer,
+                                                        unsigned length) {
+    Scanner *scanner = (Scanner *)payload;
+    deserialize(scanner, buffer, length);
+}
+
+bool tree_sitter_terraform_external_scanner_scan(void *payload, TSLexer *lexer,
+                                                 const bool *valid_symbols) {
+    Scanner *scanner = (Scanner *)payload;
+    return scan(scanner, lexer, valid_symbols);
+}
+
+void tree_sitter_terraform_external_scanner_destroy(void *payload) {
+    Scanner *scanner = (Scanner *)payload;
+    for (int i = 0; i < scanner->context_stack.len; i++) {
+        STRING_FREE(scanner->context_stack.data[i].heredoc_identifier);
+    }
+    VEC_FREE(scanner->context_stack);
+    free(scanner);
+}
diff --git a/dialects/terraform/src/scanner.cc b/dialects/terraform/src/scanner.cc
deleted file mode 100644
index 18c3892..0000000
--- a/dialects/terraform/src/scanner.cc
+++ /dev/null
@@ -1,336 +0,0 @@
-#include <tree_sitter/parser.h>
-
-#include <assert.h>
-#include <climits>
-#include <string>
-#include <vector>
-#include <wctype.h>
-
-namespace {
-
-using std::string;
-using std::vector;
-
-enum TokenType {
-  QUOTED_TEMPLATE_START,
-  QUOTED_TEMPLATE_END,
-  TEMPLATE_LITERAL_CHUNK,
-  TEMPLATE_INTERPOLATION_START,
-  TEMPLATE_INTERPOLATION_END,
-  TEMPLATE_DIRECTIVE_START,
-  TEMPLATE_DIRECTIVE_END,
-  HEREDOC_IDENTIFIER,
-};
-
-enum ContextType {
-  TEMPLATE_INTERPOLATION,
-  TEMPLATE_DIRECTIVE,
-  QUOTED_TEMPLATE,
-  HEREDOC_TEMPLATE,
-};
-
-struct Context {
-  ContextType type;
-
-  // valid if type == HEREDOC_TEMPLATE
-  string heredoc_identifier;
-};
-
-struct Scanner {
-
-public:
-  unsigned serialize(char *buf) {
-    unsigned size = 0;
-
-    if (context_stack.size() > CHAR_MAX) {
-      return 0;
-    }
-
-    buf[size++] = context_stack.size();
-    for (vector<Context>::iterator it = context_stack.begin();
-         it != context_stack.end(); ++it) {
-      if (size + 2 + it->heredoc_identifier.size() >=
-          TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
-        return 0;
-      }
-      if (it->heredoc_identifier.size() > CHAR_MAX) {
-        return 0;
-      }
-      buf[size++] = it->type;
-      buf[size++] = it->heredoc_identifier.size();
-      it->heredoc_identifier.copy(&buf[size], it->heredoc_identifier.size());
-      size += it->heredoc_identifier.size();
-    }
-    return size;
-  }
-
-  void deserialize(const char *buf, unsigned n) {
-    context_stack.clear();
-
-    if (n == 0) {
-      return;
-    }
-
-    unsigned size = 0;
-    uint8_t context_stack_size = buf[size++];
-    for (unsigned j = 0; j < context_stack_size; j++) {
-      Context ctx;
-      ctx.type = static_cast<ContextType>(buf[size++]);
-      uint8_t heredoc_identifier_size = buf[size++];
-      ctx.heredoc_identifier.assign(buf + size,
-                                    buf + size + heredoc_identifier_size);
-      size += heredoc_identifier_size;
-      context_stack.push_back(ctx);
-    }
-    assert(size == n);
-  }
-
-  bool scan(TSLexer *lexer, const bool *valid_symbols) {
-    bool has_leading_whitespace_with_newline = false;
-    while (iswspace(lexer->lookahead)) {
-      if (lexer->lookahead == '\n') {
-        has_leading_whitespace_with_newline = true;
-      }
-      skip(lexer);
-    }
-    if (lexer->lookahead == '\0') {
-      return false;
-    }
-    // manage quoted context
-    if (valid_symbols[QUOTED_TEMPLATE_START] && !in_quoted_context() &&
-        lexer->lookahead == '"') {
-      Context ctx = {QUOTED_TEMPLATE, ""};
-      context_stack.push_back(ctx);
-      return accept_and_advance(lexer, QUOTED_TEMPLATE_START);
-    }
-    if (valid_symbols[QUOTED_TEMPLATE_END] && in_quoted_context() &&
-        lexer->lookahead == '"') {
-      context_stack.pop_back();
-      return accept_and_advance(lexer, QUOTED_TEMPLATE_END);
-    }
-
-    // manage template interpolations
-    if (valid_symbols[TEMPLATE_INTERPOLATION_START] &&
-        valid_symbols[TEMPLATE_LITERAL_CHUNK] && !in_interpolation_context() &&
-        lexer->lookahead == '$') {
-      advance(lexer);
-      if (lexer->lookahead == '{') {
-        Context ctx = {TEMPLATE_INTERPOLATION, ""};
-        context_stack.push_back(ctx);
-        return accept_and_advance(lexer, TEMPLATE_INTERPOLATION_START);
-      }
-      // try to scan escape sequence
-      if (lexer->lookahead == '$') {
-        advance(lexer);
-        if (lexer->lookahead == '{') {
-          // $${
-          return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
-        }
-      }
-      return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
-    }
-    if (valid_symbols[TEMPLATE_INTERPOLATION_END] &&
-        in_interpolation_context() && lexer->lookahead == '}') {
-      context_stack.pop_back();
-      return accept_and_advance(lexer, TEMPLATE_INTERPOLATION_END);
-    }
-
-    // manage template directives
-    if (valid_symbols[TEMPLATE_DIRECTIVE_START] &&
-        valid_symbols[TEMPLATE_LITERAL_CHUNK] && !in_directive_context() &&
-        lexer->lookahead == '%') {
-      advance(lexer);
-      if (lexer->lookahead == '{') {
-        Context ctx = {TEMPLATE_DIRECTIVE, ""};
-        context_stack.push_back(ctx);
-        return accept_and_advance(lexer, TEMPLATE_DIRECTIVE_START);
-      }
-      // try to scan escape sequence
-      if (lexer->lookahead == '%') {
-        advance(lexer);
-        if (lexer->lookahead == '{') {
-          // $${
-          return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
-        }
-      }
-      return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
-    }
-    if (valid_symbols[TEMPLATE_DIRECTIVE_END] && in_directive_context() &&
-        lexer->lookahead == '}') {
-      context_stack.pop_back();
-      return accept_and_advance(lexer, TEMPLATE_DIRECTIVE_END);
-    }
-
-    // manage heredoc context
-    if (valid_symbols[HEREDOC_IDENTIFIER] && !in_heredoc_context()) {
-      string identifier;
-      // TODO: check that this is a valid identifier
-      while (iswalnum(lexer->lookahead) || lexer->lookahead == '_' ||
-             lexer->lookahead == '-') {
-        identifier.push_back(lexer->lookahead);
-        advance(lexer);
-      }
-      Context ctx = {HEREDOC_TEMPLATE, identifier};
-      context_stack.push_back(ctx);
-      return accept_inplace(lexer, HEREDOC_IDENTIFIER);
-    }
-    if (valid_symbols[HEREDOC_IDENTIFIER] && in_heredoc_context() &&
-        has_leading_whitespace_with_newline) {
-      string expected_identifier = context_stack.back().heredoc_identifier;
-
-      for (string::iterator it = expected_identifier.begin();
-           it != expected_identifier.end(); ++it) {
-        if (lexer->lookahead == *it) {
-          advance(lexer);
-        } else {
-          return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
-        }
-      }
-      // check if the identifier is on a line of its own
-      lexer->mark_end(lexer);
-      while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
-        advance(lexer);
-      }
-      if (lexer->lookahead == '\n') {
-        context_stack.pop_back();
-        return accept_inplace(lexer, HEREDOC_IDENTIFIER);
-      } else {
-        advance(lexer);
-        lexer->mark_end(lexer);
-        return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
-      }
-    }
-    // manage template literal chunks
-
-    // handle template literal chunks in quoted contexts
-    //
-    // they may not contain newlines and may contain escape sequences
-    if (valid_symbols[TEMPLATE_LITERAL_CHUNK] && in_quoted_context()) {
-      switch (lexer->lookahead) {
-      case '\\':
-        advance(lexer);
-        switch (lexer->lookahead) {
-        case '"':
-        case 'n':
-        case 'r':
-        case 't':
-        case '\\':
-          return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
-        case 'u':
-          for (int i = 0; i < 4; i++) {
-            if (!consume_wxdigit(lexer))
-              return false;
-          }
-          return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
-        case 'U':
-          for (int i = 0; i < 8; i++) {
-            if (!consume_wxdigit(lexer))
-              return false;
-          }
-          return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
-        default:
-          return false;
-        }
-      }
-    }
-
-    // handle all other quoted template or string literal characters
-    if (valid_symbols[TEMPLATE_LITERAL_CHUNK] && in_template_context()) {
-      return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
-    }
-
-    // probably not handled by the external scanner
-    return false;
-  }
-
-private:
-  vector<Context> context_stack;
-
-  void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
-
-  void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
-
-  bool accept_inplace(TSLexer *lexer, TokenType token) {
-    lexer->result_symbol = token;
-    return true;
-  }
-
-  bool accept_and_advance(TSLexer *lexer, TokenType token) {
-    advance(lexer);
-    return accept_inplace(lexer, token);
-  }
-
-  bool consume_wxdigit(TSLexer *lexer) {
-    advance(lexer);
-    return iswxdigit(lexer->lookahead);
-  }
-
-  bool skip_comment(TSLexer* lexer) {
-    while (iswspace(lexer->lookahead)) {
-      skip(lexer);
-    }
-    if (lexer->lookahead != '#') {
-      return false;
-    }
-    skip(lexer);
-    while (lexer->lookahead != '\n') {
-      skip(lexer);
-      if (lexer->eof(lexer)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool in_context_type(ContextType type) {
-    if (context_stack.empty()) {
-      return false;
-    }
-    return context_stack.back().type == type;
-  }
-
-  bool in_quoted_context() { return in_context_type(QUOTED_TEMPLATE); }
-
-  bool in_heredoc_context() { return in_context_type(HEREDOC_TEMPLATE); }
-
-  bool in_template_context() {
-    return in_quoted_context() || in_heredoc_context();
-  }
-
-  bool in_interpolation_context() {
-    return in_context_type(TEMPLATE_INTERPOLATION);
-  }
-
-  bool in_directive_context() { return in_context_type(TEMPLATE_DIRECTIVE); }
-};
-
-} // namespace
-
-extern "C" {
-
-// tree sitter callbacks
-void *tree_sitter_terraform_external_scanner_create() { return new Scanner(); }
-
-void tree_sitter_terraform_external_scanner_destroy(void *p) {
-  Scanner *scanner = static_cast<Scanner *>(p);
-  delete scanner;
-}
-
-unsigned tree_sitter_terraform_external_scanner_serialize(void *p, char *b) {
-  Scanner *scanner = static_cast<Scanner *>(p);
-  return scanner->serialize(b);
-}
-
-void tree_sitter_terraform_external_scanner_deserialize(void *p, const char *b,
-                                                  unsigned n) {
-  Scanner *scanner = static_cast<Scanner *>(p);
-  return scanner->deserialize(b, n);
-}
-
-bool tree_sitter_terraform_external_scanner_scan(void *p, TSLexer *lexer,
-                                           const bool *valid_symbols) {
-  Scanner *scanner = static_cast<Scanner *>(p);
-  return scanner->scan(lexer, valid_symbols);
-}
-
-} // extern "C"
diff --git a/src/scanner.c b/src/scanner.c
new file mode 100644
index 0000000..06821d3
--- /dev/null
+++ b/src/scanner.c
@@ -0,0 +1,436 @@
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <tree_sitter/parser.h>
+#include <wctype.h>
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#define VEC_RESIZE(vec, _cap)                                                  \
+    void *tmp = realloc((vec).data, (_cap) * sizeof((vec).data[0]));           \
+    assert(tmp != NULL);                                                       \
+    (vec).data = tmp;                                                          \
+    (vec).cap = (_cap);
+
+#define VEC_PUSH(vec, el)                                                      \
+    if ((vec).cap == (vec).len) {                                              \
+        VEC_RESIZE((vec), MAX(16, (vec).len * 2));                             \
+    }                                                                          \
+    (vec).data[(vec).len++] = (el);
+
+#define VEC_POP(vec)                                                           \
+    {                                                                          \
+        STRING_FREE(VEC_BACK((vec)).heredoc_identifier);                       \
+        (vec).len--;                                                           \
+    }
+
+#define VEC_BACK(vec) ((vec).data[(vec).len - 1])
+
+#define VEC_FREE(vec)                                                          \
+    {                                                                          \
+        if ((vec).data != NULL)                                                \
+            free((vec).data);                                                  \
+    }
+
+#define VEC_CLEAR(vec)                                                         \
+    {                                                                          \
+        for (int i = 0; i < (vec).len; i++) {                                  \
+            STRING_FREE((vec).data[i].heredoc_identifier);                     \
+        }                                                                      \
+        (vec).len = 0;                                                         \
+    }
+
+#define STRING_RESIZE(vec, _cap)                                               \
+    void *tmp = realloc((vec).data, (_cap + 1) * sizeof((vec).data[0]));       \
+    assert(tmp != NULL);                                                       \
+    (vec).data = tmp;                                                          \
+    memset((vec).data + (vec).len, 0,                                          \
+           ((_cap + 1) - (vec).len) * sizeof((vec).data[0]));                  \
+    (vec).cap = (_cap);
+
+#define STRING_GROW(vec, _cap)                                                 \
+    if ((vec).cap < (_cap)) {                                                  \
+        STRING_RESIZE((vec), (_cap));                                          \
+    }
+
+#define STRING_PUSH(vec, el)                                                   \
+    if ((vec).cap == (vec).len) {                                              \
+        STRING_RESIZE((vec), MAX(16, (vec).len * 2));                          \
+    }                                                                          \
+    (vec).data[(vec).len++] = (el);
+
+#define STRING_FREE(vec)                                                       \
+    {                                                                          \
+        if ((vec).data != NULL)                                                \
+            free((vec).data);                                                  \
+    }
+
+enum TokenType {
+    QUOTED_TEMPLATE_START,
+    QUOTED_TEMPLATE_END,
+    TEMPLATE_LITERAL_CHUNK,
+    TEMPLATE_INTERPOLATION_START,
+    TEMPLATE_INTERPOLATION_END,
+    TEMPLATE_DIRECTIVE_START,
+    TEMPLATE_DIRECTIVE_END,
+    HEREDOC_IDENTIFIER,
+};
+
+enum ContextType {
+    TEMPLATE_INTERPOLATION,
+    TEMPLATE_DIRECTIVE,
+    QUOTED_TEMPLATE,
+    HEREDOC_TEMPLATE,
+};
+
+typedef struct {
+    uint32_t cap;
+    uint32_t len;
+    char *data;
+} String;
+
+String string_new() {
+    return (String){
+        .cap = 16,
+        .len = 0,
+        .data = calloc(1, sizeof(char) * 17),
+    };
+}
+
+typedef struct {
+    enum ContextType type;
+
+    // valid if type == HEREDOC_TEMPLATE
+    String heredoc_identifier;
+} Context;
+
+Context context_new(enum ContextType type, const char *data) {
+    Context ctx = {
+        .type = type,
+        .heredoc_identifier = string_new(),
+    };
+    ctx.heredoc_identifier.len = strlen(data);
+    ctx.heredoc_identifier.cap = strlen(data);
+    memcpy(ctx.heredoc_identifier.data, data, ctx.heredoc_identifier.len);
+    return ctx;
+}
+
+typedef struct {
+    uint32_t len;
+    uint32_t cap;
+    Context *data;
+} context_vec;
+
+typedef struct {
+    context_vec context_stack;
+} Scanner;
+
+static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
+
+static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
+
+static unsigned serialize(Scanner *scanner, char *buf) {
+    unsigned size = 0;
+
+    if (scanner->context_stack.len > CHAR_MAX) {
+        return 0;
+    }
+
+    buf[size++] = (char)scanner->context_stack.len;
+    for (int i = 0; i < scanner->context_stack.len; i++) {
+        Context *context = &scanner->context_stack.data[i];
+        if (size + 2 + context->heredoc_identifier.len >=
+            TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
+            return 0;
+        }
+        if (context->heredoc_identifier.len > CHAR_MAX) {
+            return 0;
+        }
+        buf[size++] = context->type;
+        buf[size++] = (char)context->heredoc_identifier.len;
+        memcpy(&buf[size], context->heredoc_identifier.data,
+               context->heredoc_identifier.len);
+        size += context->heredoc_identifier.len;
+    }
+    return size;
+}
+
+static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
+    if (length == 0) {
+        return;
+    }
+
+    VEC_CLEAR(scanner->context_stack);
+    unsigned size = 0;
+    uint8_t context_stack_size = buffer[size++];
+    for (uint32_t j = 0; j < context_stack_size; j++) {
+        Context ctx = {
+            .type = (enum ContextType)buffer[size++],
+            .heredoc_identifier = string_new(),
+        };
+        uint8_t heredoc_identifier_size = buffer[size++];
+        STRING_GROW(ctx.heredoc_identifier, heredoc_identifier_size);
+        memcpy(ctx.heredoc_identifier.data, buffer + size,
+               heredoc_identifier_size);
+        ctx.heredoc_identifier.len = heredoc_identifier_size;
+        size += heredoc_identifier_size;
+        VEC_PUSH(scanner->context_stack, ctx);
+    }
+    assert(size == length);
+}
+
+static inline bool accept_inplace(TSLexer *lexer, enum TokenType token) {
+    lexer->result_symbol = token;
+    return true;
+}
+
+static inline bool accept_and_advance(TSLexer *lexer, enum TokenType token) {
+    advance(lexer);
+    return accept_inplace(lexer, token);
+}
+
+static inline bool consume_wxdigit(TSLexer *lexer) {
+    advance(lexer);
+    return iswxdigit(lexer->lookahead);
+}
+
+static inline bool skip_comment(TSLexer *lexer) {
+    while (iswspace(lexer->lookahead)) {
+        skip(lexer);
+    }
+    if (lexer->lookahead != '#') {
+        return false;
+    }
+    skip(lexer);
+    while (lexer->lookahead != '\n') {
+        skip(lexer);
+        if (lexer->eof(lexer)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static inline bool in_context_type(Scanner *scanner, enum ContextType type) {
+    if (scanner->context_stack.len == 0) {
+        return false;
+    }
+    return VEC_BACK(scanner->context_stack).type == type;
+}
+
+static inline bool in_quoted_context(Scanner *scanner) {
+    return in_context_type(scanner, QUOTED_TEMPLATE);
+}
+
+static inline bool in_heredoc_context(Scanner *scanner) {
+    return in_context_type(scanner, HEREDOC_TEMPLATE);
+}
+
+static inline bool in_template_context(Scanner *scanner) {
+    return in_quoted_context(scanner) || in_heredoc_context(scanner);
+}
+
+static inline bool in_interpolation_context(Scanner *scanner) {
+    return in_context_type(scanner, TEMPLATE_INTERPOLATION);
+}
+
+static inline bool in_directive_context(Scanner *scanner) {
+    return in_context_type(scanner, TEMPLATE_DIRECTIVE);
+}
+
+static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
+    bool has_leading_whitespace_with_newline = false;
+    while (iswspace(lexer->lookahead)) {
+        if (lexer->lookahead == '\n') {
+            has_leading_whitespace_with_newline = true;
+        }
+        skip(lexer);
+    }
+    if (lexer->lookahead == '\0') {
+        return false;
+    }
+    // manage quoted context
+    if (valid_symbols[QUOTED_TEMPLATE_START] && !in_quoted_context(scanner) &&
+        lexer->lookahead == '"') {
+        Context ctx = context_new(QUOTED_TEMPLATE, "");
+        VEC_PUSH(scanner->context_stack, ctx);
+        return accept_and_advance(lexer, QUOTED_TEMPLATE_START);
+    }
+    if (valid_symbols[QUOTED_TEMPLATE_END] && in_quoted_context(scanner) &&
+        lexer->lookahead == '"') {
+        VEC_POP(scanner->context_stack);
+        return accept_and_advance(lexer, QUOTED_TEMPLATE_END);
+    }
+
+    // manage template interpolations
+    if (valid_symbols[TEMPLATE_INTERPOLATION_START] &&
+        valid_symbols[TEMPLATE_LITERAL_CHUNK] &&
+        !in_interpolation_context(scanner) && lexer->lookahead == '$') {
+        advance(lexer);
+        if (lexer->lookahead == '{') {
+            Context ctx = context_new(TEMPLATE_INTERPOLATION, "");
+            VEC_PUSH(scanner->context_stack, ctx);
+            return accept_and_advance(lexer, TEMPLATE_INTERPOLATION_START);
+        }
+        // try to scan escape sequence
+        if (lexer->lookahead == '$') {
+            advance(lexer);
+            if (lexer->lookahead == '{') {
+                // $${
+                return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
+            }
+        }
+        return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
+    }
+    if (valid_symbols[TEMPLATE_INTERPOLATION_END] &&
+        in_interpolation_context(scanner) && lexer->lookahead == '}') {
+        VEC_POP(scanner->context_stack);
+        return accept_and_advance(lexer, TEMPLATE_INTERPOLATION_END);
+    }
+
+    // manage template directives
+    if (valid_symbols[TEMPLATE_DIRECTIVE_START] &&
+        valid_symbols[TEMPLATE_LITERAL_CHUNK] &&
+        !in_directive_context(scanner) && lexer->lookahead == '%') {
+        advance(lexer);
+        if (lexer->lookahead == '{') {
+            Context ctx = context_new(TEMPLATE_DIRECTIVE, "");
+            VEC_PUSH(scanner->context_stack, ctx);
+            return accept_and_advance(lexer, TEMPLATE_DIRECTIVE_START);
+        }
+        // try to scan escape sequence
+        if (lexer->lookahead == '%') {
+            advance(lexer);
+            if (lexer->lookahead == '{') {
+                // $${
+                return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
+            }
+        }
+        return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
+    }
+    if (valid_symbols[TEMPLATE_DIRECTIVE_END] &&
+        in_directive_context(scanner) && lexer->lookahead == '}') {
+        VEC_POP(scanner->context_stack);
+        return accept_and_advance(lexer, TEMPLATE_DIRECTIVE_END);
+    }
+
+    // manage heredoc context
+    if (valid_symbols[HEREDOC_IDENTIFIER] && !in_heredoc_context(scanner)) {
+        String identifier = string_new();
+        // TODO: check that this is a valid identifier
+        while (iswalnum(lexer->lookahead) || lexer->lookahead == '_' ||
+               lexer->lookahead == '-') {
+            STRING_PUSH(identifier, lexer->lookahead);
+            advance(lexer);
+        }
+        Context ctx = {HEREDOC_TEMPLATE, identifier};
+        VEC_PUSH(scanner->context_stack, ctx);
+        return accept_inplace(lexer, HEREDOC_IDENTIFIER);
+    }
+    if (valid_symbols[HEREDOC_IDENTIFIER] && in_heredoc_context(scanner) &&
+        has_leading_whitespace_with_newline) {
+        String expected_identifier =
+            VEC_BACK(scanner->context_stack).heredoc_identifier;
+
+        for (size_t i = 0; i < expected_identifier.len; i++) {
+            if (lexer->lookahead == expected_identifier.data[i]) {
+                advance(lexer);
+            } else {
+                return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
+            }
+        }
+        // check if the identifier is on a line of its own
+        lexer->mark_end(lexer);
+        while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
+            advance(lexer);
+        }
+        if (lexer->lookahead == '\n') {
+            VEC_POP(scanner->context_stack);
+            return accept_inplace(lexer, HEREDOC_IDENTIFIER);
+        }
+        advance(lexer);
+        lexer->mark_end(lexer);
+        return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
+    }
+    // manage template literal chunks
+
+    // handle template literal chunks in quoted contexts
+    //
+    // they may not contain newlines and may contain escape sequences
+    if (valid_symbols[TEMPLATE_LITERAL_CHUNK] && in_quoted_context(scanner)) {
+        switch (lexer->lookahead) {
+            case '\\':
+                advance(lexer);
+                switch (lexer->lookahead) {
+                    case '"':
+                    case 'n':
+                    case 'r':
+                    case 't':
+                    case '\\':
+                        return accept_and_advance(lexer,
+                                                  TEMPLATE_LITERAL_CHUNK);
+                    case 'u':
+                        for (int i = 0; i < 4; i++) {
+                            if (!consume_wxdigit(lexer)) {
+                                return false;
+                            }
+                        }
+                        return accept_and_advance(lexer,
+                                                  TEMPLATE_LITERAL_CHUNK);
+                    case 'U':
+                        for (int i = 0; i < 8; i++) {
+                            if (!consume_wxdigit(lexer)) {
+                                return false;
+                            }
+                        }
+                        return accept_and_advance(lexer,
+                                                  TEMPLATE_LITERAL_CHUNK);
+                    default:
+                        return false;
+                }
+        }
+    }
+
+    // handle all other quoted template or string literal characters
+    if (valid_symbols[TEMPLATE_LITERAL_CHUNK] && in_template_context(scanner)) {
+        return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
+    }
+
+    // probably not handled by the external scanner
+    return false;
+}
+
+void *tree_sitter_hcl_external_scanner_create() {
+    Scanner *scanner = calloc(1, sizeof(Scanner));
+    scanner->context_stack.data = calloc(1, sizeof(Context));
+    return scanner;
+}
+
+unsigned tree_sitter_hcl_external_scanner_serialize(void *payload,
+                                                    char *buffer) {
+    Scanner *scanner = (Scanner *)payload;
+    return serialize(scanner, buffer);
+}
+
+void tree_sitter_hcl_external_scanner_deserialize(void *payload,
+                                                  const char *buffer,
+                                                  unsigned length) {
+    Scanner *scanner = (Scanner *)payload;
+    deserialize(scanner, buffer, length);
+}
+
+bool tree_sitter_hcl_external_scanner_scan(void *payload, TSLexer *lexer,
+                                           const bool *valid_symbols) {
+    Scanner *scanner = (Scanner *)payload;
+    return scan(scanner, lexer, valid_symbols);
+}
+
+void tree_sitter_hcl_external_scanner_destroy(void *payload) {
+    Scanner *scanner = (Scanner *)payload;
+    for (int i = 0; i < scanner->context_stack.len; i++) {
+        STRING_FREE(scanner->context_stack.data[i].heredoc_identifier);
+    }
+    VEC_FREE(scanner->context_stack);
+    free(scanner);
+}
diff --git a/src/scanner.cc b/src/scanner.cc
deleted file mode 100644
index 74296e6..0000000
--- a/src/scanner.cc
+++ /dev/null
@@ -1,336 +0,0 @@
-#include <tree_sitter/parser.h>
-
-#include <assert.h>
-#include <climits>
-#include <string>
-#include <vector>
-#include <wctype.h>
-
-namespace {
-
-using std::string;
-using std::vector;
-
-enum TokenType {
-  QUOTED_TEMPLATE_START,
-  QUOTED_TEMPLATE_END,
-  TEMPLATE_LITERAL_CHUNK,
-  TEMPLATE_INTERPOLATION_START,
-  TEMPLATE_INTERPOLATION_END,
-  TEMPLATE_DIRECTIVE_START,
-  TEMPLATE_DIRECTIVE_END,
-  HEREDOC_IDENTIFIER,
-};
-
-enum ContextType {
-  TEMPLATE_INTERPOLATION,
-  TEMPLATE_DIRECTIVE,
-  QUOTED_TEMPLATE,
-  HEREDOC_TEMPLATE,
-};
-
-struct Context {
-  ContextType type;
-
-  // valid if type == HEREDOC_TEMPLATE
-  string heredoc_identifier;
-};
-
-struct Scanner {
-
-public:
-  unsigned serialize(char *buf) {
-    unsigned size = 0;
-
-    if (context_stack.size() > CHAR_MAX) {
-      return 0;
-    }
-
-    buf[size++] = context_stack.size();
-    for (vector<Context>::iterator it = context_stack.begin();
-         it != context_stack.end(); ++it) {
-      if (size + 2 + it->heredoc_identifier.size() >=
-          TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
-        return 0;
-      }
-      if (it->heredoc_identifier.size() > CHAR_MAX) {
-        return 0;
-      }
-      buf[size++] = it->type;
-      buf[size++] = it->heredoc_identifier.size();
-      it->heredoc_identifier.copy(&buf[size], it->heredoc_identifier.size());
-      size += it->heredoc_identifier.size();
-    }
-    return size;
-  }
-
-  void deserialize(const char *buf, unsigned n) {
-    context_stack.clear();
-
-    if (n == 0) {
-      return;
-    }
-
-    unsigned size = 0;
-    uint8_t context_stack_size = buf[size++];
-    for (unsigned j = 0; j < context_stack_size; j++) {
-      Context ctx;
-      ctx.type = static_cast<ContextType>(buf[size++]);
-      uint8_t heredoc_identifier_size = buf[size++];
-      ctx.heredoc_identifier.assign(buf + size,
-                                    buf + size + heredoc_identifier_size);
-      size += heredoc_identifier_size;
-      context_stack.push_back(ctx);
-    }
-    assert(size == n);
-  }
-
-  bool scan(TSLexer *lexer, const bool *valid_symbols) {
-    bool has_leading_whitespace_with_newline = false;
-    while (iswspace(lexer->lookahead)) {
-      if (lexer->lookahead == '\n') {
-        has_leading_whitespace_with_newline = true;
-      }
-      skip(lexer);
-    }
-    if (lexer->lookahead == '\0') {
-      return false;
-    }
-    // manage quoted context
-    if (valid_symbols[QUOTED_TEMPLATE_START] && !in_quoted_context() &&
-        lexer->lookahead == '"') {
-      Context ctx = {QUOTED_TEMPLATE, ""};
-      context_stack.push_back(ctx);
-      return accept_and_advance(lexer, QUOTED_TEMPLATE_START);
-    }
-    if (valid_symbols[QUOTED_TEMPLATE_END] && in_quoted_context() &&
-        lexer->lookahead == '"') {
-      context_stack.pop_back();
-      return accept_and_advance(lexer, QUOTED_TEMPLATE_END);
-    }
-
-    // manage template interpolations
-    if (valid_symbols[TEMPLATE_INTERPOLATION_START] &&
-        valid_symbols[TEMPLATE_LITERAL_CHUNK] && !in_interpolation_context() &&
-        lexer->lookahead == '$') {
-      advance(lexer);
-      if (lexer->lookahead == '{') {
-        Context ctx = {TEMPLATE_INTERPOLATION, ""};
-        context_stack.push_back(ctx);
-        return accept_and_advance(lexer, TEMPLATE_INTERPOLATION_START);
-      }
-      // try to scan escape sequence
-      if (lexer->lookahead == '$') {
-        advance(lexer);
-        if (lexer->lookahead == '{') {
-          // $${
-          return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
-        }
-      }
-      return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
-    }
-    if (valid_symbols[TEMPLATE_INTERPOLATION_END] &&
-        in_interpolation_context() && lexer->lookahead == '}') {
-      context_stack.pop_back();
-      return accept_and_advance(lexer, TEMPLATE_INTERPOLATION_END);
-    }
-
-    // manage template directives
-    if (valid_symbols[TEMPLATE_DIRECTIVE_START] &&
-        valid_symbols[TEMPLATE_LITERAL_CHUNK] && !in_directive_context() &&
-        lexer->lookahead == '%') {
-      advance(lexer);
-      if (lexer->lookahead == '{') {
-        Context ctx = {TEMPLATE_DIRECTIVE, ""};
-        context_stack.push_back(ctx);
-        return accept_and_advance(lexer, TEMPLATE_DIRECTIVE_START);
-      }
-      // try to scan escape sequence
-      if (lexer->lookahead == '%') {
-        advance(lexer);
-        if (lexer->lookahead == '{') {
-          // $${
-          return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
-        }
-      }
-      return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
-    }
-    if (valid_symbols[TEMPLATE_DIRECTIVE_END] && in_directive_context() &&
-        lexer->lookahead == '}') {
-      context_stack.pop_back();
-      return accept_and_advance(lexer, TEMPLATE_DIRECTIVE_END);
-    }
-
-    // manage heredoc context
-    if (valid_symbols[HEREDOC_IDENTIFIER] && !in_heredoc_context()) {
-      string identifier;
-      // TODO: check that this is a valid identifier
-      while (iswalnum(lexer->lookahead) || lexer->lookahead == '_' ||
-             lexer->lookahead == '-') {
-        identifier.push_back(lexer->lookahead);
-        advance(lexer);
-      }
-      Context ctx = {HEREDOC_TEMPLATE, identifier};
-      context_stack.push_back(ctx);
-      return accept_inplace(lexer, HEREDOC_IDENTIFIER);
-    }
-    if (valid_symbols[HEREDOC_IDENTIFIER] && in_heredoc_context() &&
-        has_leading_whitespace_with_newline) {
-      string expected_identifier = context_stack.back().heredoc_identifier;
-
-      for (string::iterator it = expected_identifier.begin();
-           it != expected_identifier.end(); ++it) {
-        if (lexer->lookahead == *it) {
-          advance(lexer);
-        } else {
-          return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
-        }
-      }
-      // check if the identifier is on a line of its own
-      lexer->mark_end(lexer);
-      while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
-        advance(lexer);
-      }
-      if (lexer->lookahead == '\n') {
-        context_stack.pop_back();
-        return accept_inplace(lexer, HEREDOC_IDENTIFIER);
-      } else {
-        advance(lexer);
-        lexer->mark_end(lexer);
-        return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
-      }
-    }
-    // manage template literal chunks
-
-    // handle template literal chunks in quoted contexts
-    //
-    // they may not contain newlines and may contain escape sequences
-    if (valid_symbols[TEMPLATE_LITERAL_CHUNK] && in_quoted_context()) {
-      switch (lexer->lookahead) {
-      case '\\':
-        advance(lexer);
-        switch (lexer->lookahead) {
-        case '"':
-        case 'n':
-        case 'r':
-        case 't':
-        case '\\':
-          return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
-        case 'u':
-          for (int i = 0; i < 4; i++) {
-            if (!consume_wxdigit(lexer))
-              return false;
-          }
-          return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
-        case 'U':
-          for (int i = 0; i < 8; i++) {
-            if (!consume_wxdigit(lexer))
-              return false;
-          }
-          return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
-        default:
-          return false;
-        }
-      }
-    }
-
-    // handle all other quoted template or string literal characters
-    if (valid_symbols[TEMPLATE_LITERAL_CHUNK] && in_template_context()) {
-      return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
-    }
-
-    // probably not handled by the external scanner
-    return false;
-  }
-
-private:
-  vector<Context> context_stack;
-
-  void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
-
-  void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
-
-  bool accept_inplace(TSLexer *lexer, TokenType token) {
-    lexer->result_symbol = token;
-    return true;
-  }
-
-  bool accept_and_advance(TSLexer *lexer, TokenType token) {
-    advance(lexer);
-    return accept_inplace(lexer, token);
-  }
-
-  bool consume_wxdigit(TSLexer *lexer) {
-    advance(lexer);
-    return iswxdigit(lexer->lookahead);
-  }
-
-  bool skip_comment(TSLexer* lexer) {
-    while (iswspace(lexer->lookahead)) {
-      skip(lexer);
-    }
-    if (lexer->lookahead != '#') {
-      return false;
-    }
-    skip(lexer);
-    while (lexer->lookahead != '\n') {
-      skip(lexer);
-      if (lexer->eof(lexer)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  bool in_context_type(ContextType type) {
-    if (context_stack.empty()) {
-      return false;
-    }
-    return context_stack.back().type == type;
-  }
-
-  bool in_quoted_context() { return in_context_type(QUOTED_TEMPLATE); }
-
-  bool in_heredoc_context() { return in_context_type(HEREDOC_TEMPLATE); }
-
-  bool in_template_context() {
-    return in_quoted_context() || in_heredoc_context();
-  }
-
-  bool in_interpolation_context() {
-    return in_context_type(TEMPLATE_INTERPOLATION);
-  }
-
-  bool in_directive_context() { return in_context_type(TEMPLATE_DIRECTIVE); }
-};
-
-} // namespace
-
-extern "C" {
-
-// tree sitter callbacks
-void *tree_sitter_hcl_external_scanner_create() { return new Scanner(); }
-
-void tree_sitter_hcl_external_scanner_destroy(void *p) {
-  Scanner *scanner = static_cast<Scanner *>(p);
-  delete scanner;
-}
-
-unsigned tree_sitter_hcl_external_scanner_serialize(void *p, char *b) {
-  Scanner *scanner = static_cast<Scanner *>(p);
-  return scanner->serialize(b);
-}
-
-void tree_sitter_hcl_external_scanner_deserialize(void *p, const char *b,
-                                                  unsigned n) {
-  Scanner *scanner = static_cast<Scanner *>(p);
-  return scanner->deserialize(b, n);
-}
-
-bool tree_sitter_hcl_external_scanner_scan(void *p, TSLexer *lexer,
-                                           const bool *valid_symbols) {
-  Scanner *scanner = static_cast<Scanner *>(p);
-  return scanner->scan(lexer, valid_symbols);
-}
-
-} // extern "C"