grammar: fix structure of coments in block bodies
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
|
||||
feature
|
||||
* add dialects so we can have different queries in `nvim-treesitter`
|
||||
* fix structure of comments in block bodies
|
||||
|
||||
housekeeping:
|
||||
* reformat using LSPs, ditch editorconfig
|
||||
|
||||
@@ -23,6 +23,28 @@
|
||||
]
|
||||
},
|
||||
"body": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_shim"
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_shim"
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "REPEAT1",
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
@@ -37,6 +59,10 @@
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"attribute": {
|
||||
"type": "SEQ",
|
||||
@@ -1779,6 +1805,10 @@
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "heredoc_identifier"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_shim"
|
||||
}
|
||||
],
|
||||
"inline": [],
|
||||
|
||||
@@ -139,7 +139,7 @@
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": true,
|
||||
"required": true,
|
||||
"required": false,
|
||||
"types": [
|
||||
{
|
||||
"type": "attribute",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,26 +1,341 @@
|
||||
#include "../../../src/scanner.cc"
|
||||
#include <tree_sitter/parser.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <climits>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <wctype.h>
|
||||
|
||||
namespace {
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
enum TokenType {
|
||||
QUOTED_TEMPLATE_START,
|
||||
QUOTED_TEMPLATE_END,
|
||||
TEMPLATE_LITERAL_CHUNK,
|
||||
TEMPLATE_INTERPOLATION_START,
|
||||
TEMPLATE_INTERPOLATION_END,
|
||||
TEMPLATE_DIRECTIVE_START,
|
||||
TEMPLATE_DIRECTIVE_END,
|
||||
HEREDOC_IDENTIFIER,
|
||||
SHIM,
|
||||
};
|
||||
|
||||
enum ContextType {
|
||||
TEMPLATE_INTERPOLATION,
|
||||
TEMPLATE_DIRECTIVE,
|
||||
QUOTED_TEMPLATE,
|
||||
HEREDOC_TEMPLATE,
|
||||
};
|
||||
|
||||
struct Context {
|
||||
ContextType type;
|
||||
|
||||
// valid if type == HEREDOC_TEMPLATE
|
||||
string heredoc_identifier;
|
||||
};
|
||||
|
||||
struct Scanner {
|
||||
|
||||
public:
|
||||
unsigned serialize(char *buf) {
|
||||
unsigned size = 0;
|
||||
|
||||
if (context_stack.size() > CHAR_MAX) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
buf[size++] = context_stack.size();
|
||||
for (vector<Context>::iterator it = context_stack.begin();
|
||||
it != context_stack.end(); ++it) {
|
||||
if (size + 2 + it->heredoc_identifier.size() >=
|
||||
TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
|
||||
return 0;
|
||||
}
|
||||
if (it->heredoc_identifier.size() > CHAR_MAX) {
|
||||
return 0;
|
||||
}
|
||||
buf[size++] = it->type;
|
||||
buf[size++] = it->heredoc_identifier.size();
|
||||
it->heredoc_identifier.copy(&buf[size], it->heredoc_identifier.size());
|
||||
size += it->heredoc_identifier.size();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
void deserialize(const char *buf, unsigned n) {
|
||||
context_stack.clear();
|
||||
|
||||
if (n == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned size = 0;
|
||||
uint8_t context_stack_size = buf[size++];
|
||||
for (unsigned j = 0; j < context_stack_size; j++) {
|
||||
Context ctx;
|
||||
ctx.type = static_cast<ContextType>(buf[size++]);
|
||||
uint8_t heredoc_identifier_size = buf[size++];
|
||||
ctx.heredoc_identifier.assign(buf + size,
|
||||
buf + size + heredoc_identifier_size);
|
||||
size += heredoc_identifier_size;
|
||||
context_stack.push_back(ctx);
|
||||
}
|
||||
assert(size == n);
|
||||
}
|
||||
|
||||
bool scan(TSLexer *lexer, const bool *valid_symbols) {
|
||||
bool has_leading_whitespace_with_newline = false;
|
||||
while (iswspace(lexer->lookahead)) {
|
||||
if (lexer->lookahead == '\n') {
|
||||
has_leading_whitespace_with_newline = true;
|
||||
}
|
||||
skip(lexer);
|
||||
}
|
||||
if (lexer->lookahead == '\0') {
|
||||
return false;
|
||||
}
|
||||
if (valid_symbols[SHIM]) {
|
||||
lexer->mark_end(lexer);
|
||||
while(skip_comment(lexer));
|
||||
if (lexer->lookahead != '}') {
|
||||
return accept_inplace(lexer, SHIM);
|
||||
}
|
||||
}
|
||||
// manage quoted context
|
||||
if (valid_symbols[QUOTED_TEMPLATE_START] && !in_quoted_context() &&
|
||||
lexer->lookahead == '"') {
|
||||
Context ctx = {QUOTED_TEMPLATE, ""};
|
||||
context_stack.push_back(ctx);
|
||||
return accept_and_advance(lexer, QUOTED_TEMPLATE_START);
|
||||
}
|
||||
if (valid_symbols[QUOTED_TEMPLATE_END] && in_quoted_context() &&
|
||||
lexer->lookahead == '"') {
|
||||
context_stack.pop_back();
|
||||
return accept_and_advance(lexer, QUOTED_TEMPLATE_END);
|
||||
}
|
||||
|
||||
// manage template interpolations
|
||||
if (valid_symbols[TEMPLATE_INTERPOLATION_START] &&
|
||||
valid_symbols[TEMPLATE_LITERAL_CHUNK] && !in_interpolation_context() &&
|
||||
lexer->lookahead == '$') {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '{') {
|
||||
Context ctx = {TEMPLATE_INTERPOLATION, ""};
|
||||
context_stack.push_back(ctx);
|
||||
return accept_and_advance(lexer, TEMPLATE_INTERPOLATION_START);
|
||||
}
|
||||
// try to scan escape sequence
|
||||
if (lexer->lookahead == '$') {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '{') {
|
||||
// $${
|
||||
return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
|
||||
}
|
||||
}
|
||||
return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
|
||||
}
|
||||
if (valid_symbols[TEMPLATE_INTERPOLATION_END] &&
|
||||
in_interpolation_context() && lexer->lookahead == '}') {
|
||||
context_stack.pop_back();
|
||||
return accept_and_advance(lexer, TEMPLATE_INTERPOLATION_END);
|
||||
}
|
||||
|
||||
// manage template directives
|
||||
if (valid_symbols[TEMPLATE_DIRECTIVE_START] &&
|
||||
valid_symbols[TEMPLATE_LITERAL_CHUNK] && !in_directive_context() &&
|
||||
lexer->lookahead == '%') {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '{') {
|
||||
Context ctx = {TEMPLATE_DIRECTIVE, ""};
|
||||
context_stack.push_back(ctx);
|
||||
return accept_and_advance(lexer, TEMPLATE_DIRECTIVE_START);
|
||||
}
|
||||
// try to scan escape sequence
|
||||
if (lexer->lookahead == '%') {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '{') {
|
||||
// $${
|
||||
return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
|
||||
}
|
||||
}
|
||||
return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
|
||||
}
|
||||
if (valid_symbols[TEMPLATE_DIRECTIVE_END] && in_directive_context() &&
|
||||
lexer->lookahead == '}') {
|
||||
context_stack.pop_back();
|
||||
return accept_and_advance(lexer, TEMPLATE_DIRECTIVE_END);
|
||||
}
|
||||
|
||||
// manage heredoc context
|
||||
if (valid_symbols[HEREDOC_IDENTIFIER] && !in_heredoc_context()) {
|
||||
string identifier;
|
||||
// TODO: check that this is a valid identifier
|
||||
while (iswalnum(lexer->lookahead) || lexer->lookahead == '_' ||
|
||||
lexer->lookahead == '-') {
|
||||
identifier.push_back(lexer->lookahead);
|
||||
advance(lexer);
|
||||
}
|
||||
Context ctx = {HEREDOC_TEMPLATE, identifier};
|
||||
context_stack.push_back(ctx);
|
||||
return accept_inplace(lexer, HEREDOC_IDENTIFIER);
|
||||
}
|
||||
if (valid_symbols[HEREDOC_IDENTIFIER] && in_heredoc_context() &&
|
||||
has_leading_whitespace_with_newline) {
|
||||
string expected_identifier = context_stack.back().heredoc_identifier;
|
||||
|
||||
for (string::iterator it = expected_identifier.begin();
|
||||
it != expected_identifier.end(); ++it) {
|
||||
if (lexer->lookahead == *it) {
|
||||
advance(lexer);
|
||||
} else {
|
||||
return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
|
||||
}
|
||||
}
|
||||
// check if the identifier is on a line of its own
|
||||
lexer->mark_end(lexer);
|
||||
while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
|
||||
advance(lexer);
|
||||
}
|
||||
if (lexer->lookahead == '\n') {
|
||||
context_stack.pop_back();
|
||||
return accept_inplace(lexer, HEREDOC_IDENTIFIER);
|
||||
} else {
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
return accept_inplace(lexer, TEMPLATE_LITERAL_CHUNK);
|
||||
}
|
||||
}
|
||||
// manage template literal chunks
|
||||
|
||||
// handle template literal chunks in quoted contexts
|
||||
//
|
||||
// they may not contain newlines and may contain escape sequences
|
||||
if (valid_symbols[TEMPLATE_LITERAL_CHUNK] && in_quoted_context()) {
|
||||
switch (lexer->lookahead) {
|
||||
case '\\':
|
||||
advance(lexer);
|
||||
switch (lexer->lookahead) {
|
||||
case '"':
|
||||
case 'n':
|
||||
case 'r':
|
||||
case 't':
|
||||
case '\\':
|
||||
return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
|
||||
case 'u':
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (!consume_wxdigit(lexer))
|
||||
return false;
|
||||
}
|
||||
return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
|
||||
case 'U':
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (!consume_wxdigit(lexer))
|
||||
return false;
|
||||
}
|
||||
return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handle all other quoted template or string literal characters
|
||||
if (valid_symbols[TEMPLATE_LITERAL_CHUNK] && in_template_context()) {
|
||||
return accept_and_advance(lexer, TEMPLATE_LITERAL_CHUNK);
|
||||
}
|
||||
|
||||
// probably not handled by the external scanner
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
vector<Context> context_stack;
|
||||
|
||||
void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
|
||||
|
||||
void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
|
||||
|
||||
bool accept_inplace(TSLexer *lexer, TokenType token) {
|
||||
lexer->result_symbol = token;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool accept_and_advance(TSLexer *lexer, TokenType token) {
|
||||
advance(lexer);
|
||||
return accept_inplace(lexer, token);
|
||||
}
|
||||
|
||||
bool consume_wxdigit(TSLexer *lexer) {
|
||||
advance(lexer);
|
||||
return iswxdigit(lexer->lookahead);
|
||||
}
|
||||
|
||||
bool skip_comment(TSLexer* lexer) {
|
||||
while (iswspace(lexer->lookahead)) {
|
||||
skip(lexer);
|
||||
}
|
||||
if (lexer->lookahead != '#') {
|
||||
return false;
|
||||
}
|
||||
skip(lexer);
|
||||
while (lexer->lookahead != '\n') {
|
||||
skip(lexer);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool in_context_type(ContextType type) {
|
||||
if (context_stack.empty()) {
|
||||
return false;
|
||||
}
|
||||
return context_stack.back().type == type;
|
||||
}
|
||||
|
||||
bool in_quoted_context() { return in_context_type(QUOTED_TEMPLATE); }
|
||||
|
||||
bool in_heredoc_context() { return in_context_type(HEREDOC_TEMPLATE); }
|
||||
|
||||
bool in_template_context() {
|
||||
return in_quoted_context() || in_heredoc_context();
|
||||
}
|
||||
|
||||
bool in_interpolation_context() {
|
||||
return in_context_type(TEMPLATE_INTERPOLATION);
|
||||
}
|
||||
|
||||
bool in_directive_context() { return in_context_type(TEMPLATE_DIRECTIVE); }
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
extern "C" {
|
||||
|
||||
// tree sitter callbacks
|
||||
void* tree_sitter_terraform_external_scanner_create() {
|
||||
return tree_sitter_hcl_external_scanner_create();
|
||||
}
|
||||
void *tree_sitter_terraform_external_scanner_create() { return new Scanner(); }
|
||||
|
||||
void tree_sitter_terraform_external_scanner_destroy(void *p) {
|
||||
return tree_sitter_hcl_external_scanner_destroy(p);
|
||||
Scanner *scanner = static_cast<Scanner *>(p);
|
||||
delete scanner;
|
||||
}
|
||||
|
||||
unsigned tree_sitter_terraform_external_scanner_serialize(void *p, char *b) {
|
||||
return tree_sitter_hcl_external_scanner_serialize(p, b);
|
||||
Scanner *scanner = static_cast<Scanner *>(p);
|
||||
return scanner->serialize(b);
|
||||
}
|
||||
|
||||
void tree_sitter_terraform_external_scanner_deserialize(void* p, const char* b, unsigned n) {
|
||||
return tree_sitter_hcl_external_scanner_deserialize(p, b, n);
|
||||
void tree_sitter_terraform_external_scanner_deserialize(void *p, const char *b,
|
||||
unsigned n) {
|
||||
Scanner *scanner = static_cast<Scanner *>(p);
|
||||
return scanner->deserialize(b, n);
|
||||
}
|
||||
|
||||
bool tree_sitter_terraform_external_scanner_scan(void* p, TSLexer* lexer, const bool* valid_symbols) {
|
||||
return tree_sitter_hcl_external_scanner_scan(p, lexer, valid_symbols);
|
||||
bool tree_sitter_terraform_external_scanner_scan(void *p, TSLexer *lexer,
|
||||
const bool *valid_symbols) {
|
||||
Scanner *scanner = static_cast<Scanner *>(p);
|
||||
return scanner->scan(lexer, valid_symbols);
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@@ -23,6 +23,28 @@
|
||||
]
|
||||
},
|
||||
"body": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_shim"
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_shim"
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "REPEAT1",
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
@@ -37,6 +59,10 @@
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"attribute": {
|
||||
"type": "SEQ",
|
||||
@@ -1779,6 +1805,10 @@
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "heredoc_identifier"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_shim"
|
||||
}
|
||||
],
|
||||
"inline": [],
|
||||
|
||||
@@ -139,7 +139,7 @@
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": true,
|
||||
"required": true,
|
||||
"required": false,
|
||||
"types": [
|
||||
{
|
||||
"type": "attribute",
|
||||
|
||||
26558
src/parser.c
26558
src/parser.c
File diff suppressed because it is too large
Load Diff
146
test/corpus/comments.txt
Normal file
146
test/corpus/comments.txt
Normal file
@@ -0,0 +1,146 @@
|
||||
================================================================================
|
||||
comment in empty block body
|
||||
================================================================================
|
||||
|
||||
block {
|
||||
# foo
|
||||
}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
(config_file
|
||||
(body
|
||||
(block
|
||||
(identifier)
|
||||
(block_start)
|
||||
(comment)
|
||||
(block_end))))
|
||||
|
||||
================================================================================
|
||||
multiline comment in empty block body
|
||||
================================================================================
|
||||
|
||||
block {
|
||||
/*
|
||||
foo
|
||||
*/
|
||||
}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
(config_file
|
||||
(body
|
||||
(block
|
||||
(identifier)
|
||||
(block_start)
|
||||
(body)
|
||||
(comment)
|
||||
(block_end))))
|
||||
|
||||
================================================================================
|
||||
multiline comment above attribute in block body
|
||||
================================================================================
|
||||
|
||||
block {
|
||||
/*
|
||||
foo
|
||||
*/
|
||||
foo = bar
|
||||
}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
(config_file
|
||||
(body
|
||||
(block
|
||||
(identifier)
|
||||
(block_start)
|
||||
(body
|
||||
(comment)
|
||||
(attribute
|
||||
(identifier)
|
||||
(expression
|
||||
(variable_expr
|
||||
(identifier)))))
|
||||
(block_end))))
|
||||
|
||||
================================================================================
|
||||
comment above first attribute in block body
|
||||
================================================================================
|
||||
|
||||
block {
|
||||
# foo
|
||||
foo = bar
|
||||
}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
(config_file
|
||||
(body
|
||||
(block
|
||||
(identifier)
|
||||
(block_start)
|
||||
(body
|
||||
(comment)
|
||||
(attribute
|
||||
(identifier)
|
||||
(expression
|
||||
(variable_expr
|
||||
(identifier)))))
|
||||
(block_end))))
|
||||
|
||||
================================================================================
|
||||
comment after last attribute in block body
|
||||
================================================================================
|
||||
|
||||
block {
|
||||
foo = bar
|
||||
# foo
|
||||
}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
(config_file
|
||||
(body
|
||||
(block
|
||||
(identifier)
|
||||
(block_start)
|
||||
(body
|
||||
(attribute
|
||||
(identifier)
|
||||
(expression
|
||||
(variable_expr
|
||||
(identifier)))))
|
||||
(comment)
|
||||
(block_end))))
|
||||
|
||||
================================================================================
|
||||
comment between attributes in block body
|
||||
================================================================================
|
||||
|
||||
block {
|
||||
foo = bar
|
||||
# foo
|
||||
baz = quz
|
||||
}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
(config_file
|
||||
(body
|
||||
(block
|
||||
(identifier)
|
||||
(block_start)
|
||||
(body
|
||||
(attribute
|
||||
(identifier)
|
||||
(expression
|
||||
(variable_expr
|
||||
(identifier))))
|
||||
(comment)
|
||||
(attribute
|
||||
(identifier)
|
||||
(expression
|
||||
(variable_expr
|
||||
(identifier)))))
|
||||
(block_end))))
|
||||
Reference in New Issue
Block a user