simplify whitespace handling; support hexadecimal numeric literals; allow empty configs

This commit is contained in:
mhoffm
2021-06-20 00:03:58 +02:00
parent b1c3109853
commit 22d0a7253c
15 changed files with 12984 additions and 13226 deletions

View File

@@ -36,5 +36,6 @@ resource_1 "strlit1" "strlit2" {
attr4 = 2.112e+12
attr5 = 2.112E+12
attr6 = 2.112E-12
attr7 = 0x21FF
}
}

View File

@@ -15,12 +15,9 @@ module.exports = grammar({
conflicts: $ => [
// string literals are just quoted template without template stuff
[$.string_lit, $.quoted_template],
// empty block may be both
[$.block, $.one_line_block],
],
externals: $ => [
$._newline,
$._quoted_template_start,
$._quoted_template_end,
$._template_literal_chunk,
@@ -30,46 +27,31 @@ module.exports = grammar({
extras: $ => [
$.comment,
' ',
'\t',
$._whitespace,
],
rules: {
config_file: $ => $.body,
config_file: $ => optional($.body),
body: $ => repeat1(prec.left(
body: $ => repeat1(
choice(
$.attribute,
$.block,
$.one_line_block,
$._newlines,
),
)),
),
attribute: $ => prec.left(seq(
attribute: $ => seq(
$.identifier,
'=',
$.expression,
$._newlines,
)),
),
block: $ => prec.left(seq(
block: $ => seq(
$.identifier,
repeat(choice($.string_lit, $.identifier)),
$._block_start,
optional($._newlines),
optional($.body),
$._block_end,
optional($._newlines),
)),
one_line_block: $ => seq(
$.identifier,
repeat(choice($.string_lit, $.identifier)),
$._block_start,
optional(seq($.identifier, '=', $.expression)),
$._block_end,
$._newlines,
),
_block_start: $ => '{',
@@ -77,14 +59,14 @@ module.exports = grammar({
// TODO: not to spec but good enough for now
identifier: $ => token(seq(
/\p{L}/,
choice(/\p{L}/, '_'),
repeat(choice(/\p{L}/, /[0-9]/, /(-|_)/)),
)),
expression: $ => choice(
expression: $ => prec.right(choice(
$._expr_term,
$.conditional,
),
)),
// operations are documented as expressions, but our real world samples
// contain instances of operations without parentheses. think for example:
@@ -110,7 +92,10 @@ module.exports = grammar({
$.string_lit,
),
numeric_lit: $ => /[0-9]+(\.[0-9]+([eE][-+]?[0-9]+)?)?/,
numeric_lit: $ => choice(
/[0-9]+(\.[0-9]+([eE][-+]?[0-9]+)?)?/,
/0x[0-9a-zA-Z]+/
),
bool_lit: $ => choice('true', 'false'),
@@ -133,41 +118,38 @@ module.exports = grammar({
tuple: $ => seq(
$._tuple_start,
optional($._tuple_elems),
optional($._newlines),
$._tuple_end,
),
_tuple_start: $ => '[',
_tuple_end: $ => ']',
_tuple_elems: $ => prec.right(seq(
optional($._newlines),
_tuple_elems: $ => seq(
$.expression,
repeat(seq($._comma, optional($._newlines), $.expression)),
repeat(seq(
$._comma,
$.expression,
)),
optional($._comma),
optional($._newlines),
)),
),
object: $ => seq(
$._object_start,
optional($._object_elems),
optional($._newlines),
$._object_end,
),
_object_start: $ => '{',
_object_end: $ => '}',
_object_elems: $ => prec.right(seq(
optional($._newlines),
_object_elems: $ => seq(
$.object_elem,
repeat(seq(
choice($._comma, $._newlines, seq($._comma, $._newlines)),
optional($._comma),
$.object_elem
)),
optional($._comma),
optional($._newlines),
)),
),
object_elem: $ => seq(
$.expression,
@@ -196,28 +178,22 @@ module.exports = grammar({
for_expr: $ => choice($.for_tuple_expr, $.for_object_expr),
// newlines
for_tuple_expr: $ => seq(
$._tuple_start,
optional($._newlines),
$.for_intro,
$.expression,
optional($.for_cond),
optional($._newlines),
$._tuple_end,
),
// newlines
for_object_expr: $ => seq(
$._object_start,
optional($._newlines),
$.for_intro,
$.expression,
'=>',
$.expression,
optional($.ellipsis),
optional($.for_cond),
optional($._newlines),
$._object_end,
),
@@ -235,13 +211,12 @@ module.exports = grammar({
$.expression,
),
variable_expr: $ => $.identifier,
variable_expr: $ => prec.right($.identifier),
function_call: $ => seq(
$.identifier,
$._function_call_start,
optional($.function_arguments),
optional($._newlines),
$._function_call_end,
),
@@ -249,10 +224,8 @@ module.exports = grammar({
_function_call_end: $ => ')',
function_arguments: $ => prec.right(seq(
optional($._newline),
$.expression,
repeat(seq(',', optional($._newline), $.expression,)),
optional($._newline),
repeat(seq($._comma, $.expression,)),
optional(choice(',', $.ellipsis)),
)),
@@ -321,8 +294,6 @@ module.exports = grammar({
//$.template_if,
),
_newlines: $ => prec.right(repeat1($._newline)),
// http://stackoverflow.com/questions/13014947/regex-to-match-a-c-style-multiline-comment/36328890#36328890
comment: $ => token(choice(
seq('#', /.*/),
@@ -333,5 +304,7 @@ module.exports = grammar({
'/'
)
)),
_whitespace: $ => token(/\s/),
}
});

View File

@@ -2,136 +2,51 @@
"name": "hcl",
"rules": {
"config_file": {
"type": "SYMBOL",
"name": "body"
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "body"
},
{
"type": "BLANK"
}
]
},
"body": {
"type": "REPEAT1",
"content": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "attribute"
},
{
"type": "SYMBOL",
"name": "block"
},
{
"type": "SYMBOL",
"name": "one_line_block"
},
{
"type": "SYMBOL",
"name": "_newlines"
}
]
}
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "attribute"
},
{
"type": "SYMBOL",
"name": "block"
}
]
}
},
"attribute": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "identifier"
},
{
"type": "STRING",
"value": "="
},
{
"type": "SYMBOL",
"name": "expression"
},
{
"type": "SYMBOL",
"name": "_newlines"
}
]
}
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "identifier"
},
{
"type": "STRING",
"value": "="
},
{
"type": "SYMBOL",
"name": "expression"
}
]
},
"block": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "identifier"
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "string_lit"
},
{
"type": "SYMBOL",
"name": "identifier"
}
]
}
},
{
"type": "SYMBOL",
"name": "_block_start"
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newlines"
},
{
"type": "BLANK"
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "body"
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "_block_end"
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newlines"
},
{
"type": "BLANK"
}
]
}
]
}
},
"one_line_block": {
"type": "SEQ",
"members": [
{
@@ -162,21 +77,8 @@
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "identifier"
},
{
"type": "STRING",
"value": "="
},
{
"type": "SYMBOL",
"name": "expression"
}
]
"type": "SYMBOL",
"name": "body"
},
{
"type": "BLANK"
@@ -186,10 +88,6 @@
{
"type": "SYMBOL",
"name": "_block_end"
},
{
"type": "SYMBOL",
"name": "_newlines"
}
]
},
@@ -207,8 +105,17 @@
"type": "SEQ",
"members": [
{
"type": "PATTERN",
"value": "\\p{L}"
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "\\p{L}"
},
{
"type": "STRING",
"value": "_"
}
]
},
{
"type": "REPEAT",
@@ -234,17 +141,21 @@
}
},
"expression": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_expr_term"
},
{
"type": "SYMBOL",
"name": "conditional"
}
]
"type": "PREC_RIGHT",
"value": 0,
"content": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_expr_term"
},
{
"type": "SYMBOL",
"name": "conditional"
}
]
}
},
"_expr_term": {
"type": "CHOICE",
@@ -357,8 +268,17 @@
]
},
"numeric_lit": {
"type": "PATTERN",
"value": "[0-9]+(\\.[0-9]+([eE][-+]?[0-9]+)?)?"
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[0-9]+(\\.[0-9]+([eE][-+]?[0-9]+)?)?"
},
{
"type": "PATTERN",
"value": "0x[0-9a-zA-Z]+"
}
]
},
"bool_lit": {
"type": "CHOICE",
@@ -430,18 +350,6 @@
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newlines"
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "_tuple_end"
@@ -457,81 +365,41 @@
"value": "]"
},
"_tuple_elems": {
"type": "PREC_RIGHT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newlines"
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "expression"
},
{
"type": "REPEAT",
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_comma"
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newlines"
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "expression"
}
]
}
},
{
"type": "CHOICE",
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "expression"
},
{
"type": "REPEAT",
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_comma"
},
{
"type": "BLANK"
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newlines"
},
{
"type": "BLANK"
"name": "expression"
}
]
}
]
}
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_comma"
},
{
"type": "BLANK"
}
]
}
]
},
"object": {
"type": "SEQ",
@@ -552,18 +420,6 @@
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newlines"
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "_object_end"
@@ -579,91 +435,49 @@
"value": "}"
},
"_object_elems": {
"type": "PREC_RIGHT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{
"type": "CHOICE",
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "object_elem"
},
{
"type": "REPEAT",
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_newlines"
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_comma"
},
{
"type": "BLANK"
}
]
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "object_elem"
},
{
"type": "REPEAT",
"content": {
"type": "SEQ",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_comma"
},
{
"type": "SYMBOL",
"name": "_newlines"
},
{
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_comma"
},
{
"type": "SYMBOL",
"name": "_newlines"
}
]
}
]
},
{
"type": "SYMBOL",
"name": "object_elem"
}
]
}
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_comma"
},
{
"type": "BLANK"
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newlines"
},
{
"type": "BLANK"
"name": "object_elem"
}
]
}
]
}
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_comma"
},
{
"type": "BLANK"
}
]
}
]
},
"object_elem": {
"type": "SEQ",
@@ -829,18 +643,6 @@
"type": "SYMBOL",
"name": "_tuple_start"
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newlines"
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "for_intro"
@@ -861,18 +663,6 @@
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newlines"
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "_tuple_end"
@@ -886,18 +676,6 @@
"type": "SYMBOL",
"name": "_object_start"
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newlines"
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "for_intro"
@@ -938,18 +716,6 @@
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newlines"
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "_object_end"
@@ -1016,8 +782,12 @@
]
},
"variable_expr": {
"type": "SYMBOL",
"name": "identifier"
"type": "PREC_RIGHT",
"value": 0,
"content": {
"type": "SYMBOL",
"name": "identifier"
}
},
"function_call": {
"type": "SEQ",
@@ -1042,18 +812,6 @@
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newlines"
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "_function_call_end"
@@ -1074,18 +832,6 @@
"content": {
"type": "SEQ",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newline"
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "expression"
@@ -1096,20 +842,8 @@
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": ","
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newline"
},
{
"type": "BLANK"
}
]
"type": "SYMBOL",
"name": "_comma"
},
{
"type": "SYMBOL",
@@ -1118,18 +852,6 @@
]
}
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newline"
},
{
"type": "BLANK"
}
]
},
{
"type": "CHOICE",
"members": [
@@ -1520,17 +1242,6 @@
"type": "CHOICE",
"members": []
},
"_newlines": {
"type": "PREC_RIGHT",
"value": 0,
"content": {
"type": "REPEAT1",
"content": {
"type": "SYMBOL",
"name": "_newline"
}
}
},
"comment": {
"type": "TOKEN",
"content": {
@@ -1581,6 +1292,13 @@
}
]
}
},
"_whitespace": {
"type": "TOKEN",
"content": {
"type": "PATTERN",
"value": "\\s"
}
}
},
"extras": [
@@ -1589,30 +1307,18 @@
"name": "comment"
},
{
"type": "STRING",
"value": " "
},
{
"type": "STRING",
"value": "\t"
"type": "SYMBOL",
"name": "_whitespace"
}
],
"conflicts": [
[
"string_lit",
"quoted_template"
],
[
"block",
"one_line_block"
]
],
"precedences": [],
"externals": [
{
"type": "SYMBOL",
"name": "_newline"
},
{
"type": "SYMBOL",
"name": "_quoted_template_start"

View File

@@ -117,7 +117,7 @@
"fields": {},
"children": {
"multiple": true,
"required": false,
"required": true,
"types": [
{
"type": "attribute",
@@ -126,10 +126,6 @@
{
"type": "block",
"named": true
},
{
"type": "one_line_block",
"named": true
}
]
}
@@ -179,7 +175,7 @@
"fields": {},
"children": {
"multiple": false,
"required": true,
"required": false,
"types": [
{
"type": "body",
@@ -488,6 +484,11 @@
]
}
},
{
"type": "numeric_lit",
"named": true,
"fields": {}
},
{
"type": "object",
"named": true,
@@ -518,29 +519,6 @@
]
}
},
{
"type": "one_line_block",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "expression",
"named": true
},
{
"type": "identifier",
"named": true
},
{
"type": "string_lit",
"named": true
}
]
}
},
{
"type": "operation",
"named": true,
@@ -878,10 +856,6 @@
"type": "null_lit",
"named": true
},
{
"type": "numeric_lit",
"named": true
},
{
"type": "strip_marker",
"named": true

25382
src/parser.c

File diff suppressed because it is too large Load Diff

View File

@@ -5,7 +5,6 @@
#include <stdio.h>
enum TokenType {
NEWLINE,
QUOTED_TEMPLATE_START,
QUOTED_TEMPLATE_END,
TEMPLATE_LITERAL_CHUNK,
@@ -48,7 +47,6 @@ void print_debug_info(Scanner *scanner, TSLexer *lexer, const bool *valid_symbol
printf("template_literal_chunk: %x\n", valid_symbols[TEMPLATE_LITERAL_CHUNK]);
printf("template_interpolation_start: %x\n", valid_symbols[TEMPLATE_INTERPOLATION_START]);
printf("template_interpolation_end: %x\n", valid_symbols[TEMPLATE_INTERPOLATION_END]);
printf("newline: %x\n", valid_symbols[NEWLINE]);
printf("\n");
printf("scanner state:\n");
printf("in_template_interpolation %x\n", scanner->in_template_interpolation);
@@ -86,31 +84,13 @@ void scanner_exit_quoted_context(Scanner *scanner) {
}
}
bool is_newline(char c) {
return c == '\n' || c == '\r';
}
bool is_skippable_whitespace_outside_of_quoted_context(char c) {
return c == ' ' || c == '\t';
}
bool scanner_scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
// print_debug_info(scanner, lexer, valid_symbols);
while (
is_skippable_whitespace_outside_of_quoted_context(lexer->lookahead) &&
!scanner->in_quoted_context
) {
while (iswspace(lexer->lookahead) && !scanner->in_quoted_context) {
skip(lexer);
}
if (valid_symbols[NEWLINE] &&
is_newline(lexer->lookahead) &&
scanner->quoted_context_depth == 0
) {
return accept_and_advance(lexer, NEWLINE);
}
// manage quoted context
if (
valid_symbols[QUOTED_TEMPLATE_START] &&
@@ -162,9 +142,6 @@ bool scanner_scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
//
// they may not contain newlines and may contain escape sequences
if (valid_symbols[TEMPLATE_LITERAL_CHUNK] && scanner->in_quoted_context) {
if (is_newline(lexer->lookahead)) {
return false;
}
switch (lexer->lookahead) {
case '\\':
advance(lexer);

View File

@@ -10,8 +10,7 @@ block_1 {
(config_file
(body
(block
(identifier)
(body))))
(identifier))))
================================================================================
basic block on one line
@@ -69,8 +68,7 @@ block_1 {
(identifier)
(body
(block
(identifier)
(body))))))
(identifier))))))
================================================================================
nested block on one line
@@ -122,20 +120,22 @@ locals { timestamp = regex_replace(timestamp(), "[- TZ:]", "") }
(config_file
(body
(one_line_block
(block
(identifier)
(identifier)
(expression
(function_call
(body
(attribute
(identifier)
(function_arguments
(expression
(function_call
(identifier)))
(expression
(literal_value
(string_lit
(template_literal))))
(expression
(template_expr
(quoted_template)))))))))
(expression
(function_call
(identifier)
(function_arguments
(expression
(function_call
(identifier)))
(expression
(literal_value
(string_lit
(template_literal))))
(expression
(template_expr
(quoted_template)))))))))))

View File

@@ -365,3 +365,4 @@ worker_groups = [
(expression
(literal_value
(numeric_lit)))))))))))))

View File

@@ -1,3 +1,19 @@
================================================================================
numeric literal hex 1
================================================================================
pi = 0x314F
--------------------------------------------------------------------------------
(config_file
(body
(attribute
(identifier)
(expression
(literal_value
(numeric_lit))))))
================================================================================
numeric literal scientific notation 1
================================================================================

View File

@@ -11,32 +11,12 @@ foo = "bar\uZZ"
(attribute
(identifier)
(expression
(literal_value
(string_lit
(template_expr
(quoted_template
(template_literal
(ERROR
(UNEXPECTED '\')))))))))
================================================================================
literal multi line error
================================================================================
foo = "
bar"
--------------------------------------------------------------------------------
(config_file
(body
(attribute
(identifier)
(expression
(literal_value
(string_lit
(ERROR
(UNEXPECTED '\n'))
(template_literal)))))))
================================================================================
unescaped tab
================================================================================
@@ -67,8 +47,8 @@ foo = "foo\bar"
(attribute
(identifier)
(expression
(literal_value
(string_lit
(template_expr
(quoted_template
(template_literal
(ERROR
(UNEXPECTED '\')))))))))