properly handle escape sequences in string literals

This commit is contained in:
mhoffm
2021-06-12 22:36:10 +02:00
parent 67680e4cda
commit 52247dee63
6 changed files with 2369 additions and 2020 deletions

View File

@@ -14,4 +14,4 @@ To run tests simply run `nix-shell --run 'tree-sitter test'`.
* use [Unicode® Standard Annex #31](https://www.unicode.org/reports/tr31/) (augmented with '-')for identifiers * use [Unicode® Standard Annex #31](https://www.unicode.org/reports/tr31/) (augmented with '-')for identifiers
* add [operations](https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#operations) * add [operations](https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#operations)
* add [template expressions](https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#template-expressions) and express string literals using them * add [template expressions](https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#template-expressions)

View File

@@ -76,7 +76,24 @@ module.exports = grammar({
numeric_lit: $ => /[0-9]+(\.[0-9]+([eE][-+]?[0-9]+)?)?/, numeric_lit: $ => /[0-9]+(\.[0-9]+([eE][-+]?[0-9]+)?)?/,
string_lit: $ => (seq('"', token.immediate(repeat(choice(/[^\\"\n]/, /\\(.|\n)/))), '"')), string_lit: $ => seq(
'"',
repeat(choice(token.immediate(prec(1, /[^\\"\n\r\t]+/)), $.escape_sequence)),
'"',
),
escape_sequence: $ => token.immediate(seq(
'\\',
choice(
'\\',
'"',
'n',
'r',
't',
/u[0-9a-fA-F]{4}/,
/U[0-9a-fA-F]{8}/
)
)),
bool_lit: $ => choice('true', 'false'), bool_lit: $ => choice('true', 'false'),

View File

@@ -255,22 +255,26 @@
"value": "\"" "value": "\""
}, },
{ {
"type": "IMMEDIATE_TOKEN", "type": "REPEAT",
"content": { "content": {
"type": "REPEAT", "type": "CHOICE",
"content": { "members": [
"type": "CHOICE", {
"members": [ "type": "IMMEDIATE_TOKEN",
{ "content": {
"type": "PATTERN", "type": "PREC",
"value": "[^\\\\\"\\n]" "value": 1,
}, "content": {
{ "type": "PATTERN",
"type": "PATTERN", "value": "[^\\\\\"\\n\\r\\t]+"
"value": "\\\\(.|\\n)" }
} }
] },
} {
"type": "SYMBOL",
"name": "escape_sequence"
}
]
} }
}, },
{ {
@@ -279,6 +283,51 @@
} }
] ]
}, },
"escape_sequence": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "\\"
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "\\"
},
{
"type": "STRING",
"value": "\""
},
{
"type": "STRING",
"value": "n"
},
{
"type": "STRING",
"value": "r"
},
{
"type": "STRING",
"value": "t"
},
{
"type": "PATTERN",
"value": "u[0-9a-fA-F]{4}"
},
{
"type": "PATTERN",
"value": "U[0-9a-fA-F]{8}"
}
]
}
]
}
},
"bool_lit": { "bool_lit": {
"type": "CHOICE", "type": "CHOICE",
"members": [ "members": [

View File

@@ -497,7 +497,17 @@
{ {
"type": "string_lit", "type": "string_lit",
"named": true, "named": true,
"fields": {} "fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "escape_sequence",
"named": true
}
]
}
}, },
{ {
"type": "tuple", "type": "tuple",
@@ -585,6 +595,10 @@
"type": "ellipsis", "type": "ellipsis",
"named": true "named": true
}, },
{
"type": "escape_sequence",
"named": true
},
{ {
"type": "false", "type": "false",
"named": false "named": false

File diff suppressed because it is too large Load Diff

View File

@@ -101,59 +101,6 @@ small_pi = 3.14E-10
(literal_value (literal_value
(numeric_lit))))))) (numeric_lit)))))))
==================
string literal one line
==================
foo = "bar"
---
(config_file
(body
(attribute
(identifier)
(expression
(expr_term
(literal_value
(string_lit)))))))
==================
string literal escaped newline
==================
foo = "bar\nbaz"
---
(config_file
(body
(attribute
(identifier)
(expression
(expr_term
(literal_value
(string_lit)))))))
==================
string literal multi line error
==================
foo = "
bar"
---
(config_file
(body
(attribute
(identifier)
(expression
(expr_term
(literal_value
(string_lit (ERROR (UNEXPECTED 'b')))))))))
================== ==================
bool literal true bool literal true
================== ==================
@@ -205,3 +152,147 @@ foo = null
(literal_value (literal_value
(null_lit))))))) (null_lit)))))))
==================
string literal one line
==================
foo = "bar"
---
(config_file
(body
(attribute
(identifier)
(expression
(expr_term
(literal_value
(string_lit)))))))
==================
string literal escaped newline
==================
foo = "bar\nbaz"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (escape_sequence))))))))
==================
string literal escaped tab
==================
foo = "bar\tbaz"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (escape_sequence))))))))
==================
string literal escaped "
==================
foo = "bar\"baz"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (escape_sequence))))))))
==================
string literal escaped \
==================
foo = "bar\\baz"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (escape_sequence))))))))
==================
string literal escaped \uFFFF
==================
foo = "bar\uFFFFbaz"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (escape_sequence))))))))
==================
string bad escape sequence
==================
foo = "bar\pbaz"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (ERROR (UNEXPECTED 'p')))))))))
==================
string bad escape sequence 2
==================
foo = "bar\uZZ"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (ERROR (UNEXPECTED 'Z')))))))))
==================
string literal multi line error
==================
foo = "
bar"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (ERROR (UNEXPECTED 'b')))))))))
==================
string literal unescaped tab
==================
foo = "foo bar"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (ERROR (UNEXPECTED 'b')))))))))