properly handle escape sequences in string literals

This commit is contained in:
mhoffm
2021-06-12 22:36:10 +02:00
parent 67680e4cda
commit 52247dee63
6 changed files with 2369 additions and 2020 deletions

View File

@@ -14,4 +14,4 @@ To run tests simply run `nix-shell --run 'tree-sitter test'`.
* use [Unicode® Standard Annex #31](https://www.unicode.org/reports/tr31/) (augmented with '-')for identifiers
* add [operations](https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#operations)
* add [template expressions](https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#template-expressions) and express string literals using them
* add [template expressions](https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#template-expressions)

View File

@@ -76,7 +76,24 @@ module.exports = grammar({
numeric_lit: $ => /[0-9]+(\.[0-9]+([eE][-+]?[0-9]+)?)?/,
string_lit: $ => (seq('"', token.immediate(repeat(choice(/[^\\"\n]/, /\\(.|\n)/))), '"')),
string_lit: $ => seq(
'"',
repeat(choice(token.immediate(prec(1, /[^\\"\n\r\t]+/)), $.escape_sequence)),
'"',
),
escape_sequence: $ => token.immediate(seq(
'\\',
choice(
'\\',
'"',
'n',
'r',
't',
/u[0-9a-fA-F]{4}/,
/U[0-9a-fA-F]{8}/
)
)),
bool_lit: $ => choice('true', 'false'),

View File

@@ -255,22 +255,26 @@
"value": "\""
},
{
"type": "IMMEDIATE_TOKEN",
"type": "REPEAT",
"content": {
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[^\\\\\"\\n]"
},
{
"type": "PATTERN",
"value": "\\\\(.|\\n)"
"type": "CHOICE",
"members": [
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PREC",
"value": 1,
"content": {
"type": "PATTERN",
"value": "[^\\\\\"\\n\\r\\t]+"
}
}
]
}
},
{
"type": "SYMBOL",
"name": "escape_sequence"
}
]
}
},
{
@@ -279,6 +283,51 @@
}
]
},
"escape_sequence": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "\\"
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "\\"
},
{
"type": "STRING",
"value": "\""
},
{
"type": "STRING",
"value": "n"
},
{
"type": "STRING",
"value": "r"
},
{
"type": "STRING",
"value": "t"
},
{
"type": "PATTERN",
"value": "u[0-9a-fA-F]{4}"
},
{
"type": "PATTERN",
"value": "U[0-9a-fA-F]{8}"
}
]
}
]
}
},
"bool_lit": {
"type": "CHOICE",
"members": [

View File

@@ -497,7 +497,17 @@
{
"type": "string_lit",
"named": true,
"fields": {}
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "escape_sequence",
"named": true
}
]
}
},
{
"type": "tuple",
@@ -585,6 +595,10 @@
"type": "ellipsis",
"named": true
},
{
"type": "escape_sequence",
"named": true
},
{
"type": "false",
"named": false

File diff suppressed because it is too large Load Diff

View File

@@ -101,59 +101,6 @@ small_pi = 3.14E-10
(literal_value
(numeric_lit)))))))
==================
string literal one line
==================
foo = "bar"
---
(config_file
(body
(attribute
(identifier)
(expression
(expr_term
(literal_value
(string_lit)))))))
==================
string literal escaped newline
==================
foo = "bar\nbaz"
---
(config_file
(body
(attribute
(identifier)
(expression
(expr_term
(literal_value
(string_lit)))))))
==================
string literal multi line error
==================
foo = "
bar"
---
(config_file
(body
(attribute
(identifier)
(expression
(expr_term
(literal_value
(string_lit (ERROR (UNEXPECTED 'b')))))))))
==================
bool literal true
==================
@@ -205,3 +152,147 @@ foo = null
(literal_value
(null_lit)))))))
==================
string literal one line
==================
foo = "bar"
---
(config_file
(body
(attribute
(identifier)
(expression
(expr_term
(literal_value
(string_lit)))))))
==================
string literal escaped newline
==================
foo = "bar\nbaz"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (escape_sequence))))))))
==================
string literal escaped tab
==================
foo = "bar\tbaz"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (escape_sequence))))))))
==================
string literal escaped "
==================
foo = "bar\"baz"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (escape_sequence))))))))
==================
string literal escaped \
==================
foo = "bar\\baz"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (escape_sequence))))))))
==================
string literal escaped \uFFFF
==================
foo = "bar\uFFFFbaz"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (escape_sequence))))))))
==================
string bad escape sequence
==================
foo = "bar\pbaz"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (ERROR (UNEXPECTED 'p')))))))))
==================
string bad escape sequence 2
==================
foo = "bar\uZZ"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (ERROR (UNEXPECTED 'Z')))))))))
==================
string literal multi line error
==================
foo = "
bar"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (ERROR (UNEXPECTED 'b')))))))))
==================
string literal unescaped tab
==================
foo = "foo bar"
---
(config_file
(body
(attribute
(identifier)
(expression (expr_term (literal_value (string_lit (ERROR (UNEXPECTED 'b')))))))))