wip: add dialects

This commit is contained in:
Michael Hoffmann
2022-12-03 00:40:51 +01:00
parent 6b74f88b3d
commit 0ff887f2a6
9 changed files with 36842 additions and 379 deletions

View File

@@ -1,5 +1,10 @@
# Changelog # Changelog
## 1.1.0 - not yet released
feature
* add dialects so we can have different queries in `nvim-treesitter`
## 1.0.0 - 2022-12-02 ## 1.0.0 - 2022-12-02
breaking: breaking:

View File

@@ -0,0 +1,3 @@
const make_grammar = require('../../make_grammar');
module.exports = make_grammar('terraform');

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,26 @@
#include "../../../src/scanner.cc"
extern "C" {
// tree sitter callbacks
void* tree_sitter_terraform_external_scanner_create() {
return tree_sitter_hcl_external_scanner_create();
}
void tree_sitter_terraform_external_scanner_destroy(void* p) {
return tree_sitter_hcl_external_scanner_destroy(p);
}
unsigned tree_sitter_terraform_external_scanner_serialize(void* p, char* b) {
return tree_sitter_hcl_external_scanner_serialize(p, b);
}
void tree_sitter_terraform_external_scanner_deserialize(void* p, const char* b, unsigned n) {
return tree_sitter_hcl_external_scanner_deserialize(p, b, n);
}
bool tree_sitter_terraform_external_scanner_scan(void* p, TSLexer* lexer, const bool* valid_symbols) {
return tree_sitter_hcl_external_scanner_scan(p, lexer, valid_symbols);
}
} // extern "C"

View File

@@ -0,0 +1,224 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

View File

@@ -1,380 +1,3 @@
const const make_grammar = require('./make_grammar');
PREC = {
unary: 7,
binary_mult: 6,
binary_add: 5,
binary_ord: 4,
binary_comp: 3,
binary_and: 2,
binary_or: 1,
// if possible prefer string_literals to quoted templates module.exports = make_grammar('hcl');
string_lit: 2,
quoted_template: 1,
}
module.exports = grammar({
name: 'hcl',
externals: $ => [
$.quoted_template_start,
$.quoted_template_end,
$._template_literal_chunk,
$.template_interpolation_start,
$.template_interpolation_end,
$.template_directive_start,
$.template_directive_end,
$.heredoc_identifier,
],
extras: $ => [
$.comment,
$._whitespace,
],
rules: {
// also allow objects to handle .tfvars in json format
config_file: $ => optional(choice($.body, $.object)),
body: $ => repeat1(
choice(
$.attribute,
$.block,
),
),
attribute: $ => seq(
$.identifier,
'=',
$.expression,
),
block: $ => seq(
$.identifier,
repeat(choice($.string_lit, $.identifier)),
$.block_start,
optional($.body),
$.block_end,
),
block_start: $ => '{',
block_end: $ => '}',
identifier: $ => token(seq(
choice(/\p{ID_Start}/, '_'),
repeat(choice(/\p{ID_Continue}/, '-')),
)),
expression: $ => prec.right(choice(
$._expr_term,
$.conditional,
)),
// operations are documented as expressions, but our real world samples
// contain instances of operations without parentheses. think for example:
// x = a == "" && b != ""
_expr_term: $ => choice(
$.literal_value,
$.template_expr,
$.collection_value,
$.variable_expr,
$.function_call,
$.for_expr,
$.operation,
seq($._expr_term, $.index),
seq($._expr_term, $.get_attr),
seq($._expr_term, $.splat),
seq('(', $.expression, ')'),
),
literal_value: $ => choice(
$.numeric_lit,
$.bool_lit,
$.null_lit,
$.string_lit,
),
numeric_lit: $ => choice(
/[0-9]+(\.[0-9]+([eE][-+]?[0-9]+)?)?/,
/0x[0-9a-zA-Z]+/
),
bool_lit: $ => choice('true', 'false'),
null_lit: $ => 'null',
string_lit: $ => prec(PREC.string_lit, seq(
$.quoted_template_start,
optional($.template_literal),
$.quoted_template_end,
)),
collection_value: $ => choice(
$.tuple,
$.object,
),
_comma: $ => ',',
tuple: $ => seq(
$.tuple_start,
optional($._tuple_elems),
$.tuple_end,
),
tuple_start: $ => '[',
tuple_end: $ => ']',
_tuple_elems: $ => seq(
$.expression,
repeat(seq(
$._comma,
$.expression,
)),
optional($._comma),
),
object: $ => seq(
$.object_start,
optional($._object_elems),
$.object_end,
),
object_start: $ => '{',
object_end: $ => '}',
_object_elems: $ => seq(
$.object_elem,
repeat(seq(
optional($._comma),
$.object_elem
)),
optional($._comma),
),
object_elem: $ => seq(
field("key", $.expression),
choice('=', ':'),
field("val", $.expression),
),
index: $ => choice($.new_index, $.legacy_index),
new_index: $ => seq('[', $.expression, ']'),
legacy_index: $ => seq('.', /[0-9]+/),
get_attr: $ => seq('.', $.identifier),
splat: $ => choice($.attr_splat, $.full_splat),
attr_splat: $ => prec.right(seq(
'.*',
repeat(choice($.get_attr, $.index)),
)),
full_splat: $ => prec.right(seq(
'[*]',
repeat(choice($.get_attr, $.index)),
)),
for_expr: $ => choice($.for_tuple_expr, $.for_object_expr),
for_tuple_expr: $ => seq(
$.tuple_start,
$.for_intro,
$.expression,
optional($.for_cond),
$.tuple_end,
),
for_object_expr: $ => seq(
$.object_start,
$.for_intro,
$.expression,
'=>',
$.expression,
optional($.ellipsis),
optional($.for_cond),
$.object_end,
),
for_intro: $ => seq(
'for',
$.identifier,
optional(seq(',', $.identifier)),
'in',
$.expression,
':',
),
for_cond: $ => seq(
'if',
$.expression,
),
variable_expr: $ => prec.right($.identifier),
function_call: $ => seq(
$.identifier,
$._function_call_start,
optional($.function_arguments),
$._function_call_end,
),
_function_call_start: $ => '(',
_function_call_end: $ => ')',
function_arguments: $ => prec.right(seq(
$.expression,
repeat(seq($._comma, $.expression,)),
optional(choice($._comma, $.ellipsis)),
)),
ellipsis: $ => token('...'),
conditional: $ => prec.left(seq(
$.expression,
'?',
$.expression,
':',
$.expression,
)),
operation: $ => choice($.unary_operation, $.binary_operation),
unary_operation: $ => prec.left(PREC.unary, seq(choice('-', '!'), $._expr_term)),
binary_operation: $ => {
const table = [
[PREC.binary_mult, choice('*', '/', '%')],
[PREC.binary_add, choice('+', '-')],
[PREC.binary_ord, choice('>', '>=', '<', '<=')],
[PREC.binary_comp, choice('==', '!=')],
[PREC.binary_and, choice('&&')],
[PREC.binary_or, choice('||')],
];
return choice(...table.map(([precedence, operator]) =>
prec.left(precedence, seq($._expr_term, operator, $._expr_term),
))
);
},
template_expr: $ => choice(
$.quoted_template,
$.heredoc_template,
),
quoted_template: $ => prec(PREC.quoted_template, seq(
$.quoted_template_start,
optional($._template),
$.quoted_template_end,
)),
heredoc_template: $ => seq(
$.heredoc_start,
$.heredoc_identifier,
optional($._template),
$.heredoc_identifier,
),
heredoc_start: $ => choice('<<', '<<-'),
strip_marker: $ => '~',
_template: $ => repeat1(choice(
$.template_interpolation,
$.template_directive,
$.template_literal,
)),
template_literal: $ => prec.right(repeat1(
$._template_literal_chunk,
)),
template_interpolation: $ => seq(
$.template_interpolation_start,
optional($.strip_marker),
optional($.expression),
optional($.strip_marker),
$.template_interpolation_end,
),
template_directive: $ => choice(
$.template_for,
$.template_if,
),
template_for: $ => seq(
$.template_for_start,
optional($._template),
$.template_for_end,
),
template_for_start: $ => seq(
$.template_directive_start,
optional($.strip_marker),
"for",
$.identifier,
optional(seq(",", $.identifier)),
"in",
$.expression,
optional($.strip_marker),
$.template_directive_end
),
template_for_end: $ => seq(
$.template_directive_start,
optional($.strip_marker),
"endfor",
optional($.strip_marker),
$.template_directive_end
),
template_if: $ => seq(
$.template_if_intro,
optional($._template),
optional(seq($.template_else_intro, optional($._template))),
$.template_if_end,
),
template_if_intro: $ => seq(
$.template_directive_start,
optional($.strip_marker),
"if",
$.expression,
optional($.strip_marker),
$.template_directive_end
),
template_else_intro: $ => seq(
$.template_directive_start,
optional($.strip_marker),
"else",
optional($.strip_marker),
$.template_directive_end
),
template_if_end: $ => seq(
$.template_directive_start,
optional($.strip_marker),
"endif",
optional($.strip_marker),
$.template_directive_end
),
// http://stackoverflow.com/questions/13014947/regex-to-match-a-c-style-multiline-comment/36328890#36328890
comment: $ => token(choice(
seq('#', /.*/),
seq('//', /.*/),
seq(
'/*',
/[^*]*\*+([^/*][^*]*\*+)*/,
'/'
)
)),
_whitespace: $ => token(/\s/),
}
});

381
make_grammar.js Normal file
View File

@@ -0,0 +1,381 @@
module.exports = function make_grammar(dialect) {
const
PREC = {
unary: 7,
binary_mult: 6,
binary_add: 5,
binary_ord: 4,
binary_comp: 3,
binary_and: 2,
binary_or: 1,
// if possible prefer string_literals to quoted templates
string_lit: 2,
quoted_template: 1,
}
return grammar({
name: dialect,
externals: $ => [
$.quoted_template_start,
$.quoted_template_end,
$._template_literal_chunk,
$.template_interpolation_start,
$.template_interpolation_end,
$.template_directive_start,
$.template_directive_end,
$.heredoc_identifier,
],
extras: $ => [
$.comment,
$._whitespace,
],
rules: {
// also allow objects to handle .tfvars in json format
config_file: $ => optional(choice($.body, $.object)),
body: $ => repeat1(
choice(
$.attribute,
$.block,
),
),
attribute: $ => seq(
$.identifier,
'=',
$.expression,
),
block: $ => seq(
$.identifier,
repeat(choice($.string_lit, $.identifier)),
$.block_start,
optional($.body),
$.block_end,
),
block_start: $ => '{',
block_end: $ => '}',
identifier: $ => token(seq(
choice(/\p{ID_Start}/, '_'),
repeat(choice(/\p{ID_Continue}/, '-')),
)),
expression: $ => prec.right(choice(
$._expr_term,
$.conditional,
)),
// operations are documented as expressions, but our real world samples
// contain instances of operations without parentheses. think for example:
// x = a == "" && b != ""
_expr_term: $ => choice(
$.literal_value,
$.template_expr,
$.collection_value,
$.variable_expr,
$.function_call,
$.for_expr,
$.operation,
seq($._expr_term, $.index),
seq($._expr_term, $.get_attr),
seq($._expr_term, $.splat),
seq('(', $.expression, ')'),
),
literal_value: $ => choice(
$.numeric_lit,
$.bool_lit,
$.null_lit,
$.string_lit,
),
numeric_lit: $ => choice(
/[0-9]+(\.[0-9]+([eE][-+]?[0-9]+)?)?/,
/0x[0-9a-zA-Z]+/
),
bool_lit: $ => choice('true', 'false'),
null_lit: $ => 'null',
string_lit: $ => prec(PREC.string_lit, seq(
$.quoted_template_start,
optional($.template_literal),
$.quoted_template_end,
)),
collection_value: $ => choice(
$.tuple,
$.object,
),
_comma: $ => ',',
tuple: $ => seq(
$.tuple_start,
optional($._tuple_elems),
$.tuple_end,
),
tuple_start: $ => '[',
tuple_end: $ => ']',
_tuple_elems: $ => seq(
$.expression,
repeat(seq(
$._comma,
$.expression,
)),
optional($._comma),
),
object: $ => seq(
$.object_start,
optional($._object_elems),
$.object_end,
),
object_start: $ => '{',
object_end: $ => '}',
_object_elems: $ => seq(
$.object_elem,
repeat(seq(
optional($._comma),
$.object_elem
)),
optional($._comma),
),
object_elem: $ => seq(
field("key", $.expression),
choice('=', ':'),
field("val", $.expression),
),
index: $ => choice($.new_index, $.legacy_index),
new_index: $ => seq('[', $.expression, ']'),
legacy_index: $ => seq('.', /[0-9]+/),
get_attr: $ => seq('.', $.identifier),
splat: $ => choice($.attr_splat, $.full_splat),
attr_splat: $ => prec.right(seq(
'.*',
repeat(choice($.get_attr, $.index)),
)),
full_splat: $ => prec.right(seq(
'[*]',
repeat(choice($.get_attr, $.index)),
)),
for_expr: $ => choice($.for_tuple_expr, $.for_object_expr),
for_tuple_expr: $ => seq(
$.tuple_start,
$.for_intro,
$.expression,
optional($.for_cond),
$.tuple_end,
),
for_object_expr: $ => seq(
$.object_start,
$.for_intro,
$.expression,
'=>',
$.expression,
optional($.ellipsis),
optional($.for_cond),
$.object_end,
),
for_intro: $ => seq(
'for',
$.identifier,
optional(seq(',', $.identifier)),
'in',
$.expression,
':',
),
for_cond: $ => seq(
'if',
$.expression,
),
variable_expr: $ => prec.right($.identifier),
function_call: $ => seq(
$.identifier,
$._function_call_start,
optional($.function_arguments),
$._function_call_end,
),
_function_call_start: $ => '(',
_function_call_end: $ => ')',
function_arguments: $ => prec.right(seq(
$.expression,
repeat(seq($._comma, $.expression,)),
optional(choice($._comma, $.ellipsis)),
)),
ellipsis: $ => token('...'),
conditional: $ => prec.left(seq(
$.expression,
'?',
$.expression,
':',
$.expression,
)),
operation: $ => choice($.unary_operation, $.binary_operation),
unary_operation: $ => prec.left(PREC.unary, seq(choice('-', '!'), $._expr_term)),
binary_operation: $ => {
const table = [
[PREC.binary_mult, choice('*', '/', '%')],
[PREC.binary_add, choice('+', '-')],
[PREC.binary_ord, choice('>', '>=', '<', '<=')],
[PREC.binary_comp, choice('==', '!=')],
[PREC.binary_and, choice('&&')],
[PREC.binary_or, choice('||')],
];
return choice(...table.map(([precedence, operator]) =>
prec.left(precedence, seq($._expr_term, operator, $._expr_term),
))
);
},
template_expr: $ => choice(
$.quoted_template,
$.heredoc_template,
),
quoted_template: $ => prec(PREC.quoted_template, seq(
$.quoted_template_start,
optional($._template),
$.quoted_template_end,
)),
heredoc_template: $ => seq(
$.heredoc_start,
$.heredoc_identifier,
optional($._template),
$.heredoc_identifier,
),
heredoc_start: $ => choice('<<', '<<-'),
strip_marker: $ => '~',
_template: $ => repeat1(choice(
$.template_interpolation,
$.template_directive,
$.template_literal,
)),
template_literal: $ => prec.right(repeat1(
$._template_literal_chunk,
)),
template_interpolation: $ => seq(
$.template_interpolation_start,
optional($.strip_marker),
optional($.expression),
optional($.strip_marker),
$.template_interpolation_end,
),
template_directive: $ => choice(
$.template_for,
$.template_if,
),
template_for: $ => seq(
$.template_for_start,
optional($._template),
$.template_for_end,
),
template_for_start: $ => seq(
$.template_directive_start,
optional($.strip_marker),
"for",
$.identifier,
optional(seq(",", $.identifier)),
"in",
$.expression,
optional($.strip_marker),
$.template_directive_end
),
template_for_end: $ => seq(
$.template_directive_start,
optional($.strip_marker),
"endfor",
optional($.strip_marker),
$.template_directive_end
),
template_if: $ => seq(
$.template_if_intro,
optional($._template),
optional(seq($.template_else_intro, optional($._template))),
$.template_if_end,
),
template_if_intro: $ => seq(
$.template_directive_start,
optional($.strip_marker),
"if",
$.expression,
optional($.strip_marker),
$.template_directive_end
),
template_else_intro: $ => seq(
$.template_directive_start,
optional($.strip_marker),
"else",
optional($.strip_marker),
$.template_directive_end
),
template_if_end: $ => seq(
$.template_directive_start,
optional($.strip_marker),
"endif",
optional($.strip_marker),
$.template_directive_end
),
// http://stackoverflow.com/questions/13014947/regex-to-match-a-c-style-multiline-comment/36328890#36328890
comment: $ => token(choice(
seq('#', /.*/),
seq('//', /.*/),
seq(
'/*',
/[^*]*\*+([^/*][^*]*\*+)*/,
'/'
)
)),
_whitespace: $ => token(/\s/),
}
});
}