Files
boring-lang/boring/parse.py

536 lines
13 KiB
Python
Raw Normal View History

2021-02-20 19:58:56 -07:00
import sys
import enum
2021-06-11 20:59:51 -06:00
from typing import Union, List, Optional, Dict
2021-02-20 19:58:56 -07:00
from dataclasses import dataclass, field
from lark import Lark, Transformer
2021-05-05 19:32:55 -06:00
2021-04-18 08:51:21 -06:00
def pretty_print(clas, indent=0):
2021-05-05 19:32:55 -06:00
print(" " * indent + type(clas).__name__ + ":")
if type(clas) == list:
for e in clas:
pretty_print(e)
return
2021-04-18 08:51:21 -06:00
indent += 2
2021-05-05 19:32:55 -06:00
for k, v in clas.__dict__.items():
if "__dict__" in dir(v):
print(" " * indent + k + ": ")
pretty_print(v, indent + 2)
2021-04-18 08:51:21 -06:00
elif type(v) == list:
2021-05-05 19:32:55 -06:00
print(" " * indent + k + ": " "[")
2021-04-18 08:51:21 -06:00
for e in v:
2021-05-05 19:32:55 -06:00
pretty_print(e, indent + 2)
print(" " * indent + "]")
2021-04-18 08:51:21 -06:00
else:
2021-05-05 19:32:55 -06:00
print(" " * indent + k + ": " + str(v))
UNIT_TYPE = "()"
2021-05-30 09:57:41 -06:00
NEVER_TYPE = "!"
2021-04-18 08:51:21 -06:00
2021-02-20 19:58:56 -07:00
2021-05-05 19:32:55 -06:00
@dataclass
class FunctionTypeUsage:
2021-06-12 12:26:53 -06:00
arguments: List["TypeUsage"]
2021-05-05 19:32:55 -06:00
return_type: "TypeUsage"
@dataclass
class DataTypeUsage:
2021-05-28 23:57:07 -06:00
name: str
2021-05-05 19:32:55 -06:00
2021-05-28 23:57:07 -06:00
@dataclass
class UnknownTypeUsage:
pass
TypeUsage = Union[FunctionTypeUsage, DataTypeUsage, UnknownTypeUsage]
2021-05-05 19:32:55 -06:00
2021-02-20 19:58:56 -07:00
class Operator(enum.Enum):
mult = "mult"
div = "div"
plus = "plus"
minus = "minus"
@dataclass
class LiteralInt:
value: int
2021-05-28 23:57:07 -06:00
type: TypeUsage
2021-02-20 19:58:56 -07:00
2021-05-29 10:50:15 -06:00
@dataclass
class LiteralFloat:
value: float
type: TypeUsage
2021-06-12 12:26:53 -06:00
2021-06-12 10:59:58 -06:00
@dataclass
class LiteralStruct:
fields: Dict[str, "Expression"]
type: TypeUsage
2021-05-29 10:50:15 -06:00
2021-02-20 19:58:56 -07:00
@dataclass
class FunctionCall:
2021-05-12 06:40:11 -06:00
source: "Expression"
2021-05-05 19:32:55 -06:00
arguments: List["Expression"]
2021-05-28 23:57:07 -06:00
type: TypeUsage
2021-02-20 19:58:56 -07:00
2021-06-12 12:26:53 -06:00
@dataclass
class StructGetter:
source: "Expression"
attribute: str
type: TypeUsage
2021-02-20 19:58:56 -07:00
@dataclass
class Operation:
2021-05-05 19:32:55 -06:00
left: "Expression"
2021-02-20 19:58:56 -07:00
op: Operator
2021-05-05 19:32:55 -06:00
right: "Expression"
2021-05-28 23:57:07 -06:00
type: TypeUsage
2021-05-05 19:32:55 -06:00
@dataclass
class VariableUsage:
2021-05-28 23:57:07 -06:00
name: str
type: TypeUsage
2021-02-20 19:58:56 -07:00
2021-05-30 09:57:41 -06:00
@dataclass
class ReturnStatement:
source: "Expression"
type: TypeUsage
2021-02-20 19:58:56 -07:00
@dataclass
class Expression:
2021-06-11 20:59:51 -06:00
expression: Union[
LiteralInt,
LiteralFloat,
2021-06-12 10:59:58 -06:00
LiteralStruct,
2021-06-11 20:59:51 -06:00
FunctionCall,
2021-06-12 12:26:53 -06:00
StructGetter,
2021-06-11 20:59:51 -06:00
"Block",
ReturnStatement,
VariableUsage,
Operation,
]
2021-05-28 23:57:07 -06:00
type: TypeUsage
2021-02-20 19:58:56 -07:00
@dataclass
2021-04-18 08:51:21 -06:00
class LetStatement:
2021-05-28 23:57:07 -06:00
variable_name: str
type: TypeUsage
2021-02-20 19:58:56 -07:00
expression: Expression
2021-06-01 23:05:17 -06:00
@dataclass
class AssignmentStatement:
2021-06-12 12:47:10 -06:00
source: Union[VariableUsage, StructGetter]
2021-06-01 23:05:17 -06:00
type: TypeUsage
expression: Expression
Statement = Union[LetStatement, AssignmentStatement, Expression]
2021-04-18 08:51:21 -06:00
@dataclass
class Block:
statements: List[Statement]
2021-05-28 23:57:07 -06:00
type: TypeUsage
2021-04-18 08:51:21 -06:00
2021-02-20 19:58:56 -07:00
@dataclass
class VariableDeclaration:
2021-05-28 23:57:07 -06:00
name: str
2021-05-05 19:32:55 -06:00
type: TypeUsage
2021-02-20 19:58:56 -07:00
@dataclass
class Function:
2021-07-01 12:29:00 -06:00
declaration: "FunctionDeclaration"
type: TypeUsage
@dataclass
class FunctionDeclaration:
2021-05-28 23:57:07 -06:00
name: str
2021-04-18 08:51:21 -06:00
arguments: List[VariableDeclaration]
2021-05-05 19:32:55 -06:00
return_type: TypeUsage
2021-05-12 06:40:11 -06:00
type: TypeUsage
2021-02-20 19:58:56 -07:00
2021-06-11 20:59:51 -06:00
@dataclass
class PrimitiveTypeDeclaration:
name: str
@dataclass
class StructTypeDeclaration:
name: str
fields: Dict[str, TypeUsage]
2021-06-13 10:38:13 -06:00
@dataclass
class AliasTypeDeclaration:
new: DataTypeUsage
old: TypeUsage
2021-06-26 17:47:52 -06:00
TypeDeclaration = Union[
StructTypeDeclaration, PrimitiveTypeDeclaration, AliasTypeDeclaration
]
2021-06-13 10:38:13 -06:00
@dataclass
class Impl:
struct: str
functions: List[Function]
2021-06-11 20:59:51 -06:00
2021-07-01 12:29:00 -06:00
@dataclass
class FunctionDeclartation:
name: str
arguments: List[VariableDeclaration]
return_type: TypeUsage
type: TypeUsage
@dataclass
class TraitTypeDeclaration:
struct: str
functions: List[Function]
@dataclass
class TraitImpl:
struct: str
trait: str
functions: List[Function]
2021-02-20 19:58:56 -07:00
@dataclass
class Module:
2021-05-05 19:32:55 -06:00
functions: List[Function]
2021-06-11 20:59:51 -06:00
types: List[TypeDeclaration]
2021-06-13 10:38:13 -06:00
impls: List[Impl]
2021-02-20 19:58:56 -07:00
boring_grammar = r"""
plus : "+"
minus : "-"
mult : "*"
div : "/"
2021-06-12 10:59:58 -06:00
identifier : CNAME
2021-05-29 10:50:15 -06:00
literal_float : SIGNED_FLOAT
literal_int : SIGNED_INT
2021-06-12 10:59:58 -06:00
literal_struct_field : identifier ":" expression
2021-06-13 10:46:41 -06:00
literal_struct : data_type "{" (literal_struct_field ",")* "}"
2021-05-05 19:32:55 -06:00
function_call : expression "(" [expression ("," expression)*] ")"
2021-02-20 19:58:56 -07:00
2021-06-12 12:26:53 -06:00
struct_getter : expression "." identifier
2021-02-20 19:58:56 -07:00
add_expression : expression plus factor
sub_expression : expression minus factor
mult_expression : expression mult term
div_expression : expression div term
2021-05-05 19:32:55 -06:00
variable_usage : identifier
2021-05-30 09:57:41 -06:00
return_statement : "return" expression ";"
2021-02-20 19:58:56 -07:00
expression : add_expression
| sub_expression
| factor
factor : mult_expression
| div_expression
| term
term : literal_int
2021-05-29 10:50:15 -06:00
| literal_float
2021-06-12 10:59:58 -06:00
| literal_struct
2021-05-05 19:32:55 -06:00
| variable_usage
2021-02-20 19:58:56 -07:00
| function_call
2021-06-12 12:26:53 -06:00
| struct_getter
2021-02-20 19:58:56 -07:00
| "(" expression ")"
2021-05-29 11:01:34 -06:00
| block
2021-02-20 19:58:56 -07:00
2021-04-18 08:51:21 -06:00
let_statement : "let" identifier "=" expression ";"
2021-05-05 19:32:55 -06:00
| "let" identifier ":" type_usage "=" expression ";"
2021-04-18 08:51:21 -06:00
2021-06-12 12:47:10 -06:00
assignment_statement : variable_usage "=" expression ";"
| struct_getter "=" expression ";"
2021-06-01 23:05:17 -06:00
2021-04-18 08:51:21 -06:00
statement : let_statement
2021-06-01 23:05:17 -06:00
| assignment_statement
2021-05-30 09:57:41 -06:00
| return_statement
2021-04-18 08:51:21 -06:00
| expression
block : "{" (statement)* "}"
2021-02-20 19:58:56 -07:00
2021-05-05 19:32:55 -06:00
data_type : identifier
function_type : "fn" "(" (type_usage)* ")"
function_type_with_return : "fn" "(" (type_usage)* ")" ":" type_usage
type_usage : data_type
| function_type
| function_type_with_return
variable_declaration : identifier ":" type_usage
2021-07-01 12:29:00 -06:00
function_declaration_without_return : "fn" identifier "(" [variable_declaration ("," variable_declaration)*] ")"
2021-05-05 19:32:55 -06:00
2021-07-01 12:29:00 -06:00
function_declaration_with_return : "fn" identifier "(" [variable_declaration ("," variable_declaration)*] ")" ":" type_usage
2021-02-20 19:58:56 -07:00
2021-07-01 12:29:00 -06:00
function_declaration : function_declaration_with_return
| function_declaration_without_return
2021-02-20 19:58:56 -07:00
2021-07-01 12:29:00 -06:00
function : function_declaration block
2021-06-11 20:59:51 -06:00
struct_definition_field : identifier ":" type_usage
struct_type_declaration : "type" identifier "struct" "{" (struct_definition_field ",")* "}"
2021-06-13 10:38:13 -06:00
type_alias_declaration : "type" identifier "=" type_usage ";"
2021-07-01 12:29:00 -06:00
trait_item : function_declaration ";"
| function
trait_declaration : "type" identifier "trait" "{" trait_item* "}"
2021-06-11 20:59:51 -06:00
type_declaration : struct_type_declaration
2021-06-13 10:38:13 -06:00
| type_alias_declaration
impl : "impl" identifier "{" function* "}"
2021-07-01 12:29:00 -06:00
| "impl" identifier "for" identifier "{" function* "}"
2021-06-11 20:59:51 -06:00
2021-06-13 10:38:13 -06:00
module : (function|type_declaration|impl)*
2021-02-20 19:58:56 -07:00
2021-05-29 10:50:15 -06:00
%import common.CNAME
%import common.SIGNED_INT
%import common.SIGNED_FLOAT
2021-02-20 19:58:56 -07:00
%import common.WS
2021-05-29 12:16:03 -06:00
%import common.CPP_COMMENT
2021-02-20 19:58:56 -07:00
%ignore WS
2021-05-29 12:16:03 -06:00
%ignore CPP_COMMENT
2021-02-20 19:58:56 -07:00
"""
2021-05-28 23:57:07 -06:00
next_sub_id = 0
2021-05-05 19:32:55 -06:00
2021-02-20 19:58:56 -07:00
class TreeToBoring(Transformer):
2021-05-05 19:32:55 -06:00
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
2021-04-18 08:51:21 -06:00
def plus(self, p) -> Operator:
2021-02-20 19:58:56 -07:00
return Operator.plus
2021-04-18 08:51:21 -06:00
def minus(self, m) -> Operator:
2021-02-20 19:58:56 -07:00
return Operator.minus
2021-04-18 08:51:21 -06:00
def mult(self, m) -> Operator:
2021-02-20 19:58:56 -07:00
return Operator.mult
2021-04-18 08:51:21 -06:00
def div(self, d) -> Operator:
2021-02-20 19:58:56 -07:00
return Operator.div
2021-04-18 08:51:21 -06:00
def literal_int(self, n) -> LiteralInt:
2021-02-20 19:58:56 -07:00
(n,) = n
2021-05-28 23:57:07 -06:00
return LiteralInt(value=int(n), type=UnknownTypeUsage())
2021-02-20 19:58:56 -07:00
2021-05-29 10:50:15 -06:00
def literal_float(self, f) -> LiteralFloat:
(f,) = f
return LiteralFloat(value=float(f), type=UnknownTypeUsage())
2021-06-12 10:59:58 -06:00
def literal_struct_field(self, lsf):
(name, expression) = lsf
return name, expression
def literal_struct(self, literal_struct) -> LiteralStruct:
2021-06-13 10:46:41 -06:00
data_type = literal_struct[0]
2021-06-12 10:59:58 -06:00
fields = {key: value for (key, value) in literal_struct[1:]}
2021-06-13 10:46:41 -06:00
return LiteralStruct(fields=fields, type=data_type)
2021-06-12 10:59:58 -06:00
2021-05-28 23:57:07 -06:00
def identifier(self, i) -> str:
2021-02-20 19:58:56 -07:00
(i,) = i
2021-05-28 23:57:07 -06:00
return str(i)
2021-02-20 19:58:56 -07:00
2021-05-05 19:32:55 -06:00
def variable_usage(self, variable) -> VariableUsage:
(variable,) = variable
2021-05-28 23:57:07 -06:00
return VariableUsage(name=variable, type=UnknownTypeUsage())
2021-05-05 19:32:55 -06:00
2021-05-30 09:57:41 -06:00
def return_statement(self, return_expression) -> ReturnStatement:
(return_expression,) = return_expression
2021-06-11 20:59:51 -06:00
return ReturnStatement(
source=return_expression, type=DataTypeUsage(name=NEVER_TYPE)
)
2021-05-30 09:57:41 -06:00
2021-04-18 08:51:21 -06:00
def function_call(self, call) -> FunctionCall:
2021-05-28 23:57:07 -06:00
return FunctionCall(source=call[0], arguments=call[1:], type=UnknownTypeUsage())
2021-02-20 19:58:56 -07:00
2021-06-12 12:26:53 -06:00
def struct_getter(self, getter) -> StructGetter:
expression, attribute = getter
return StructGetter(expression, attribute, UnknownTypeUsage())
2021-04-18 08:51:21 -06:00
def add_expression(self, ae) -> Operation:
2021-05-28 23:57:07 -06:00
return Operation(left=ae[0], op=ae[1], right=ae[2], type=UnknownTypeUsage())
2021-02-20 19:58:56 -07:00
2021-04-18 08:51:21 -06:00
def sub_expression(self, se) -> Operation:
2021-05-28 23:57:07 -06:00
return Operation(left=se[0], op=se[1], right=se[2], type=UnknownTypeUsage())
2021-02-20 19:58:56 -07:00
2021-04-18 08:51:21 -06:00
def mult_expression(self, se) -> Operation:
2021-05-28 23:57:07 -06:00
return Operation(left=se[0], op=se[1], right=se[2], type=UnknownTypeUsage())
2021-02-20 19:58:56 -07:00
2021-04-18 08:51:21 -06:00
def div_expression(self, se) -> Operation:
2021-05-28 23:57:07 -06:00
return Operation(left=se[0], op=se[1], right=se[2], type=UnknownTypeUsage())
2021-02-20 19:58:56 -07:00
2021-04-18 08:51:21 -06:00
def expression(self, exp) -> Expression:
2021-02-20 19:58:56 -07:00
(exp,) = exp
2021-04-18 08:56:45 -06:00
if isinstance(exp, Expression):
2021-04-18 08:51:21 -06:00
return exp
2021-05-28 23:57:07 -06:00
return Expression(expression=exp, type=UnknownTypeUsage())
2021-02-20 19:58:56 -07:00
2021-04-18 08:51:21 -06:00
def factor(self, factor) -> Expression:
2021-02-20 19:58:56 -07:00
(factor,) = factor
2021-04-18 08:56:45 -06:00
if isinstance(factor, Expression):
2021-04-18 08:51:21 -06:00
return factor
2021-05-28 23:57:07 -06:00
return Expression(expression=factor, type=UnknownTypeUsage())
2021-02-20 19:58:56 -07:00
2021-04-18 08:51:21 -06:00
def term(self, term) -> Expression:
2021-02-20 19:58:56 -07:00
(term,) = term
2021-05-28 23:57:07 -06:00
return Expression(expression=term, type=UnknownTypeUsage())
2021-02-20 19:58:56 -07:00
2021-04-18 08:51:21 -06:00
def let_statement(self, let_statement) -> LetStatement:
2021-05-05 19:32:55 -06:00
if len(let_statement) == 3:
(variable_name, type_usage, expression) = let_statement
return LetStatement(
variable_name=variable_name,
type=type_usage,
expression=expression,
)
2021-04-18 08:51:21 -06:00
(variable_name, expression) = let_statement
2021-05-05 19:32:55 -06:00
return LetStatement(
variable_name=variable_name,
2021-05-28 23:57:07 -06:00
type=UnknownTypeUsage(),
2021-05-05 19:32:55 -06:00
expression=expression,
)
2021-06-01 23:05:17 -06:00
def assignment_statement(self, assignment_statement) -> AssignmentStatement:
2021-06-12 12:47:10 -06:00
(source, expression) = assignment_statement
2021-06-01 23:05:17 -06:00
return AssignmentStatement(
2021-06-12 12:47:10 -06:00
source=source,
2021-06-01 23:05:17 -06:00
type=UnknownTypeUsage(),
expression=expression,
)
2021-05-05 19:32:55 -06:00
def statement(self, statement):
2021-04-18 08:51:21 -06:00
(statement,) = statement
2021-05-05 19:32:55 -06:00
return statement
2021-04-18 08:51:21 -06:00
def block(self, block) -> Block:
2021-05-28 23:57:07 -06:00
return Block(statements=block, type=UnknownTypeUsage())
2021-02-20 19:58:56 -07:00
2021-05-05 19:32:55 -06:00
def data_type(self, name) -> TypeUsage:
(name,) = name
return DataTypeUsage(name=name)
2021-02-20 19:58:56 -07:00
2021-05-05 19:32:55 -06:00
def function_type(self, type_usage) -> TypeUsage:
return FunctionTypeUsage(
2021-05-12 06:40:11 -06:00
arguments=type_usage,
2021-05-28 23:57:07 -06:00
return_type=DataTypeUsage(name=UNIT_TYPE),
2021-05-05 19:32:55 -06:00
)
2021-05-12 06:40:11 -06:00
def function_type_with_return(self, type_usage) -> TypeUsage:
return FunctionTypeUsage(arguments=type_usage[0:-1], return_type=type_usage[-1])
2021-05-05 19:32:55 -06:00
def type_usage(self, type_usage):
(type_usage,) = type_usage
return type_usage
def variable_declaration(self, identifier) -> VariableDeclaration:
(identifier, type_usage) = identifier
2021-05-12 06:40:11 -06:00
return VariableDeclaration(name=identifier, type=type_usage)
2021-05-05 19:32:55 -06:00
def function_without_return(self, function) -> Function:
return Function(
name=function[0],
arguments=function[1:-1],
2021-05-28 23:57:07 -06:00
return_type=DataTypeUsage(name=UNIT_TYPE),
2021-05-05 19:32:55 -06:00
block=function[-1],
2021-05-12 06:40:11 -06:00
type=FunctionTypeUsage(
arguments=[arg.type for arg in function[1:-1]],
2021-05-28 23:57:07 -06:00
return_type=DataTypeUsage(name=UNIT_TYPE),
2021-05-12 06:40:11 -06:00
),
2021-05-05 19:32:55 -06:00
)
def function_with_return(self, function) -> Function:
return Function(
name=function[0],
arguments=function[1:-2],
return_type=function[-2],
block=function[-1],
2021-05-12 06:40:11 -06:00
type=FunctionTypeUsage(
arguments=[arg.type for arg in function[1:-2]], return_type=function[-2]
),
2021-05-05 19:32:55 -06:00
)
def function(self, function):
(function,) = function
return function
2021-02-20 19:58:56 -07:00
2021-06-11 20:59:51 -06:00
def struct_definition_field(self, struct_definition_field):
(field, type_usage) = struct_definition_field
return (field, type_usage)
def struct_type_declaration(self, struct_type_declaration) -> StructTypeDeclaration:
name = struct_type_declaration[0]
fields = {key: value for (key, value) in struct_type_declaration[1:]}
return StructTypeDeclaration(name=name, fields=fields)
2021-06-13 10:38:13 -06:00
def type_alias_declaration(self, type_alias_declaration) -> AliasTypeDeclaration:
(name, existing) = type_alias_declaration
return AliasTypeDeclaration(new=DataTypeUsage(name), old=type_alias_declaration)
2021-06-11 20:59:51 -06:00
def type_declaration(self, type_declaration):
(type_declaration,) = type_declaration
return type_declaration
2021-06-13 10:38:13 -06:00
def impl(self, impl) -> Impl:
return Impl(struct=impl[0], functions=impl[1:])
2021-06-11 20:59:51 -06:00
def module(self, module_items) -> Module:
functions = []
types = []
2021-06-13 10:38:13 -06:00
impls = []
2021-06-11 20:59:51 -06:00
for item in module_items:
if isinstance(item, Function):
functions.append(item)
2021-06-13 10:38:13 -06:00
elif isinstance(item, Impl):
impls.append(item)
2021-06-11 20:59:51 -06:00
else:
types.append(item)
2021-06-13 10:38:13 -06:00
return Module(functions=functions, types=types, impls=impls)
2021-02-20 19:58:56 -07:00
2021-05-05 19:32:55 -06:00
boring_parser = Lark(boring_grammar, start="module", lexer="standard")
2021-02-20 19:58:56 -07:00
2021-05-05 19:32:55 -06:00
if __name__ == "__main__":
2021-02-20 19:58:56 -07:00
with open(sys.argv[1]) as f:
tree = boring_parser.parse(f.read())
2021-04-18 08:51:21 -06:00
# print(tree)
result = TreeToBoring().transform(tree)
pretty_print(result)