added ast parsing

This commit is contained in:
2025-08-18 22:53:36 -06:00
parent 03662d980f
commit d370fb44a2
7 changed files with 635 additions and 5 deletions

View File

@@ -0,0 +1,29 @@
import { defineCommand } from "@bunli/core";
import { boringGrammar } from "../parse/grammar";
import { semantics } from "../parse/semantics";
export const run = defineCommand({
name: "run",
description: "Run a boringlang file",
handler: async ({ positional }) => {
const [path] = positional;
if (!path) {
throw new Error("Usage: run <path>");
}
const file = Bun.file(path);
const text = await file.text();
const match = boringGrammar.match(text, "Module");
if (match.succeeded()) {
const adapter = semantics(match);
const ast = adapter.toAST();
console.log(JSON.stringify(ast, null, 2));
} else {
console.log(match.message);
// console.log(boringGrammar.trace(text, "Module").toString());
}
},
});

View File

@@ -1,11 +1,14 @@
#!/usr/bin/env bun
import { createCLI } from "@bunli/core";
import { run } from "./commands/run.js";
const cli = createCLI({
name: "boringlang",
version: "0.1.0",
description: "Boring programming language CLI",
});
cli.command(run);
await cli.run();

View File

@@ -0,0 +1,194 @@
export interface Spanned {
spanStart: number;
spanEnd: number;
}
export interface Identifier extends Spanned {
name: string;
}
export interface LiteralInt {
expressionType: "LiteralInt";
value: string;
type: TypeUsage;
}
export interface LiteralFloat {
expressionType: "LiteralFloat";
value: string;
type: TypeUsage;
}
export interface LiteralBool {
expressionType: "LiteralBool";
value: string;
type: TypeUsage;
}
export interface LiteralString {
expressionType: "LiteralString";
value: string;
type: TypeUsage;
}
export interface StructField {
name: Identifier;
expression: Expression;
}
export interface LiteralStruct {
expressionType: "LiteralStruct";
name: Identifier;
fields: StructField[];
type: TypeUsage;
}
export interface FunctionCall {
expressionType: "FunctionCall";
source: Expression;
arguments: Expression[];
type: TypeUsage;
}
export interface StructGetter {
expressionType: "StructGetter";
source: Expression;
attribute: Identifier;
type: TypeUsage;
}
export interface Operation {
expressionType: "Operation";
left: Expression;
op: "+" | "-" | "*" | "/";
right: Expression;
}
export interface VariableUsage {
expressionType: "VariableUsage";
name: Identifier;
type: TypeUsage;
}
export interface IfExpression {
expressionType: "IfExpression";
condition: Expression;
block: Block;
else: Block | null;
type: TypeUsage;
}
export interface Expression {
statementType: "Expression";
subExpression:
| LiteralInt
| LiteralFloat
| LiteralBool
| LiteralString
| LiteralStruct
| FunctionCall
| VariableUsage
| IfExpression
| StructGetter
| Block
| Operation;
type: TypeUsage;
}
export interface ReturnStatement {
statementType: "ReturnStatement";
source: Expression;
}
export interface LetStatement {
statementType: "LetStatement";
variableName: Identifier;
expression: Expression;
type: TypeUsage;
}
export interface AssignmentStatement {
statementType: "AssignmentStatement";
source: VariableUsage | StructGetter;
expression: Expression;
}
export type Statement =
| ReturnStatement
| LetStatement
| AssignmentStatement
| Expression;
export interface Block {
statements: Statement[];
type: TypeUsage;
}
export interface FunctionArgument {
name: Identifier;
type: TypeUsage;
}
export interface FunctionDeclaration {
name: Identifier;
arguments: FunctionArgument[];
returnType: TypeUsage;
}
export interface Function {
moduleItem: "Function";
declaration: FunctionDeclaration;
block: Block;
}
export interface StructTypeField {
name: Identifier;
type: TypeUsage;
}
export interface StructTypeDeclaration {
moduleItem: "StructTypeDeclaration";
typeDeclaration: "StructTypeDeclaration";
name: Identifier;
fields: StructTypeField[];
}
export interface TraitTypeDeclaration {
moduleItem: "TraitTypeDeclaration";
typeDeclaration: "TraitTypeDeclaration";
name: Identifier;
functions: FunctionDeclaration[];
}
export type TypeDeclaration = StructTypeDeclaration | TraitTypeDeclaration;
export interface Impl {
moduleItem: "Impl";
struct: NamedTypeUsage;
trait: NamedTypeUsage | null;
functions: Function[];
}
export type ModuleItem = Function | TypeDeclaration | Impl;
export interface Module {
items: ModuleItem[];
}
export interface NamedTypeUsage {
typeUsage: "NamedTypeUsage";
name: Identifier;
}
export interface FunctionTypeUsage {
typeUsage: "FunctionTypeUsage";
arguments: TypeUsage[];
returnType: TypeUsage;
}
export interface UnknownTypeUsage {
typeUsage: "UnknownTypeUsage";
name: string;
}
export type TypeUsage = NamedTypeUsage | FunctionTypeUsage | UnknownTypeUsage;

View File

@@ -0,0 +1,66 @@
import * as ohm from "ohm-js";
export const boringGrammar = ohm.grammar(String.raw`
Boringlang {
ReturnStatement = "return" Expression ";"
LetStatement = "let" Identifier (":" TypeUsage)? "=" Expression ";"
AssignmentStatement = VariableUsage "=" Expression ";"
| StructGetter "=" Expression ";"
ExpressionStatement = Expression ";"
Statement = ExpressionStatement
| LetStatement
| ReturnStatement
| AssignmentStatement
LiteralInt = digit+
LiteralFloat = digit* "." digit+
LiteralBool = "true" | "false"
LiteralString = "\"" (~"\"" any)* "\""
| "'" (~"'" any)* "'"
LiteralStructField = Identifier ":" Expression
LiteralStruct = Identifier "{" ListOf<LiteralStructField, ","> "}"
Identifier = (letter | "_")+ (letter | digit | "_")*
FunctionCall = Expression "(" ListOf<Expression, ","> ")"
StructGetter = Expression "." Identifier
VariableUsage = Identifier
IfExpression = "if" "(" Expression ")" Block ("else" Block)?
Term = LiteralInt
| LiteralFloat
| LiteralBool
| LiteralString
| LiteralStruct
| IfExpression
| Block
| "(" Expression ")" -- parens
| VariableUsage
Factor = Factor "*" Term -- mult
| Factor "/" Term -- div
| Term
Expression = Expression "+" Factor -- plus
| Expression "-" Factor -- minus
| StructGetter
| FunctionCall
| Factor
Block = "{" Statement* Expression? "}"
NamedTypeUsage = Identifier
TypeUsage = NamedTypeUsage
| "fn" "(" ListOf<TypeUsage, ","> ")" ":" TypeUsage -- function_tu
FunctionArgument = Identifier ":" TypeUsage
FunctionDeclaration = "fn" Identifier "(" ListOf<FunctionArgument, ","> ")" ":" TypeUsage
Function = FunctionDeclaration Block
StructTypeField = Identifier ":" TypeUsage
StructTypeDeclaration = "type" Identifier "struct" "{" ListOf<StructTypeField, ","> "}"
TraitMethod = FunctionDeclaration ";"
TraitTypeDeclaration = "type" Identifier "trait" "{" TraitMethod* "}"
TypeDeclaration = StructTypeDeclaration
| TraitTypeDeclaration
Impl = "impl" (NamedTypeUsage "for")? NamedTypeUsage "{" Function* "}"
ModuleItem = Function
| TypeDeclaration
| Impl
Module = ModuleItem*
lineTerminator = "\n" | "\r" | "\u2028" | "\u2029"
comment = "//" (~lineTerminator any)* lineTerminator
space += comment
}
`);

View File

@@ -0,0 +1,335 @@
import {
AssignmentStatement,
Block,
Expression,
Function,
FunctionArgument,
FunctionCall,
FunctionDeclaration,
FunctionTypeUsage,
Identifier,
IfExpression,
Impl,
LetStatement,
LiteralBool,
LiteralFloat,
LiteralInt,
LiteralString,
LiteralStruct,
Module,
ModuleItem,
NamedTypeUsage,
Operation,
ReturnStatement,
Statement,
StructField,
StructGetter,
StructTypeDeclaration,
StructTypeField,
TraitTypeDeclaration,
TypeDeclaration,
TypeUsage,
VariableUsage,
} from "./ast";
import { boringGrammar } from "./grammar";
let unknownTypeCounter = 0;
function nextUnknown() {
let name = "S" + unknownTypeCounter.toString();
unknownTypeCounter += 1;
return name;
}
export const semantics = boringGrammar.createSemantics();
semantics.addOperation<any>("toAST", {
LiteralInt(a): LiteralInt {
console.log(this);
console.log(a.source.startIdx);
return {
expressionType: "LiteralInt",
value: this.sourceString,
type: {
typeUsage: "NamedTypeUsage",
name: { name: "i64", spanStart: 0, spanEnd: 0 },
},
};
},
LiteralFloat(_1, _2, _3): LiteralFloat {
return {
expressionType: "LiteralFloat",
value: this.sourceString,
type: {
typeUsage: "NamedTypeUsage",
name: { name: "f64", spanStart: 0, spanEnd: 0 },
},
};
},
LiteralBool(_): LiteralBool {
return {
expressionType: "LiteralBool",
value: this.sourceString,
type: {
typeUsage: "NamedTypeUsage",
name: { name: "bool", spanStart: 0, spanEnd: 0 },
},
};
},
LiteralString(_1, text, _3): LiteralString {
return {
expressionType: "LiteralString",
value: text.sourceString,
type: {
typeUsage: "NamedTypeUsage",
name: { name: "String", spanStart: 0, spanEnd: 0 },
},
};
},
LiteralStructField(identifier, _2, expression): StructField {
return {
name: identifier.toAST(),
expression: expression.toAST(),
};
},
LiteralStruct(identifier, _2, fields, _4): LiteralStruct {
return {
expressionType: "LiteralStruct",
name: identifier.toAST(),
fields: fields.asIteration().children.map((c) => c.toAST()),
type: { typeUsage: "NamedTypeUsage", name: identifier.toAST() },
};
},
Identifier(_1, _2): Identifier {
return {
name: this.sourceString,
spanStart: this.source.startIdx,
spanEnd: this.source.endIdx,
};
},
FunctionCall(expression, _2, args, _4): FunctionCall {
return {
expressionType: "FunctionCall",
source: expression.toAST(),
arguments: args.asIteration().children.map((c) => c.toAST()),
type: { typeUsage: "UnknownTypeUsage", name: nextUnknown() },
};
},
StructGetter(expression, _2, identifier): StructGetter {
return {
expressionType: "StructGetter",
source: expression.toAST(),
attribute: identifier.toAST(),
type: { typeUsage: "UnknownTypeUsage", name: nextUnknown() },
};
},
VariableUsage(identifier): VariableUsage {
return {
expressionType: "VariableUsage",
name: identifier.toAST(),
type: { typeUsage: "UnknownTypeUsage", name: nextUnknown() },
};
},
IfExpression(_1, _2, expression, _4, block, _6, elseBlock): IfExpression {
const eb = elseBlock.toAST();
return {
expressionType: "IfExpression",
condition: expression.toAST(),
block: block.toAST(),
else: eb.length > 0 ? eb[0] : null,
type: { typeUsage: "UnknownTypeUsage", name: nextUnknown() },
};
},
Term(term): Expression {
return term.toAST();
},
Term_parens(_1, term, _3): Expression {
return term.toAST();
},
Factor(factor): Expression {
return factor.toAST();
},
Expression(expression): Expression {
return expression.toAST();
},
Expression_plus(expression, _2, factor): Operation {
return {
expressionType: "Operation",
left: expression.toAST(),
op: "+",
right: factor.toAST(),
};
},
Expression_minus(expression, _2, factor): Operation {
return {
expressionType: "Operation",
left: expression.toAST(),
op: "-",
right: factor.toAST(),
};
},
Factor_mult(factor, _2, term): Operation {
return {
expressionType: "Operation",
left: factor.toAST(),
op: "*",
right: term.toAST(),
};
},
Factor_div(factor, _2, term): Operation {
return {
expressionType: "Operation",
left: factor.toAST(),
op: "/",
right: term.toAST(),
};
},
Statement(statement): Statement {
return statement.toAST();
},
ReturnStatement(_1, expression, _3): ReturnStatement {
return {
statementType: "ReturnStatement",
source: expression.toAST(),
};
},
LetStatement(_1, ident, _3, typeUsage, _5, expression, _7): LetStatement {
const tu = typeUsage.toAST();
return {
statementType: "LetStatement",
variableName: ident.toAST(),
expression: expression.toAST(),
type:
tu.length > 0
? tu[0]
: { typeUsage: "UnknownTypeUsage", name: nextUnknown() },
};
},
AssignmentStatement(variable, _2, expression, _4): AssignmentStatement {
return {
statementType: "AssignmentStatement",
source: variable.toAST(),
expression: expression.toAST(),
};
},
ExpressionStatement(expression, _2): Expression {
return {
statementType: "Expression",
subExpression: expression.toAST(),
type: { typeUsage: "UnknownTypeUsage", name: nextUnknown() },
};
},
Block(_1, statements, expression, _4): Block {
const lines = statements.asIteration().children.map((c) => c.toAST());
const finalExpression = expression.toAST();
lines.push(finalExpression.length > 0 ? finalExpression[0] : null);
return {
statements: lines,
type: { typeUsage: "UnknownTypeUsage", name: nextUnknown() },
};
},
NamedTypeUsage(name): NamedTypeUsage {
return {
typeUsage: "NamedTypeUsage",
name: name.toAST(),
};
},
TypeUsage_function_tu(_1, _2, args, _4, _5, returnType): FunctionTypeUsage {
return {
typeUsage: "FunctionTypeUsage",
arguments: args.asIteration().children.map((c) => c.toAST()),
returnType: returnType.toAST(),
};
},
TypeUsage(typeUsage): TypeUsage {
return typeUsage.toAST();
},
FunctionArgument(identifier, _2, typeUsage): FunctionArgument {
return {
name: identifier.toAST(),
type: typeUsage.toAST(),
};
},
FunctionDeclaration(
_1,
identifier,
_3,
args,
_4,
_5,
returnType,
): FunctionDeclaration {
return {
name: identifier.toAST(),
arguments: args.asIteration().children.map((c) => c.toAST()),
returnType: returnType.toAST(),
};
},
Function(declaration, block): Function {
return {
moduleItem: "Function",
declaration: declaration.toAST(),
block: block.toAST(),
};
},
StructTypeField(identifier, _2, typeUsage): StructTypeField {
return {
name: identifier.toAST(),
type: typeUsage.toAST(),
};
},
StructTypeDeclaration(
_1,
identifier,
_3,
_4,
fields,
_6,
): StructTypeDeclaration {
return {
moduleItem: "StructTypeDeclaration",
typeDeclaration: "StructTypeDeclaration",
name: identifier.toAST(),
fields: fields.asIteration().children.map((c) => c.toAST()),
};
},
TraitMethod(declaration, _2): FunctionDeclaration {
return declaration.toAST();
},
TraitTypeDeclaration(
_1,
identifier,
_3,
_4,
methods,
_5,
): TraitTypeDeclaration {
return {
moduleItem: "TraitTypeDeclaration",
typeDeclaration: "TraitTypeDeclaration",
name: identifier.toAST(),
functions: methods.asIteration().children.map((c) => c.toAST()),
};
},
TypeDeclaration(declaration): TypeDeclaration {
return declaration.toAST();
},
Impl(_1, trait, _3, struct, _4, methods, _5): Impl {
const tr = trait.toAST();
return {
moduleItem: "Impl",
struct: struct.toAST(),
trait: tr.length > 0 ? tr[0] : null,
functions: methods.asIteration().children.map((c) => c.toAST()),
};
},
ModuleItem(item): ModuleItem {
return item.toAST();
},
Module(items): Module {
return {
items: items.asIteration().children.map((c) => c.toAST()),
};
},
_iter(...children) {
return children.map((c) => c.toAST());
},
});