WIP dedicated recursive descent module

This commit is contained in:
mars 2022-03-25 17:26:06 -06:00
parent 62109e84ad
commit fb50827f2f
3 changed files with 454 additions and 0 deletions

View File

@ -121,6 +121,7 @@ impl<'a> Display for TokenInfo<'a> {
}
}
#[derive(Clone)]
pub struct Lexer<'a> {
inner: logos::Lexer<'a, Token>,
line_num: usize,

View File

@ -4,3 +4,8 @@
pub mod ast;
pub mod lexer;
pub mod pest;
pub mod rd;
pub trait BuildAst<'a> {
fn build_ast(self) -> ast::Ast<'a>;
}

448
src/parse/rd.rs Normal file
View File

@ -0,0 +1,448 @@
// Copyright (c) 2022 Marceline Cramer
// SPDX-License-Identifier: GPL-3.0-or-later
use super::ast::*;
use super::lexer::{Lexer, Token};
use super::BuildAst;
pub struct RecursiveDescent<'a> {
lexer: Lexer<'a>,
}
impl<'a> RecursiveDescent<'a> {
pub fn new(lexer: Lexer<'a>) -> Self {
Self { lexer }
}
pub fn build_defs(&mut self) -> Vec<Definition<'a>> {
let mut defs = Vec::new();
let mut associated_struct = None;
while let Some(tok) = self.next() {
if let Some(struct_name) = associated_struct {
let mut mutable = false;
if tok == Token::Mut {
mutable = true;
self.eat_expect(Token::Function);
} else if tok != Token::Function {
self.panic_message("Expected fn or mut");
}
defs.push(self.build_fn(mutable, Some(struct_name)));
associated_struct = None;
} else {
if let Some(def) = match tok {
Token::Struct => Some(self.build_struct()),
Token::Function => Some(self.build_fn(false, None)),
Token::Interface => Some(self.build_iface()),
Token::Impl => Some(self.build_impl()),
Token::Identifier => {
associated_struct = Some(self.slice());
None
}
_ => self.panic_message("Expected function, struct, impl block, or interface"),
} {
defs.push(def);
}
}
}
defs
}
pub fn build_struct(&mut self) -> Definition<'a> {
let name = self.eat_expect_id();
self.eat_expect(Token::BraceOpen);
let mut members = Vec::new();
loop {
match self.eat_id() {
Ok(member_type) => {
let member_name = self.eat_expect_id();
members.push(StructMember {
type_name: member_type,
name: member_name,
});
}
Err(Some(Token::Comma)) => {}
Err(Some(Token::BraceClose)) => break,
_ => self.panic_message("Expected comma or closing brace"),
}
}
Definition::Struct { name, members }
}
pub fn build_fn(&mut self, mutable: bool, associated_struct: Option<&'a str>) -> Definition<'a> {
let implementation = self.build_fn_impl(mutable);
Definition::Function {
associated_struct,
implementation,
}
}
pub fn build_iface(&mut self) -> Definition<'a> {
let name = self.eat_expect_id();
self.eat_expect(Token::BraceOpen);
let mut functions = Vec::new();
loop {
match self.next().unwrap() {
Token::Function => functions.push(self.fn_def_then(false, Token::Semicolon)),
Token::Mut => {
self.eat_expect(Token::Function);
functions.push(self.fn_def_then(true, Token::Semicolon));
}
Token::BraceClose => break,
_ => self.panic_message("Expected function definition or closing bracket"),
}
}
Definition::Interface { name, functions }
}
pub fn build_impl(&mut self) -> Definition<'a> {
let structure = self.eat_expect_id();
let interface = match self.next().unwrap() {
Token::Identifier => {
let interface = self.slice();
self.eat_expect(Token::BraceOpen);
Some(interface)
}
Token::BraceOpen => None,
_ => self.panic_message("Expected interface name or opening brace"),
};
let mut functions = Vec::new();
loop {
match self.next().unwrap() {
Token::BraceClose => break,
Token::Function => functions.push(self.build_fn_impl(false)),
Token::Mut => {
self.eat_expect(Token::Function);
functions.push(self.build_fn_impl(true));
}
_ => self.panic_message("Expected function implementation or closing brace"),
}
}
Definition::Impl {
structure,
interface,
functions,
}
}
pub fn build_fn_impl(&mut self, mutable: bool) -> FnImpl<'a> {
let def = self.fn_def_then(mutable, Token::Semicolon);
let body = self.build_branch_body();
FnImpl { def, body }
}
pub fn build_fn_def(&mut self, mutable: bool) -> (FnDef<'a>, Token) {
let name = self.eat_expect_id();
let (signature, tail) = self.build_fn_sig();
let fn_def = FnDef {
mutable,
name,
signature,
};
(fn_def, tail)
}
pub fn fn_def_then(&mut self, mutable: bool, then: Token) -> FnDef<'a> {
let (fn_def, tail) = self.build_fn_def(mutable);
if tail != then {
self.panic_message(&format!("Expected {:?}", then));
}
fn_def
}
pub fn build_fn_sig(&mut self) -> (FnSig<'a>, Token) {
self.eat_expect(Token::ParenOpen);
let mut args = Vec::new();
loop {
match self.eat_id() {
Ok(type_name) => match self.next().unwrap() {
Token::Identifier => args.push(FnArg {
type_name,
name: Some(self.slice()),
}),
Token::Comma => args.push(FnArg {
type_name,
name: None,
}),
Token::ParenClose => {
args.push(FnArg {
type_name,
name: None,
});
break;
}
_ => self.panic_message("Unexpected token"),
},
Err(Some(Token::Comma)) => {}
Err(Some(Token::ParenClose)) => break,
_ => self.panic_message("Expected comma, type, or closing parentheses"),
}
}
let mut next = self.next().unwrap();
let mut return_type = None;
if next == Token::Identifier {
return_type = Some(self.slice());
next = self.next().unwrap();
}
(FnSig { args, return_type }, next)
}
pub fn build_branch_body(&mut self) -> BranchBody<'a> {
let mut statements = Vec::new();
let mut next = None;
let tail_expr = loop {
let tok = if let Some(tok) = next {
next = None;
tok
} else {
self.next().unwrap()
};
match tok {
Token::Let => {
let (statement, next) = self.build_let();
statements.push(statement);
match next {
Token::Semicolon => {}
Token::BraceClose => break None,
_ => self.panic_message("Unexpected token"),
}
}
Token::If => {
let (stmt, tail) = self.build_if_stmt();
if tail == Token::BraceClose {
if let Some(_) = stmt.else_body {
let expr = stmt.to_expr();
break Some(Expr::If(expr));
} else {
statements.push(Statement::If(stmt));
break None;
}
} else {
statements.push(Statement::If(stmt));
next = Some(tail);
}
}
Token::While => {
let test_expr = match self.build_expr() {
(Some(test_expr), Token::BraceOpen) => test_expr,
(Some(_), _) => self.panic_message("Expected opening brace"),
_ => self.panic_message("Expected test expression"),
};
let loop_body = self.build_branch_body();
statements.push(Statement::While {
test_expr,
loop_body,
});
}
Token::BraceClose => break None,
_ => match self.build_expr_start(tok) {
(None, Token::Semicolon | Token::BraceClose) => {}
(Some(expr), Token::Semicolon) => statements.push(Statement::Expr(expr)),
(Some(expr), Token::BraceClose) => break Some(expr),
_ => self.panic_message("Unexpected token"),
},
}
};
BranchBody {
statements,
tail_expr: tail_expr.map(|x| Box::new(x)),
}
}
pub fn build_expr_start(&mut self, tok: Token) -> (Option<Expr<'a>>, Token) {
let (unary_op, tok) = if let Some(op) = UnaryOp::from_token(tok) {
(Some(op), self.next().unwrap())
} else {
(None, tok)
};
let lhs = match tok {
Token::Identifier => Expr::Local(self.slice()),
Token::Dot => Expr::SelfMember(self.eat_expect_id()),
Token::ParenOpen => {
let (inner, next) = self.build_expr();
if next != Token::ParenClose {
self.panic_message("Expected closing parentheses");
}
Expr::Group(Box::new(inner.unwrap()))
}
Token::If => Expr::If(self.build_if_expr()),
Token::BinaryInteger => Expr::Literal(Literal::BinaryInteger(self.slice())),
Token::OctalInteger => Expr::Literal(Literal::OctalInteger(self.slice())),
Token::HexInteger => Expr::Literal(Literal::HexInteger(self.slice())),
Token::DecimalInteger => Expr::Literal(Literal::DecimalInteger(self.slice())),
Token::DecimalFloat => Expr::Literal(Literal::DecimalFloat(self.slice())),
Token::StringLiteral => Expr::Literal(Literal::String(self.slice())),
Token::True => Expr::Literal(Literal::Boolean(true)),
Token::False => Expr::Literal(Literal::Boolean(false)),
Token::Semicolon | Token::BraceClose | Token::ParenClose => return (None, tok),
_ => self.panic_message("Unexpected token"),
};
let mut lhs = if let Some(op) = unary_op {
Expr::UnaryOp(op, Box::new(lhs))
} else {
lhs
};
loop {
let tok = self.next().unwrap();
match tok {
Token::Dot => {
let member = self.eat_expect_id();
let expr = Expr::Member(member, Box::new(lhs));
lhs = expr;
}
Token::ParenOpen => lhs = Expr::FnCall(Box::new(lhs), self.eat_expr_args()),
_ => {
if let Some(op) = BinaryOp::from_token(tok) {
match self.build_expr() {
(Some(rhs), tail) => {
break (Some(Expr::BinaryOp(op, Box::new((lhs, rhs)))), tail)
}
_ => self.panic_message("Expected right-hand expression"),
}
} else {
break (Some(lhs), tok);
}
}
}
}
}
pub fn build_expr(&mut self) -> (Option<Expr<'a>>, Token) {
let next = self.next().unwrap();
self.build_expr_start(next)
}
pub fn eat_expr_args(&mut self) -> Vec<Expr<'a>> {
let mut args = Vec::new();
loop {
match self.build_expr() {
(Some(arg), Token::Comma) => args.push(arg),
(Some(arg), Token::ParenClose) => {
args.push(arg);
break;
}
(None, Token::ParenClose) => break,
_ => self.panic_message("Unexpected token"),
}
}
args
}
pub fn build_let(&mut self) -> (Statement<'a>, Token) {
let mut mutable = false;
let var = match self.next().unwrap() {
Token::Identifier => self.slice(),
Token::Mut => {
mutable = true;
self.eat_expect_id()
}
_ => self.panic_message("Unexpected token"),
};
let (expr, tail) = self.eat_assign();
(Statement::Let { var, mutable, expr }, tail)
}
pub fn eat_assign(&mut self) -> (Expr<'a>, Token) {
self.eat_expect(Token::OpAssign);
let (expr, tail) = self.build_expr();
(expr.unwrap(), tail)
}
pub fn build_if_expr(&mut self) -> IfExpr<'a> {
let test_expr = Box::new(self.eat_test_expr());
let then_body = self.build_branch_body();
self.eat_expect(Token::Else);
let else_body = match self.next().unwrap() {
Token::BraceOpen => self.build_branch_body(),
Token::If => BranchBody {
statements: Vec::new(),
tail_expr: Some(Box::new(Expr::If(self.build_if_expr()))),
},
_ => self.panic_message("Expected if or opening brace"),
};
IfExpr {
test_expr,
then_body,
else_body,
}
}
pub fn eat_test_expr(&mut self) -> Expr<'a> {
match self.build_expr() {
(Some(test_expr), Token::BraceOpen) => test_expr,
(Some(_), _) => self.panic_message("Expected opening brace"),
_ => self.panic_message("Expected test expression"),
}
}
pub fn build_if_stmt(&mut self) -> (IfStmt<'a>, Token) {
let test_expr = self.eat_test_expr();
let then_body = self.build_branch_body();
let (else_body, tail) = match self.next().unwrap() {
Token::Else => {
self.eat_expect(Token::BraceOpen);
(Some(self.build_branch_body()), self.next().unwrap())
}
next => (None, next),
};
(
IfStmt {
test_expr,
then_body,
else_body,
},
tail,
)
}
}
impl<'a> std::ops::Deref for RecursiveDescent<'a> {
type Target = Lexer<'a>;
fn deref(&self) -> &Self::Target {
&self.lexer
}
}
impl<'a> std::ops::DerefMut for RecursiveDescent<'a> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.lexer
}
}
impl<'a> BuildAst<'a> for RecursiveDescent<'a> {
fn build_ast(mut self) -> Ast<'a> {
let defs = self.build_defs();
Ast { defs }
}
}