sprite-rs/src/parse.rs

579 lines
16 KiB
Rust

// Copyright (c) 2022 Marceline Cramer
// SPDX-License-Identifier: GPL-3.0-or-later
use crate::lexer::{Lexer, Token};
#[derive(Debug)]
pub struct ParseTree<'a> {
pub declarations: Vec<Definition<'a>>,
}
impl<'a> ParseTree<'a> {
pub fn build(lexer: &mut Lexer<'a>) -> Self {
let mut declarations = Vec::new();
let mut associated_struct = None;
while let Some(tok) = lexer.next() {
if let Some(struct_name) = associated_struct {
let mut mutable = false;
if tok == Token::Mut {
mutable = true;
lexer.eat_expect(Token::Function);
} else if tok != Token::Function {
lexer.panic_message("Expected fn or mut");
}
declarations.push(Definition::build_function(
Some((struct_name, mutable)),
lexer,
));
associated_struct = None;
} else {
match tok {
Token::Struct => declarations.push(Definition::build_structure(lexer)),
Token::Function => declarations.push(Definition::build_function(None, lexer)),
Token::Interface => declarations.push(Definition::build_interface(lexer)),
Token::Impl => declarations.push(Definition::build_impl(lexer)),
Token::Identifier => associated_struct = Some(lexer.slice()),
_ => lexer.panic_message("Expected function, struct, impl block, or interface"),
}
}
}
Self { declarations }
}
}
#[derive(Debug)]
pub enum Definition<'a> {
Struct {
name: &'a str,
members: Vec<StructMember<'a>>,
},
Function {
associated_struct: Option<(&'a str, bool)>,
implementation: FnImpl<'a>,
},
Interface {
name: &'a str,
functions: Vec<(&'a str, FnSig<'a>)>,
},
Impl {
structure: &'a str,
interface: Option<&'a str>,
functions: Vec<FnImpl<'a>>,
},
}
impl<'a> Definition<'a> {
pub fn build_structure(lexer: &mut Lexer<'a>) -> Self {
let name = lexer.eat_expect_id();
lexer.eat_expect(Token::BraceOpen);
let mut members = Vec::new();
loop {
match lexer.eat_id() {
Ok(member_type) => {
let member_name = lexer.eat_expect_id();
members.push(StructMember {
type_name: member_type,
name: member_name,
});
}
Err(Some(Token::Comma)) => {}
Err(Some(Token::BraceClose)) => break,
_ => lexer.panic_message("Expected comma or closing brace"),
}
}
Self::Struct { name, members }
}
pub fn build_function(
associated_struct: Option<(&'a str, bool)>,
lexer: &mut Lexer<'a>,
) -> Self {
let implementation = FnImpl::build(lexer);
Self::Function {
associated_struct,
implementation,
}
}
pub fn build_interface(lexer: &mut Lexer<'a>) -> Self {
let name = lexer.eat_expect_id();
lexer.eat_expect(Token::BraceOpen);
let mut functions = Vec::new();
loop {
match lexer.next().unwrap() {
Token::Function => {
let name = lexer.eat_expect_id();
let (signature, next) = FnSig::build(lexer);
if next != Token::Semicolon {
lexer.panic_message("Expected semicolon");
}
functions.push((name, signature));
}
Token::BraceClose => break,
_ => lexer.panic_message("Expected fn or closing bracket"),
}
}
Self::Interface { name, functions }
}
pub fn build_impl(lexer: &mut Lexer<'a>) -> Self {
let structure = lexer.eat_expect_id();
let interface = match lexer.next().unwrap() {
Token::Identifier => {
let interface = lexer.slice();
lexer.eat_expect(Token::BraceOpen);
Some(interface)
}
Token::BraceOpen => None,
_ => lexer.panic_message("Expected interface name or opening brace"),
};
let mut functions = Vec::new();
loop {
match lexer.next().unwrap() {
Token::BraceClose => break,
Token::Function => functions.push(FnImpl::build(lexer)),
_ => lexer.panic_message("Expected function implementation or closing brace"),
}
}
Self::Impl {
structure,
interface,
functions,
}
}
}
#[derive(Debug)]
pub struct FnImpl<'a> {
pub name: &'a str,
pub signature: FnSig<'a>,
pub body: BranchBody<'a>,
}
impl<'a> FnImpl<'a> {
pub fn build(lexer: &mut Lexer<'a>) -> Self {
let name = lexer.eat_expect_id();
let (signature, tok) = FnSig::build(lexer);
if tok != Token::BraceOpen {
lexer.panic_message("Expected open brace");
}
let body = BranchBody::build(lexer);
Self {
name,
signature,
body,
}
}
}
#[derive(Debug)]
pub struct StructMember<'a> {
pub type_name: &'a str,
pub name: &'a str,
}
#[derive(Debug)]
pub struct FnSig<'a> {
pub args: Vec<FnArg<'a>>,
pub return_type: Option<&'a str>,
}
impl<'a> FnSig<'a> {
pub fn build(lexer: &mut Lexer<'a>) -> (Self, Token) {
lexer.eat_expect(Token::ParanOpen);
let mut args = Vec::new();
loop {
match lexer.eat_id() {
Ok(type_name) => match lexer.next().unwrap() {
Token::Identifier => args.push(FnArg {
type_name,
name: Some(lexer.slice()),
}),
Token::Comma => args.push(FnArg {
type_name,
name: None,
}),
Token::ParanClose => {
args.push(FnArg {
type_name,
name: None,
});
break;
}
_ => lexer.panic_message("Unexpected token"),
},
Err(Some(Token::Comma)) => {}
Err(Some(Token::ParanClose)) => break,
_ => lexer.panic_message("Expected comma, type, or closing parantheses"),
}
}
let mut next = lexer.next().unwrap();
let mut return_type = None;
if next == Token::Identifier {
return_type = Some(lexer.slice());
next = lexer.next().unwrap();
}
(Self { args, return_type }, next)
}
}
#[derive(Debug)]
pub struct FnArg<'a> {
pub type_name: &'a str,
pub name: Option<&'a str>,
}
#[derive(Debug)]
pub struct BranchBody<'a> {
pub statements: Vec<Statement<'a>>,
pub tail_expr: Option<Box<Expr<'a>>>,
}
impl<'a> BranchBody<'a> {
pub fn build(lexer: &mut Lexer<'a>) -> Self {
let mut statements = Vec::new();
let mut tail_expr = None;
loop {
let tok = lexer.next().unwrap();
match tok {
Token::Let => {
let (statement, next) = Statement::build_let(lexer);
statements.push(statement);
match next {
Token::Semicolon => {}
Token::BraceClose => break,
_ => lexer.panic_message("Unexpected token"),
}
}
Token::BraceClose => break,
_ => match Expr::build_start(tok, lexer) {
(None, Token::Semicolon | Token::BraceClose) => {}
(Some(expr), Token::Semicolon) => statements.push(Statement::Expr(expr)),
(Some(expr), Token::BraceClose) => {
tail_expr = Some(expr);
break;
}
_ => lexer.panic_message("Unexpected token"),
},
}
}
Self {
statements,
tail_expr: tail_expr.map(|x| Box::new(x)),
}
}
}
type ExprPair<'a> = Box<(Expr<'a>, Expr<'a>)>;
#[derive(Debug)]
pub enum Expr<'a> {
BinaryOp(BinaryOp, ExprPair<'a>),
Literal(Literal<'a>),
Local(&'a str),
Member(&'a str, Box<Expr<'a>>),
SelfMember(&'a str),
Group(Box<Expr<'a>>),
FnCall(Box<Expr<'a>>, Vec<Expr<'a>>),
If {
test_expr: Box<Expr<'a>>,
then_body: BranchBody<'a>,
else_body: BranchBody<'a>,
},
}
impl<'a> Expr<'a> {
pub fn build_start(tok: Token, lexer: &mut Lexer<'a>) -> (Option<Self>, Token) {
let mut lhs = match tok {
Token::Identifier => Self::Local(lexer.slice()),
Token::Dot => Self::SelfMember(lexer.eat_expect_id()),
Token::ParanOpen => {
let (inner, next) = Self::build(lexer);
if next != Token::ParanClose {
lexer.panic_message("Expected closing parantheses");
}
Self::Group(Box::new(inner.unwrap()))
}
Token::If => {
let test_expr = match Self::build(lexer) {
(Some(test_expr), Token::BraceOpen) => Box::new(test_expr),
(None, Token::BraceOpen) => lexer.panic_message("Expected test expression"),
_ => lexer.panic_message("Expected opening brace"),
};
let then_body = BranchBody::build(lexer);
lexer.eat_expect(Token::Else);
lexer.eat_expect(Token::BraceOpen);
let else_body = BranchBody::build(lexer);
Self::If {
test_expr,
then_body,
else_body,
}
}
Token::BinaryInteger => Self::Literal(Literal::BinaryInteger(lexer.slice())),
Token::OctalInteger => Self::Literal(Literal::OctalInteger(lexer.slice())),
Token::HexInteger => Self::Literal(Literal::HexInteger(lexer.slice())),
Token::DecimalInteger => Self::Literal(Literal::DecimalInteger(lexer.slice())),
Token::True => Self::Literal(Literal::Boolean(true)),
Token::False => Self::Literal(Literal::Boolean(false)),
Token::Semicolon | Token::BraceClose | Token::ParanClose => return (None, tok),
_ => lexer.panic_message("Unexpected token"),
};
loop {
let tok = lexer.next().unwrap();
match tok {
Token::Dot => {
let member = lexer.eat_expect_id();
let expr = Self::Member(member, Box::new(lhs));
lhs = expr;
}
Token::ParanOpen => lhs = Self::FnCall(Box::new(lhs), Self::eat_args(lexer)),
_ => {
if let Some(op) = BinaryOp::from_token(tok) {
match Self::build(lexer) {
(Some(rhs), tail) => {
break (Some(Self::BinaryOp(op, Box::new((lhs, rhs)))), tail)
}
_ => lexer.panic_message("Expected right-hand expression"),
}
} else {
break (Some(lhs), tok);
}
}
}
}
}
pub fn build(lexer: &mut Lexer<'a>) -> (Option<Self>, Token) {
Self::build_start(lexer.next().unwrap(), lexer)
}
pub fn eat_args(lexer: &mut Lexer<'a>) -> Vec<Self> {
let mut args = Vec::new();
loop {
match Self::build(lexer) {
(Some(arg), Token::Comma) => args.push(arg),
(Some(arg), Token::ParanClose) => {
args.push(arg);
break;
}
(None, Token::ParanClose) => break,
_ => lexer.panic_message("Unexpected token"),
}
}
args
}
}
#[derive(Debug)]
pub enum BinaryOp {
Add,
Sub,
Mul,
Div,
Assign,
BoolOr,
BoolAnd,
BitOr,
BitAnd,
BitXor,
Less,
LessEq,
Greater,
GreaterEq,
Eq,
Neq,
}
impl BinaryOp {
pub fn from_token(tok: Token) -> Option<Self> {
use BinaryOp::*;
use Token::*;
match tok {
OpAdd => Some(Add),
OpSub => Some(Sub),
OpMul => Some(Mul),
OpDiv => Some(Div),
OpAssign => Some(Assign),
OpBoolOr => Some(BoolOr),
OpBoolAnd => Some(BoolAnd),
OpBitOr => Some(BitOr),
OpBitAnd => Some(BitAnd),
OpBitXor => Some(BitXor),
OpLess => Some(Less),
OpLessEq => Some(LessEq),
OpGreater => Some(Greater),
OpGreaterEq => Some(GreaterEq),
OpEq => Some(Eq),
OpNeq => Some(Neq),
_ => None,
}
}
}
#[derive(Debug)]
pub enum Literal<'a> {
BinaryInteger(&'a str),
OctalInteger(&'a str),
HexInteger(&'a str),
DecimalInteger(&'a str),
Boolean(bool),
}
#[derive(Debug)]
pub enum Statement<'a> {
Expr(Expr<'a>),
Let {
var: &'a str,
mutable: bool,
expr: Expr<'a>,
},
If {
test_expr: Expr<'a>,
then_body: BranchBody<'a>,
else_body: Option<BranchBody<'a>>,
},
}
impl<'a> Statement<'a> {
pub fn eat_assign(lexer: &mut Lexer<'a>) -> (Expr<'a>, Token) {
lexer.eat_expect(Token::OpAssign);
let (expr, tail) = Expr::build(lexer);
(expr.unwrap(), tail)
}
pub fn build_let(lexer: &mut Lexer<'a>) -> (Self, Token) {
let mut mutable = false;
let var = match lexer.next().unwrap() {
Token::Identifier => lexer.slice(),
Token::Mut => {
mutable = true;
lexer.eat_expect_id()
}
_ => lexer.panic_message("Unexpected token"),
};
let (expr, tail) = Self::eat_assign(lexer);
(Self::Let { var, mutable, expr }, tail)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse(source: &str) {
let mut lex = Lexer::new(source);
let parse_tree = ParseTree::build(&mut lex);
println!("{:#?}", parse_tree);
}
#[test]
fn function() {
parse(include_str!("test/function.fae"));
}
#[test]
fn tail_expression() {
parse("fn add(i32 x, i32 y) i32 { x + y }");
}
#[test]
fn structure() {
parse(include_str!("test/structure.fae"));
}
#[test]
fn interface() {
parse(include_str!("test/interface.fae"));
}
#[test]
fn example() {
parse(include_str!("test/example.fae"));
}
#[test]
fn clock() {
parse(include_str!("test/clock.fae"));
}
mod expr {
use super::*;
fn parse_expr(source: &str) {
println!("Source: {}", source);
let mut lex = Lexer::new(source);
let expr = Expr::build(&mut lex);
println!("{:#?}", expr);
assert_eq!(expr.1, Token::Semicolon);
}
#[test]
fn int_literals() {
parse_expr("0b1 + 0x2 - 0o3 * 0x4;");
}
#[test]
fn locals() {
parse_expr("local1 + local2 - local3;");
}
#[test]
fn nesting() {
parse_expr("(1 + 2) * (3 / (4 + 5));")
}
#[test]
fn members() {
parse_expr(".member1 * .member2;");
}
#[test]
fn if_expr() {
parse_expr("if true { 1 } else { 0 };");
}
#[test]
fn func_call() {
parse_expr("func_call(1 + 2, 3, 4 * 5);");
}
#[test]
fn method() {
parse_expr(".method();");
}
#[test]
fn submembers() {
parse_expr(".member.submember1 / .member.submember2.submember3;");
}
}
}