Initial AST + basic struct parsing

This commit is contained in:
mars 2022-02-27 11:27:22 -07:00
parent b7678dc218
commit afbc34ebd0
3 changed files with 171 additions and 5 deletions

135
src/ast.rs Normal file
View File

@ -0,0 +1,135 @@
use crate::Token;
use logos::Lexer;
fn eat_identifier<'a>(lexer: &mut Lexer<'a, Token>) -> Result<&'a str, Option<Token>> {
let tok = lexer.next();
if let Some(Token::Identifier) = tok {
Ok(lexer.slice())
} else {
Err(tok)
}
}
#[derive(Debug)]
pub struct Ast<'a> {
pub declarations: Vec<TopLevelDeclaration<'a>>,
}
impl<'a> Ast<'a> {
pub fn build(lexer: &mut Lexer<'a, Token>) -> Self {
let mut declarations = Vec::new();
while let Some(tok) = lexer.next() {
if tok == Token::Struct {
assert_eq!(lexer.next(), Some(Token::Identifier));
let name = lexer.slice();
assert_eq!(lexer.next(), Some(Token::BraceOpen));
let mut members = Vec::new();
loop {
match eat_identifier(lexer) {
Ok(member_type) => {
let member_name = eat_identifier(lexer).unwrap();
members.push(StructMember { type_name: member_type, name: member_name });
}
Err(Some(Token::Comma)) => {},
Err(Some(Token::BraceClose)) => break,
_ => panic!("Expected comma or closing brace"),
}
}
declarations.push(TopLevelDeclaration::Struct { name, members });
}
}
Self { declarations }
}
}
#[derive(Debug)]
pub enum TopLevelDeclaration<'a> {
Struct {
name: &'a str,
members: Vec<StructMember<'a>>,
},
Function {
name: &'a str,
signature: FunctionSignature<'a>,
body: FunctionBody<'a>,
},
}
#[derive(Debug)]
pub struct StructMember<'a> {
pub type_name: &'a str,
pub name: &'a str,
}
impl<'a> StructMember<'a> {
pub fn build(lexer: &mut Lexer<'a, Token>) -> Result<Self, Token> {
assert_eq!(lexer.next(), Some(Token::Identifier));
let type_name = lexer.slice();
assert_eq!(lexer.next(), Some(Token::Identifier));
let name = lexer.slice();
assert_eq!(lexer.next(), Some(Token::Comma));
Ok(Self { type_name, name })
}
}
#[derive(Debug)]
pub struct FunctionSignature<'a> {
pub associated_struct: Option<&'a str>,
pub return_type: Option<&'a str>,
pub args: Vec<FunctionArg<'a>>,
}
#[derive(Debug)]
pub struct FunctionArg<'a> {
pub type_name: &'a str,
pub name: &'a str,
}
#[derive(Debug)]
pub struct FunctionBody<'a> {
pub statements: Vec<Statement<'a>>,
pub tail_expression: Option<Expression<'a>>,
}
#[derive(Debug)]
pub enum Expression<'a> {
Arithmetic(&'a str),
If {
test_expr: Box<Expression<'a>>,
then_body: BranchBody<'a>,
else_body: BranchBody<'a>,
},
}
#[derive(Debug)]
pub enum Statement<'a> {
Assign {
var: &'a str,
expr: Expression<'a>,
},
Let {
var: &'a str,
expr: Expression<'a>,
},
Var {
var: &'a str,
expr: Expression<'a>,
},
If {
test_expr: Expression<'a>,
then_body: BranchBody<'a>,
else_body: Option<BranchBody<'a>>,
},
}
#[derive(Debug)]
pub struct BranchBody<'a> {
pub statements: Vec<Statement<'a>>,
pub tail_expression: Box<Expression<'a>>,
}

View File

@ -1,6 +1,8 @@
use console::Style;
use logos::Logos;
pub mod ast;
#[rustfmt::skip]
#[derive(Logos, Debug, PartialEq)]
pub enum Token {
@ -41,9 +43,6 @@ pub enum Token {
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
Identifier,
#[regex(r"//[^\n]*")]
SingleLineComment,
#[regex(r"0b_*[01][_01]*")]
BinaryInteger,
@ -53,7 +52,10 @@ pub enum Token {
#[regex(r"-?[0-9][_'0-9]*")]
DecimalInteger,
#[regex(r"[ \t\n\f ]+")] // , logos::skip)]
#[regex(r"//[^\n]*", logos::skip)]
SingleLineComment,
#[regex(r"[ \t\n\f ]+", logos::skip)]
Whitespace,
#[error]
@ -95,7 +97,8 @@ impl ColorTheme {
mod tests {
use super::*;
#[test]
// TODO use spans to color-code instead of raw tokens, to show original whitespace
/*#[test]
fn color_file() {
let source = include_str!("../example.ktn");
let theme = ColorTheme::new();
@ -105,5 +108,13 @@ mod tests {
let style = theme.token_style(&token);
print!("{}", style.apply_to(lex.slice()));
}
}*/
#[test]
fn generate_ast() {
let source = include_str!("struct_ast.ktn");
let mut lex = Token::lexer(source);
let ast = ast::Ast::build(&mut lex);
println!("{:#?}", ast);
}
}

20
src/struct_ast.ktn Normal file
View File

@ -0,0 +1,20 @@
// complex data is passed and manipulated in structures
struct World {
// each member is defined by name and type
u32 xsize,
u32 ysize,
BitArray current,
u32 numdots,
BitArray next,
// TODO: members can have default initialization values
// i32 xmin = 1'000'000,
// i32 xmax = -1,
// i32 ymin = 1'000'000,
// i32 ymax = -1,
i32 xmin,
i32 xmax,
i32 ymin,
i32 ymax,
}