Parse function signatures and skip over bodies

This commit is contained in:
mars 2022-02-27 12:35:45 -07:00
parent afbc34ebd0
commit f473fe324e
5 changed files with 292 additions and 110 deletions

View File

@ -1,45 +1,26 @@
use crate::Token;
use logos::Lexer;
fn eat_identifier<'a>(lexer: &mut Lexer<'a, Token>) -> Result<&'a str, Option<Token>> {
let tok = lexer.next();
if let Some(Token::Identifier) = tok {
Ok(lexer.slice())
} else {
Err(tok)
}
}
use crate::lexer::{Lexer, Token};
#[derive(Debug)]
pub struct Ast<'a> {
pub declarations: Vec<TopLevelDeclaration<'a>>,
pub declarations: Vec<Definition<'a>>,
}
impl<'a> Ast<'a> {
pub fn build(lexer: &mut Lexer<'a, Token>) -> Self {
pub fn build(lexer: &mut Lexer<'a>) -> Self {
let mut declarations = Vec::new();
let mut associated_struct = None;
while let Some(tok) = lexer.next() {
if tok == Token::Struct {
assert_eq!(lexer.next(), Some(Token::Identifier));
let name = lexer.slice();
assert_eq!(lexer.next(), Some(Token::BraceOpen));
let mut members = Vec::new();
loop {
match eat_identifier(lexer) {
Ok(member_type) => {
let member_name = eat_identifier(lexer).unwrap();
members.push(StructMember { type_name: member_type, name: member_name });
}
Err(Some(Token::Comma)) => {},
Err(Some(Token::BraceClose)) => break,
_ => panic!("Expected comma or closing brace"),
}
if let Some(_) = associated_struct {
assert_eq!(Token::Function, tok);
declarations.push(Definition::build_function(associated_struct, lexer));
associated_struct = None;
} else {
match tok {
Token::Struct => declarations.push(Definition::build_structure(lexer)),
Token::Function => declarations.push(Definition::build_function(None, lexer)),
Token::Identifier => associated_struct = Some(lexer.slice()),
_ => panic!("Expected associated struct identifier, fn, or struct"),
}
declarations.push(TopLevelDeclaration::Struct { name, members });
}
}
@ -48,7 +29,7 @@ impl<'a> Ast<'a> {
}
#[derive(Debug)]
pub enum TopLevelDeclaration<'a> {
pub enum Definition<'a> {
Struct {
name: &'a str,
members: Vec<StructMember<'a>>,
@ -60,29 +41,118 @@ pub enum TopLevelDeclaration<'a> {
},
}
impl<'a> Definition<'a> {
pub fn build_structure(lexer: &mut Lexer<'a>) -> Self {
let name = lexer.eat_id().unwrap();
assert_eq!(lexer.next(), Some(Token::BraceOpen));
let mut members = Vec::new();
loop {
match lexer.eat_id() {
Ok(member_type) => {
let member_name = lexer.eat_id().unwrap();
members.push(StructMember {
type_name: member_type,
name: member_name,
});
}
Err(Some(Token::Comma)) => {}
Err(Some(Token::BraceClose)) => break,
_ => panic!("Expected comma or closing brace"),
}
}
Self::Struct { name, members }
}
pub fn build_function(associated_struct: Option<&'a str>, lexer: &mut Lexer<'a>) -> Self {
let name = lexer.eat_id().unwrap();
assert_eq!(lexer.next(), Some(Token::ParanOpen));
let mut args = Vec::new();
loop {
match lexer.eat_id() {
Ok(member_type) => {
let member_name = lexer.eat_id().unwrap();
args.push(FunctionArg {
type_name: member_type,
name: member_name,
});
}
Err(Some(Token::Comma)) => {}
Err(Some(Token::ParanClose)) => break,
_ => panic!("Expected comma, type, or closing paranthases"),
}
}
let tok = lexer.next();
let mut return_type = None;
if let Some(Token::Identifier) = tok {
return_type = Some(lexer.slice());
assert_eq!(Some(Token::BraceOpen), lexer.next());
} else {
assert_eq!(Some(Token::BraceOpen), tok);
}
let signature = FunctionSignature {
associated_struct,
args,
return_type,
};
let body = FunctionBody::build(lexer);
Self::Function {
name,
signature,
body,
}
}
}
#[derive(Debug)]
pub struct StructMember<'a> {
pub type_name: &'a str,
pub name: &'a str,
}
impl<'a> StructMember<'a> {
pub fn build(lexer: &mut Lexer<'a, Token>) -> Result<Self, Token> {
assert_eq!(lexer.next(), Some(Token::Identifier));
let type_name = lexer.slice();
assert_eq!(lexer.next(), Some(Token::Identifier));
let name = lexer.slice();
assert_eq!(lexer.next(), Some(Token::Comma));
Ok(Self { type_name, name })
}
}
#[derive(Debug)]
pub struct FunctionSignature<'a> {
pub associated_struct: Option<&'a str>,
pub return_type: Option<&'a str>,
pub args: Vec<FunctionArg<'a>>,
pub return_type: Option<&'a str>,
}
impl<'a> FunctionSignature<'a> {
pub fn build(associated_struct: Option<&'a str>, lexer: &mut Lexer<'a>) -> Self {
assert_eq!(lexer.next(), Some(Token::ParanOpen));
let mut args = Vec::new();
loop {
match lexer.eat_id() {
Ok(member_type) => {
let member_name = lexer.eat_id().unwrap();
args.push(FunctionArg {
type_name: member_type,
name: member_name,
});
}
Err(Some(Token::Comma)) => {}
Err(Some(Token::ParanClose)) => break,
_ => panic!("Expected comma, type, or closing paranthases"),
}
}
let return_type = lexer.eat_id().ok();
Self {
associated_struct,
args,
return_type,
}
}
}
#[derive(Debug)]
@ -97,6 +167,24 @@ pub struct FunctionBody<'a> {
pub tail_expression: Option<Expression<'a>>,
}
impl<'a> FunctionBody<'a> {
pub fn build(lexer: &mut Lexer<'a>) -> Self {
let mut level = 1;
while level > 0 {
match lexer.next().unwrap() {
Token::BraceOpen => level += 1,
Token::BraceClose => level -= 1,
_ => {}
}
}
Self {
statements: Vec::new(),
tail_expression: None,
}
}
}
#[derive(Debug)]
pub enum Expression<'a> {
Arithmetic(&'a str),

34
src/ast_fn.ktn Normal file
View File

@ -0,0 +1,34 @@
fn free_floating(i32 arg1, i32 arg2) ReturnType {}
// associated function for the World struct
World fn set_next(i32 i, i32 j) {
// members on this World struct can be accessed with `.`
var numdots = .numdots; // mutable variables are defined with `var`
let neighbors = count_neighbors(.current, i, j);
// `if` statements are expressions
let next = if .current.get(i, j) == 0 {
if neighbors != 3 {
0
} else {
numdots++;
1
}
} else {
// Python-like `or` operator
if neighbors == 2 or neighbors == 3 {
1
} else {
numdots--;
0
}
};
if next != 0 {
// TODO: mutability rules for arguments?
if i < .xmin { .xmin = i; }
if i > .xmax { .xmax = i; }
if j < .ymin { .ymin = j; }
if j > .ymax { .ymax = j; }
}
}

91
src/lexer.rs Normal file
View File

@ -0,0 +1,91 @@
use logos::Logos;
#[rustfmt::skip]
#[derive(Logos, Debug, PartialEq)]
pub enum Token {
// keywords
#[token("struct")] Struct,
#[token("fn")] Function,
#[token("for")] For,
#[token("if")] If,
#[token("else")] Else,
#[token("in")] In,
#[token("let")] Let,
#[token("var")] Var,
// separators
#[token("{")] BraceOpen,
#[token("}")] BraceClose,
#[token("(")] ParanOpen,
#[token(")")] ParanClose,
#[token(";")] Semicolon,
#[token(",")] Comma,
#[token(".")] Dot,
// arithmetic operators
#[token("+")] OpAdd,
#[token("-")] OpSub,
#[token("*")] OpMul,
#[token("/")] OpDiv,
#[token("=")] OpAssign,
// comparison operators
#[token("<")] OpLess,
#[token("<=")] OpLessEq,
#[token(">")] OpGreater,
#[token(">=")] OpGreaterEq,
#[token("==")] OpEq,
#[token("!=")] OpNeq,
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
Identifier,
#[regex(r"0b_*[01][_01]*")]
BinaryInteger,
#[regex(r"0o_*[0-7][_0-7]*")]
OctalInteger,
#[regex(r"-?[0-9][_'0-9]*")]
DecimalInteger,
#[regex(r"//[^\n]*", logos::skip)]
SingleLineComment,
#[regex(r"[ \t\n\f ]+", logos::skip)]
Whitespace,
#[error]
Error,
}
pub struct Lexer<'a>(logos::Lexer<'a, Token>);
impl<'a> Lexer<'a> {
pub fn new(source: &'a str) -> Self {
Self(Token::lexer(source))
}
pub fn slice(&self) -> &'a str {
self.0.slice()
}
pub fn eat_id(&mut self) -> Result<&'a str, Option<Token>> {
let tok = self.next();
if let Some(Token::Identifier) = tok {
Ok(self.slice())
} else {
Err(tok)
}
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
let tok = self.0.next();
println!("ate: {:?}", tok);
tok
}
}

View File

@ -1,66 +1,9 @@
use console::Style;
use logos::Logos;
pub mod ast;
pub mod lexer;
#[rustfmt::skip]
#[derive(Logos, Debug, PartialEq)]
pub enum Token {
// keywords
#[token("struct")] Struct,
#[token("fn")] Function,
#[token("for")] For,
#[token("if")] If,
#[token("else")] Else,
#[token("in")] In,
#[token("let")] Let,
#[token("var")] Var,
// separators
#[token("{")] BraceOpen,
#[token("}")] BraceClose,
#[token("(")] ParanOpen,
#[token(")")] ParanClose,
#[token(";")] Semicolon,
#[token(",")] Comma,
#[token(".")] Dot,
// arithmetic operators
#[token("+")] OpAdd,
#[token("-")] OpSub,
#[token("*")] OpMul,
#[token("/")] OpDiv,
#[token("=")] OpAssign,
// comparison operators
#[token("<")] OpLess,
#[token("<=")] OpLessEq,
#[token(">")] OpGreater,
#[token(">=")] OpGreaterEq,
#[token("==")] OpEq,
#[token("!=")] OpNeq,
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
Identifier,
#[regex(r"0b_*[01][_01]*")]
BinaryInteger,
#[regex(r"0o_*[0-7][_0-7]*")]
OctalInteger,
#[regex(r"-?[0-9][_'0-9]*")]
DecimalInteger,
#[regex(r"//[^\n]*", logos::skip)]
SingleLineComment,
#[regex(r"[ \t\n\f ]+", logos::skip)]
Whitespace,
#[error]
Error,
}
use lexer::Token;
pub struct ColorTheme {
normal: Style,
@ -97,6 +40,19 @@ impl ColorTheme {
mod tests {
use super::*;
#[test]
fn lex_file() {
let source = include_str!("ast_fn.ktn");
let theme = ColorTheme::new();
let mut lex = lexer::Lexer::new(source);
while let Some(token) = lex.next() {
let style = theme.token_style(&token);
let styled = style.apply_to(format!("{:?}: {}", token, lex.slice()));
println!("{}", styled);
}
}
// TODO use spans to color-code instead of raw tokens, to show original whitespace
/*#[test]
fn color_file() {
@ -110,11 +66,24 @@ mod tests {
}
}*/
#[test]
fn generate_ast() {
let source = include_str!("struct_ast.ktn");
let mut lex = Token::lexer(source);
fn print_ast(source: &str) {
let mut lex = lexer::Lexer::new(source);
let ast = ast::Ast::build(&mut lex);
println!("{:#?}", ast);
}
#[test]
fn ast_fn() {
print_ast(include_str!("ast_fn.ktn"));
}
#[test]
fn ast_struct() {
print_ast(include_str!("ast_struct.ktn"));
}
#[test]
fn ast_example() {
print_ast(include_str!("../example.ktn"));
}
}