// Copyright (c) 2022 Marceline Cramer // SPDX-License-Identifier: GPL-3.0-or-later use logos::Logos; use std::fmt::{Display, Formatter, Result as FmtResult}; #[rustfmt::skip] #[derive(Logos, Clone, Copy, Debug, PartialEq)] #[logos(subpattern decimal = r"[0-9][_'0-9]*")] #[logos(subpattern hex = r"[0-9a-fA-F][_'0-9a-fA-F]*")] #[logos(subpattern octal = r"[0-7][_'0-7]*")] #[logos(subpattern binary = r"[01][_'01]*")] pub enum Token { // keywords #[token("struct")] Struct, #[token("fn")] Function, #[token("for")] For, #[token("in")] In, #[token("while")] While, #[token("if")] If, #[token("else")] Else, #[token("let")] Let, #[token("mut")] Mut, #[token("interface")] Interface, #[token("impl")] Impl, // separators #[token("{")] BraceOpen, #[token("}")] BraceClose, #[token("(")] ParenOpen, #[token(")")] ParenClose, #[token(";")] Semicolon, #[token(",")] Comma, #[token(".")] Dot, // arithmetic operators #[token("+")] OpAdd, #[token("-")] OpSub, #[token("*")] OpMul, #[token("/")] OpDiv, #[token("=")] OpAssign, // boolean operators #[token("or")] OpBoolOr, #[token("and")] OpBoolAnd, #[token("not")] OpBoolNot, // bitwise operators #[token("&")] OpBitAnd, #[token("|")] OpBitOr, #[token("^")] OpBitXor, #[token("~")] OpBitNot, // comparison operators #[token("<")] OpLess, #[token("<=")] OpLessEq, #[token(">")] OpGreater, #[token(">=")] OpGreaterEq, #[token("==")] OpEq, #[token("!=")] OpNeq, // boolean literals #[token("true")] True, #[token("false")] False, #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")] Identifier, #[regex(r"0[bB](?&binary)")] BinaryInteger, #[regex(r"0[oO](?&octal)")] OctalInteger, #[regex(r"0[xX](?&hex)")] HexInteger, // TODO this has a higher priority because the DecimalFloat regex matches // even without a . (please help me I don't know regex) #[regex(r"(?&decimal)", priority=2)] DecimalInteger, #[regex(r"([0-9]+([.][0-9]*)?|[.][0-9]+)")] DecimalFloat, #[regex(r#""([^"\\]|\\t|\\u|\\n|\\")*""#)] StringLiteral, #[regex(r"//[^\n]*", logos::skip)] SingleLineComment, #[regex(r"[ \t\f\r]+", logos::skip)] Whitespace, #[token("\n")] Newline, #[error] Error, } pub struct TokenInfo<'a> { pub line: &'a str, pub line_num: usize, pub token_start: usize, pub token_end: usize, } impl<'a> Display for TokenInfo<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { write!(f, "{:>4}| {}\n |", self.line_num, self.line)?; write!(f, "{} ^", " ".repeat(self.token_start))?; let token_len = self.token_end - self.token_start; if token_len > 1 { write!(f, "{}^\n", "-".repeat(token_len - 2)) } else { write!(f, "\n") } } } pub struct Lexer<'a> { inner: logos::Lexer<'a, Token>, line_num: usize, line_start: usize, } impl<'a> Lexer<'a> { pub fn new(source: &'a str) -> Self { Self { inner: Token::lexer(source), line_num: 0, line_start: 0, } } pub fn slice(&self) -> &'a str { self.inner.slice() } pub fn eat_id(&mut self) -> Result<&'a str, Option> { let tok = self.next(); if let Some(Token::Identifier) = tok { Ok(self.slice()) } else { Err(tok) } } pub fn eat_expect_id(&mut self) -> &'a str { let tok = self.next(); if let Some(Token::Identifier) = tok { self.slice() } else { self.panic_message("Expected identifier"); } } pub fn eat_expect(&mut self, expected: Token) { let tok = self.next(); if tok != Some(expected) { self.panic_message(&format!("Expected {:?}, got {:?}", expected, tok)); } } pub fn panic_message(&self, message: &str) -> ! { panic!("{}\n{}", message, self.info()); } pub fn info(&self) -> TokenInfo<'a> { let from_start = &self.inner.source()[self.line_start..]; let line_end = from_start.find("\n").unwrap_or(from_start.len()); let line = &from_start[..line_end]; let span = self.inner.span(); TokenInfo { line, line_num: self.line_num, token_start: span.start - self.line_start, token_end: span.end - self.line_start, } } } impl<'a> Iterator for Lexer<'a> { type Item = Token; fn next(&mut self) -> Option { while let Some(tok) = self.inner.next() { if tok == Token::Newline { self.line_start = self.inner.span().end; self.line_num += 1; } else { return Some(tok); } } None } } #[cfg(test)] mod tests { use super::*; use console::Style; pub struct ColorTheme { normal: Style, keyword: Style, literal: Style, comment: Style, error: Style, } impl ColorTheme { pub fn new() -> Self { Self { normal: Style::default(), keyword: Style::new().blue(), literal: Style::new().cyan(), comment: Style::new().white(), error: Style::new().black().on_red().bold(), } } pub fn token_style(&self, token: &Token) -> &Style { use Token::*; match token { Struct | Function | For | If | Else | In | Let | Mut => &self.keyword, BinaryInteger | OctalInteger | DecimalInteger => &self.literal, SingleLineComment => &self.comment, Error => &self.error, _ => &self.normal, } } } #[test] fn lex_file() { let source = include_str!("../test/clock.fae"); let theme = ColorTheme::new(); let mut lex = Lexer::new(source); while let Some(token) = lex.next() { let style = theme.token_style(&token); let styled = style.apply_to(format!("{:?}: {}", token, lex.slice())); println!("{}", styled); } } // TODO use spans to color-code instead of raw tokens, to show original whitespace /*#[test] fn color_file() { let source = include_str!("test/example.fae"); let theme = ColorTheme::new(); let mut lex = Token::lexer(source); while let Some(token) = lex.next() { let style = theme.token_style(&token); print!("{}", style.apply_to(lex.slice())); } }*/ }