sprite-rs/src/parse/lexer.rs

266 lines
6.7 KiB
Rust

// Copyright (c) 2022 Marceline Cramer
// SPDX-License-Identifier: GPL-3.0-or-later
use logos::Logos;
use std::fmt::{Display, Formatter, Result as FmtResult};
#[rustfmt::skip]
#[derive(Logos, Clone, Copy, Debug, PartialEq)]
#[logos(subpattern decimal = r"[0-9][_'0-9]*")]
#[logos(subpattern hex = r"[0-9a-fA-F][_'0-9a-fA-F]*")]
#[logos(subpattern octal = r"[0-7][_'0-7]*")]
#[logos(subpattern binary = r"[01][_'01]*")]
pub enum Token {
// keywords
#[token("struct")] Struct,
#[token("fn")] Function,
#[token("for")] For,
#[token("in")] In,
#[token("while")] While,
#[token("if")] If,
#[token("else")] Else,
#[token("let")] Let,
#[token("mut")] Mut,
#[token("interface")] Interface,
#[token("impl")] Impl,
// separators
#[token("{")] BraceOpen,
#[token("}")] BraceClose,
#[token("(")] ParenOpen,
#[token(")")] ParenClose,
#[token(";")] Semicolon,
#[token(",")] Comma,
#[token(".")] Dot,
// arithmetic operators
#[token("+")] OpAdd,
#[token("-")] OpSub,
#[token("*")] OpMul,
#[token("/")] OpDiv,
#[token("=")] OpAssign,
// boolean operators
#[token("or")] OpBoolOr,
#[token("and")] OpBoolAnd,
#[token("not")] OpBoolNot,
// bitwise operators
#[token("&")] OpBitAnd,
#[token("|")] OpBitOr,
#[token("^")] OpBitXor,
#[token("~")] OpBitNot,
// comparison operators
#[token("<")] OpLess,
#[token("<=")] OpLessEq,
#[token(">")] OpGreater,
#[token(">=")] OpGreaterEq,
#[token("==")] OpEq,
#[token("!=")] OpNeq,
// boolean literals
#[token("true")] True,
#[token("false")] False,
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
Identifier,
#[regex(r"0[bB](?&binary)")]
BinaryInteger,
#[regex(r"0[oO](?&octal)")]
OctalInteger,
#[regex(r"0[xX](?&hex)")]
HexInteger,
// TODO this has a higher priority because the DecimalFloat regex matches
// even without a . (please help me I don't know regex)
#[regex(r"(?&decimal)", priority=2)]
DecimalInteger,
#[regex(r"([0-9]+([.][0-9]*)?|[.][0-9]+)")]
DecimalFloat,
#[regex(r#""([^"\\]|\\t|\\u|\\n|\\")*""#)]
StringLiteral,
#[regex(r"//[^\n]*", logos::skip)]
SingleLineComment,
#[regex(r"[ \t\f\r]+", logos::skip)]
Whitespace,
#[token("\n")]
Newline,
#[error]
Error,
}
pub struct TokenInfo<'a> {
pub line: &'a str,
pub line_num: usize,
pub token_start: usize,
pub token_end: usize,
}
impl<'a> Display for TokenInfo<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
write!(f, "{:>4}| {}\n |", self.line_num, self.line)?;
write!(f, "{} ^", " ".repeat(self.token_start))?;
let token_len = self.token_end - self.token_start;
if token_len > 1 {
write!(f, "{}^\n", "-".repeat(token_len - 2))
} else {
write!(f, "\n")
}
}
}
pub struct Lexer<'a> {
inner: logos::Lexer<'a, Token>,
line_num: usize,
line_start: usize,
}
impl<'a> Lexer<'a> {
pub fn new(source: &'a str) -> Self {
Self {
inner: Token::lexer(source),
line_num: 0,
line_start: 0,
}
}
pub fn slice(&self) -> &'a str {
self.inner.slice()
}
pub fn eat_id(&mut self) -> Result<&'a str, Option<Token>> {
let tok = self.next();
if let Some(Token::Identifier) = tok {
Ok(self.slice())
} else {
Err(tok)
}
}
pub fn eat_expect_id(&mut self) -> &'a str {
let tok = self.next();
if let Some(Token::Identifier) = tok {
self.slice()
} else {
self.panic_message("Expected identifier");
}
}
pub fn eat_expect(&mut self, expected: Token) {
let tok = self.next();
if tok != Some(expected) {
self.panic_message(&format!("Expected {:?}, got {:?}", expected, tok));
}
}
pub fn panic_message(&self, message: &str) -> ! {
panic!("{}\n{}", message, self.info());
}
pub fn info(&self) -> TokenInfo<'a> {
let from_start = &self.inner.source()[self.line_start..];
let line_end = from_start.find("\n").unwrap_or(from_start.len());
let line = &from_start[..line_end];
let span = self.inner.span();
TokenInfo {
line,
line_num: self.line_num,
token_start: span.start - self.line_start,
token_end: span.end - self.line_start,
}
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
while let Some(tok) = self.inner.next() {
if tok == Token::Newline {
self.line_start = self.inner.span().end;
self.line_num += 1;
} else {
return Some(tok);
}
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
use console::Style;
pub struct ColorTheme {
normal: Style,
keyword: Style,
literal: Style,
comment: Style,
error: Style,
}
impl ColorTheme {
pub fn new() -> Self {
Self {
normal: Style::default(),
keyword: Style::new().blue(),
literal: Style::new().cyan(),
comment: Style::new().white(),
error: Style::new().black().on_red().bold(),
}
}
pub fn token_style(&self, token: &Token) -> &Style {
use Token::*;
match token {
Struct | Function | For | If | Else | In | Let | Mut => &self.keyword,
BinaryInteger | OctalInteger | DecimalInteger => &self.literal,
SingleLineComment => &self.comment,
Error => &self.error,
_ => &self.normal,
}
}
}
#[test]
fn lex_file() {
let source = include_str!("../test/clock.fae");
let theme = ColorTheme::new();
let mut lex = Lexer::new(source);
while let Some(token) = lex.next() {
let style = theme.token_style(&token);
let styled = style.apply_to(format!("{:?}: {}", token, lex.slice()));
println!("{}", styled);
}
}
// TODO use spans to color-code instead of raw tokens, to show original whitespace
/*#[test]
fn color_file() {
let source = include_str!("test/example.fae");
let theme = ColorTheme::new();
let mut lex = Token::lexer(source);
while let Some(token) = lex.next() {
let style = theme.token_style(&token);
print!("{}", style.apply_to(lex.slice()));
}
}*/
}