266 lines
6.7 KiB
Rust
266 lines
6.7 KiB
Rust
// Copyright (c) 2022 Marceline Cramer
|
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
use logos::Logos;
|
|
use std::fmt::{Display, Formatter, Result as FmtResult};
|
|
|
|
#[rustfmt::skip]
|
|
#[derive(Logos, Clone, Copy, Debug, PartialEq)]
|
|
#[logos(subpattern decimal = r"[0-9][_'0-9]*")]
|
|
#[logos(subpattern hex = r"[0-9a-fA-F][_'0-9a-fA-F]*")]
|
|
#[logos(subpattern octal = r"[0-7][_'0-7]*")]
|
|
#[logos(subpattern binary = r"[01][_'01]*")]
|
|
pub enum Token {
|
|
// keywords
|
|
#[token("struct")] Struct,
|
|
#[token("fn")] Function,
|
|
#[token("for")] For,
|
|
#[token("in")] In,
|
|
#[token("while")] While,
|
|
#[token("if")] If,
|
|
#[token("else")] Else,
|
|
#[token("let")] Let,
|
|
#[token("mut")] Mut,
|
|
#[token("interface")] Interface,
|
|
#[token("impl")] Impl,
|
|
|
|
// separators
|
|
#[token("{")] BraceOpen,
|
|
#[token("}")] BraceClose,
|
|
#[token("(")] ParenOpen,
|
|
#[token(")")] ParenClose,
|
|
#[token(";")] Semicolon,
|
|
#[token(",")] Comma,
|
|
#[token(".")] Dot,
|
|
|
|
// arithmetic operators
|
|
#[token("+")] OpAdd,
|
|
#[token("-")] OpSub,
|
|
#[token("*")] OpMul,
|
|
#[token("/")] OpDiv,
|
|
#[token("=")] OpAssign,
|
|
|
|
// boolean operators
|
|
#[token("or")] OpBoolOr,
|
|
#[token("and")] OpBoolAnd,
|
|
#[token("not")] OpBoolNot,
|
|
|
|
// bitwise operators
|
|
#[token("&")] OpBitAnd,
|
|
#[token("|")] OpBitOr,
|
|
#[token("^")] OpBitXor,
|
|
#[token("~")] OpBitNot,
|
|
|
|
// comparison operators
|
|
#[token("<")] OpLess,
|
|
#[token("<=")] OpLessEq,
|
|
#[token(">")] OpGreater,
|
|
#[token(">=")] OpGreaterEq,
|
|
#[token("==")] OpEq,
|
|
#[token("!=")] OpNeq,
|
|
|
|
// boolean literals
|
|
#[token("true")] True,
|
|
#[token("false")] False,
|
|
|
|
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
|
|
Identifier,
|
|
|
|
#[regex(r"0[bB](?&binary)")]
|
|
BinaryInteger,
|
|
|
|
#[regex(r"0[oO](?&octal)")]
|
|
OctalInteger,
|
|
|
|
#[regex(r"0[xX](?&hex)")]
|
|
HexInteger,
|
|
|
|
// TODO this has a higher priority because the DecimalFloat regex matches
|
|
// even without a . (please help me I don't know regex)
|
|
#[regex(r"(?&decimal)", priority=2)]
|
|
DecimalInteger,
|
|
|
|
#[regex(r"([0-9]+([.][0-9]*)?|[.][0-9]+)")]
|
|
DecimalFloat,
|
|
|
|
#[regex(r#""([^"\\]|\\t|\\u|\\n|\\")*""#)]
|
|
StringLiteral,
|
|
|
|
#[regex(r"//[^\n]*", logos::skip)]
|
|
SingleLineComment,
|
|
|
|
#[regex(r"[ \t\f\r]+", logos::skip)]
|
|
Whitespace,
|
|
|
|
#[token("\n")]
|
|
Newline,
|
|
|
|
#[error]
|
|
Error,
|
|
}
|
|
|
|
pub struct TokenInfo<'a> {
|
|
pub line: &'a str,
|
|
pub line_num: usize,
|
|
pub token_start: usize,
|
|
pub token_end: usize,
|
|
}
|
|
|
|
impl<'a> Display for TokenInfo<'a> {
|
|
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
|
|
write!(f, "{:>4}| {}\n |", self.line_num, self.line)?;
|
|
|
|
write!(f, "{} ^", " ".repeat(self.token_start))?;
|
|
|
|
let token_len = self.token_end - self.token_start;
|
|
if token_len > 1 {
|
|
write!(f, "{}^\n", "-".repeat(token_len - 2))
|
|
} else {
|
|
write!(f, "\n")
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct Lexer<'a> {
|
|
inner: logos::Lexer<'a, Token>,
|
|
line_num: usize,
|
|
line_start: usize,
|
|
}
|
|
|
|
impl<'a> Lexer<'a> {
|
|
pub fn new(source: &'a str) -> Self {
|
|
Self {
|
|
inner: Token::lexer(source),
|
|
line_num: 0,
|
|
line_start: 0,
|
|
}
|
|
}
|
|
|
|
pub fn slice(&self) -> &'a str {
|
|
self.inner.slice()
|
|
}
|
|
|
|
pub fn eat_id(&mut self) -> Result<&'a str, Option<Token>> {
|
|
let tok = self.next();
|
|
if let Some(Token::Identifier) = tok {
|
|
Ok(self.slice())
|
|
} else {
|
|
Err(tok)
|
|
}
|
|
}
|
|
|
|
pub fn eat_expect_id(&mut self) -> &'a str {
|
|
let tok = self.next();
|
|
if let Some(Token::Identifier) = tok {
|
|
self.slice()
|
|
} else {
|
|
self.panic_message("Expected identifier");
|
|
}
|
|
}
|
|
|
|
pub fn eat_expect(&mut self, expected: Token) {
|
|
let tok = self.next();
|
|
if tok != Some(expected) {
|
|
self.panic_message(&format!("Expected {:?}, got {:?}", expected, tok));
|
|
}
|
|
}
|
|
|
|
pub fn panic_message(&self, message: &str) -> ! {
|
|
panic!("{}\n{}", message, self.info());
|
|
}
|
|
|
|
pub fn info(&self) -> TokenInfo<'a> {
|
|
let from_start = &self.inner.source()[self.line_start..];
|
|
let line_end = from_start.find("\n").unwrap_or(from_start.len());
|
|
let line = &from_start[..line_end];
|
|
let span = self.inner.span();
|
|
|
|
TokenInfo {
|
|
line,
|
|
line_num: self.line_num,
|
|
token_start: span.start - self.line_start,
|
|
token_end: span.end - self.line_start,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> Iterator for Lexer<'a> {
|
|
type Item = Token;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
while let Some(tok) = self.inner.next() {
|
|
if tok == Token::Newline {
|
|
self.line_start = self.inner.span().end;
|
|
self.line_num += 1;
|
|
} else {
|
|
return Some(tok);
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use console::Style;
|
|
|
|
pub struct ColorTheme {
|
|
normal: Style,
|
|
keyword: Style,
|
|
literal: Style,
|
|
comment: Style,
|
|
error: Style,
|
|
}
|
|
|
|
impl ColorTheme {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
normal: Style::default(),
|
|
keyword: Style::new().blue(),
|
|
literal: Style::new().cyan(),
|
|
comment: Style::new().white(),
|
|
error: Style::new().black().on_red().bold(),
|
|
}
|
|
}
|
|
|
|
pub fn token_style(&self, token: &Token) -> &Style {
|
|
use Token::*;
|
|
match token {
|
|
Struct | Function | For | If | Else | In | Let | Mut => &self.keyword,
|
|
BinaryInteger | OctalInteger | DecimalInteger => &self.literal,
|
|
SingleLineComment => &self.comment,
|
|
Error => &self.error,
|
|
_ => &self.normal,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn lex_file() {
|
|
let source = include_str!("../test/clock.fae");
|
|
let theme = ColorTheme::new();
|
|
let mut lex = Lexer::new(source);
|
|
|
|
while let Some(token) = lex.next() {
|
|
let style = theme.token_style(&token);
|
|
let styled = style.apply_to(format!("{:?}: {}", token, lex.slice()));
|
|
println!("{}", styled);
|
|
}
|
|
}
|
|
|
|
// TODO use spans to color-code instead of raw tokens, to show original whitespace
|
|
/*#[test]
|
|
fn color_file() {
|
|
let source = include_str!("test/example.fae");
|
|
let theme = ColorTheme::new();
|
|
let mut lex = Token::lexer(source);
|
|
|
|
while let Some(token) = lex.next() {
|
|
let style = theme.token_style(&token);
|
|
print!("{}", style.apply_to(lex.slice()));
|
|
}
|
|
}*/
|
|
}
|