From 488af74b75984bb47b4baceb3d27d00ebda2fd48 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Tue, 9 Aug 2022 20:45:06 -0400 Subject: [PATCH] Laid out basic lexer structure --- lexer/lexer.go | 83 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 76 insertions(+), 7 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index ecc1250..ee59994 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -1,5 +1,6 @@ package lexer +import "io" import "github.com/sashakoshka/arf/file" // LexingOperation holds information about an ongoing lexing operataion. @@ -8,13 +9,81 @@ type LexingOperation struct { char rune } -// Tokenize converts a file into a slice of tokens (lexemes) -func Tokenize (file *file.File) (tokens []Token) { - lexer := LexingOperation { } - return lexer.tokenize(file) -} +// Tokenize converts a file into a slice of tokens (lexemes). +func Tokenize (file *file.File) (tokens []Token, err error) { + lexer := LexingOperation { file: file } + tokens, err = lexer.tokenize() -// tokenize converts a file into a slice of tokens (lexemes) -func (lexer *LexingOperation) tokenize (file *file.File) (tokens []Token) { + // if the lexing operation returned io.EOF, nothing went wrong so we + // return nil for err. + if err == io.EOF { + err = nil + } return } + +// tokenize converts a file into a slice of tokens (lexemes). It will always +// return a non-nil error, but if nothing went wrong it will return io.EOF. +func (lexer *LexingOperation) tokenize () (tokens []Token, err error) { + err = lexer.nextRune() + if err != nil { return } + + for { + lowercase := lexer.char >= 'a' && lexer.char <= 'z' + uppercase := lexer.char >= 'A' && lexer.char <= 'Z' + number := lexer.char >= '0' && lexer.char <= '9' + + if number { + // TODO: tokenize number + } else if lowercase || uppercase { + // TODO: tokenize multi + } else { + switch lexer.char { + case '"': + // TODO: tokenize string literal + lexer.nextRune() + case '\'': + // TODO: tokenize rune literal + lexer.nextRune() + case ':': + // TODO: colon token + case '.': + // TODO: dot token + case '[': + // TODO: left bracket token + case ']': + // TODO: right bracket token + case '{': + // TODO: left brace token + case '}': + // TODO: right brace token + // TODO: add more for things like math symbols, return + // direction operators, indentation, etc + default: + err = file.NewError ( + lexer.file.Location(), 1, + "unexpected character " + + string(lexer.char), + file.ErrorKindError) + return + } + } + + // TODO: skip whitespace + } + + return +} + +// nextRune advances the lexer to the next rune in the file. +func (lexer *LexingOperation) nextRune () (err error) { + lexer.char, _, err = lexer.file.ReadRune() + if err != nil && err != io.EOF { + return file.NewError ( + lexer.file.Location(), 1, + err.Error(), file.ErrorKindError) + } + return +} + +//