From 856d5763d3cf855c4fde86c2139f00cd40cac389 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Fri, 12 Aug 2022 14:34:07 -0500 Subject: [PATCH] Lexer tokens are now created by the lexer This is so positional information can be accurately embedded into them. --- file/file.go | 4 +- lexer/lexer.go | 135 +++++++++++++++++++++++++++-------------------- lexer/numbers.go | 6 +-- lexer/text.go | 12 ++--- lexer/token.go | 2 +- 5 files changed, 89 insertions(+), 70 deletions(-) diff --git a/file/file.go b/file/file.go index bbaaca8..db6c2d5 100644 --- a/file/file.go +++ b/file/file.go @@ -103,11 +103,11 @@ func (file *File) Close () { // Location returns a location struct describing the current position inside of // the file. This can be stored and used to print errors. -func (file *File) Location () (location Location) { +func (file *File) Location (width int) (location Location) { return Location { file: file, row: file.currentLine, column: file.currentColumn, - width: 1, + width: width, } } diff --git a/lexer/lexer.go b/lexer/lexer.go index fac177e..cbdc5fb 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -35,7 +35,7 @@ func (lexer *LexingOperation) tokenize () (err error) { if err != nil || shebangCheck[index] != lexer.char { err = file.NewError ( - lexer.file.Location(), + lexer.file.Location(1), "not an arf file", file.ErrorKindError) return @@ -66,13 +66,18 @@ func (lexer *LexingOperation) tokenize () (err error) { } if lexer.tokens[len(lexer.tokens) - 1].kind != TokenKindNewline { - lexer.addToken(Token { kind: TokenKindNewline }) + token := lexer.newToken() + token.kind = TokenKindNewline + lexer.addToken(token) } return } func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) { + token := lexer.newToken() + token.kind = TokenKindName + got := "" for { @@ -86,7 +91,7 @@ func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) { lexer.nextRune() } - token := Token { kind: TokenKindName, value: got } + token.value = got if len(got) == 2 { firstValid := got[0] == 'n' || got[0] == 'r' || got[0] == 'w' @@ -119,11 +124,14 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { err = lexer.nextRune() file.NewError ( - lexer.file.Location(), + lexer.file.Location(1), "tab not used as indent", file.ErrorKindWarn).Print() return } + + token := lexer.newToken() + token.kind = TokenKindIndent // eat up tabs while increasing the indent level indentLevel := 0 @@ -132,11 +140,9 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { err = lexer.nextRune() if err != nil { return } } - - lexer.addToken (Token { - kind: TokenKindIndent, - value: indentLevel, - }) + + token.value = indentLevel + lexer.addToken(token) case '\n': // line break @@ -155,48 +161,49 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { lexer.tokens = lexer.tokens[:tokenIndex] } - lexer.addToken (Token { - kind: TokenKindNewline, - }) + token := lexer.newToken() + token.kind = TokenKindNewline + lexer.addToken(token) err = lexer.nextRune() case '"': err = lexer.tokenizeString(false) case '\'': err = lexer.tokenizeString(true) case ':': - lexer.addToken (Token { - kind: TokenKindColon, - }) + token := lexer.newToken() + token.kind = TokenKindColon + lexer.addToken(token) err = lexer.nextRune() case '.': - lexer.addToken (Token { - kind: TokenKindDot, - }) + token := lexer.newToken() + token.kind = TokenKindDot + lexer.addToken(token) err = lexer.nextRune() case '[': - lexer.addToken (Token { - kind: TokenKindLBracket, - }) + token := lexer.newToken() + token.kind = TokenKindLBracket + lexer.addToken(token) err = lexer.nextRune() case ']': - lexer.addToken (Token { - kind: TokenKindRBracket, - }) + token := lexer.newToken() + token.kind = TokenKindRBracket + lexer.addToken(token) err = lexer.nextRune() case '{': - lexer.addToken (Token { - kind: TokenKindLBrace, - }) + token := lexer.newToken() + token.kind = TokenKindLBrace + lexer.addToken(token) err = lexer.nextRune() case '}': - lexer.addToken (Token { - kind: TokenKindRBrace, - }) + token := lexer.newToken() + token.kind = TokenKindRBrace + lexer.addToken(token) err = lexer.nextRune() case '+': err = lexer.nextRune() if err != nil { return } - token := Token { kind: TokenKindPlus } + token := lexer.newToken() + token.kind = TokenKindPlus if lexer.char == '+' { token.kind = TokenKindIncrement } @@ -205,39 +212,40 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { case '-': err = lexer.tokenizeDashBeginning() case '*': - lexer.addToken (Token { - kind: TokenKindAsterisk, - }) + token := lexer.newToken() + token.kind = TokenKindAsterisk + lexer.addToken(token) err = lexer.nextRune() case '/': - lexer.addToken (Token { - kind: TokenKindSlash, - }) + token := lexer.newToken() + token.kind = TokenKindSlash + lexer.addToken(token) err = lexer.nextRune() case '@': - lexer.addToken (Token { - kind: TokenKindAt, - }) + token := lexer.newToken() + token.kind = TokenKindAt + lexer.addToken(token) err = lexer.nextRune() case '!': - lexer.addToken (Token { - kind: TokenKindExclamation, - }) + token := lexer.newToken() + token.kind = TokenKindExclamation + lexer.addToken(token) err = lexer.nextRune() case '%': - lexer.addToken (Token { - kind: TokenKindPercent, - }) + token := lexer.newToken() + token.kind = TokenKindPercent + lexer.addToken(token) err = lexer.nextRune() case '~': - lexer.addToken (Token { - kind: TokenKindTilde, - }) + token := lexer.newToken() + token.kind = TokenKindTilde + lexer.addToken(token) err = lexer.nextRune() case '<': err = lexer.nextRune() if err != nil { return } - token := Token { kind: TokenKindLessThan } + token := lexer.newToken() + token.kind = TokenKindLessThan if lexer.char == '<' { token.kind = TokenKindLShift } @@ -246,7 +254,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { case '>': err = lexer.nextRune() if err != nil { return } - token := Token { kind: TokenKindGreaterThan } + token := lexer.newToken() + token.kind = TokenKindGreaterThan if lexer.char == '>' { token.kind = TokenKindRShift } @@ -255,7 +264,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { case '|': err = lexer.nextRune() if err != nil { return } - token := Token { kind: TokenKindBinaryOr } + token := lexer.newToken() + token.kind = TokenKindBinaryOr if lexer.char == '|' { token.kind = TokenKindLogicalOr } @@ -264,7 +274,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { case '&': err = lexer.nextRune() if err != nil { return } - token := Token { kind: TokenKindBinaryAnd } + token := lexer.newToken() + token.kind = TokenKindBinaryAnd if lexer.char == '&' { token.kind = TokenKindLogicalAnd } @@ -272,7 +283,7 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { err = lexer.nextRune() default: err = file.NewError ( - lexer.file.Location(), + lexer.file.Location(1), "unexpected symbol character " + string(lexer.char), file.ErrorKindError) @@ -287,7 +298,8 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) { if err != nil { return } if lexer.char == '-' { - token := Token { kind: TokenKindDecrement } + token := lexer.newToken() + token.kind = TokenKindDecrement err = lexer.nextRune() if err != nil { return } @@ -298,7 +310,8 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) { } lexer.addToken(token) } else if lexer.char == '>' { - token := Token { kind: TokenKindReturnDirection } + token := lexer.newToken() + token.kind = TokenKindReturnDirection err = lexer.nextRune() if err != nil { return } @@ -307,13 +320,19 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) { } else if lexer.char >= '0' && lexer.char <= '9' { lexer.tokenizeNumberBeginning(true) } else { - token := Token { kind: TokenKindMinus } + token := lexer.newToken() + token.kind = TokenKindMinus lexer.addToken(token) } return } +// newToken creates a new token from the lexer's current position in the file. +func (lexer *LexingOperation) newToken () (token Token) { + return Token { location: lexer.file.Location(1) } +} + // addToken adds a new token to the lexer's token slice. func (lexer *LexingOperation) addToken (token Token) { lexer.tokens = append(lexer.tokens, token) @@ -334,7 +353,7 @@ func (lexer *LexingOperation) nextRune () (err error) { lexer.char, _, err = lexer.file.ReadRune() if err != nil && err != io.EOF { return file.NewError ( - lexer.file.Location(), + lexer.file.Location(1), err.Error(), file.ErrorKindError) } return diff --git a/lexer/numbers.go b/lexer/numbers.go index 4414bc3..4dce92b 100644 --- a/lexer/numbers.go +++ b/lexer/numbers.go @@ -8,6 +8,8 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error var fragment float64 var isFloat bool + token := lexer.newToken() + if lexer.char == '0' { lexer.nextRune() @@ -23,7 +25,7 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error number, fragment, isFloat, err = lexer.tokenizeNumber(8) } else { return file.NewError ( - lexer.file.Location(), + lexer.file.Location(1), "unexpected character in number literal", file.ErrorKindError) } @@ -33,8 +35,6 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error if err != nil { return } - token := Token { } - if isFloat { floatNumber := float64(number) + fragment diff --git a/lexer/text.go b/lexer/text.go index e2359e2..8d25a23 100644 --- a/lexer/text.go +++ b/lexer/text.go @@ -8,6 +8,8 @@ func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) { err = lexer.nextRune() if err != nil { return } + token := lexer.newToken() + got := "" for { @@ -38,12 +40,10 @@ func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) { err = lexer.nextRune() if err != nil { return } - token := Token { } - if isRuneLiteral { if len(got) > 1 { err = file.NewError ( - lexer.file.Location(), + lexer.file.Location(1), "excess data in rune literal", file.ErrorKindError) return @@ -99,7 +99,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) { if len(number) < 3 { err = file.NewError ( - lexer.file.Location(), + lexer.file.Location(1), "octal escape sequence too short", file.ErrorKindError) return @@ -133,7 +133,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) { if len(number) < want { err = file.NewError ( - lexer.file.Location(), + lexer.file.Location(1), "hex escape sequence too short ", file.ErrorKindError) return @@ -143,7 +143,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) { result = rune(parsedNumber) } else { err = file.NewError ( - lexer.file.Location(), + lexer.file.Location(1), "unknown escape character " + string(lexer.char), file.ErrorKindError) return diff --git a/lexer/token.go b/lexer/token.go index 8e171ed..955eabe 100644 --- a/lexer/token.go +++ b/lexer/token.go @@ -78,7 +78,7 @@ func (token Token) Value () (value any) { // Equals returns whether this token is equal to another token func (token Token) Equals (testToken Token) (match bool) { - return token == testToken + return token.value == testToken.value && token.Is(testToken.kind) } // Location returns the location of the token in its file.