Lexer tokens are now created by the lexer

This is so positional information can be accurately embedded into them.
This commit is contained in:
Sasha Koshka 2022-08-12 14:34:07 -05:00
parent accf528869
commit 856d5763d3
5 changed files with 89 additions and 70 deletions

View File

@ -103,11 +103,11 @@ func (file *File) Close () {
// Location returns a location struct describing the current position inside of // Location returns a location struct describing the current position inside of
// the file. This can be stored and used to print errors. // the file. This can be stored and used to print errors.
func (file *File) Location () (location Location) { func (file *File) Location (width int) (location Location) {
return Location { return Location {
file: file, file: file,
row: file.currentLine, row: file.currentLine,
column: file.currentColumn, column: file.currentColumn,
width: 1, width: width,
} }
} }

View File

@ -35,7 +35,7 @@ func (lexer *LexingOperation) tokenize () (err error) {
if err != nil || shebangCheck[index] != lexer.char { if err != nil || shebangCheck[index] != lexer.char {
err = file.NewError ( err = file.NewError (
lexer.file.Location(), lexer.file.Location(1),
"not an arf file", "not an arf file",
file.ErrorKindError) file.ErrorKindError)
return return
@ -66,13 +66,18 @@ func (lexer *LexingOperation) tokenize () (err error) {
} }
if lexer.tokens[len(lexer.tokens) - 1].kind != TokenKindNewline { if lexer.tokens[len(lexer.tokens) - 1].kind != TokenKindNewline {
lexer.addToken(Token { kind: TokenKindNewline }) token := lexer.newToken()
token.kind = TokenKindNewline
lexer.addToken(token)
} }
return return
} }
func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) { func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) {
token := lexer.newToken()
token.kind = TokenKindName
got := "" got := ""
for { for {
@ -86,7 +91,7 @@ func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) {
lexer.nextRune() lexer.nextRune()
} }
token := Token { kind: TokenKindName, value: got } token.value = got
if len(got) == 2 { if len(got) == 2 {
firstValid := got[0] == 'n' || got[0] == 'r' || got[0] == 'w' firstValid := got[0] == 'n' || got[0] == 'r' || got[0] == 'w'
@ -119,11 +124,14 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
err = lexer.nextRune() err = lexer.nextRune()
file.NewError ( file.NewError (
lexer.file.Location(), lexer.file.Location(1),
"tab not used as indent", "tab not used as indent",
file.ErrorKindWarn).Print() file.ErrorKindWarn).Print()
return return
} }
token := lexer.newToken()
token.kind = TokenKindIndent
// eat up tabs while increasing the indent level // eat up tabs while increasing the indent level
indentLevel := 0 indentLevel := 0
@ -132,11 +140,9 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
} }
lexer.addToken (Token { token.value = indentLevel
kind: TokenKindIndent, lexer.addToken(token)
value: indentLevel,
})
case '\n': case '\n':
// line break // line break
@ -155,48 +161,49 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
lexer.tokens = lexer.tokens[:tokenIndex] lexer.tokens = lexer.tokens[:tokenIndex]
} }
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindNewline, token.kind = TokenKindNewline
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '"': case '"':
err = lexer.tokenizeString(false) err = lexer.tokenizeString(false)
case '\'': case '\'':
err = lexer.tokenizeString(true) err = lexer.tokenizeString(true)
case ':': case ':':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindColon, token.kind = TokenKindColon
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '.': case '.':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindDot, token.kind = TokenKindDot
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '[': case '[':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindLBracket, token.kind = TokenKindLBracket
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case ']': case ']':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindRBracket, token.kind = TokenKindRBracket
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '{': case '{':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindLBrace, token.kind = TokenKindLBrace
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '}': case '}':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindRBrace, token.kind = TokenKindRBrace
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '+': case '+':
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
token := Token { kind: TokenKindPlus } token := lexer.newToken()
token.kind = TokenKindPlus
if lexer.char == '+' { if lexer.char == '+' {
token.kind = TokenKindIncrement token.kind = TokenKindIncrement
} }
@ -205,39 +212,40 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '-': case '-':
err = lexer.tokenizeDashBeginning() err = lexer.tokenizeDashBeginning()
case '*': case '*':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindAsterisk, token.kind = TokenKindAsterisk
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '/': case '/':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindSlash, token.kind = TokenKindSlash
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '@': case '@':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindAt, token.kind = TokenKindAt
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '!': case '!':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindExclamation, token.kind = TokenKindExclamation
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '%': case '%':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindPercent, token.kind = TokenKindPercent
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '~': case '~':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindTilde, token.kind = TokenKindTilde
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '<': case '<':
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
token := Token { kind: TokenKindLessThan } token := lexer.newToken()
token.kind = TokenKindLessThan
if lexer.char == '<' { if lexer.char == '<' {
token.kind = TokenKindLShift token.kind = TokenKindLShift
} }
@ -246,7 +254,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '>': case '>':
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
token := Token { kind: TokenKindGreaterThan } token := lexer.newToken()
token.kind = TokenKindGreaterThan
if lexer.char == '>' { if lexer.char == '>' {
token.kind = TokenKindRShift token.kind = TokenKindRShift
} }
@ -255,7 +264,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '|': case '|':
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
token := Token { kind: TokenKindBinaryOr } token := lexer.newToken()
token.kind = TokenKindBinaryOr
if lexer.char == '|' { if lexer.char == '|' {
token.kind = TokenKindLogicalOr token.kind = TokenKindLogicalOr
} }
@ -264,7 +274,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '&': case '&':
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
token := Token { kind: TokenKindBinaryAnd } token := lexer.newToken()
token.kind = TokenKindBinaryAnd
if lexer.char == '&' { if lexer.char == '&' {
token.kind = TokenKindLogicalAnd token.kind = TokenKindLogicalAnd
} }
@ -272,7 +283,7 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
err = lexer.nextRune() err = lexer.nextRune()
default: default:
err = file.NewError ( err = file.NewError (
lexer.file.Location(), lexer.file.Location(1),
"unexpected symbol character " + "unexpected symbol character " +
string(lexer.char), string(lexer.char),
file.ErrorKindError) file.ErrorKindError)
@ -287,7 +298,8 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) {
if err != nil { return } if err != nil { return }
if lexer.char == '-' { if lexer.char == '-' {
token := Token { kind: TokenKindDecrement } token := lexer.newToken()
token.kind = TokenKindDecrement
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
@ -298,7 +310,8 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) {
} }
lexer.addToken(token) lexer.addToken(token)
} else if lexer.char == '>' { } else if lexer.char == '>' {
token := Token { kind: TokenKindReturnDirection } token := lexer.newToken()
token.kind = TokenKindReturnDirection
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
@ -307,13 +320,19 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) {
} else if lexer.char >= '0' && lexer.char <= '9' { } else if lexer.char >= '0' && lexer.char <= '9' {
lexer.tokenizeNumberBeginning(true) lexer.tokenizeNumberBeginning(true)
} else { } else {
token := Token { kind: TokenKindMinus } token := lexer.newToken()
token.kind = TokenKindMinus
lexer.addToken(token) lexer.addToken(token)
} }
return return
} }
// newToken creates a new token from the lexer's current position in the file.
func (lexer *LexingOperation) newToken () (token Token) {
return Token { location: lexer.file.Location(1) }
}
// addToken adds a new token to the lexer's token slice. // addToken adds a new token to the lexer's token slice.
func (lexer *LexingOperation) addToken (token Token) { func (lexer *LexingOperation) addToken (token Token) {
lexer.tokens = append(lexer.tokens, token) lexer.tokens = append(lexer.tokens, token)
@ -334,7 +353,7 @@ func (lexer *LexingOperation) nextRune () (err error) {
lexer.char, _, err = lexer.file.ReadRune() lexer.char, _, err = lexer.file.ReadRune()
if err != nil && err != io.EOF { if err != nil && err != io.EOF {
return file.NewError ( return file.NewError (
lexer.file.Location(), lexer.file.Location(1),
err.Error(), file.ErrorKindError) err.Error(), file.ErrorKindError)
} }
return return

View File

@ -8,6 +8,8 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
var fragment float64 var fragment float64
var isFloat bool var isFloat bool
token := lexer.newToken()
if lexer.char == '0' { if lexer.char == '0' {
lexer.nextRune() lexer.nextRune()
@ -23,7 +25,7 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
number, fragment, isFloat, err = lexer.tokenizeNumber(8) number, fragment, isFloat, err = lexer.tokenizeNumber(8)
} else { } else {
return file.NewError ( return file.NewError (
lexer.file.Location(), lexer.file.Location(1),
"unexpected character in number literal", "unexpected character in number literal",
file.ErrorKindError) file.ErrorKindError)
} }
@ -33,8 +35,6 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
if err != nil { return } if err != nil { return }
token := Token { }
if isFloat { if isFloat {
floatNumber := float64(number) + fragment floatNumber := float64(number) + fragment

View File

@ -8,6 +8,8 @@ func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) {
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
token := lexer.newToken()
got := "" got := ""
for { for {
@ -38,12 +40,10 @@ func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) {
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
token := Token { }
if isRuneLiteral { if isRuneLiteral {
if len(got) > 1 { if len(got) > 1 {
err = file.NewError ( err = file.NewError (
lexer.file.Location(), lexer.file.Location(1),
"excess data in rune literal", "excess data in rune literal",
file.ErrorKindError) file.ErrorKindError)
return return
@ -99,7 +99,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) {
if len(number) < 3 { if len(number) < 3 {
err = file.NewError ( err = file.NewError (
lexer.file.Location(), lexer.file.Location(1),
"octal escape sequence too short", "octal escape sequence too short",
file.ErrorKindError) file.ErrorKindError)
return return
@ -133,7 +133,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) {
if len(number) < want { if len(number) < want {
err = file.NewError ( err = file.NewError (
lexer.file.Location(), lexer.file.Location(1),
"hex escape sequence too short ", "hex escape sequence too short ",
file.ErrorKindError) file.ErrorKindError)
return return
@ -143,7 +143,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) {
result = rune(parsedNumber) result = rune(parsedNumber)
} else { } else {
err = file.NewError ( err = file.NewError (
lexer.file.Location(), lexer.file.Location(1),
"unknown escape character " + "unknown escape character " +
string(lexer.char), file.ErrorKindError) string(lexer.char), file.ErrorKindError)
return return

View File

@ -78,7 +78,7 @@ func (token Token) Value () (value any) {
// Equals returns whether this token is equal to another token // Equals returns whether this token is equal to another token
func (token Token) Equals (testToken Token) (match bool) { func (token Token) Equals (testToken Token) (match bool) {
return token == testToken return token.value == testToken.value && token.Is(testToken.kind)
} }
// Location returns the location of the token in its file. // Location returns the location of the token in its file.