Lexer tokens are now created by the lexer

This is so positional information can be accurately embedded into them.
This commit is contained in:
Sasha Koshka 2022-08-12 14:34:07 -05:00
parent accf528869
commit 856d5763d3
5 changed files with 89 additions and 70 deletions

View File

@ -103,11 +103,11 @@ func (file *File) Close () {
// Location returns a location struct describing the current position inside of
// the file. This can be stored and used to print errors.
func (file *File) Location () (location Location) {
func (file *File) Location (width int) (location Location) {
return Location {
file: file,
row: file.currentLine,
column: file.currentColumn,
width: 1,
width: width,
}
}

View File

@ -35,7 +35,7 @@ func (lexer *LexingOperation) tokenize () (err error) {
if err != nil || shebangCheck[index] != lexer.char {
err = file.NewError (
lexer.file.Location(),
lexer.file.Location(1),
"not an arf file",
file.ErrorKindError)
return
@ -66,13 +66,18 @@ func (lexer *LexingOperation) tokenize () (err error) {
}
if lexer.tokens[len(lexer.tokens) - 1].kind != TokenKindNewline {
lexer.addToken(Token { kind: TokenKindNewline })
token := lexer.newToken()
token.kind = TokenKindNewline
lexer.addToken(token)
}
return
}
func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) {
token := lexer.newToken()
token.kind = TokenKindName
got := ""
for {
@ -86,7 +91,7 @@ func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) {
lexer.nextRune()
}
token := Token { kind: TokenKindName, value: got }
token.value = got
if len(got) == 2 {
firstValid := got[0] == 'n' || got[0] == 'r' || got[0] == 'w'
@ -119,11 +124,14 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
err = lexer.nextRune()
file.NewError (
lexer.file.Location(),
lexer.file.Location(1),
"tab not used as indent",
file.ErrorKindWarn).Print()
return
}
token := lexer.newToken()
token.kind = TokenKindIndent
// eat up tabs while increasing the indent level
indentLevel := 0
@ -132,11 +140,9 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
err = lexer.nextRune()
if err != nil { return }
}
lexer.addToken (Token {
kind: TokenKindIndent,
value: indentLevel,
})
token.value = indentLevel
lexer.addToken(token)
case '\n':
// line break
@ -155,48 +161,49 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
lexer.tokens = lexer.tokens[:tokenIndex]
}
lexer.addToken (Token {
kind: TokenKindNewline,
})
token := lexer.newToken()
token.kind = TokenKindNewline
lexer.addToken(token)
err = lexer.nextRune()
case '"':
err = lexer.tokenizeString(false)
case '\'':
err = lexer.tokenizeString(true)
case ':':
lexer.addToken (Token {
kind: TokenKindColon,
})
token := lexer.newToken()
token.kind = TokenKindColon
lexer.addToken(token)
err = lexer.nextRune()
case '.':
lexer.addToken (Token {
kind: TokenKindDot,
})
token := lexer.newToken()
token.kind = TokenKindDot
lexer.addToken(token)
err = lexer.nextRune()
case '[':
lexer.addToken (Token {
kind: TokenKindLBracket,
})
token := lexer.newToken()
token.kind = TokenKindLBracket
lexer.addToken(token)
err = lexer.nextRune()
case ']':
lexer.addToken (Token {
kind: TokenKindRBracket,
})
token := lexer.newToken()
token.kind = TokenKindRBracket
lexer.addToken(token)
err = lexer.nextRune()
case '{':
lexer.addToken (Token {
kind: TokenKindLBrace,
})
token := lexer.newToken()
token.kind = TokenKindLBrace
lexer.addToken(token)
err = lexer.nextRune()
case '}':
lexer.addToken (Token {
kind: TokenKindRBrace,
})
token := lexer.newToken()
token.kind = TokenKindRBrace
lexer.addToken(token)
err = lexer.nextRune()
case '+':
err = lexer.nextRune()
if err != nil { return }
token := Token { kind: TokenKindPlus }
token := lexer.newToken()
token.kind = TokenKindPlus
if lexer.char == '+' {
token.kind = TokenKindIncrement
}
@ -205,39 +212,40 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '-':
err = lexer.tokenizeDashBeginning()
case '*':
lexer.addToken (Token {
kind: TokenKindAsterisk,
})
token := lexer.newToken()
token.kind = TokenKindAsterisk
lexer.addToken(token)
err = lexer.nextRune()
case '/':
lexer.addToken (Token {
kind: TokenKindSlash,
})
token := lexer.newToken()
token.kind = TokenKindSlash
lexer.addToken(token)
err = lexer.nextRune()
case '@':
lexer.addToken (Token {
kind: TokenKindAt,
})
token := lexer.newToken()
token.kind = TokenKindAt
lexer.addToken(token)
err = lexer.nextRune()
case '!':
lexer.addToken (Token {
kind: TokenKindExclamation,
})
token := lexer.newToken()
token.kind = TokenKindExclamation
lexer.addToken(token)
err = lexer.nextRune()
case '%':
lexer.addToken (Token {
kind: TokenKindPercent,
})
token := lexer.newToken()
token.kind = TokenKindPercent
lexer.addToken(token)
err = lexer.nextRune()
case '~':
lexer.addToken (Token {
kind: TokenKindTilde,
})
token := lexer.newToken()
token.kind = TokenKindTilde
lexer.addToken(token)
err = lexer.nextRune()
case '<':
err = lexer.nextRune()
if err != nil { return }
token := Token { kind: TokenKindLessThan }
token := lexer.newToken()
token.kind = TokenKindLessThan
if lexer.char == '<' {
token.kind = TokenKindLShift
}
@ -246,7 +254,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '>':
err = lexer.nextRune()
if err != nil { return }
token := Token { kind: TokenKindGreaterThan }
token := lexer.newToken()
token.kind = TokenKindGreaterThan
if lexer.char == '>' {
token.kind = TokenKindRShift
}
@ -255,7 +264,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '|':
err = lexer.nextRune()
if err != nil { return }
token := Token { kind: TokenKindBinaryOr }
token := lexer.newToken()
token.kind = TokenKindBinaryOr
if lexer.char == '|' {
token.kind = TokenKindLogicalOr
}
@ -264,7 +274,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '&':
err = lexer.nextRune()
if err != nil { return }
token := Token { kind: TokenKindBinaryAnd }
token := lexer.newToken()
token.kind = TokenKindBinaryAnd
if lexer.char == '&' {
token.kind = TokenKindLogicalAnd
}
@ -272,7 +283,7 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
err = lexer.nextRune()
default:
err = file.NewError (
lexer.file.Location(),
lexer.file.Location(1),
"unexpected symbol character " +
string(lexer.char),
file.ErrorKindError)
@ -287,7 +298,8 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) {
if err != nil { return }
if lexer.char == '-' {
token := Token { kind: TokenKindDecrement }
token := lexer.newToken()
token.kind = TokenKindDecrement
err = lexer.nextRune()
if err != nil { return }
@ -298,7 +310,8 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) {
}
lexer.addToken(token)
} else if lexer.char == '>' {
token := Token { kind: TokenKindReturnDirection }
token := lexer.newToken()
token.kind = TokenKindReturnDirection
err = lexer.nextRune()
if err != nil { return }
@ -307,13 +320,19 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) {
} else if lexer.char >= '0' && lexer.char <= '9' {
lexer.tokenizeNumberBeginning(true)
} else {
token := Token { kind: TokenKindMinus }
token := lexer.newToken()
token.kind = TokenKindMinus
lexer.addToken(token)
}
return
}
// newToken creates a new token from the lexer's current position in the file.
func (lexer *LexingOperation) newToken () (token Token) {
return Token { location: lexer.file.Location(1) }
}
// addToken adds a new token to the lexer's token slice.
func (lexer *LexingOperation) addToken (token Token) {
lexer.tokens = append(lexer.tokens, token)
@ -334,7 +353,7 @@ func (lexer *LexingOperation) nextRune () (err error) {
lexer.char, _, err = lexer.file.ReadRune()
if err != nil && err != io.EOF {
return file.NewError (
lexer.file.Location(),
lexer.file.Location(1),
err.Error(), file.ErrorKindError)
}
return

View File

@ -8,6 +8,8 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
var fragment float64
var isFloat bool
token := lexer.newToken()
if lexer.char == '0' {
lexer.nextRune()
@ -23,7 +25,7 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
number, fragment, isFloat, err = lexer.tokenizeNumber(8)
} else {
return file.NewError (
lexer.file.Location(),
lexer.file.Location(1),
"unexpected character in number literal",
file.ErrorKindError)
}
@ -33,8 +35,6 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
if err != nil { return }
token := Token { }
if isFloat {
floatNumber := float64(number) + fragment

View File

@ -8,6 +8,8 @@ func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) {
err = lexer.nextRune()
if err != nil { return }
token := lexer.newToken()
got := ""
for {
@ -38,12 +40,10 @@ func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) {
err = lexer.nextRune()
if err != nil { return }
token := Token { }
if isRuneLiteral {
if len(got) > 1 {
err = file.NewError (
lexer.file.Location(),
lexer.file.Location(1),
"excess data in rune literal",
file.ErrorKindError)
return
@ -99,7 +99,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) {
if len(number) < 3 {
err = file.NewError (
lexer.file.Location(),
lexer.file.Location(1),
"octal escape sequence too short",
file.ErrorKindError)
return
@ -133,7 +133,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) {
if len(number) < want {
err = file.NewError (
lexer.file.Location(),
lexer.file.Location(1),
"hex escape sequence too short ",
file.ErrorKindError)
return
@ -143,7 +143,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) {
result = rune(parsedNumber)
} else {
err = file.NewError (
lexer.file.Location(),
lexer.file.Location(1),
"unknown escape character " +
string(lexer.char), file.ErrorKindError)
return

View File

@ -78,7 +78,7 @@ func (token Token) Value () (value any) {
// Equals returns whether this token is equal to another token
func (token Token) Equals (testToken Token) (match bool) {
return token == testToken
return token.value == testToken.value && token.Is(testToken.kind)
}
// Location returns the location of the token in its file.