This repository has been archived on 2024-02-27. You can view files and clone it, but cannot push or open issues or pull requests.
arf/lexer/text.go
2022-08-30 01:11:10 -04:00

169 lines
4.3 KiB
Go

package lexer
import "strconv"
import "git.tebibyte.media/arf/arf/infoerr"
// tokenizeString tokenizes a string or rune literal.
func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) {
err = lexer.nextRune()
if err != nil { return }
token := lexer.newToken()
got := ""
tokenWidth := 2
beginning := lexer.file.Location(1)
for {
if lexer.char == '\\' {
err = lexer.nextRune()
tokenWidth ++
if err != nil { return }
var actual rune
var amountRead int
actual, amountRead, err = lexer.getEscapeSequence()
tokenWidth += amountRead
if err != nil { return }
got += string(actual)
} else {
got += string(lexer.char)
err = lexer.nextRune()
tokenWidth ++
if err != nil { return }
}
if isRuneLiteral {
if lexer.char == '\'' { break }
} else {
if lexer.char == '"' { break }
}
}
err = lexer.nextRune()
if err != nil { return }
beginning.SetWidth(len(got))
if isRuneLiteral {
if len(got) > 1 {
err = infoerr.NewError (
beginning,
"excess data in rune literal",
infoerr.ErrorKindError)
return
}
token.kind = TokenKindRune
token.value = rune([]rune(got)[0])
} else {
token.kind = TokenKindString
token.value = got
}
token.location.SetWidth(tokenWidth)
lexer.addToken(token)
return
}
// escapeSequenceMap contains basic escape sequences and how they map to actual
// runes.
var escapeSequenceMap = map[rune] rune {
'a': '\x07',
'b': '\x08',
'f': '\x0c',
'n': '\x0a',
'r': '\x0d',
't': '\x09',
'v': '\x0b',
'\'': '\'',
'"': '"',
'\\': '\\',
}
// getEscapeSequence reads an escape sequence in a string or rune literal.
func (lexer *LexingOperation) getEscapeSequence () (
result rune,
amountRead int,
err error,
) {
result, exists := escapeSequenceMap[lexer.char]
if exists {
err = lexer.nextRune()
amountRead ++
return
} else if lexer.char >= '0' && lexer.char <= '7' {
// octal escape sequence
number := string(lexer.char)
err = lexer.nextRune()
amountRead ++
if err != nil { return }
for len(number) < 3 {
if lexer.char < '0' || lexer.char > '7' { break }
number += string(lexer.char)
err = lexer.nextRune()
amountRead ++
if err != nil { return }
}
if len(number) < 3 {
err = infoerr.NewError (
lexer.file.Location(1),
"octal escape sequence too short",
infoerr.ErrorKindError)
return
}
parsedNumber, _ := strconv.ParseInt(number, 8, 8)
result = rune(parsedNumber)
} else if lexer.char == 'x' || lexer.char == 'u' || lexer.char == 'U' {
// hexidecimal escape sequence
want := 2
if lexer.char == 'u' { want = 4 }
if lexer.char == 'U' { want = 8 }
number := ""
err = lexer.nextRune()
amountRead ++
if err != nil { return }
for len(number) < want {
notLower := lexer.char < 'a' || lexer.char > 'f'
notUpper := lexer.char < 'A' || lexer.char > 'F'
notNum := lexer.char < '0' || lexer.char > '9'
if notLower && notUpper && notNum { break }
number += string(lexer.char)
err = lexer.nextRune()
amountRead ++
if err != nil { return }
}
if len(number) < want {
err = infoerr.NewError (
lexer.file.Location(1),
"hex escape sequence too short ",
infoerr.ErrorKindError)
return
}
parsedNumber, _ := strconv.ParseInt(number, 16, want * 4)
result = rune(parsedNumber)
} else {
err = infoerr.NewError (
lexer.file.Location(1),
"unknown escape character " +
string(lexer.char), infoerr.ErrorKindError)
return
}
return
}