2022-08-11 01:47:51 -06:00
|
|
|
package lexer
|
|
|
|
|
2022-08-11 11:39:49 -06:00
|
|
|
import "strconv"
|
2022-08-29 23:11:10 -06:00
|
|
|
import "git.tebibyte.media/arf/arf/infoerr"
|
2022-08-11 01:47:51 -06:00
|
|
|
|
2022-08-11 11:39:49 -06:00
|
|
|
// tokenizeString tokenizes a string or rune literal.
|
2022-10-11 22:00:34 -06:00
|
|
|
func (lexer *lexingOperation) tokenizeString () (err error) {
|
2022-08-11 01:47:51 -06:00
|
|
|
err = lexer.nextRune()
|
|
|
|
if err != nil { return }
|
|
|
|
|
2022-08-12 13:34:07 -06:00
|
|
|
token := lexer.newToken()
|
2022-08-18 09:25:40 -06:00
|
|
|
got := ""
|
|
|
|
tokenWidth := 2
|
2022-08-11 01:47:51 -06:00
|
|
|
|
|
|
|
for {
|
2022-08-11 03:05:55 -06:00
|
|
|
if lexer.char == '\\' {
|
|
|
|
err = lexer.nextRune()
|
2022-08-18 09:25:40 -06:00
|
|
|
tokenWidth ++
|
2022-08-11 03:05:55 -06:00
|
|
|
if err != nil { return }
|
2022-08-11 11:39:49 -06:00
|
|
|
|
2022-08-18 09:25:40 -06:00
|
|
|
var actual rune
|
|
|
|
var amountRead int
|
|
|
|
actual, amountRead, err = lexer.getEscapeSequence()
|
|
|
|
tokenWidth += amountRead
|
2022-08-11 11:39:49 -06:00
|
|
|
if err != nil { return }
|
|
|
|
|
|
|
|
got += string(actual)
|
2022-08-11 03:05:55 -06:00
|
|
|
} else {
|
|
|
|
got += string(lexer.char)
|
2022-08-11 11:39:49 -06:00
|
|
|
|
|
|
|
err = lexer.nextRune()
|
2022-08-18 09:25:40 -06:00
|
|
|
tokenWidth ++
|
2022-08-11 11:39:49 -06:00
|
|
|
if err != nil { return }
|
2022-08-11 03:05:55 -06:00
|
|
|
}
|
2022-08-11 01:47:51 -06:00
|
|
|
|
2022-10-04 14:35:00 -06:00
|
|
|
if lexer.char == '\'' { break }
|
2022-08-11 01:47:51 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
err = lexer.nextRune()
|
|
|
|
if err != nil { return }
|
|
|
|
|
2022-10-04 14:35:00 -06:00
|
|
|
token.kind = TokenKindString
|
|
|
|
token.value = got
|
2022-08-11 01:47:51 -06:00
|
|
|
|
2022-08-18 09:25:40 -06:00
|
|
|
token.location.SetWidth(tokenWidth)
|
2022-08-11 01:47:51 -06:00
|
|
|
lexer.addToken(token)
|
|
|
|
return
|
|
|
|
}
|
2022-08-11 11:39:49 -06:00
|
|
|
|
|
|
|
// escapeSequenceMap contains basic escape sequences and how they map to actual
|
|
|
|
// runes.
|
|
|
|
var escapeSequenceMap = map[rune] rune {
|
|
|
|
'a': '\x07',
|
|
|
|
'b': '\x08',
|
|
|
|
'f': '\x0c',
|
|
|
|
'n': '\x0a',
|
|
|
|
'r': '\x0d',
|
|
|
|
't': '\x09',
|
|
|
|
'v': '\x0b',
|
|
|
|
'\'': '\'',
|
|
|
|
'\\': '\\',
|
|
|
|
}
|
|
|
|
|
|
|
|
// getEscapeSequence reads an escape sequence in a string or rune literal.
|
2022-10-11 22:00:34 -06:00
|
|
|
func (lexer *lexingOperation) getEscapeSequence () (
|
2022-08-18 09:25:40 -06:00
|
|
|
result rune,
|
|
|
|
amountRead int,
|
|
|
|
err error,
|
|
|
|
) {
|
2022-08-11 11:39:49 -06:00
|
|
|
result, exists := escapeSequenceMap[lexer.char]
|
|
|
|
if exists {
|
|
|
|
err = lexer.nextRune()
|
2022-08-18 09:25:40 -06:00
|
|
|
amountRead ++
|
2022-08-11 11:39:49 -06:00
|
|
|
return
|
|
|
|
} else if lexer.char >= '0' && lexer.char <= '7' {
|
|
|
|
// octal escape sequence
|
|
|
|
number := string(lexer.char)
|
|
|
|
|
|
|
|
err = lexer.nextRune()
|
2022-08-18 09:25:40 -06:00
|
|
|
amountRead ++
|
2022-08-11 11:39:49 -06:00
|
|
|
if err != nil { return }
|
|
|
|
|
|
|
|
for len(number) < 3 {
|
|
|
|
if lexer.char < '0' || lexer.char > '7' { break }
|
|
|
|
|
|
|
|
number += string(lexer.char)
|
|
|
|
|
|
|
|
err = lexer.nextRune()
|
2022-08-18 09:25:40 -06:00
|
|
|
amountRead ++
|
2022-08-11 11:39:49 -06:00
|
|
|
if err != nil { return }
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(number) < 3 {
|
2022-08-17 22:58:40 -06:00
|
|
|
err = infoerr.NewError (
|
2022-08-12 13:34:07 -06:00
|
|
|
lexer.file.Location(1),
|
2022-08-11 11:39:49 -06:00
|
|
|
"octal escape sequence too short",
|
2022-08-17 22:58:40 -06:00
|
|
|
infoerr.ErrorKindError)
|
2022-08-11 11:39:49 -06:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
parsedNumber, _ := strconv.ParseInt(number, 8, 8)
|
|
|
|
result = rune(parsedNumber)
|
|
|
|
|
|
|
|
} else if lexer.char == 'x' || lexer.char == 'u' || lexer.char == 'U' {
|
|
|
|
// hexidecimal escape sequence
|
|
|
|
want := 2
|
|
|
|
if lexer.char == 'u' { want = 4 }
|
|
|
|
if lexer.char == 'U' { want = 8 }
|
|
|
|
|
|
|
|
number := ""
|
|
|
|
|
|
|
|
err = lexer.nextRune()
|
2022-08-18 09:25:40 -06:00
|
|
|
amountRead ++
|
2022-08-11 11:39:49 -06:00
|
|
|
if err != nil { return }
|
|
|
|
|
|
|
|
for len(number) < want {
|
|
|
|
notLower := lexer.char < 'a' || lexer.char > 'f'
|
|
|
|
notUpper := lexer.char < 'A' || lexer.char > 'F'
|
|
|
|
notNum := lexer.char < '0' || lexer.char > '9'
|
|
|
|
if notLower && notUpper && notNum { break }
|
|
|
|
|
|
|
|
number += string(lexer.char)
|
|
|
|
|
|
|
|
err = lexer.nextRune()
|
2022-08-18 09:25:40 -06:00
|
|
|
amountRead ++
|
2022-08-11 11:39:49 -06:00
|
|
|
if err != nil { return }
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(number) < want {
|
2022-08-17 22:58:40 -06:00
|
|
|
err = infoerr.NewError (
|
2022-08-12 13:34:07 -06:00
|
|
|
lexer.file.Location(1),
|
2022-08-11 11:39:49 -06:00
|
|
|
"hex escape sequence too short ",
|
2022-08-17 22:58:40 -06:00
|
|
|
infoerr.ErrorKindError)
|
2022-08-11 11:39:49 -06:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
parsedNumber, _ := strconv.ParseInt(number, 16, want * 4)
|
|
|
|
result = rune(parsedNumber)
|
|
|
|
} else {
|
2022-08-17 22:58:40 -06:00
|
|
|
err = infoerr.NewError (
|
2022-08-12 13:34:07 -06:00
|
|
|
lexer.file.Location(1),
|
2022-08-11 11:39:49 -06:00
|
|
|
"unknown escape character " +
|
2022-08-17 22:58:40 -06:00
|
|
|
string(lexer.char), infoerr.ErrorKindError)
|
2022-08-11 11:39:49 -06:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|