Lexer now uses go's strconv number parsing functions
This allows for better accuracy when parsing floats, at the cost of only supporting decimal floating point literals. The lexer now passes all tests.
This commit is contained in:
parent
ac85c2c725
commit
dd8bbf8b13
@ -1,9 +1,12 @@
|
|||||||
package lexer
|
package lexer
|
||||||
|
|
||||||
|
import "strconv"
|
||||||
|
import "git.tebibyte.media/sashakoshka/arf/file"
|
||||||
|
|
||||||
// tokenizeSymbolBeginning lexes a token that starts with a number.
|
// tokenizeSymbolBeginning lexes a token that starts with a number.
|
||||||
func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error) {
|
func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error) {
|
||||||
var number uint64
|
var intNumber uint64
|
||||||
var fragment float64
|
var floatNumber float64
|
||||||
var isFloat bool
|
var isFloat bool
|
||||||
|
|
||||||
token := lexer.newToken()
|
token := lexer.newToken()
|
||||||
@ -13,24 +16,22 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
|
|||||||
|
|
||||||
if lexer.char == 'x' {
|
if lexer.char == 'x' {
|
||||||
lexer.nextRune()
|
lexer.nextRune()
|
||||||
number, fragment, isFloat, err = lexer.tokenizeNumber(16)
|
intNumber, floatNumber, isFloat, err = lexer.tokenizeNumber(16)
|
||||||
} else if lexer.char == 'b' {
|
} else if lexer.char == 'b' {
|
||||||
lexer.nextRune()
|
lexer.nextRune()
|
||||||
number, fragment, isFloat, err = lexer.tokenizeNumber(2)
|
intNumber, floatNumber, isFloat, err = lexer.tokenizeNumber(2)
|
||||||
} else if lexer.char == '.' {
|
} else if lexer.char == '.' {
|
||||||
number, fragment, isFloat, err = lexer.tokenizeNumber(10)
|
intNumber, floatNumber, isFloat, err = lexer.tokenizeNumber(10)
|
||||||
} else if lexer.char >= '0' && lexer.char <= '9' {
|
} else if lexer.char >= '0' && lexer.char <= '9' {
|
||||||
number, fragment, isFloat, err = lexer.tokenizeNumber(8)
|
intNumber, floatNumber, isFloat, err = lexer.tokenizeNumber(8)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
number, fragment, isFloat, err = lexer.tokenizeNumber(10)
|
intNumber, floatNumber, isFloat, err = lexer.tokenizeNumber(10)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil { return }
|
if err != nil { return }
|
||||||
|
|
||||||
if isFloat {
|
if isFloat {
|
||||||
floatNumber := float64(number) + fragment
|
|
||||||
|
|
||||||
token.kind = TokenKindFloat
|
token.kind = TokenKindFloat
|
||||||
if negative {
|
if negative {
|
||||||
token.value = floatNumber * -1
|
token.value = floatNumber * -1
|
||||||
@ -40,10 +41,10 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
|
|||||||
} else {
|
} else {
|
||||||
if negative {
|
if negative {
|
||||||
token.kind = TokenKindInt
|
token.kind = TokenKindInt
|
||||||
token.value = int64(number) * -1
|
token.value = int64(intNumber) * -1
|
||||||
} else {
|
} else {
|
||||||
token.kind = TokenKindUInt
|
token.kind = TokenKindUInt
|
||||||
token.value = uint64(number)
|
token.value = uint64(intNumber)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -51,24 +52,24 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// runeToDigit converts a rune from 0-F to a corresponding digit, with a maximum
|
// runeIsDigit checks to see if the rune is a valid digit within the given
|
||||||
// radix. If the character is invalid, or the digit is too big, it will return
|
// radix, up to 16. A '.' rune will also be treated as valid.
|
||||||
// false for worked.
|
func runeIsDigit (char rune, radix uint64) (isDigit bool) {
|
||||||
func runeToDigit (char rune, radix uint64) (digit uint64, worked bool) {
|
isDigit = true
|
||||||
worked = true
|
|
||||||
|
|
||||||
|
var digit uint64
|
||||||
if char >= '0' && char <= '9' {
|
if char >= '0' && char <= '9' {
|
||||||
digit = uint64(char - '0')
|
digit = uint64(char - '0')
|
||||||
} else if char >= 'A' && char <= 'F' {
|
} else if char >= 'A' && char <= 'F' {
|
||||||
digit = uint64(char - 'A' + 10)
|
digit = uint64(char - 'A' + 10)
|
||||||
} else if char >= 'a' && char <= 'f' {
|
} else if char >= 'a' && char <= 'f' {
|
||||||
digit = uint64(char - 'a' + 10)
|
digit = uint64(char - 'a' + 10)
|
||||||
} else {
|
} else if char != '.' {
|
||||||
worked = false
|
isDigit = false
|
||||||
}
|
}
|
||||||
|
|
||||||
if digit >= radix {
|
if digit >= radix {
|
||||||
worked = false
|
isDigit = false
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
@ -78,41 +79,42 @@ func runeToDigit (char rune, radix uint64) (digit uint64, worked bool) {
|
|||||||
func (lexer *LexingOperation) tokenizeNumber (
|
func (lexer *LexingOperation) tokenizeNumber (
|
||||||
radix uint64,
|
radix uint64,
|
||||||
) (
|
) (
|
||||||
number uint64,
|
intNumber uint64,
|
||||||
fragment float64,
|
floatNumber float64,
|
||||||
isFloat bool,
|
isFloat bool,
|
||||||
err error,
|
err error,
|
||||||
) {
|
) {
|
||||||
|
got := ""
|
||||||
for {
|
for {
|
||||||
digit, worked := runeToDigit(lexer.char, radix)
|
if !runeIsDigit(lexer.char, radix) { break }
|
||||||
if !worked { break }
|
|
||||||
|
|
||||||
number *= radix
|
|
||||||
number += digit
|
|
||||||
|
|
||||||
err = lexer.nextRune()
|
|
||||||
if err != nil { return }
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: increase accuracy of this so that TestTokenizeNumbers is
|
|
||||||
// passed.
|
|
||||||
if lexer.char == '.' {
|
if lexer.char == '.' {
|
||||||
|
if radix != 10 {
|
||||||
|
err = file.NewError (
|
||||||
|
lexer.file.Location(1),
|
||||||
|
"floats must have radix of 10",
|
||||||
|
file.ErrorKindError)
|
||||||
|
return
|
||||||
|
}
|
||||||
isFloat = true
|
isFloat = true
|
||||||
err = lexer.nextRune()
|
}
|
||||||
if err != nil { return }
|
|
||||||
|
|
||||||
coef := 1 / float64(radix)
|
|
||||||
for {
|
|
||||||
digit, worked := runeToDigit(lexer.char, radix)
|
|
||||||
if !worked { break }
|
|
||||||
|
|
||||||
fragment += float64(digit) * coef
|
|
||||||
|
|
||||||
coef /= float64(radix)
|
|
||||||
|
|
||||||
|
got += string(lexer.char)
|
||||||
err = lexer.nextRune()
|
err = lexer.nextRune()
|
||||||
if err != nil { return }
|
if err != nil { return }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if isFloat {
|
||||||
|
floatNumber, err = strconv.ParseFloat(got, 64)
|
||||||
|
} else {
|
||||||
|
intNumber, err = strconv.ParseUint(got, int(radix), 64)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
err = file.NewError (
|
||||||
|
lexer.file.Location(1),
|
||||||
|
"could not parse number: " + err.Error(),
|
||||||
|
file.ErrorKindError)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
|
Reference in New Issue
Block a user