From 758b85e735aa3f46ee3b910493bfeacc00fd20a7 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 02:47:51 -0500 Subject: [PATCH] Added naieve string and char literal parsing --- lexer/lexer.go | 7 ++----- lexer/lexer_test.go | 7 +++---- lexer/numbers.go | 3 +++ lexer/text.go | 47 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 9 deletions(-) create mode 100644 lexer/text.go diff --git a/lexer/lexer.go b/lexer/lexer.go index 57cbbb7..c17a549 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -40,7 +40,6 @@ func (lexer *LexingOperation) tokenize () (err error) { number := lexer.char >= '0' && lexer.char <= '9' if number { - // TODO: tokenize number begin\ err = lexer.tokenizeNumberBeginning(false) if err != nil { return } } else if lowercase || uppercase { @@ -126,11 +125,9 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { }) err = lexer.nextRune() case '"': - // TODO: tokenize string literal - err = lexer.nextRune() + err = lexer.tokenizeString(false) case '\'': - // TODO: tokenize rune literal - err = lexer.nextRune() + err = lexer.tokenizeString(true) case ':': lexer.addToken (Token { kind: TokenKindColon, diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index cd9cb5b..78d4f91 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -19,10 +19,9 @@ func TestTokenizeAll (test *testing.T) { test.Log("got token:", token.Describe()) } - test.Log("resulting error:") - test.Log(err.Error()) - if err == nil { - test.Log("Tokenize() should have returned an error") + if err != nil { + test.Log("returned error:") + test.Log(err.Error()) test.Fail() return } diff --git a/lexer/numbers.go b/lexer/numbers.go index 2bfec05..a08074b 100644 --- a/lexer/numbers.go +++ b/lexer/numbers.go @@ -58,6 +58,9 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error return } +// runeToDigit converts a rune from 0-F to a corresponding digit, with a maximum +// radix. If the character is invalid, or the digit is too big, it will return +// false for worked. func runeToDigit (char rune, radix uint64) (digit uint64, worked bool) { worked = true diff --git a/lexer/text.go b/lexer/text.go new file mode 100644 index 0000000..02fdc61 --- /dev/null +++ b/lexer/text.go @@ -0,0 +1,47 @@ +package lexer + +import "github.com/sashakoshka/arf/file" + +func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) { + err = lexer.nextRune() + if err != nil { return } + + got := "" + + for { + got += string(lexer.char) + + err = lexer.nextRune() + if err != nil { return } + + if isRuneLiteral { + if lexer.char == '\'' { break } + } else { + if lexer.char == '"' { break } + } + } + + err = lexer.nextRune() + if err != nil { return } + + token := Token { } + + if isRuneLiteral { + if len(got) > 1 { + err = file.NewError ( + lexer.file.Location(), len(got) - 1, + "excess data in rune literal", + file.ErrorKindError) + return + } + + token.kind = TokenKindRune + token.value = rune([]rune(got)[0]) + } else { + token.kind = TokenKindString + token.value = got + } + + lexer.addToken(token) + return +}