From 314c045e65612e39e10ec4a92f5f2f72b519e153 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Wed, 10 Aug 2022 13:59:09 -0400 Subject: [PATCH 01/20] Lexer no longer hangs when encountering letters --- lexer/lexer.go | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index 040b405..00959b0 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -1,6 +1,7 @@ package lexer import "io" +import "fmt" import "github.com/sashakoshka/arf/file" // LexingOperation holds information about an ongoing lexing operataion. @@ -37,20 +38,24 @@ func (lexer *LexingOperation) tokenize () (err error) { if number { // TODO: tokenize number begin + lexer.nextRune() } else if lowercase || uppercase { // TODO: tokenize alpha begin + lexer.nextRune() } else { err = lexer.tokenizeSymbolBeginning() - if err != nil { return err } + if err != nil { return } } - // TODO: skip whitespace + err = lexer.skipSpaces() + if err != nil { return } } return } func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { + fmt.Println(string(lexer.char)) switch lexer.char { case '#': // comment @@ -182,10 +187,21 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { return } +// addToken adds a new token to the lexer's token slice. func (lexer *LexingOperation) addToken (token Token) { lexer.tokens = append(lexer.tokens, token) } +// skipSpaces skips all space characters (not tabs or newlines) +func (lexer *LexingOperation) skipSpaces () (err error) { + for lexer.char == ' ' { + err = lexer.nextRune() + if err != nil { return } + } + + return +} + // nextRune advances the lexer to the next rune in the file. func (lexer *LexingOperation) nextRune () (err error) { lexer.char, _, err = lexer.file.ReadRune() From d43a2540cf3367df75f3e293090ee78b8a5671d3 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Wed, 10 Aug 2022 14:08:04 -0400 Subject: [PATCH 02/20] Fixed error formatting column indicator --- file/error.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/file/error.go b/file/error.go index e7f72e4..e6b4d80 100644 --- a/file/error.go +++ b/file/error.go @@ -58,7 +58,7 @@ func (err Error) Error () (formattedMessage string) { // print an arrow with a tail spanning the width of the mistake columnCountdown := err.Location.column - for columnCountdown > 0 { + for columnCountdown > 1 { formattedMessage += " " columnCountdown -- } @@ -66,9 +66,9 @@ func (err Error) Error () (formattedMessage string) { // TODO: for tabs, print out 8 of these instead. formattedMessage += "-" } - formattedMessage += "-\n" + formattedMessage += "^\n" } - formattedMessage += err.message + "-\n" + formattedMessage += err.message + "\n" return } From 9c1baf8216d451b84f27b8b6505cdd6e9efa0fb1 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Wed, 10 Aug 2022 14:18:28 -0400 Subject: [PATCH 03/20] Lexer can now tokenize minus decrement return direction separator --- file/error.go | 1 + lexer/lexer.go | 26 +++++++++++++++++++++----- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/file/error.go b/file/error.go index e6b4d80..6a0d502 100644 --- a/file/error.go +++ b/file/error.go @@ -59,6 +59,7 @@ func (err Error) Error () (formattedMessage string) { // print an arrow with a tail spanning the width of the mistake columnCountdown := err.Location.column for columnCountdown > 1 { + // TODO: for tabs, print out a teb instead. formattedMessage += " " columnCountdown -- } diff --git a/lexer/lexer.go b/lexer/lexer.go index 00959b0..60795ef 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -126,13 +126,10 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { }) lexer.nextRune() case '+': - lexer.addToken (Token { - kind: TokenKindPlus, - }) + // TODO: tokenize plus begin lexer.nextRune() case '-': - // TODO: tokenize dash begin - lexer.nextRune() + lexer.tokenizeDashBeginning() case '*': lexer.addToken (Token { kind: TokenKindAsterisk, @@ -187,6 +184,25 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { return } +func (lexer *LexingOperation) tokenizeDashBeginning () (err error) { + token := Token { kind: TokenKindMinus } + lexer.nextRune() + + if lexer.char == '-' { + token.kind = TokenKindDecrement + lexer.nextRune() + } else if lexer.char == '>' { + token.kind = TokenKindReturnDirection + lexer.nextRune() + } + + if lexer.char == '-' { + token.kind = TokenKindSeparator + lexer.nextRune() + } + return +} + // addToken adds a new token to the lexer's token slice. func (lexer *LexingOperation) addToken (token Token) { lexer.tokens = append(lexer.tokens, token) From 2220b95cd277f54f2d08924ca4ea428df2bc608e Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Wed, 10 Aug 2022 14:55:26 -0400 Subject: [PATCH 04/20] Name and permission tokens are now supported --- lexer/lexer.go | 90 ++++++++++++++++++++++++++++++++------------- types/permission.go | 22 ++++++++++- 2 files changed, 84 insertions(+), 28 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index 60795ef..a1e43c0 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -3,6 +3,7 @@ package lexer import "io" import "fmt" import "github.com/sashakoshka/arf/file" +import "github.com/sashakoshka/arf/types" // LexingOperation holds information about an ongoing lexing operataion. type LexingOperation struct { @@ -32,6 +33,8 @@ func (lexer *LexingOperation) tokenize () (err error) { if err != nil { return } for { + fmt.Println(string(lexer.char)) + lowercase := lexer.char >= 'a' && lexer.char <= 'z' uppercase := lexer.char >= 'A' && lexer.char <= 'Z' number := lexer.char >= '0' && lexer.char <= '9' @@ -41,7 +44,8 @@ func (lexer *LexingOperation) tokenize () (err error) { lexer.nextRune() } else if lowercase || uppercase { // TODO: tokenize alpha begin - lexer.nextRune() + err = lexer.tokenizeAlphaBeginning() + if err != nil { return } } else { err = lexer.tokenizeSymbolBeginning() if err != nil { return } @@ -54,13 +58,44 @@ func (lexer *LexingOperation) tokenize () (err error) { return } +func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) { + got := "" + + for { + lowercase := lexer.char >= 'a' && lexer.char <= 'z' + uppercase := lexer.char >= 'A' && lexer.char <= 'Z' + number := lexer.char >= '0' && lexer.char <= '9' + if !lowercase && !uppercase && !number { break } + + got += string(lexer.char) + + lexer.nextRune() + } + + token := Token { kind: TokenKindName, value: got } + + if len(got) == 2 { + firstValid := got[0] == 'n' || got[0] == 'r' || got[0] == 'w' + secondValid := got[1] == 'n' || got[1] == 'r' || got[1] == 'w' + + if firstValid && secondValid { + token.kind = TokenKindPermission + token.value = types.PermissionFrom(got) + } + } + + lexer.addToken(token) + + return +} + func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { - fmt.Println(string(lexer.char)) switch lexer.char { case '#': // comment for lexer.char != '\n' { - lexer.nextRune() + err = lexer.nextRune() + if err != nil { return } } case '\t': // indent level @@ -80,7 +115,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { lexer.addToken (Token { kind: TokenKindIndent, }) - lexer.nextRune() + err = lexer.nextRune() + if err != nil { return } } case '\n': // line break @@ -88,94 +124,94 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { lexer.addToken (Token { kind: TokenKindNewline, }) - lexer.nextRune() + err = lexer.nextRune() case '"': // TODO: tokenize string literal - lexer.nextRune() + err = lexer.nextRune() case '\'': // TODO: tokenize rune literal - lexer.nextRune() + err = lexer.nextRune() case ':': lexer.addToken (Token { kind: TokenKindColon, }) - lexer.nextRune() + err = lexer.nextRune() case '.': lexer.addToken (Token { kind: TokenKindDot, }) - lexer.nextRune() + err = lexer.nextRune() case '[': lexer.addToken (Token { kind: TokenKindLBracket, }) - lexer.nextRune() + err = lexer.nextRune() case ']': lexer.addToken (Token { kind: TokenKindRBracket, }) - lexer.nextRune() + err = lexer.nextRune() case '{': lexer.addToken (Token { kind: TokenKindLBrace, }) - lexer.nextRune() + err = lexer.nextRune() case '}': lexer.addToken (Token { kind: TokenKindRBrace, }) - lexer.nextRune() + err = lexer.nextRune() case '+': // TODO: tokenize plus begin - lexer.nextRune() + err = lexer.nextRune() case '-': - lexer.tokenizeDashBeginning() + err = lexer.tokenizeDashBeginning() case '*': lexer.addToken (Token { kind: TokenKindAsterisk, }) - lexer.nextRune() + err = lexer.nextRune() case '/': lexer.addToken (Token { kind: TokenKindSlash, }) - lexer.nextRune() + err = lexer.nextRune() case '@': lexer.addToken (Token { kind: TokenKindAt, }) - lexer.nextRune() + err = lexer.nextRune() case '!': lexer.addToken (Token { kind: TokenKindExclamation, }) - lexer.nextRune() + err = lexer.nextRune() case '%': lexer.addToken (Token { kind: TokenKindPercent, }) - lexer.nextRune() + err = lexer.nextRune() case '~': lexer.addToken (Token { kind: TokenKindTilde, }) - lexer.nextRune() + err = lexer.nextRune() case '<': // TODO: tokenize less than begin - lexer.nextRune() + err = lexer.nextRune() case '>': // TODO: tokenize greater than begin - lexer.nextRune() + err = lexer.nextRune() case '|': // TODO: tokenize bar begin - lexer.nextRune() + err = lexer.nextRune() case '&': // TODO: tokenize and begin - lexer.nextRune() + err = lexer.nextRune() default: err = file.NewError ( lexer.file.Location(), 1, - "unexpected character " + + "unexpected symbol character " + string(lexer.char), file.ErrorKindError) return @@ -200,6 +236,8 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) { token.kind = TokenKindSeparator lexer.nextRune() } + + lexer.addToken(token) return } diff --git a/types/permission.go b/types/permission.go index ec59771..22b42f0 100644 --- a/types/permission.go +++ b/types/permission.go @@ -3,12 +3,30 @@ package types type Mode int const ( - ModeRead = iota + ModeNone = iota + ModeRead ModeWrite - ModeNone ) type Permission struct { Internal Mode External Mode } + +func ModeFrom (char rune) (mode Mode) { + switch (char) { + case 'r': mode = ModeNone + case 'n': mode = ModeRead + case 'w': mode = ModeWrite + } + + return +} + +func PermissionFrom (data string) (permission Permission) { + if len(data) != 2 { return } + + permission.Internal = ModeFrom(rune(data[0])) + permission.External = ModeFrom(rune(data[1])) + return +} From 040e14d27cfd64356052c2ac171a1c36d839b3b7 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Wed, 10 Aug 2022 15:02:08 -0400 Subject: [PATCH 05/20] Test now prints out list of tokens --- lexer/lexer.go | 4 +-- lexer/lexer_test.go | 6 ++++ lexer/token.go | 84 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 2 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index a1e43c0..1807cfd 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -1,7 +1,7 @@ package lexer import "io" -import "fmt" +// import "fmt" import "github.com/sashakoshka/arf/file" import "github.com/sashakoshka/arf/types" @@ -33,7 +33,7 @@ func (lexer *LexingOperation) tokenize () (err error) { if err != nil { return } for { - fmt.Println(string(lexer.char)) + // fmt.Println(string(lexer.char)) lowercase := lexer.char >= 'a' && lexer.char <= 'z' uppercase := lexer.char >= 'A' && lexer.char <= 'Z' diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index 62a5791..cd9cb5b 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -13,6 +13,12 @@ func TestTokenizeAll (test *testing.T) { } tokens, err := Tokenize(file) + + // print all tokens + for _, token := range tokens { + test.Log("got token:", token.Describe()) + } + test.Log("resulting error:") test.Log(err.Error()) if err == nil { diff --git a/lexer/token.go b/lexer/token.go index e803606..0ac8298 100644 --- a/lexer/token.go +++ b/lexer/token.go @@ -1,5 +1,6 @@ package lexer +import "fmt" import "github.com/sashakoshka/arf/file" // TokenKind is an enum represzenting what role a token has. @@ -84,3 +85,86 @@ func (token Token) Equals (testToken Token) (match bool) { func (token Token) Location () (location file.Location) { return token.location } + +// Describe generates a textual description of the token to be used in debug +// logs. +func (token Token) Describe () (description string) { + switch token.kind { + case TokenKindNewline: + description += "Newline" + case TokenKindIndent: + description += "Indent" + case TokenKindSeparator: + description += "Separator" + case TokenKindPermission: + description += "Permission" + case TokenKindReturnDirection: + description += "ReturnDirection" + case TokenKindInt: + description += "Int" + case TokenKindUInt: + description += "UInt" + case TokenKindFloat: + description += "Float" + case TokenKindString: + description += "String" + case TokenKindRune: + description += "Rune" + case TokenKindName: + description += "Name" + case TokenKindColon: + description += "Colon" + case TokenKindDot: + description += "Dot" + case TokenKindLBracket: + description += "LBracket" + case TokenKindRBracket: + description += "RBracket" + case TokenKindLBrace: + description += "LBrace" + case TokenKindRBrace: + description += "RBrace" + case TokenKindPlus: + description += "Plus" + case TokenKindMinus: + description += "Minus" + case TokenKindIncrement: + description += "Increment" + case TokenKindDecrement: + description += "Decrement" + case TokenKindAsterisk: + description += "Asterisk" + case TokenKindSlash: + description += "Slash" + case TokenKindAt: + description += "At" + case TokenKindExclamation: + description += "Exclamation" + case TokenKindPercent: + description += "Percent" + case TokenKindTilde: + description += "Tilde" + case TokenKindLessThan: + description += "LessThan" + case TokenKindLShift: + description += "LShift" + case TokenKindGreaterThan: + description += "GreaterThan" + case TokenKindRShift: + description += "RShift" + case TokenKindBinaryOr: + description += "BinaryOr" + case TokenKindLogicalOr: + description += "LogicalOr" + case TokenKindBinaryAnd: + description += "BinaryAnd" + case TokenKindLogicalAnd: + description += "LogicalAnd" + } + + if token.value != nil { + description += fmt.Sprint(": ", token.value) + } + + return +} From 77f3c590c6471e9cec59f993b7fba0d495055d3a Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 00:14:41 -0500 Subject: [PATCH 06/20] Added lexing hexidecimal numbers --- lexer/lexer.go | 72 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 69 insertions(+), 3 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index 1807cfd..9f01038 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -41,12 +41,12 @@ func (lexer *LexingOperation) tokenize () (err error) { if number { // TODO: tokenize number begin - lexer.nextRune() + err = lexer.tokenizeNumberBeginning(false) + if err != nil { return } } else if lowercase || uppercase { - // TODO: tokenize alpha begin err = lexer.tokenizeAlphaBeginning() if err != nil { return } - } else { + } else if lexer.char >= '0' && lexer.char <= '9' { err = lexer.tokenizeSymbolBeginning() if err != nil { return } } @@ -58,6 +58,72 @@ func (lexer *LexingOperation) tokenize () (err error) { return } +func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error) { + if lexer.char == '0' { + lexer.nextRune() + + if lexer.char == 'x' { + lexer.nextRune() + err = lexer.tokenizeHexidecimalNumber(negative) + if err != nil { return } + } else if lexer.char == 'b' { + lexer.nextRune() + err = lexer.tokenizeBinaryNumber(negative) + if err != nil { return } + } else if lexer.char == '.' { + err = lexer.tokenizeDecimalNumber(negative) + if err != nil { return } + } else if lexer.char >= '0' && lexer.char <= '9' { + lexer.tokenizeOctalNumber(negative) + } else { + return file.NewError ( + lexer.file.Location(), 1, + "unexpected character in number literal", + file.ErrorKindError) + } + } else { + lexer.tokenizeDecimalNumber(negative) + } + + return +} + +// tokenizeDecimalNumber Reads and tokenizes a hexidecimal number. +func (lexer *LexingOperation) tokenizeHexidecimalNumber (negative bool) (err error) { + var number uint64 + + for { + if lexer.char >= '0' && lexer.char <= '9' { + number *= 16 + number += uint64(lexer.char - '0') + } else if lexer.char >= 'A' && lexer.char <= 'F' { + number *= 16 + number += uint64(lexer.char - 'A' + 9) + } else if lexer.char >= 'a' && lexer.char <= 'f' { + number *= 16 + number += uint64(lexer.char - 'a' + 9) + } else { + break + } + + err = lexer.nextRune() + if err != nil { return } + } + + token := Token { } + + if negative { + token.kind = TokenKindInt + token.value = int64(number) * -1 + } else { + token.kind = TokenKindUInt + token.value = uint64(number) + } + + lexer.addToken(token) + return +} + func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) { got := "" From 8f382036845a81fbc287f65fa602ad56f001af99 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 00:17:22 -0500 Subject: [PATCH 07/20] Added support for binary literals --- lexer/lexer.go | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index 9f01038..4da1a92 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -58,6 +58,7 @@ func (lexer *LexingOperation) tokenize () (err error) { return } +// tokenizeSymbolBeginning lexes a token that starts with a number. func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error) { if lexer.char == '0' { lexer.nextRune() @@ -88,7 +89,7 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error return } -// tokenizeDecimalNumber Reads and tokenizes a hexidecimal number. +// tokenizeHexidecimalNumber Reads and tokenizes a hexidecimal number. func (lexer *LexingOperation) tokenizeHexidecimalNumber (negative bool) (err error) { var number uint64 @@ -124,6 +125,38 @@ func (lexer *LexingOperation) tokenizeHexidecimalNumber (negative bool) (err err return } +// tokenizeBinaryNumber Reads and tokenizes a binary number. +func (lexer *LexingOperation) tokenizeBinaryNumber (negative bool) (err error) { + var number uint64 + + for { + if lexer.char == '0' { + number *= 2 + } else if lexer.char == '1' { + number *= 2 + number += 1 + } else { + break + } + + err = lexer.nextRune() + if err != nil { return } + } + + token := Token { } + + if negative { + token.kind = TokenKindInt + token.value = int64(number) * -1 + } else { + token.kind = TokenKindUInt + token.value = uint64(number) + } + + lexer.addToken(token) + return +} + func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) { got := "" From 3d610df44e568af92a0ab4ce4fb144c543d6a151 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 00:21:04 -0500 Subject: [PATCH 08/20] Added support for decimal and octal numbers --- lexer/lexer.go | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/lexer/lexer.go b/lexer/lexer.go index 4da1a92..c00ad59 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -157,6 +157,58 @@ func (lexer *LexingOperation) tokenizeBinaryNumber (negative bool) (err error) { return } +// tokenizeDecimalNumber Reads and tokenizes a decimal number. +func (lexer *LexingOperation) tokenizeDecimalNumber (negative bool) (err error) { + var number uint64 + + for lexer.char >= '0' && lexer.char <= '9' { + number *= 10 + number += uint64(lexer.char - '0') + + err = lexer.nextRune() + if err != nil { return } + } + + token := Token { } + + if negative { + token.kind = TokenKindInt + token.value = int64(number) * -1 + } else { + token.kind = TokenKindUInt + token.value = uint64(number) + } + + lexer.addToken(token) + return +} + +// tokenizeOctalNumber Reads and tokenizes an octal number. +func (lexer *LexingOperation) tokenizeOctalNumber (negative bool) (err error) { + var number uint64 + + for lexer.char >= '0' && lexer.char <= '7' { + number *= 8 + number += uint64(lexer.char - '0') + + err = lexer.nextRune() + if err != nil { return } + } + + token := Token { } + + if negative { + token.kind = TokenKindInt + token.value = int64(number) * -1 + } else { + token.kind = TokenKindUInt + token.value = uint64(number) + } + + lexer.addToken(token) + return +} + func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) { got := "" From 7a4522444978836867c00020439e2d98c6a6c773 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 00:57:04 -0500 Subject: [PATCH 09/20] Reduced code du0plicaion with number lexing functions --- lexer/lexer.go | 97 ++++++++++++++------------------------------------ 1 file changed, 27 insertions(+), 70 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index c00ad59..6650f9d 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -1,7 +1,7 @@ package lexer import "io" -// import "fmt" +import "fmt" import "github.com/sashakoshka/arf/file" import "github.com/sashakoshka/arf/types" @@ -33,14 +33,14 @@ func (lexer *LexingOperation) tokenize () (err error) { if err != nil { return } for { - // fmt.Println(string(lexer.char)) + fmt.Println(string(lexer.char)) lowercase := lexer.char >= 'a' && lexer.char <= 'z' uppercase := lexer.char >= 'A' && lexer.char <= 'Z' number := lexer.char >= '0' && lexer.char <= '9' if number { - // TODO: tokenize number begin + // TODO: tokenize number begin\ err = lexer.tokenizeNumberBeginning(false) if err != nil { return } } else if lowercase || uppercase { @@ -60,22 +60,21 @@ func (lexer *LexingOperation) tokenize () (err error) { // tokenizeSymbolBeginning lexes a token that starts with a number. func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error) { + var number uint64 + if lexer.char == '0' { lexer.nextRune() if lexer.char == 'x' { lexer.nextRune() - err = lexer.tokenizeHexidecimalNumber(negative) - if err != nil { return } + number, err = lexer.tokenizeHexidecimalNumber() } else if lexer.char == 'b' { lexer.nextRune() - err = lexer.tokenizeBinaryNumber(negative) - if err != nil { return } + number, err = lexer.tokenizeBinaryNumber() } else if lexer.char == '.' { - err = lexer.tokenizeDecimalNumber(negative) - if err != nil { return } + number, err = lexer.tokenizeDecimalNumber() } else if lexer.char >= '0' && lexer.char <= '9' { - lexer.tokenizeOctalNumber(negative) + number, err = lexer.tokenizeOctalNumber() } else { return file.NewError ( lexer.file.Location(), 1, @@ -83,16 +82,27 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error file.ErrorKindError) } } else { - lexer.tokenizeDecimalNumber(negative) + number, err = lexer.tokenizeDecimalNumber() } + if err != nil { return } + + token := Token { } + + if negative { + token.kind = TokenKindInt + token.value = int64(number) * -1 + } else { + token.kind = TokenKindUInt + token.value = uint64(number) + } + + lexer.addToken(token) return } // tokenizeHexidecimalNumber Reads and tokenizes a hexidecimal number. -func (lexer *LexingOperation) tokenizeHexidecimalNumber (negative bool) (err error) { - var number uint64 - +func (lexer *LexingOperation) tokenizeHexidecimalNumber () (number uint64, err error) { for { if lexer.char >= '0' && lexer.char <= '9' { number *= 16 @@ -110,25 +120,11 @@ func (lexer *LexingOperation) tokenizeHexidecimalNumber (negative bool) (err err err = lexer.nextRune() if err != nil { return } } - - token := Token { } - - if negative { - token.kind = TokenKindInt - token.value = int64(number) * -1 - } else { - token.kind = TokenKindUInt - token.value = uint64(number) - } - - lexer.addToken(token) return } // tokenizeBinaryNumber Reads and tokenizes a binary number. -func (lexer *LexingOperation) tokenizeBinaryNumber (negative bool) (err error) { - var number uint64 - +func (lexer *LexingOperation) tokenizeBinaryNumber () (number uint64, err error) { for { if lexer.char == '0' { number *= 2 @@ -142,25 +138,11 @@ func (lexer *LexingOperation) tokenizeBinaryNumber (negative bool) (err error) { err = lexer.nextRune() if err != nil { return } } - - token := Token { } - - if negative { - token.kind = TokenKindInt - token.value = int64(number) * -1 - } else { - token.kind = TokenKindUInt - token.value = uint64(number) - } - - lexer.addToken(token) return } // tokenizeDecimalNumber Reads and tokenizes a decimal number. -func (lexer *LexingOperation) tokenizeDecimalNumber (negative bool) (err error) { - var number uint64 - +func (lexer *LexingOperation) tokenizeDecimalNumber () (number uint64, err error) { for lexer.char >= '0' && lexer.char <= '9' { number *= 10 number += uint64(lexer.char - '0') @@ -168,25 +150,12 @@ func (lexer *LexingOperation) tokenizeDecimalNumber (negative bool) (err error) err = lexer.nextRune() if err != nil { return } } - - token := Token { } - - if negative { - token.kind = TokenKindInt - token.value = int64(number) * -1 - } else { - token.kind = TokenKindUInt - token.value = uint64(number) - } - lexer.addToken(token) return } // tokenizeOctalNumber Reads and tokenizes an octal number. -func (lexer *LexingOperation) tokenizeOctalNumber (negative bool) (err error) { - var number uint64 - +func (lexer *LexingOperation) tokenizeOctalNumber () (number uint64, err error) { for lexer.char >= '0' && lexer.char <= '7' { number *= 8 number += uint64(lexer.char - '0') @@ -194,18 +163,6 @@ func (lexer *LexingOperation) tokenizeOctalNumber (negative bool) (err error) { err = lexer.nextRune() if err != nil { return } } - - token := Token { } - - if negative { - token.kind = TokenKindInt - token.value = int64(number) * -1 - } else { - token.kind = TokenKindUInt - token.value = uint64(number) - } - - lexer.addToken(token) return } From 0f603e3bf7bb0f1e6a3ea7bc2964ddce1b85c776 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 01:00:57 -0500 Subject: [PATCH 10/20] I am an idiot --- lexer/lexer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index 6650f9d..a4fa122 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -46,7 +46,7 @@ func (lexer *LexingOperation) tokenize () (err error) { } else if lowercase || uppercase { err = lexer.tokenizeAlphaBeginning() if err != nil { return } - } else if lexer.char >= '0' && lexer.char <= '9' { + } else { err = lexer.tokenizeSymbolBeginning() if err != nil { return } } From 8813928b685ef571cca9365a34691fc9de8b82b7 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 01:10:34 -0500 Subject: [PATCH 11/20] Negative numbers are now supported --- lexer/lexer.go | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index a4fa122..8d909ab 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -329,23 +329,34 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { } func (lexer *LexingOperation) tokenizeDashBeginning () (err error) { - token := Token { kind: TokenKindMinus } - lexer.nextRune() - + err = lexer.nextRune() + if err != nil { return } + if lexer.char == '-' { - token.kind = TokenKindDecrement - lexer.nextRune() + token := Token { kind: TokenKindDecrement } + + err = lexer.nextRune() + if err != nil { return } + + if lexer.char == '-' { + token.kind = TokenKindSeparator + lexer.nextRune() + } + lexer.addToken(token) } else if lexer.char == '>' { - token.kind = TokenKindReturnDirection - lexer.nextRune() - } + token := Token { kind: TokenKindReturnDirection } - if lexer.char == '-' { - token.kind = TokenKindSeparator - lexer.nextRune() - } + err = lexer.nextRune() + if err != nil { return } - lexer.addToken(token) + lexer.addToken(token) + } else if lexer.char >= '0' && lexer.char <= '9' { + lexer.tokenizeNumberBeginning(true) + } else { + token := Token { kind: TokenKindMinus } + lexer.addToken(token) + } + return } From 9e4684dbedd0efa5ae9387e0b3b5ba5b4d449037 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 01:57:04 -0500 Subject: [PATCH 12/20] Number tokenizing functions use one digit conversion functions --- lexer/lexer.go | 108 ---------------------------------------- lexer/numbers.go | 127 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 108 deletions(-) create mode 100644 lexer/numbers.go diff --git a/lexer/lexer.go b/lexer/lexer.go index 8d909ab..57cbbb7 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -58,114 +58,6 @@ func (lexer *LexingOperation) tokenize () (err error) { return } -// tokenizeSymbolBeginning lexes a token that starts with a number. -func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error) { - var number uint64 - - if lexer.char == '0' { - lexer.nextRune() - - if lexer.char == 'x' { - lexer.nextRune() - number, err = lexer.tokenizeHexidecimalNumber() - } else if lexer.char == 'b' { - lexer.nextRune() - number, err = lexer.tokenizeBinaryNumber() - } else if lexer.char == '.' { - number, err = lexer.tokenizeDecimalNumber() - } else if lexer.char >= '0' && lexer.char <= '9' { - number, err = lexer.tokenizeOctalNumber() - } else { - return file.NewError ( - lexer.file.Location(), 1, - "unexpected character in number literal", - file.ErrorKindError) - } - } else { - number, err = lexer.tokenizeDecimalNumber() - } - - if err != nil { return } - - token := Token { } - - if negative { - token.kind = TokenKindInt - token.value = int64(number) * -1 - } else { - token.kind = TokenKindUInt - token.value = uint64(number) - } - - lexer.addToken(token) - return -} - -// tokenizeHexidecimalNumber Reads and tokenizes a hexidecimal number. -func (lexer *LexingOperation) tokenizeHexidecimalNumber () (number uint64, err error) { - for { - if lexer.char >= '0' && lexer.char <= '9' { - number *= 16 - number += uint64(lexer.char - '0') - } else if lexer.char >= 'A' && lexer.char <= 'F' { - number *= 16 - number += uint64(lexer.char - 'A' + 9) - } else if lexer.char >= 'a' && lexer.char <= 'f' { - number *= 16 - number += uint64(lexer.char - 'a' + 9) - } else { - break - } - - err = lexer.nextRune() - if err != nil { return } - } - return -} - -// tokenizeBinaryNumber Reads and tokenizes a binary number. -func (lexer *LexingOperation) tokenizeBinaryNumber () (number uint64, err error) { - for { - if lexer.char == '0' { - number *= 2 - } else if lexer.char == '1' { - number *= 2 - number += 1 - } else { - break - } - - err = lexer.nextRune() - if err != nil { return } - } - return -} - -// tokenizeDecimalNumber Reads and tokenizes a decimal number. -func (lexer *LexingOperation) tokenizeDecimalNumber () (number uint64, err error) { - for lexer.char >= '0' && lexer.char <= '9' { - number *= 10 - number += uint64(lexer.char - '0') - - err = lexer.nextRune() - if err != nil { return } - } - - return -} - -// tokenizeOctalNumber Reads and tokenizes an octal number. -func (lexer *LexingOperation) tokenizeOctalNumber () (number uint64, err error) { - for lexer.char >= '0' && lexer.char <= '7' { - number *= 8 - number += uint64(lexer.char - '0') - - err = lexer.nextRune() - if err != nil { return } - } - return -} - func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) { got := "" diff --git a/lexer/numbers.go b/lexer/numbers.go new file mode 100644 index 0000000..df1634d --- /dev/null +++ b/lexer/numbers.go @@ -0,0 +1,127 @@ +package lexer + +import "github.com/sashakoshka/arf/file" + +// tokenizeSymbolBeginning lexes a token that starts with a number. +func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error) { + var number uint64 + + if lexer.char == '0' { + lexer.nextRune() + + if lexer.char == 'x' { + lexer.nextRune() + number, err = lexer.tokenizeHexidecimalNumber() + } else if lexer.char == 'b' { + lexer.nextRune() + number, err = lexer.tokenizeBinaryNumber() + } else if lexer.char == '.' { + number, err = lexer.tokenizeDecimalNumber() + } else if lexer.char >= '0' && lexer.char <= '9' { + number, err = lexer.tokenizeOctalNumber() + } else { + return file.NewError ( + lexer.file.Location(), 1, + "unexpected character in number literal", + file.ErrorKindError) + } + } else { + number, err = lexer.tokenizeDecimalNumber() + } + + if err != nil { return } + + token := Token { } + + if negative { + token.kind = TokenKindInt + token.value = int64(number) * -1 + } else { + token.kind = TokenKindUInt + token.value = uint64(number) + } + + lexer.addToken(token) + return +} + +func runeToDigit (char rune, radix uint64) (digit uint64, worked bool) { + worked = true + + if char >= '0' && char <= '9' { + digit = uint64(char - '0') + } else if char >= 'A' && char <= 'F' { + digit = uint64(char - 'A' + 9) + } else if char >= 'a' && char <= 'f' { + digit = uint64(char - 'a' + 9) + } else { + worked = false + } + + if digit >= radix { + worked = false + } + + return +} + +// tokenizeHexidecimalNumber Reads and tokenizes a hexidecimal number. +func (lexer *LexingOperation) tokenizeHexidecimalNumber () (number uint64, err error) { + for { + digit, worked := runeToDigit(lexer.char, 16) + if !worked { break } + + number *= 16 + number += digit + + err = lexer.nextRune() + if err != nil { return } + } + return +} + +// tokenizeBinaryNumber Reads and tokenizes a binary number. +func (lexer *LexingOperation) tokenizeBinaryNumber () (number uint64, err error) { + for { + digit, worked := runeToDigit(lexer.char, 2) + if !worked { break } + + number *= 2 + number += digit + + err = lexer.nextRune() + if err != nil { return } + } + return +} + +// tokenizeDecimalNumber Reads and tokenizes a decimal number. +func (lexer *LexingOperation) tokenizeDecimalNumber () (number uint64, err error) { + for { + digit, worked := runeToDigit(lexer.char, 10) + if !worked { break } + + number *= 10 + number += digit + + err = lexer.nextRune() + if err != nil { return } + } + + return +} + +// tokenizeOctalNumber Reads and tokenizes an octal number. +func (lexer *LexingOperation) tokenizeOctalNumber () (number uint64, err error) { + for { + digit, worked := runeToDigit(lexer.char, 8) + if !worked { break } + + number *= 8 + number += digit + + err = lexer.nextRune() + if err != nil { return } + } + return +} From 40ad569870fd72ba050acdee1850eedea666f918 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 02:03:54 -0500 Subject: [PATCH 13/20] Collapsed all number tokenization functions into just one --- lexer/numbers.go | 64 +++++++----------------------------------------- 1 file changed, 9 insertions(+), 55 deletions(-) diff --git a/lexer/numbers.go b/lexer/numbers.go index df1634d..3c7316c 100644 --- a/lexer/numbers.go +++ b/lexer/numbers.go @@ -11,14 +11,14 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error if lexer.char == 'x' { lexer.nextRune() - number, err = lexer.tokenizeHexidecimalNumber() + number, err = lexer.tokenizeNumber(16) } else if lexer.char == 'b' { lexer.nextRune() - number, err = lexer.tokenizeBinaryNumber() + number, err = lexer.tokenizeNumber(2) } else if lexer.char == '.' { - number, err = lexer.tokenizeDecimalNumber() + number, err = lexer.tokenizeNumber(10) } else if lexer.char >= '0' && lexer.char <= '9' { - number, err = lexer.tokenizeOctalNumber() + number, err = lexer.tokenizeNumber(8) } else { return file.NewError ( lexer.file.Location(), 1, @@ -26,7 +26,7 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error file.ErrorKindError) } } else { - number, err = lexer.tokenizeDecimalNumber() + number, err = lexer.tokenizeNumber(10) } if err != nil { return } @@ -65,13 +65,13 @@ func runeToDigit (char rune, radix uint64) (digit uint64, worked bool) { return } -// tokenizeHexidecimalNumber Reads and tokenizes a hexidecimal number. -func (lexer *LexingOperation) tokenizeHexidecimalNumber () (number uint64, err error) { +// tokenizeNumber reads and tokenizes a number with the specified radix. +func (lexer *LexingOperation) tokenizeNumber (radix uint64) (number uint64, err error) { for { - digit, worked := runeToDigit(lexer.char, 16) + digit, worked := runeToDigit(lexer.char, radix) if !worked { break } - number *= 16 + number *= radix number += digit err = lexer.nextRune() @@ -79,49 +79,3 @@ func (lexer *LexingOperation) tokenizeHexidecimalNumber () (number uint64, err e } return } - -// tokenizeBinaryNumber Reads and tokenizes a binary number. -func (lexer *LexingOperation) tokenizeBinaryNumber () (number uint64, err error) { - for { - digit, worked := runeToDigit(lexer.char, 2) - if !worked { break } - - number *= 2 - number += digit - - err = lexer.nextRune() - if err != nil { return } - } - return -} - -// tokenizeDecimalNumber Reads and tokenizes a decimal number. -func (lexer *LexingOperation) tokenizeDecimalNumber () (number uint64, err error) { - for { - digit, worked := runeToDigit(lexer.char, 10) - if !worked { break } - - number *= 10 - number += digit - - err = lexer.nextRune() - if err != nil { return } - } - - return -} - -// tokenizeOctalNumber Reads and tokenizes an octal number. -func (lexer *LexingOperation) tokenizeOctalNumber () (number uint64, err error) { - for { - digit, worked := runeToDigit(lexer.char, 8) - if !worked { break } - - number *= 8 - number += digit - - err = lexer.nextRune() - if err != nil { return } - } - return -} From 3768e3454fed3342e76053df23a34540480acdc3 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 02:17:35 -0500 Subject: [PATCH 14/20] Added float support --- lexer/numbers.go | 64 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/lexer/numbers.go b/lexer/numbers.go index 3c7316c..2bfec05 100644 --- a/lexer/numbers.go +++ b/lexer/numbers.go @@ -4,21 +4,23 @@ import "github.com/sashakoshka/arf/file" // tokenizeSymbolBeginning lexes a token that starts with a number. func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error) { - var number uint64 + var number uint64 + var fragment float64 + var isFloat bool if lexer.char == '0' { lexer.nextRune() if lexer.char == 'x' { lexer.nextRune() - number, err = lexer.tokenizeNumber(16) + number, fragment, isFloat, err = lexer.tokenizeNumber(16) } else if lexer.char == 'b' { lexer.nextRune() - number, err = lexer.tokenizeNumber(2) + number, fragment, isFloat, err = lexer.tokenizeNumber(2) } else if lexer.char == '.' { - number, err = lexer.tokenizeNumber(10) + number, fragment, isFloat, err = lexer.tokenizeNumber(10) } else if lexer.char >= '0' && lexer.char <= '9' { - number, err = lexer.tokenizeNumber(8) + number, fragment, isFloat, err = lexer.tokenizeNumber(8) } else { return file.NewError ( lexer.file.Location(), 1, @@ -26,19 +28,30 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error file.ErrorKindError) } } else { - number, err = lexer.tokenizeNumber(10) + number, fragment, isFloat, err = lexer.tokenizeNumber(10) } if err != nil { return } token := Token { } - if negative { - token.kind = TokenKindInt - token.value = int64(number) * -1 + if isFloat { + floatNumber := float64(number) + fragment + + token.kind = TokenKindFloat + if negative { + token.value = floatNumber * -1 + } else { + token.value = floatNumber + } } else { - token.kind = TokenKindUInt - token.value = uint64(number) + if negative { + token.kind = TokenKindInt + token.value = int64(number) * -1 + } else { + token.kind = TokenKindUInt + token.value = uint64(number) + } } lexer.addToken(token) @@ -66,7 +79,14 @@ func runeToDigit (char rune, radix uint64) (digit uint64, worked bool) { } // tokenizeNumber reads and tokenizes a number with the specified radix. -func (lexer *LexingOperation) tokenizeNumber (radix uint64) (number uint64, err error) { +func (lexer *LexingOperation) tokenizeNumber ( + radix uint64, +) ( + number uint64, + fragment float64, + isFloat bool, + err error, +) { for { digit, worked := runeToDigit(lexer.char, radix) if !worked { break } @@ -77,5 +97,25 @@ func (lexer *LexingOperation) tokenizeNumber (radix uint64) (number uint64, err err = lexer.nextRune() if err != nil { return } } + + if lexer.char == '.' { + isFloat = true + err = lexer.nextRune() + if err != nil { return } + + coef := 1 / float64(radix) + for { + digit, worked := runeToDigit(lexer.char, radix) + if !worked { break } + + fragment += float64(digit) * coef + + coef /= float64(radix) + + err = lexer.nextRune() + if err != nil { return } + } + } + return } From 758b85e735aa3f46ee3b910493bfeacc00fd20a7 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 02:47:51 -0500 Subject: [PATCH 15/20] Added naieve string and char literal parsing --- lexer/lexer.go | 7 ++----- lexer/lexer_test.go | 7 +++---- lexer/numbers.go | 3 +++ lexer/text.go | 47 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 9 deletions(-) create mode 100644 lexer/text.go diff --git a/lexer/lexer.go b/lexer/lexer.go index 57cbbb7..c17a549 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -40,7 +40,6 @@ func (lexer *LexingOperation) tokenize () (err error) { number := lexer.char >= '0' && lexer.char <= '9' if number { - // TODO: tokenize number begin\ err = lexer.tokenizeNumberBeginning(false) if err != nil { return } } else if lowercase || uppercase { @@ -126,11 +125,9 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { }) err = lexer.nextRune() case '"': - // TODO: tokenize string literal - err = lexer.nextRune() + err = lexer.tokenizeString(false) case '\'': - // TODO: tokenize rune literal - err = lexer.nextRune() + err = lexer.tokenizeString(true) case ':': lexer.addToken (Token { kind: TokenKindColon, diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index cd9cb5b..78d4f91 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -19,10 +19,9 @@ func TestTokenizeAll (test *testing.T) { test.Log("got token:", token.Describe()) } - test.Log("resulting error:") - test.Log(err.Error()) - if err == nil { - test.Log("Tokenize() should have returned an error") + if err != nil { + test.Log("returned error:") + test.Log(err.Error()) test.Fail() return } diff --git a/lexer/numbers.go b/lexer/numbers.go index 2bfec05..a08074b 100644 --- a/lexer/numbers.go +++ b/lexer/numbers.go @@ -58,6 +58,9 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error return } +// runeToDigit converts a rune from 0-F to a corresponding digit, with a maximum +// radix. If the character is invalid, or the digit is too big, it will return +// false for worked. func runeToDigit (char rune, radix uint64) (digit uint64, worked bool) { worked = true diff --git a/lexer/text.go b/lexer/text.go new file mode 100644 index 0000000..02fdc61 --- /dev/null +++ b/lexer/text.go @@ -0,0 +1,47 @@ +package lexer + +import "github.com/sashakoshka/arf/file" + +func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) { + err = lexer.nextRune() + if err != nil { return } + + got := "" + + for { + got += string(lexer.char) + + err = lexer.nextRune() + if err != nil { return } + + if isRuneLiteral { + if lexer.char == '\'' { break } + } else { + if lexer.char == '"' { break } + } + } + + err = lexer.nextRune() + if err != nil { return } + + token := Token { } + + if isRuneLiteral { + if len(got) > 1 { + err = file.NewError ( + lexer.file.Location(), len(got) - 1, + "excess data in rune literal", + file.ErrorKindError) + return + } + + token.kind = TokenKindRune + token.value = rune([]rune(got)[0]) + } else { + token.kind = TokenKindString + token.value = got + } + + lexer.addToken(token) + return +} From 0a31ea7bf88cdc313a757db5e047345ed514e00d Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 02:54:20 -0500 Subject: [PATCH 16/20] Support for digraphs such as &&, <<, ++ --- lexer/lexer.go | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index c17a549..1051d1b 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -159,7 +159,13 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { }) err = lexer.nextRune() case '+': - // TODO: tokenize plus begin + err = lexer.nextRune() + if err != nil { return } + token := Token { kind: TokenKindPlus } + if lexer.char == '+' { + token.kind = TokenKindIncrement + } + lexer.addToken(token) err = lexer.nextRune() case '-': err = lexer.tokenizeDashBeginning() @@ -194,16 +200,40 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { }) err = lexer.nextRune() case '<': - // TODO: tokenize less than begin + err = lexer.nextRune() + if err != nil { return } + token := Token { kind: TokenKindLessThan } + if lexer.char == '<' { + token.kind = TokenKindLShift + } + lexer.addToken(token) err = lexer.nextRune() case '>': - // TODO: tokenize greater than begin + err = lexer.nextRune() + if err != nil { return } + token := Token { kind: TokenKindGreaterThan } + if lexer.char == '>' { + token.kind = TokenKindRShift + } + lexer.addToken(token) err = lexer.nextRune() case '|': - // TODO: tokenize bar begin + err = lexer.nextRune() + if err != nil { return } + token := Token { kind: TokenKindBinaryOr } + if lexer.char == '|' { + token.kind = TokenKindLogicalOr + } + lexer.addToken(token) err = lexer.nextRune() case '&': - // TODO: tokenize and begin + err = lexer.nextRune() + if err != nil { return } + token := Token { kind: TokenKindBinaryAnd } + if lexer.char == '&' { + token.kind = TokenKindLogicalAnd + } + lexer.addToken(token) err = lexer.nextRune() default: err = file.NewError ( From af6f1708334cd335228bdd6a94017aed482f6eec Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 03:47:42 -0500 Subject: [PATCH 17/20] Normalized whitespace tokens --- lexer/lexer.go | 19 +++++++++++++++++++ lexer/lexer_test.go | 2 ++ 2 files changed, 21 insertions(+) diff --git a/lexer/lexer.go b/lexer/lexer.go index 1051d1b..742e981 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -54,6 +54,10 @@ func (lexer *LexingOperation) tokenize () (err error) { if err != nil { return } } + if lexer.tokens[len(lexer.tokens) - 1].kind != TokenKindNewline { + lexer.addToken(Token { kind: TokenKindNewline }) + } + return } @@ -119,6 +123,21 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { } case '\n': // line break + lastLineEmpty := true + tokenIndex := len(lexer.tokens) - 1 + for lexer.tokens[tokenIndex].kind != TokenKindNewline { + if lexer.tokens[tokenIndex].kind != TokenKindIndent { + + lastLineEmpty = false + break + } + tokenIndex -- + } + + if lastLineEmpty { + lexer.tokens = lexer.tokens[:tokenIndex] + } + // TODO: if last line was blank, (ony whitespace) discard. lexer.addToken (Token { kind: TokenKindNewline, diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index 78d4f91..9865564 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -45,6 +45,7 @@ func TestTokenizeAll (test *testing.T) { Token { kind: TokenKindRBracket }, Token { kind: TokenKindLBrace }, Token { kind: TokenKindRBrace }, + Token { kind: TokenKindNewline }, Token { kind: TokenKindPlus }, Token { kind: TokenKindMinus }, Token { kind: TokenKindIncrement }, @@ -63,6 +64,7 @@ func TestTokenizeAll (test *testing.T) { Token { kind: TokenKindLogicalOr }, Token { kind: TokenKindBinaryAnd }, Token { kind: TokenKindLogicalAnd }, + Token { kind: TokenKindNewline }, } if len(tokens) != len(correct) { From bef5b0328edf4dd7ab013157c8f05271d916f9b2 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 03:52:16 -0500 Subject: [PATCH 18/20] Fixed PermissionFrom() --- lexer/token.go | 2 +- types/permission.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lexer/token.go b/lexer/token.go index 0ac8298..3ab98a8 100644 --- a/lexer/token.go +++ b/lexer/token.go @@ -78,7 +78,7 @@ func (token Token) Value () (value any) { // Equals returns whether this token is equal to another token func (token Token) Equals (testToken Token) (match bool) { - return token == testToken + return token.value == testToken.value } // Location returns the location of the token in its file. diff --git a/types/permission.go b/types/permission.go index 22b42f0..4421058 100644 --- a/types/permission.go +++ b/types/permission.go @@ -15,8 +15,8 @@ type Permission struct { func ModeFrom (char rune) (mode Mode) { switch (char) { - case 'r': mode = ModeNone - case 'n': mode = ModeRead + case 'n': mode = ModeNone + case 'r': mode = ModeRead case 'w': mode = ModeWrite } From 4d73fa4b83e8330d3786fb81a3693cd9555f5268 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 03:58:45 -0500 Subject: [PATCH 19/20] Fixed equality issue with int --- lexer/lexer_test.go | 11 +++++++---- lexer/token.go | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index 9865564..3c7e5d1 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -15,8 +15,8 @@ func TestTokenizeAll (test *testing.T) { tokens, err := Tokenize(file) // print all tokens - for _, token := range tokens { - test.Log("got token:", token.Describe()) + for index, token := range tokens { + test.Log(index, "\tgot token:", token.Describe()) } if err != nil { @@ -33,8 +33,8 @@ func TestTokenizeAll (test *testing.T) { External: types.ModeWrite, }}, Token { kind: TokenKindReturnDirection }, - Token { kind: TokenKindInt, value: -349820394 }, - Token { kind: TokenKindUInt, value: 932748397 }, + Token { kind: TokenKindInt, value: int64(-349820394) }, + Token { kind: TokenKindUInt, value: uint64(932748397) }, Token { kind: TokenKindFloat, value: 239485.37520 }, Token { kind: TokenKindString, value: "hello world\n" }, Token { kind: TokenKindRune, value: 'E' }, @@ -77,6 +77,9 @@ func TestTokenizeAll (test *testing.T) { for index, token := range tokens { if !token.Equals(correct[index]) { test.Log("token", index, "not equal") + test.Log ( + "have", token.Describe(), + "want", correct[index].Describe()) test.Fail() return } diff --git a/lexer/token.go b/lexer/token.go index 3ab98a8..0ac8298 100644 --- a/lexer/token.go +++ b/lexer/token.go @@ -78,7 +78,7 @@ func (token Token) Value () (value any) { // Equals returns whether this token is equal to another token func (token Token) Equals (testToken Token) (match bool) { - return token.value == testToken.value + return token == testToken } // Location returns the location of the token in its file. From a0d5b3a1a23ec9a9b0c4eab71ab7803f34593c4c Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 11 Aug 2022 04:05:55 -0500 Subject: [PATCH 20/20] Added basic escape sequences to string parser --- lexer/lexer_test.go | 2 +- lexer/text.go | 32 +++++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index 3c7e5d1..9143521 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -36,7 +36,7 @@ func TestTokenizeAll (test *testing.T) { Token { kind: TokenKindInt, value: int64(-349820394) }, Token { kind: TokenKindUInt, value: uint64(932748397) }, Token { kind: TokenKindFloat, value: 239485.37520 }, - Token { kind: TokenKindString, value: "hello world\n" }, + Token { kind: TokenKindString, value: "hello world!\n" }, Token { kind: TokenKindRune, value: 'E' }, Token { kind: TokenKindName, value: "helloWorld" }, Token { kind: TokenKindColon }, diff --git a/lexer/text.go b/lexer/text.go index 02fdc61..e349581 100644 --- a/lexer/text.go +++ b/lexer/text.go @@ -2,6 +2,19 @@ package lexer import "github.com/sashakoshka/arf/file" +var escapeSequenceMap = map[rune] rune { + 'a': '\x07', + 'b': '\x08', + 'f': '\x0c', + 'n': '\x0a', + 'r': '\x0d', + 't': '\x09', + 'v': '\x0b', + '\'': '\'', + '"': '"', + '\\': '\\', +} + func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) { err = lexer.nextRune() if err != nil { return } @@ -9,7 +22,24 @@ func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) { got := "" for { - got += string(lexer.char) + // TODO: add hexadecimal escape codes + if lexer.char == '\\' { + err = lexer.nextRune() + if err != nil { return } + + actual, exists := escapeSequenceMap[lexer.char] + if exists { + got += string(actual) + } else { + err = file.NewError ( + lexer.file.Location(), 1, + "unknown escape character " + + string(lexer.char), file.ErrorKindError) + return + } + } else { + got += string(lexer.char) + } err = lexer.nextRune() if err != nil { return }