Compare commits

...

14 Commits

Author SHA1 Message Date
714aca5196 Fix parseMeta not getting enough tokens
Parser now passes TestMeta
2022-08-12 17:12:38 -05:00
033e64fc54 Parser can now print out a list of expected token kinds 2022-08-12 17:09:37 -05:00
b3071d4ac9 Token kind values can now be described 2022-08-12 16:30:32 -05:00
f23c3a234a Added metadata parser 2022-08-12 16:22:51 -05:00
856d5763d3 Lexer tokens are now created by the lexer
This is so positional information can be accurately embedded into them.
2022-08-12 14:34:07 -05:00
accf528869 Locations and tokens are now capable of creating errors on their own 2022-08-12 13:51:38 -05:00
7914f0df45 Location now stores width instead of Error 2022-08-12 13:43:09 -05:00
050c956787 Added expect and nextToken methods to parser 2022-08-12 13:33:21 -05:00
18bd681082 Parser now understands the separation between files
This needs to be done because each file has a metadata section at the top.
2022-08-12 12:02:20 -05:00
2019c67bbb Created basic test for parser 2022-08-12 11:55:17 -05:00
f4f19a809a Lexer now eats :arf symbol at file beginning 2022-08-12 10:38:23 -05:00
c09c9860b8 Parser tests are now arf files 2022-08-12 10:26:16 -05:00
81b47f7734 Replaced all occurences of github.com with git.tebibyte.media 2022-08-12 10:21:36 -05:00
09170e390d Created base for parser
The parser now handles file opening and invokes the lexer.
2022-08-12 10:11:43 -05:00
21 changed files with 459 additions and 174 deletions

View File

@ -2,7 +2,7 @@ package arfc
import "os" import "os"
import "fmt" import "fmt"
import "github.com/sashakoshka/arf" import "git.tebibyte.media/sashakoshka/arf"
func main () { func main () {
if len(os.Args) != 2 { if len(os.Args) != 2 {

View File

@ -12,7 +12,6 @@ const (
type Error struct { type Error struct {
Location Location
width int
message string message string
kind ErrorKind kind ErrorKind
} }
@ -20,15 +19,13 @@ type Error struct {
// NewError creates a new error at the specified location. // NewError creates a new error at the specified location.
func NewError ( func NewError (
location Location, location Location,
width int,
message string, message string,
kind ErrorKind, kind ErrorKind,
) ( ) (
err *Error, err Error,
) { ) {
return &Error { return Error {
Location: location, Location: location,
width: width,
message: message, message: message,
kind: kind, kind: kind,
} }

View File

@ -103,10 +103,11 @@ func (file *File) Close () {
// Location returns a location struct describing the current position inside of // Location returns a location struct describing the current position inside of
// the file. This can be stored and used to print errors. // the file. This can be stored and used to print errors.
func (file *File) Location () (location Location) { func (file *File) Location (width int) (location Location) {
return Location { return Location {
file: file, file: file,
row: file.currentLine, row: file.currentLine,
column: file.currentColumn, column: file.currentColumn,
width: width,
} }
} }

View File

@ -6,4 +6,10 @@ type Location struct {
file *File file *File
row int row int
column int column int
width int
}
// NewError creates a new error at this location.
func (location Location) NewError (message string, kind ErrorKind) (err Error) {
return NewError(location, message, kind)
} }

2
go.mod
View File

@ -1,3 +1,3 @@
module github.com/sashakoshka/arf module git.tebibyte.media/sashakoshka/arf
go 1.18 go 1.18

View File

@ -1,8 +1,8 @@
package lexer package lexer
import "io" import "io"
import "github.com/sashakoshka/arf/file" import "git.tebibyte.media/sashakoshka/arf/file"
import "github.com/sashakoshka/arf/types" import "git.tebibyte.media/sashakoshka/arf/types"
// LexingOperation holds information about an ongoing lexing operataion. // LexingOperation holds information about an ongoing lexing operataion.
type LexingOperation struct { type LexingOperation struct {
@ -28,6 +28,20 @@ func Tokenize (file *file.File) (tokens []Token, err error) {
// tokenize converts a file into a slice of tokens (lexemes). It will always // tokenize converts a file into a slice of tokens (lexemes). It will always
// return a non-nil error, but if nothing went wrong it will return io.EOF. // return a non-nil error, but if nothing went wrong it will return io.EOF.
func (lexer *LexingOperation) tokenize () (err error) { func (lexer *LexingOperation) tokenize () (err error) {
// check to see if the beginning of the file says :arf
var shebangCheck = []rune(":arf\n")
for index := 0; index < 5; index ++ {
err = lexer.nextRune()
if err != nil || shebangCheck[index] != lexer.char {
err = file.NewError (
lexer.file.Location(1),
"not an arf file",
file.ErrorKindError)
return
}
}
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
@ -52,13 +66,18 @@ func (lexer *LexingOperation) tokenize () (err error) {
} }
if lexer.tokens[len(lexer.tokens) - 1].kind != TokenKindNewline { if lexer.tokens[len(lexer.tokens) - 1].kind != TokenKindNewline {
lexer.addToken(Token { kind: TokenKindNewline }) token := lexer.newToken()
token.kind = TokenKindNewline
lexer.addToken(token)
} }
return return
} }
func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) { func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) {
token := lexer.newToken()
token.kind = TokenKindName
got := "" got := ""
for { for {
@ -72,7 +91,7 @@ func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) {
lexer.nextRune() lexer.nextRune()
} }
token := Token { kind: TokenKindName, value: got } token.value = got
if len(got) == 2 { if len(got) == 2 {
firstValid := got[0] == 'n' || got[0] == 'r' || got[0] == 'w' firstValid := got[0] == 'n' || got[0] == 'r' || got[0] == 'w'
@ -105,11 +124,14 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
err = lexer.nextRune() err = lexer.nextRune()
file.NewError ( file.NewError (
lexer.file.Location(), 1, lexer.file.Location(1),
"tab not used as indent", "tab not used as indent",
file.ErrorKindWarn).Print() file.ErrorKindWarn).Print()
return return
} }
token := lexer.newToken()
token.kind = TokenKindIndent
// eat up tabs while increasing the indent level // eat up tabs while increasing the indent level
indentLevel := 0 indentLevel := 0
@ -118,11 +140,9 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
} }
lexer.addToken (Token { token.value = indentLevel
kind: TokenKindIndent, lexer.addToken(token)
value: indentLevel,
})
case '\n': case '\n':
// line break // line break
@ -141,48 +161,49 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
lexer.tokens = lexer.tokens[:tokenIndex] lexer.tokens = lexer.tokens[:tokenIndex]
} }
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindNewline, token.kind = TokenKindNewline
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '"': case '"':
err = lexer.tokenizeString(false) err = lexer.tokenizeString(false)
case '\'': case '\'':
err = lexer.tokenizeString(true) err = lexer.tokenizeString(true)
case ':': case ':':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindColon, token.kind = TokenKindColon
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '.': case '.':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindDot, token.kind = TokenKindDot
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '[': case '[':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindLBracket, token.kind = TokenKindLBracket
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case ']': case ']':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindRBracket, token.kind = TokenKindRBracket
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '{': case '{':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindLBrace, token.kind = TokenKindLBrace
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '}': case '}':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindRBrace, token.kind = TokenKindRBrace
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '+': case '+':
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
token := Token { kind: TokenKindPlus } token := lexer.newToken()
token.kind = TokenKindPlus
if lexer.char == '+' { if lexer.char == '+' {
token.kind = TokenKindIncrement token.kind = TokenKindIncrement
} }
@ -191,39 +212,40 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '-': case '-':
err = lexer.tokenizeDashBeginning() err = lexer.tokenizeDashBeginning()
case '*': case '*':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindAsterisk, token.kind = TokenKindAsterisk
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '/': case '/':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindSlash, token.kind = TokenKindSlash
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '@': case '@':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindAt, token.kind = TokenKindAt
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '!': case '!':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindExclamation, token.kind = TokenKindExclamation
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '%': case '%':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindPercent, token.kind = TokenKindPercent
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '~': case '~':
lexer.addToken (Token { token := lexer.newToken()
kind: TokenKindTilde, token.kind = TokenKindTilde
}) lexer.addToken(token)
err = lexer.nextRune() err = lexer.nextRune()
case '<': case '<':
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
token := Token { kind: TokenKindLessThan } token := lexer.newToken()
token.kind = TokenKindLessThan
if lexer.char == '<' { if lexer.char == '<' {
token.kind = TokenKindLShift token.kind = TokenKindLShift
} }
@ -232,7 +254,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '>': case '>':
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
token := Token { kind: TokenKindGreaterThan } token := lexer.newToken()
token.kind = TokenKindGreaterThan
if lexer.char == '>' { if lexer.char == '>' {
token.kind = TokenKindRShift token.kind = TokenKindRShift
} }
@ -241,7 +264,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '|': case '|':
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
token := Token { kind: TokenKindBinaryOr } token := lexer.newToken()
token.kind = TokenKindBinaryOr
if lexer.char == '|' { if lexer.char == '|' {
token.kind = TokenKindLogicalOr token.kind = TokenKindLogicalOr
} }
@ -250,7 +274,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '&': case '&':
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
token := Token { kind: TokenKindBinaryAnd } token := lexer.newToken()
token.kind = TokenKindBinaryAnd
if lexer.char == '&' { if lexer.char == '&' {
token.kind = TokenKindLogicalAnd token.kind = TokenKindLogicalAnd
} }
@ -258,7 +283,7 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
err = lexer.nextRune() err = lexer.nextRune()
default: default:
err = file.NewError ( err = file.NewError (
lexer.file.Location(), 1, lexer.file.Location(1),
"unexpected symbol character " + "unexpected symbol character " +
string(lexer.char), string(lexer.char),
file.ErrorKindError) file.ErrorKindError)
@ -273,7 +298,8 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) {
if err != nil { return } if err != nil { return }
if lexer.char == '-' { if lexer.char == '-' {
token := Token { kind: TokenKindDecrement } token := lexer.newToken()
token.kind = TokenKindDecrement
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
@ -284,7 +310,8 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) {
} }
lexer.addToken(token) lexer.addToken(token)
} else if lexer.char == '>' { } else if lexer.char == '>' {
token := Token { kind: TokenKindReturnDirection } token := lexer.newToken()
token.kind = TokenKindReturnDirection
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
@ -293,13 +320,19 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) {
} else if lexer.char >= '0' && lexer.char <= '9' { } else if lexer.char >= '0' && lexer.char <= '9' {
lexer.tokenizeNumberBeginning(true) lexer.tokenizeNumberBeginning(true)
} else { } else {
token := Token { kind: TokenKindMinus } token := lexer.newToken()
token.kind = TokenKindMinus
lexer.addToken(token) lexer.addToken(token)
} }
return return
} }
// newToken creates a new token from the lexer's current position in the file.
func (lexer *LexingOperation) newToken () (token Token) {
return Token { location: lexer.file.Location(1) }
}
// addToken adds a new token to the lexer's token slice. // addToken adds a new token to the lexer's token slice.
func (lexer *LexingOperation) addToken (token Token) { func (lexer *LexingOperation) addToken (token Token) {
lexer.tokens = append(lexer.tokens, token) lexer.tokens = append(lexer.tokens, token)
@ -320,7 +353,7 @@ func (lexer *LexingOperation) nextRune () (err error) {
lexer.char, _, err = lexer.file.ReadRune() lexer.char, _, err = lexer.file.ReadRune()
if err != nil && err != io.EOF { if err != nil && err != io.EOF {
return file.NewError ( return file.NewError (
lexer.file.Location(), 1, lexer.file.Location(1),
err.Error(), file.ErrorKindError) err.Error(), file.ErrorKindError)
} }
return return

View File

@ -1,8 +1,8 @@
package lexer package lexer
import "testing" import "testing"
import "github.com/sashakoshka/arf/file" import "git.tebibyte.media/sashakoshka/arf/file"
import "github.com/sashakoshka/arf/types" import "git.tebibyte.media/sashakoshka/arf/types"
func checkTokenSlice (filePath string, correct []Token, test *testing.T) { func checkTokenSlice (filePath string, correct []Token, test *testing.T) {
file, err := file.Open(filePath) file, err := file.Open(filePath)
@ -47,7 +47,7 @@ func checkTokenSlice (filePath string, correct []Token, test *testing.T) {
} }
func TestTokenizeAll (test *testing.T) { func TestTokenizeAll (test *testing.T) {
checkTokenSlice("../tests/lexer/all", []Token { checkTokenSlice("../tests/lexer/all.arf", []Token {
Token { kind: TokenKindSeparator }, Token { kind: TokenKindSeparator },
Token { kind: TokenKindPermission, value: types.Permission { Token { kind: TokenKindPermission, value: types.Permission {
Internal: types.ModeRead, Internal: types.ModeRead,
@ -90,7 +90,7 @@ func TestTokenizeAll (test *testing.T) {
} }
func TestTokenizeNumbers (test *testing.T) { func TestTokenizeNumbers (test *testing.T) {
checkTokenSlice("../tests/lexer/numbers", []Token { checkTokenSlice("../tests/lexer/numbers.arf", []Token {
Token { kind: TokenKindUInt, value: uint64(83628266) }, Token { kind: TokenKindUInt, value: uint64(83628266) },
Token { kind: TokenKindNewline }, Token { kind: TokenKindNewline },
Token { kind: TokenKindUInt, value: uint64(83628266) }, Token { kind: TokenKindUInt, value: uint64(83628266) },
@ -119,7 +119,7 @@ func TestTokenizeNumbers (test *testing.T) {
} }
func TestTokenizeText (test *testing.T) { func TestTokenizeText (test *testing.T) {
checkTokenSlice("../tests/lexer/text", []Token { checkTokenSlice("../tests/lexer/text.arf", []Token {
Token { kind: TokenKindString, value: "hello world!\a\b\f\n\r\t\v'\"\\" }, Token { kind: TokenKindString, value: "hello world!\a\b\f\n\r\t\v'\"\\" },
Token { kind: TokenKindNewline }, Token { kind: TokenKindNewline },
Token { kind: TokenKindRune, value: '\a' }, Token { kind: TokenKindRune, value: '\a' },
@ -139,7 +139,7 @@ func TestTokenizeText (test *testing.T) {
} }
func TestTokenizeIndent (test *testing.T) { func TestTokenizeIndent (test *testing.T) {
checkTokenSlice("../tests/lexer/indent", []Token { checkTokenSlice("../tests/lexer/indent.arf", []Token {
Token { kind: TokenKindName, value: "line1" }, Token { kind: TokenKindName, value: "line1" },
Token { kind: TokenKindNewline }, Token { kind: TokenKindNewline },
Token { kind: TokenKindIndent, value: 1 }, Token { kind: TokenKindIndent, value: 1 },

View File

@ -1,6 +1,6 @@
package lexer package lexer
import "github.com/sashakoshka/arf/file" import "git.tebibyte.media/sashakoshka/arf/file"
// tokenizeSymbolBeginning lexes a token that starts with a number. // tokenizeSymbolBeginning lexes a token that starts with a number.
func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error) { func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error) {
@ -8,6 +8,8 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
var fragment float64 var fragment float64
var isFloat bool var isFloat bool
token := lexer.newToken()
if lexer.char == '0' { if lexer.char == '0' {
lexer.nextRune() lexer.nextRune()
@ -23,7 +25,7 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
number, fragment, isFloat, err = lexer.tokenizeNumber(8) number, fragment, isFloat, err = lexer.tokenizeNumber(8)
} else { } else {
return file.NewError ( return file.NewError (
lexer.file.Location(), 1, lexer.file.Location(1),
"unexpected character in number literal", "unexpected character in number literal",
file.ErrorKindError) file.ErrorKindError)
} }
@ -33,8 +35,6 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
if err != nil { return } if err != nil { return }
token := Token { }
if isFloat { if isFloat {
floatNumber := float64(number) + fragment floatNumber := float64(number) + fragment

View File

@ -1,13 +1,15 @@
package lexer package lexer
import "strconv" import "strconv"
import "github.com/sashakoshka/arf/file" import "git.tebibyte.media/sashakoshka/arf/file"
// tokenizeString tokenizes a string or rune literal. // tokenizeString tokenizes a string or rune literal.
func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) { func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) {
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
token := lexer.newToken()
got := "" got := ""
for { for {
@ -38,12 +40,10 @@ func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) {
err = lexer.nextRune() err = lexer.nextRune()
if err != nil { return } if err != nil { return }
token := Token { }
if isRuneLiteral { if isRuneLiteral {
if len(got) > 1 { if len(got) > 1 {
err = file.NewError ( err = file.NewError (
lexer.file.Location(), len(got) - 1, lexer.file.Location(1),
"excess data in rune literal", "excess data in rune literal",
file.ErrorKindError) file.ErrorKindError)
return return
@ -99,7 +99,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) {
if len(number) < 3 { if len(number) < 3 {
err = file.NewError ( err = file.NewError (
lexer.file.Location(), 1, lexer.file.Location(1),
"octal escape sequence too short", "octal escape sequence too short",
file.ErrorKindError) file.ErrorKindError)
return return
@ -133,7 +133,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) {
if len(number) < want { if len(number) < want {
err = file.NewError ( err = file.NewError (
lexer.file.Location(), 1, lexer.file.Location(1),
"hex escape sequence too short ", "hex escape sequence too short ",
file.ErrorKindError) file.ErrorKindError)
return return
@ -143,7 +143,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) {
result = rune(parsedNumber) result = rune(parsedNumber)
} else { } else {
err = file.NewError ( err = file.NewError (
lexer.file.Location(), 1, lexer.file.Location(1),
"unknown escape character " + "unknown escape character " +
string(lexer.char), file.ErrorKindError) string(lexer.char), file.ErrorKindError)
return return

View File

@ -1,7 +1,7 @@
package lexer package lexer
import "fmt" import "fmt"
import "github.com/sashakoshka/arf/file" import "git.tebibyte.media/sashakoshka/arf/file"
// TokenKind is an enum represzenting what role a token has. // TokenKind is an enum represzenting what role a token has.
type TokenKind int type TokenKind int
@ -78,7 +78,7 @@ func (token Token) Value () (value any) {
// Equals returns whether this token is equal to another token // Equals returns whether this token is equal to another token
func (token Token) Equals (testToken Token) (match bool) { func (token Token) Equals (testToken Token) (match bool) {
return token == testToken return token.value == testToken.value && token.Is(testToken.kind)
} }
// Location returns the location of the token in its file. // Location returns the location of the token in its file.
@ -86,81 +86,15 @@ func (token Token) Location () (location file.Location) {
return token.location return token.location
} }
// NewError creates a new error at this token's location.
func (token Token) NewError (message string, kind file.ErrorKind) (err file.Error) {
return token.location.NewError(message, kind)
}
// Describe generates a textual description of the token to be used in debug // Describe generates a textual description of the token to be used in debug
// logs. // logs.
func (token Token) Describe () (description string) { func (token Token) Describe () (description string) {
switch token.kind { description = token.kind.Describe()
case TokenKindNewline:
description += "Newline"
case TokenKindIndent:
description += "Indent"
case TokenKindSeparator:
description += "Separator"
case TokenKindPermission:
description += "Permission"
case TokenKindReturnDirection:
description += "ReturnDirection"
case TokenKindInt:
description += "Int"
case TokenKindUInt:
description += "UInt"
case TokenKindFloat:
description += "Float"
case TokenKindString:
description += "String"
case TokenKindRune:
description += "Rune"
case TokenKindName:
description += "Name"
case TokenKindColon:
description += "Colon"
case TokenKindDot:
description += "Dot"
case TokenKindLBracket:
description += "LBracket"
case TokenKindRBracket:
description += "RBracket"
case TokenKindLBrace:
description += "LBrace"
case TokenKindRBrace:
description += "RBrace"
case TokenKindPlus:
description += "Plus"
case TokenKindMinus:
description += "Minus"
case TokenKindIncrement:
description += "Increment"
case TokenKindDecrement:
description += "Decrement"
case TokenKindAsterisk:
description += "Asterisk"
case TokenKindSlash:
description += "Slash"
case TokenKindAt:
description += "At"
case TokenKindExclamation:
description += "Exclamation"
case TokenKindPercent:
description += "Percent"
case TokenKindTilde:
description += "Tilde"
case TokenKindLessThan:
description += "LessThan"
case TokenKindLShift:
description += "LShift"
case TokenKindGreaterThan:
description += "GreaterThan"
case TokenKindRShift:
description += "RShift"
case TokenKindBinaryOr:
description += "BinaryOr"
case TokenKindLogicalOr:
description += "LogicalOr"
case TokenKindBinaryAnd:
description += "BinaryAnd"
case TokenKindLogicalAnd:
description += "LogicalAnd"
}
if token.value != nil { if token.value != nil {
description += fmt.Sprint(": ", token.value) description += fmt.Sprint(": ", token.value)
@ -168,3 +102,82 @@ func (token Token) Describe () (description string) {
return return
} }
// Describe generates a textual description of the token kind to be used in
// debug logs.
func (tokenKind TokenKind) Describe () (description string) {
switch tokenKind {
case TokenKindNewline:
description = "Newline"
case TokenKindIndent:
description = "Indent"
case TokenKindSeparator:
description = "Separator"
case TokenKindPermission:
description = "Permission"
case TokenKindReturnDirection:
description = "ReturnDirection"
case TokenKindInt:
description = "Int"
case TokenKindUInt:
description = "UInt"
case TokenKindFloat:
description = "Float"
case TokenKindString:
description = "String"
case TokenKindRune:
description = "Rune"
case TokenKindName:
description = "Name"
case TokenKindColon:
description = "Colon"
case TokenKindDot:
description = "Dot"
case TokenKindLBracket:
description = "LBracket"
case TokenKindRBracket:
description = "RBracket"
case TokenKindLBrace:
description = "LBrace"
case TokenKindRBrace:
description = "RBrace"
case TokenKindPlus:
description = "Plus"
case TokenKindMinus:
description = "Minus"
case TokenKindIncrement:
description = "Increment"
case TokenKindDecrement:
description = "Decrement"
case TokenKindAsterisk:
description = "Asterisk"
case TokenKindSlash:
description = "Slash"
case TokenKindAt:
description = "At"
case TokenKindExclamation:
description = "Exclamation"
case TokenKindPercent:
description = "Percent"
case TokenKindTilde:
description = "Tilde"
case TokenKindLessThan:
description = "LessThan"
case TokenKindLShift:
description = "LShift"
case TokenKindGreaterThan:
description = "GreaterThan"
case TokenKindRShift:
description = "RShift"
case TokenKindBinaryOr:
description = "BinaryOr"
case TokenKindLogicalOr:
description = "LogicalOr"
case TokenKindBinaryAnd:
description = "BinaryAnd"
case TokenKindLogicalAnd:
description = "LogicalAnd"
}
return
}

18
main.go
View File

@ -1,23 +1,9 @@
package arf package arf
import "os"
import "io" import "io"
import "path/filepath" import "git.tebibyte.media/sashakoshka/arf/parser"
// import "github.com/sashakoshka/arf/lexer"
func CompileModule (modulePath string, output io.Writer) (err error) { func CompileModule (modulePath string, output io.Writer) (err error) {
moduleFiles, err := os.ReadDir(modulePath) _, err = parser.Parse(modulePath)
if err != nil { return err }
// var moduleTokens []lexer.Token
for _, entry := range moduleFiles {
if filepath.Ext(entry.Name()) != ".arf" || entry.IsDir() {
continue
}
// tokens, err := lexer.Tokenize()
// if err != nil { return err }
}
return return
} }

44
parser/meta.go Normal file
View File

@ -0,0 +1,44 @@
package parser
import "git.tebibyte.media/sashakoshka/arf/file"
import "git.tebibyte.media/sashakoshka/arf/lexer"
// parseMeta parsese the metadata header at the top of an arf file.
func (parser *ParsingOperation) parseMeta () (err error) {
for {
err = parser.expect (
lexer.TokenKindName,
lexer.TokenKindSeparator)
if err != nil { return }
if parser.token.Is(lexer.TokenKindSeparator) {
err = parser.nextToken()
return
}
field := parser.token.Value().(string)
err = parser.nextToken(lexer.TokenKindString)
if err != nil { return }
value := parser.token.Value().(string)
switch field {
case "author":
parser.tree.author = value
case "license":
parser.tree.license = value
case "require":
parser.tree.requires = append(parser.tree.requires, value)
default:
parser.token.NewError (
"unrecognized metadata field: " + field,
file.ErrorKindError)
}
err = parser.nextToken(lexer.TokenKindNewline)
if err != nil { return }
err = parser.nextToken()
if err != nil { return }
}
}

110
parser/parser.go Normal file
View File

@ -0,0 +1,110 @@
package parser
import "io"
import "os"
import "path/filepath"
import "git.tebibyte.media/sashakoshka/arf/file"
import "git.tebibyte.media/sashakoshka/arf/lexer"
// ParsingOperation holds information about an ongoing parsing operation.
type ParsingOperation struct {
modulePath string
token lexer.Token
tokens []lexer.Token
tokenIndex int
tree *SyntaxTree
}
// Parse reads the files located in the module specified by modulePath, and
// converts them into an abstract syntax tree.
func Parse (modulePath string) (tree *SyntaxTree, err error) {
parser := ParsingOperation { modulePath: modulePath }
if parser.modulePath[len(parser.modulePath) - 1] != '/' {
parser.modulePath += "/"
}
var moduleFiles []os.DirEntry
moduleFiles, err = os.ReadDir(parser.modulePath)
if err != nil { return }
for _, entry := range moduleFiles {
if filepath.Ext(entry.Name()) != ".arf" || entry.IsDir() {
continue
}
var sourceFile *file.File
sourceFile, err = file.Open(parser.modulePath + entry.Name())
if err != nil { return }
// parse the tokens into the module
err = parser.parse(sourceFile)
}
tree = parser.tree
return
}
// parse parses a file and adds it to the syntax tree.
func (parser *ParsingOperation) parse (sourceFile *file.File) (err error) {
var tokens []lexer.Token
tokens, err = lexer.Tokenize(sourceFile)
if err != nil { return }
// reset the parser
if parser.tree == nil {
parser.tree = &SyntaxTree { }
}
if len(tokens) == 0 { return }
parser.tokens = tokens
parser.token = tokens[0]
parser.tokenIndex = 0
err = parser.parseMeta()
if err != nil { return }
return
}
// expect takes in a list of allowed token kinds, and returns an error if the
// current token isn't one of them. If the length of allowed is zero, this
// function will not return an error.
func (parser *ParsingOperation) expect (allowed ...lexer.TokenKind) (err error) {
if len(allowed) == 0 { return }
for _, kind := range allowed {
if parser.token.Is(kind) { return }
}
message :=
"unexpected " + parser.token.Kind().Describe() +
" token, expected "
for index, allowedItem := range allowed {
if index > 0 {
if index == len(allowed) - 1 {
message += " or "
} else {
message += ", "
}
}
message += allowedItem.Describe()
}
err = file.NewError (
parser.token.Location(),
message, file.ErrorKindError)
return
}
// nextToken is the same as expect, but it advances to the next token first.
func (parser *ParsingOperation) nextToken (allowed ...lexer.TokenKind) (err error) {
parser.tokenIndex ++
if parser.tokenIndex >= len(parser.tokens) { return io.EOF }
parser.token = parser.tokens[parser.tokenIndex]
err = parser.expect(allowed...)
return
}

33
parser/parser_test.go Normal file
View File

@ -0,0 +1,33 @@
package parser
import "reflect"
import "testing"
func checkTree (modulePath string, correct *SyntaxTree, test *testing.T) {
tree, err := Parse(modulePath)
if err != nil {
test.Log("returned error:")
test.Log(err.Error())
test.Fail()
return
}
if !reflect.DeepEqual(tree, correct) {
test.Log("trees not equal")
test.Fail()
return
}
}
func TestMeta (test *testing.T) {
checkTree("../tests/parser/meta",&SyntaxTree {
license: "GPLv3",
author: "Sasha Koshka",
requires: []string {
"someModule",
"otherModule",
},
}, test)
}

11
parser/tree.go Normal file
View File

@ -0,0 +1,11 @@
package parser
// SyntaxTree represents an abstract syntax tree. It covers an entire module. It
// can be expected to be syntactically correct, but it might not be semantically
// correct (because it has not been analyzed yet.)
type SyntaxTree struct {
license string
author string
requires []string
}

View File

@ -1,2 +1,3 @@
:arf
--- rw -> -349820394 932748397 239485.37520 "hello world!\n" 'E' helloWorld:.[]{} --- rw -> -349820394 932748397 239485.37520 "hello world!\n" 'E' helloWorld:.[]{}
+ - ++ -- * / @ ! % ~ < << > >> | || & && + - ++ -- * / @ ! % ~ < << > >> | || & &&

View File

@ -1,3 +1,4 @@
:arf
line1 line1
line2 line2
line3 line3

View File

@ -1,3 +1,4 @@
:arf
83628266 83628266
0b100111111000001000011101010 0b100111111000001000011101010
0x4Fc10Ea 0x4Fc10Ea

View File

@ -1,3 +1,4 @@
:arf
"hello world!\a\b\f\n\r\t\v\'\"\\" "hello world!\a\b\f\n\r\t\v\'\"\\"
'\a' '\b' '\f' '\n' '\r' '\t' '\v' '\'' '\"' '\\' '\a' '\b' '\f' '\n' '\r' '\t' '\v' '\'' '\"' '\\'
"hello world \x40\u0040\U00000040!" "hello world \x40\u0040\U00000040!"

View File

@ -0,0 +1,41 @@
:arf
author "Sasha Koshka"
license "GPLv3"
require "io"
---
# this is a global variable
data wn helloText:String "Hello, world!"
# this is a struct definition
type rr Greeter:Obj
# "Hi." is a string constant. all Greeters will be initialized with a
# pointer to it. I don't know really it depends on what I decide that
# a String type even is.
wr text:String "Hi."
"sdfdsf" "ahh"
"asdf"
# this is a function
func rr main
> argc:Int
> argv:{String}
< status:Int 0
---
let greeter:Greeter:mut
greeter.setText helloText
greeter.greet
# this is a member function
func rr greet
@ greeter:{Greeter}
---
io.println greeter.text
# this is mutator member function
func rr setText
@ greeter:{Greeter}
> text:String
---
greeter.text.set text

View File

@ -0,0 +1,6 @@
:arf
author "Sasha Koshka"
license "GPLv3"
require "someModule"
require "otherModule"
---