Compare commits

...

14 Commits

Author SHA1 Message Date
714aca5196 Fix parseMeta not getting enough tokens
Parser now passes TestMeta
2022-08-12 17:12:38 -05:00
033e64fc54 Parser can now print out a list of expected token kinds 2022-08-12 17:09:37 -05:00
b3071d4ac9 Token kind values can now be described 2022-08-12 16:30:32 -05:00
f23c3a234a Added metadata parser 2022-08-12 16:22:51 -05:00
856d5763d3 Lexer tokens are now created by the lexer
This is so positional information can be accurately embedded into them.
2022-08-12 14:34:07 -05:00
accf528869 Locations and tokens are now capable of creating errors on their own 2022-08-12 13:51:38 -05:00
7914f0df45 Location now stores width instead of Error 2022-08-12 13:43:09 -05:00
050c956787 Added expect and nextToken methods to parser 2022-08-12 13:33:21 -05:00
18bd681082 Parser now understands the separation between files
This needs to be done because each file has a metadata section at the top.
2022-08-12 12:02:20 -05:00
2019c67bbb Created basic test for parser 2022-08-12 11:55:17 -05:00
f4f19a809a Lexer now eats :arf symbol at file beginning 2022-08-12 10:38:23 -05:00
c09c9860b8 Parser tests are now arf files 2022-08-12 10:26:16 -05:00
81b47f7734 Replaced all occurences of github.com with git.tebibyte.media 2022-08-12 10:21:36 -05:00
09170e390d Created base for parser
The parser now handles file opening and invokes the lexer.
2022-08-12 10:11:43 -05:00
21 changed files with 459 additions and 174 deletions

View File

@ -2,7 +2,7 @@ package arfc
import "os"
import "fmt"
import "github.com/sashakoshka/arf"
import "git.tebibyte.media/sashakoshka/arf"
func main () {
if len(os.Args) != 2 {

View File

@ -12,7 +12,6 @@ const (
type Error struct {
Location
width int
message string
kind ErrorKind
}
@ -20,15 +19,13 @@ type Error struct {
// NewError creates a new error at the specified location.
func NewError (
location Location,
width int,
message string,
kind ErrorKind,
) (
err *Error,
err Error,
) {
return &Error {
return Error {
Location: location,
width: width,
message: message,
kind: kind,
}

View File

@ -103,10 +103,11 @@ func (file *File) Close () {
// Location returns a location struct describing the current position inside of
// the file. This can be stored and used to print errors.
func (file *File) Location () (location Location) {
func (file *File) Location (width int) (location Location) {
return Location {
file: file,
row: file.currentLine,
column: file.currentColumn,
width: width,
}
}

View File

@ -6,4 +6,10 @@ type Location struct {
file *File
row int
column int
width int
}
// NewError creates a new error at this location.
func (location Location) NewError (message string, kind ErrorKind) (err Error) {
return NewError(location, message, kind)
}

2
go.mod
View File

@ -1,3 +1,3 @@
module github.com/sashakoshka/arf
module git.tebibyte.media/sashakoshka/arf
go 1.18

View File

@ -1,8 +1,8 @@
package lexer
import "io"
import "github.com/sashakoshka/arf/file"
import "github.com/sashakoshka/arf/types"
import "git.tebibyte.media/sashakoshka/arf/file"
import "git.tebibyte.media/sashakoshka/arf/types"
// LexingOperation holds information about an ongoing lexing operataion.
type LexingOperation struct {
@ -28,6 +28,20 @@ func Tokenize (file *file.File) (tokens []Token, err error) {
// tokenize converts a file into a slice of tokens (lexemes). It will always
// return a non-nil error, but if nothing went wrong it will return io.EOF.
func (lexer *LexingOperation) tokenize () (err error) {
// check to see if the beginning of the file says :arf
var shebangCheck = []rune(":arf\n")
for index := 0; index < 5; index ++ {
err = lexer.nextRune()
if err != nil || shebangCheck[index] != lexer.char {
err = file.NewError (
lexer.file.Location(1),
"not an arf file",
file.ErrorKindError)
return
}
}
err = lexer.nextRune()
if err != nil { return }
@ -52,13 +66,18 @@ func (lexer *LexingOperation) tokenize () (err error) {
}
if lexer.tokens[len(lexer.tokens) - 1].kind != TokenKindNewline {
lexer.addToken(Token { kind: TokenKindNewline })
token := lexer.newToken()
token.kind = TokenKindNewline
lexer.addToken(token)
}
return
}
func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) {
token := lexer.newToken()
token.kind = TokenKindName
got := ""
for {
@ -72,7 +91,7 @@ func (lexer *LexingOperation) tokenizeAlphaBeginning () (err error) {
lexer.nextRune()
}
token := Token { kind: TokenKindName, value: got }
token.value = got
if len(got) == 2 {
firstValid := got[0] == 'n' || got[0] == 'r' || got[0] == 'w'
@ -105,11 +124,14 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
err = lexer.nextRune()
file.NewError (
lexer.file.Location(), 1,
lexer.file.Location(1),
"tab not used as indent",
file.ErrorKindWarn).Print()
return
}
token := lexer.newToken()
token.kind = TokenKindIndent
// eat up tabs while increasing the indent level
indentLevel := 0
@ -118,11 +140,9 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
err = lexer.nextRune()
if err != nil { return }
}
lexer.addToken (Token {
kind: TokenKindIndent,
value: indentLevel,
})
token.value = indentLevel
lexer.addToken(token)
case '\n':
// line break
@ -141,48 +161,49 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
lexer.tokens = lexer.tokens[:tokenIndex]
}
lexer.addToken (Token {
kind: TokenKindNewline,
})
token := lexer.newToken()
token.kind = TokenKindNewline
lexer.addToken(token)
err = lexer.nextRune()
case '"':
err = lexer.tokenizeString(false)
case '\'':
err = lexer.tokenizeString(true)
case ':':
lexer.addToken (Token {
kind: TokenKindColon,
})
token := lexer.newToken()
token.kind = TokenKindColon
lexer.addToken(token)
err = lexer.nextRune()
case '.':
lexer.addToken (Token {
kind: TokenKindDot,
})
token := lexer.newToken()
token.kind = TokenKindDot
lexer.addToken(token)
err = lexer.nextRune()
case '[':
lexer.addToken (Token {
kind: TokenKindLBracket,
})
token := lexer.newToken()
token.kind = TokenKindLBracket
lexer.addToken(token)
err = lexer.nextRune()
case ']':
lexer.addToken (Token {
kind: TokenKindRBracket,
})
token := lexer.newToken()
token.kind = TokenKindRBracket
lexer.addToken(token)
err = lexer.nextRune()
case '{':
lexer.addToken (Token {
kind: TokenKindLBrace,
})
token := lexer.newToken()
token.kind = TokenKindLBrace
lexer.addToken(token)
err = lexer.nextRune()
case '}':
lexer.addToken (Token {
kind: TokenKindRBrace,
})
token := lexer.newToken()
token.kind = TokenKindRBrace
lexer.addToken(token)
err = lexer.nextRune()
case '+':
err = lexer.nextRune()
if err != nil { return }
token := Token { kind: TokenKindPlus }
token := lexer.newToken()
token.kind = TokenKindPlus
if lexer.char == '+' {
token.kind = TokenKindIncrement
}
@ -191,39 +212,40 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '-':
err = lexer.tokenizeDashBeginning()
case '*':
lexer.addToken (Token {
kind: TokenKindAsterisk,
})
token := lexer.newToken()
token.kind = TokenKindAsterisk
lexer.addToken(token)
err = lexer.nextRune()
case '/':
lexer.addToken (Token {
kind: TokenKindSlash,
})
token := lexer.newToken()
token.kind = TokenKindSlash
lexer.addToken(token)
err = lexer.nextRune()
case '@':
lexer.addToken (Token {
kind: TokenKindAt,
})
token := lexer.newToken()
token.kind = TokenKindAt
lexer.addToken(token)
err = lexer.nextRune()
case '!':
lexer.addToken (Token {
kind: TokenKindExclamation,
})
token := lexer.newToken()
token.kind = TokenKindExclamation
lexer.addToken(token)
err = lexer.nextRune()
case '%':
lexer.addToken (Token {
kind: TokenKindPercent,
})
token := lexer.newToken()
token.kind = TokenKindPercent
lexer.addToken(token)
err = lexer.nextRune()
case '~':
lexer.addToken (Token {
kind: TokenKindTilde,
})
token := lexer.newToken()
token.kind = TokenKindTilde
lexer.addToken(token)
err = lexer.nextRune()
case '<':
err = lexer.nextRune()
if err != nil { return }
token := Token { kind: TokenKindLessThan }
token := lexer.newToken()
token.kind = TokenKindLessThan
if lexer.char == '<' {
token.kind = TokenKindLShift
}
@ -232,7 +254,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '>':
err = lexer.nextRune()
if err != nil { return }
token := Token { kind: TokenKindGreaterThan }
token := lexer.newToken()
token.kind = TokenKindGreaterThan
if lexer.char == '>' {
token.kind = TokenKindRShift
}
@ -241,7 +264,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '|':
err = lexer.nextRune()
if err != nil { return }
token := Token { kind: TokenKindBinaryOr }
token := lexer.newToken()
token.kind = TokenKindBinaryOr
if lexer.char == '|' {
token.kind = TokenKindLogicalOr
}
@ -250,7 +274,8 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
case '&':
err = lexer.nextRune()
if err != nil { return }
token := Token { kind: TokenKindBinaryAnd }
token := lexer.newToken()
token.kind = TokenKindBinaryAnd
if lexer.char == '&' {
token.kind = TokenKindLogicalAnd
}
@ -258,7 +283,7 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
err = lexer.nextRune()
default:
err = file.NewError (
lexer.file.Location(), 1,
lexer.file.Location(1),
"unexpected symbol character " +
string(lexer.char),
file.ErrorKindError)
@ -273,7 +298,8 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) {
if err != nil { return }
if lexer.char == '-' {
token := Token { kind: TokenKindDecrement }
token := lexer.newToken()
token.kind = TokenKindDecrement
err = lexer.nextRune()
if err != nil { return }
@ -284,7 +310,8 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) {
}
lexer.addToken(token)
} else if lexer.char == '>' {
token := Token { kind: TokenKindReturnDirection }
token := lexer.newToken()
token.kind = TokenKindReturnDirection
err = lexer.nextRune()
if err != nil { return }
@ -293,13 +320,19 @@ func (lexer *LexingOperation) tokenizeDashBeginning () (err error) {
} else if lexer.char >= '0' && lexer.char <= '9' {
lexer.tokenizeNumberBeginning(true)
} else {
token := Token { kind: TokenKindMinus }
token := lexer.newToken()
token.kind = TokenKindMinus
lexer.addToken(token)
}
return
}
// newToken creates a new token from the lexer's current position in the file.
func (lexer *LexingOperation) newToken () (token Token) {
return Token { location: lexer.file.Location(1) }
}
// addToken adds a new token to the lexer's token slice.
func (lexer *LexingOperation) addToken (token Token) {
lexer.tokens = append(lexer.tokens, token)
@ -320,7 +353,7 @@ func (lexer *LexingOperation) nextRune () (err error) {
lexer.char, _, err = lexer.file.ReadRune()
if err != nil && err != io.EOF {
return file.NewError (
lexer.file.Location(), 1,
lexer.file.Location(1),
err.Error(), file.ErrorKindError)
}
return

View File

@ -1,8 +1,8 @@
package lexer
import "testing"
import "github.com/sashakoshka/arf/file"
import "github.com/sashakoshka/arf/types"
import "git.tebibyte.media/sashakoshka/arf/file"
import "git.tebibyte.media/sashakoshka/arf/types"
func checkTokenSlice (filePath string, correct []Token, test *testing.T) {
file, err := file.Open(filePath)
@ -47,7 +47,7 @@ func checkTokenSlice (filePath string, correct []Token, test *testing.T) {
}
func TestTokenizeAll (test *testing.T) {
checkTokenSlice("../tests/lexer/all", []Token {
checkTokenSlice("../tests/lexer/all.arf", []Token {
Token { kind: TokenKindSeparator },
Token { kind: TokenKindPermission, value: types.Permission {
Internal: types.ModeRead,
@ -90,7 +90,7 @@ func TestTokenizeAll (test *testing.T) {
}
func TestTokenizeNumbers (test *testing.T) {
checkTokenSlice("../tests/lexer/numbers", []Token {
checkTokenSlice("../tests/lexer/numbers.arf", []Token {
Token { kind: TokenKindUInt, value: uint64(83628266) },
Token { kind: TokenKindNewline },
Token { kind: TokenKindUInt, value: uint64(83628266) },
@ -119,7 +119,7 @@ func TestTokenizeNumbers (test *testing.T) {
}
func TestTokenizeText (test *testing.T) {
checkTokenSlice("../tests/lexer/text", []Token {
checkTokenSlice("../tests/lexer/text.arf", []Token {
Token { kind: TokenKindString, value: "hello world!\a\b\f\n\r\t\v'\"\\" },
Token { kind: TokenKindNewline },
Token { kind: TokenKindRune, value: '\a' },
@ -139,7 +139,7 @@ func TestTokenizeText (test *testing.T) {
}
func TestTokenizeIndent (test *testing.T) {
checkTokenSlice("../tests/lexer/indent", []Token {
checkTokenSlice("../tests/lexer/indent.arf", []Token {
Token { kind: TokenKindName, value: "line1" },
Token { kind: TokenKindNewline },
Token { kind: TokenKindIndent, value: 1 },

View File

@ -1,6 +1,6 @@
package lexer
import "github.com/sashakoshka/arf/file"
import "git.tebibyte.media/sashakoshka/arf/file"
// tokenizeSymbolBeginning lexes a token that starts with a number.
func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error) {
@ -8,6 +8,8 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
var fragment float64
var isFloat bool
token := lexer.newToken()
if lexer.char == '0' {
lexer.nextRune()
@ -23,7 +25,7 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
number, fragment, isFloat, err = lexer.tokenizeNumber(8)
} else {
return file.NewError (
lexer.file.Location(), 1,
lexer.file.Location(1),
"unexpected character in number literal",
file.ErrorKindError)
}
@ -33,8 +35,6 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
if err != nil { return }
token := Token { }
if isFloat {
floatNumber := float64(number) + fragment

View File

@ -1,13 +1,15 @@
package lexer
import "strconv"
import "github.com/sashakoshka/arf/file"
import "git.tebibyte.media/sashakoshka/arf/file"
// tokenizeString tokenizes a string or rune literal.
func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) {
err = lexer.nextRune()
if err != nil { return }
token := lexer.newToken()
got := ""
for {
@ -38,12 +40,10 @@ func (lexer *LexingOperation) tokenizeString (isRuneLiteral bool) (err error) {
err = lexer.nextRune()
if err != nil { return }
token := Token { }
if isRuneLiteral {
if len(got) > 1 {
err = file.NewError (
lexer.file.Location(), len(got) - 1,
lexer.file.Location(1),
"excess data in rune literal",
file.ErrorKindError)
return
@ -99,7 +99,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) {
if len(number) < 3 {
err = file.NewError (
lexer.file.Location(), 1,
lexer.file.Location(1),
"octal escape sequence too short",
file.ErrorKindError)
return
@ -133,7 +133,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) {
if len(number) < want {
err = file.NewError (
lexer.file.Location(), 1,
lexer.file.Location(1),
"hex escape sequence too short ",
file.ErrorKindError)
return
@ -143,7 +143,7 @@ func (lexer *LexingOperation) getEscapeSequence () (result rune, err error) {
result = rune(parsedNumber)
} else {
err = file.NewError (
lexer.file.Location(), 1,
lexer.file.Location(1),
"unknown escape character " +
string(lexer.char), file.ErrorKindError)
return

View File

@ -1,7 +1,7 @@
package lexer
import "fmt"
import "github.com/sashakoshka/arf/file"
import "git.tebibyte.media/sashakoshka/arf/file"
// TokenKind is an enum represzenting what role a token has.
type TokenKind int
@ -78,7 +78,7 @@ func (token Token) Value () (value any) {
// Equals returns whether this token is equal to another token
func (token Token) Equals (testToken Token) (match bool) {
return token == testToken
return token.value == testToken.value && token.Is(testToken.kind)
}
// Location returns the location of the token in its file.
@ -86,81 +86,15 @@ func (token Token) Location () (location file.Location) {
return token.location
}
// NewError creates a new error at this token's location.
func (token Token) NewError (message string, kind file.ErrorKind) (err file.Error) {
return token.location.NewError(message, kind)
}
// Describe generates a textual description of the token to be used in debug
// logs.
func (token Token) Describe () (description string) {
switch token.kind {
case TokenKindNewline:
description += "Newline"
case TokenKindIndent:
description += "Indent"
case TokenKindSeparator:
description += "Separator"
case TokenKindPermission:
description += "Permission"
case TokenKindReturnDirection:
description += "ReturnDirection"
case TokenKindInt:
description += "Int"
case TokenKindUInt:
description += "UInt"
case TokenKindFloat:
description += "Float"
case TokenKindString:
description += "String"
case TokenKindRune:
description += "Rune"
case TokenKindName:
description += "Name"
case TokenKindColon:
description += "Colon"
case TokenKindDot:
description += "Dot"
case TokenKindLBracket:
description += "LBracket"
case TokenKindRBracket:
description += "RBracket"
case TokenKindLBrace:
description += "LBrace"
case TokenKindRBrace:
description += "RBrace"
case TokenKindPlus:
description += "Plus"
case TokenKindMinus:
description += "Minus"
case TokenKindIncrement:
description += "Increment"
case TokenKindDecrement:
description += "Decrement"
case TokenKindAsterisk:
description += "Asterisk"
case TokenKindSlash:
description += "Slash"
case TokenKindAt:
description += "At"
case TokenKindExclamation:
description += "Exclamation"
case TokenKindPercent:
description += "Percent"
case TokenKindTilde:
description += "Tilde"
case TokenKindLessThan:
description += "LessThan"
case TokenKindLShift:
description += "LShift"
case TokenKindGreaterThan:
description += "GreaterThan"
case TokenKindRShift:
description += "RShift"
case TokenKindBinaryOr:
description += "BinaryOr"
case TokenKindLogicalOr:
description += "LogicalOr"
case TokenKindBinaryAnd:
description += "BinaryAnd"
case TokenKindLogicalAnd:
description += "LogicalAnd"
}
description = token.kind.Describe()
if token.value != nil {
description += fmt.Sprint(": ", token.value)
@ -168,3 +102,82 @@ func (token Token) Describe () (description string) {
return
}
// Describe generates a textual description of the token kind to be used in
// debug logs.
func (tokenKind TokenKind) Describe () (description string) {
switch tokenKind {
case TokenKindNewline:
description = "Newline"
case TokenKindIndent:
description = "Indent"
case TokenKindSeparator:
description = "Separator"
case TokenKindPermission:
description = "Permission"
case TokenKindReturnDirection:
description = "ReturnDirection"
case TokenKindInt:
description = "Int"
case TokenKindUInt:
description = "UInt"
case TokenKindFloat:
description = "Float"
case TokenKindString:
description = "String"
case TokenKindRune:
description = "Rune"
case TokenKindName:
description = "Name"
case TokenKindColon:
description = "Colon"
case TokenKindDot:
description = "Dot"
case TokenKindLBracket:
description = "LBracket"
case TokenKindRBracket:
description = "RBracket"
case TokenKindLBrace:
description = "LBrace"
case TokenKindRBrace:
description = "RBrace"
case TokenKindPlus:
description = "Plus"
case TokenKindMinus:
description = "Minus"
case TokenKindIncrement:
description = "Increment"
case TokenKindDecrement:
description = "Decrement"
case TokenKindAsterisk:
description = "Asterisk"
case TokenKindSlash:
description = "Slash"
case TokenKindAt:
description = "At"
case TokenKindExclamation:
description = "Exclamation"
case TokenKindPercent:
description = "Percent"
case TokenKindTilde:
description = "Tilde"
case TokenKindLessThan:
description = "LessThan"
case TokenKindLShift:
description = "LShift"
case TokenKindGreaterThan:
description = "GreaterThan"
case TokenKindRShift:
description = "RShift"
case TokenKindBinaryOr:
description = "BinaryOr"
case TokenKindLogicalOr:
description = "LogicalOr"
case TokenKindBinaryAnd:
description = "BinaryAnd"
case TokenKindLogicalAnd:
description = "LogicalAnd"
}
return
}

18
main.go
View File

@ -1,23 +1,9 @@
package arf
import "os"
import "io"
import "path/filepath"
// import "github.com/sashakoshka/arf/lexer"
import "git.tebibyte.media/sashakoshka/arf/parser"
func CompileModule (modulePath string, output io.Writer) (err error) {
moduleFiles, err := os.ReadDir(modulePath)
if err != nil { return err }
// var moduleTokens []lexer.Token
for _, entry := range moduleFiles {
if filepath.Ext(entry.Name()) != ".arf" || entry.IsDir() {
continue
}
// tokens, err := lexer.Tokenize()
// if err != nil { return err }
}
_, err = parser.Parse(modulePath)
return
}

44
parser/meta.go Normal file
View File

@ -0,0 +1,44 @@
package parser
import "git.tebibyte.media/sashakoshka/arf/file"
import "git.tebibyte.media/sashakoshka/arf/lexer"
// parseMeta parsese the metadata header at the top of an arf file.
func (parser *ParsingOperation) parseMeta () (err error) {
for {
err = parser.expect (
lexer.TokenKindName,
lexer.TokenKindSeparator)
if err != nil { return }
if parser.token.Is(lexer.TokenKindSeparator) {
err = parser.nextToken()
return
}
field := parser.token.Value().(string)
err = parser.nextToken(lexer.TokenKindString)
if err != nil { return }
value := parser.token.Value().(string)
switch field {
case "author":
parser.tree.author = value
case "license":
parser.tree.license = value
case "require":
parser.tree.requires = append(parser.tree.requires, value)
default:
parser.token.NewError (
"unrecognized metadata field: " + field,
file.ErrorKindError)
}
err = parser.nextToken(lexer.TokenKindNewline)
if err != nil { return }
err = parser.nextToken()
if err != nil { return }
}
}

110
parser/parser.go Normal file
View File

@ -0,0 +1,110 @@
package parser
import "io"
import "os"
import "path/filepath"
import "git.tebibyte.media/sashakoshka/arf/file"
import "git.tebibyte.media/sashakoshka/arf/lexer"
// ParsingOperation holds information about an ongoing parsing operation.
type ParsingOperation struct {
modulePath string
token lexer.Token
tokens []lexer.Token
tokenIndex int
tree *SyntaxTree
}
// Parse reads the files located in the module specified by modulePath, and
// converts them into an abstract syntax tree.
func Parse (modulePath string) (tree *SyntaxTree, err error) {
parser := ParsingOperation { modulePath: modulePath }
if parser.modulePath[len(parser.modulePath) - 1] != '/' {
parser.modulePath += "/"
}
var moduleFiles []os.DirEntry
moduleFiles, err = os.ReadDir(parser.modulePath)
if err != nil { return }
for _, entry := range moduleFiles {
if filepath.Ext(entry.Name()) != ".arf" || entry.IsDir() {
continue
}
var sourceFile *file.File
sourceFile, err = file.Open(parser.modulePath + entry.Name())
if err != nil { return }
// parse the tokens into the module
err = parser.parse(sourceFile)
}
tree = parser.tree
return
}
// parse parses a file and adds it to the syntax tree.
func (parser *ParsingOperation) parse (sourceFile *file.File) (err error) {
var tokens []lexer.Token
tokens, err = lexer.Tokenize(sourceFile)
if err != nil { return }
// reset the parser
if parser.tree == nil {
parser.tree = &SyntaxTree { }
}
if len(tokens) == 0 { return }
parser.tokens = tokens
parser.token = tokens[0]
parser.tokenIndex = 0
err = parser.parseMeta()
if err != nil { return }
return
}
// expect takes in a list of allowed token kinds, and returns an error if the
// current token isn't one of them. If the length of allowed is zero, this
// function will not return an error.
func (parser *ParsingOperation) expect (allowed ...lexer.TokenKind) (err error) {
if len(allowed) == 0 { return }
for _, kind := range allowed {
if parser.token.Is(kind) { return }
}
message :=
"unexpected " + parser.token.Kind().Describe() +
" token, expected "
for index, allowedItem := range allowed {
if index > 0 {
if index == len(allowed) - 1 {
message += " or "
} else {
message += ", "
}
}
message += allowedItem.Describe()
}
err = file.NewError (
parser.token.Location(),
message, file.ErrorKindError)
return
}
// nextToken is the same as expect, but it advances to the next token first.
func (parser *ParsingOperation) nextToken (allowed ...lexer.TokenKind) (err error) {
parser.tokenIndex ++
if parser.tokenIndex >= len(parser.tokens) { return io.EOF }
parser.token = parser.tokens[parser.tokenIndex]
err = parser.expect(allowed...)
return
}

33
parser/parser_test.go Normal file
View File

@ -0,0 +1,33 @@
package parser
import "reflect"
import "testing"
func checkTree (modulePath string, correct *SyntaxTree, test *testing.T) {
tree, err := Parse(modulePath)
if err != nil {
test.Log("returned error:")
test.Log(err.Error())
test.Fail()
return
}
if !reflect.DeepEqual(tree, correct) {
test.Log("trees not equal")
test.Fail()
return
}
}
func TestMeta (test *testing.T) {
checkTree("../tests/parser/meta",&SyntaxTree {
license: "GPLv3",
author: "Sasha Koshka",
requires: []string {
"someModule",
"otherModule",
},
}, test)
}

11
parser/tree.go Normal file
View File

@ -0,0 +1,11 @@
package parser
// SyntaxTree represents an abstract syntax tree. It covers an entire module. It
// can be expected to be syntactically correct, but it might not be semantically
// correct (because it has not been analyzed yet.)
type SyntaxTree struct {
license string
author string
requires []string
}

View File

@ -1,2 +1,3 @@
:arf
--- rw -> -349820394 932748397 239485.37520 "hello world!\n" 'E' helloWorld:.[]{}
+ - ++ -- * / @ ! % ~ < << > >> | || & &&

View File

@ -1,3 +1,4 @@
:arf
line1
line2
line3

View File

@ -1,3 +1,4 @@
:arf
83628266
0b100111111000001000011101010
0x4Fc10Ea

View File

@ -1,3 +1,4 @@
:arf
"hello world!\a\b\f\n\r\t\v\'\"\\"
'\a' '\b' '\f' '\n' '\r' '\t' '\v' '\'' '\"' '\\'
"hello world \x40\u0040\U00000040!"

View File

@ -0,0 +1,41 @@
:arf
author "Sasha Koshka"
license "GPLv3"
require "io"
---
# this is a global variable
data wn helloText:String "Hello, world!"
# this is a struct definition
type rr Greeter:Obj
# "Hi." is a string constant. all Greeters will be initialized with a
# pointer to it. I don't know really it depends on what I decide that
# a String type even is.
wr text:String "Hi."
"sdfdsf" "ahh"
"asdf"
# this is a function
func rr main
> argc:Int
> argv:{String}
< status:Int 0
---
let greeter:Greeter:mut
greeter.setText helloText
greeter.greet
# this is a member function
func rr greet
@ greeter:{Greeter}
---
io.println greeter.text
# this is mutator member function
func rr setText
@ greeter:{Greeter}
> text:String
---
greeter.text.set text

View File

@ -0,0 +1,6 @@
:arf
author "Sasha Koshka"
license "GPLv3"
require "someModule"
require "otherModule"
---