This repository has been archived on 2024-02-27. You can view files and clone it, but cannot push or open issues or pull requests.
arf/parser/parser.go

206 lines
4.8 KiB
Go
Raw Normal View History

package parser
import "io"
import "os"
import "path/filepath"
2022-08-30 05:11:10 +00:00
import "git.tebibyte.media/arf/arf/file"
import "git.tebibyte.media/arf/arf/lexer"
import "git.tebibyte.media/arf/arf/infoerr"
// ParsingOperation holds information about an ongoing parsing operation.
type ParsingOperation struct {
modulePath string
token lexer.Token
tokens []lexer.Token
tokenIndex int
skimming bool
2022-08-12 16:55:17 +00:00
tree SyntaxTree
}
2022-09-05 17:31:38 +00:00
// Fetch returns the parsed module located at the specified path, and returns an
// abstract syntax tree. If the module has not yet been parsed, it parses it
// first.
2022-09-05 17:46:10 +00:00
func Fetch (modulePath string, skim bool) (tree SyntaxTree, err error) {
2022-09-05 17:31:38 +00:00
if modulePath[0] != '/' {
panic("module path did not begin at filesystem root")
}
// try to hit cache
cached, exists := cache[modulePath]
2022-09-05 17:46:10 +00:00
if exists && !(!skim && cached.skimmed){
2022-09-05 17:31:38 +00:00
tree = cached.tree
return
}
// miss, so parse the module.
parser := ParsingOperation {
modulePath: modulePath,
skimming: skim,
tree: SyntaxTree {
requires: make(map[string] string),
sections: make(map[string] Section),
},
}
2022-08-12 16:55:17 +00:00
if parser.modulePath[len(parser.modulePath) - 1] != '/' {
parser.modulePath += "/"
}
var moduleFiles []os.DirEntry
moduleFiles, err = os.ReadDir(parser.modulePath)
if err != nil { return }
for _, entry := range moduleFiles {
if filepath.Ext(entry.Name()) != ".arf" || entry.IsDir() {
continue
}
var sourceFile *file.File
sourceFile, err = file.Open(parser.modulePath + entry.Name())
if err != nil { return }
// parse the tokens into the module
err = parser.parse(sourceFile)
}
tree = parser.tree
2022-09-05 17:31:38 +00:00
// cache tree
cache[modulePath] = cacheItem {
tree: tree,
skimmed: false,
}
return
}
// parse parses a file and adds it to the syntax tree.
func (parser *ParsingOperation) parse (sourceFile *file.File) (err error) {
var tokens []lexer.Token
tokens, err = lexer.Tokenize(sourceFile)
if err != nil { return }
// reset the parser
if len(tokens) == 0 { return }
parser.tokens = tokens
parser.token = tokens[0]
parser.tokenIndex = 0
err = parser.parseMeta()
if err != nil { return }
2022-08-15 02:38:57 +00:00
err = parser.parseBody()
if err != nil { return }
return
}
// expect takes in a list of allowed token kinds, and returns an error if the
// current token isn't one of them. If the length of allowed is zero, this
// function will not return an error.
2022-08-12 21:22:51 +00:00
func (parser *ParsingOperation) expect (allowed ...lexer.TokenKind) (err error) {
if len(allowed) == 0 { return }
for _, kind := range allowed {
if parser.token.Is(kind) { return }
}
message :=
"unexpected " + parser.token.Kind().Describe() +
" token, expected "
for index, allowedItem := range allowed {
if index > 0 {
if index == len(allowed) - 1 {
message += " or "
} else {
message += ", "
}
}
message += allowedItem.Describe()
}
err = infoerr.NewError (
2022-08-12 21:22:51 +00:00
parser.token.Location(),
message, infoerr.ErrorKindError)
return
}
// nextToken is the same as expect, but it advances to the next token first.
2022-08-12 21:22:51 +00:00
func (parser *ParsingOperation) nextToken (allowed ...lexer.TokenKind) (err error) {
parser.tokenIndex ++
if parser.tokenIndex >= len(parser.tokens) { return io.EOF }
parser.token = parser.tokens[parser.tokenIndex]
2022-08-12 21:22:51 +00:00
err = parser.expect(allowed...)
return
}
2022-08-17 16:39:26 +00:00
// previousToken goes back one token. If the parser is already at the beginning,
// this does nothing.
func (parser *ParsingOperation) previousToken () {
parser.tokenIndex --
if parser.tokenIndex < 0 { parser.tokenIndex = 0 }
parser.token = parser.tokens[parser.tokenIndex]
return
}
2022-09-05 18:56:35 +00:00
// skipIndentLevel advances the parser, ignoring every line with an indentation
// equal to or greater than the specified indent.
func (parser *ParsingOperation) skipIndentLevel (indent int) (err error) {
2022-09-29 15:15:58 +00:00
braceLevel := 0
parenLevel := 0
bracketLevel := 0
2022-09-05 18:56:35 +00:00
for {
if parser.token.Is(lexer.TokenKindNewline) {
err = parser.nextToken()
if err != nil { return }
2022-09-05 18:56:35 +00:00
2022-09-29 15:15:58 +00:00
shouldBreak :=
!parser.token.Is(lexer.TokenKindIndent) ||
parser.token.Value().(int) < indent
shouldBreak =
shouldBreak &&
braceLevel < 1 &&
parenLevel < 1 &&
bracketLevel < 1
if shouldBreak { return }
}
2022-09-05 18:56:35 +00:00
2022-09-29 15:15:58 +00:00
switch parser.token.Kind() {
case lexer.TokenKindLBrace: braceLevel ++
case lexer.TokenKindRBrace: braceLevel --
case lexer.TokenKindLParen: parenLevel ++
case lexer.TokenKindRParen: parenLevel --
case lexer.TokenKindLBracket: bracketLevel ++
case lexer.TokenKindRBracket: bracketLevel --
2022-09-05 18:56:35 +00:00
}
err = parser.nextToken()
if err != nil { return }
2022-09-05 18:56:35 +00:00
}
}
2022-09-16 16:27:13 +00:00
// skipWhitespace skips over newlines and indent tokens.
func (parser *ParsingOperation) skipWhitespace () (err error) {
for {
isWhitespace :=
parser.token.Is(lexer.TokenKindIndent) ||
parser.token.Is(lexer.TokenKindNewline)
if !isWhitespace {
break
}
err = parser.nextToken()
if err != nil { return }
}
return
}