This repository has been archived on 2024-02-27. You can view files and clone it, but cannot push or open issues or pull requests.
arf/parser/parser.go

218 lines
5.5 KiB
Go
Raw Normal View History

2022-10-11 22:48:55 -06:00
/*
Package parser implements a parser for the ARF language. It contains an abstract
syntax tree (SyntaxTree), various tree nodes, and a function called Fetch that
returns a SyntaxTree for the module located at the given path. Internally, the
parser caches parsing results so Fetch may be called frequently.
Trees returned by this package can be expected to be internally consistent and
syntactically corred, but not semantically correct. Ensuring the semantic
integrity of ARF code is the job of the analyzer package.
This package automatically invokes lexer before parsing module files.
*/
package parser
import "io"
import "os"
import "path/filepath"
2022-08-29 23:11:10 -06:00
import "git.tebibyte.media/arf/arf/file"
import "git.tebibyte.media/arf/arf/lexer"
import "git.tebibyte.media/arf/arf/infoerr"
2022-10-11 21:57:27 -06:00
// parsingOperation holds information about an ongoing parsing operation.
type parsingOperation struct {
modulePath string
token lexer.Token
tokens []lexer.Token
tokenIndex int
skimming bool
2022-08-12 10:55:17 -06:00
tree SyntaxTree
}
2022-10-11 22:48:55 -06:00
// Fetch returns the parsed module located at the specified path as a
// SyntaxTree. If the module has not yet been parsed, it parses it first. If it
// has, it grabs it out of a cache. This function can be called frequently.
2022-09-05 11:46:10 -06:00
func Fetch (modulePath string, skim bool) (tree SyntaxTree, err error) {
2022-09-05 11:31:38 -06:00
if modulePath[0] != '/' {
panic("module path did not begin at filesystem root")
}
// try to hit cache
cached, exists := cache[modulePath]
2022-09-05 11:46:10 -06:00
if exists && !(!skim && cached.skimmed){
2022-09-05 11:31:38 -06:00
tree = cached.tree
return
}
// miss, so parse the module.
2022-10-11 21:57:27 -06:00
parser := parsingOperation {
modulePath: modulePath,
skimming: skim,
tree: SyntaxTree {
requires: make(map[string] string),
sections: make(map[string] Section),
},
}
2022-08-12 10:55:17 -06:00
if parser.modulePath[len(parser.modulePath) - 1] != '/' {
parser.modulePath += "/"
}
var moduleFiles []os.DirEntry
moduleFiles, err = os.ReadDir(parser.modulePath)
if err != nil { return }
for _, entry := range moduleFiles {
if filepath.Ext(entry.Name()) != ".arf" || entry.IsDir() {
continue
}
var sourceFile *file.File
sourceFile, err = file.Open(parser.modulePath + entry.Name())
if err != nil { return }
// parse the tokens into the module
err = parser.parse(sourceFile)
}
tree = parser.tree
2022-09-05 11:31:38 -06:00
// cache tree
cache[modulePath] = cacheItem {
tree: tree,
skimmed: false,
}
return
}
// parse parses a file and adds it to the syntax tree.
2022-10-11 21:57:27 -06:00
func (parser *parsingOperation) parse (sourceFile *file.File) (err error) {
var tokens []lexer.Token
tokens, err = lexer.Tokenize(sourceFile)
if err != nil { return }
// reset the parser
if len(tokens) == 0 { return }
parser.tokens = tokens
parser.token = tokens[0]
parser.tokenIndex = 0
err = parser.parseMeta()
if err != nil { return }
2022-08-14 20:38:57 -06:00
err = parser.parseBody()
if err != nil { return }
return
}
// expect takes in a list of allowed token kinds, and returns an error if the
// current token isn't one of them. If the length of allowed is zero, this
// function will not return an error.
2022-10-11 21:57:27 -06:00
func (parser *parsingOperation) expect (allowed ...lexer.TokenKind) (err error) {
if len(allowed) == 0 { return }
for _, kind := range allowed {
if parser.token.Is(kind) { return }
}
message :=
"unexpected " + parser.token.Kind().Describe() +
" token, expected "
for index, allowedItem := range allowed {
if index > 0 {
if index == len(allowed) - 1 {
message += " or "
} else {
message += ", "
}
}
message += allowedItem.Describe()
}
err = infoerr.NewError (
2022-08-12 15:22:51 -06:00
parser.token.Location(),
message, infoerr.ErrorKindError)
return
}
// nextToken is the same as expect, but it advances to the next token first.
2022-10-11 21:57:27 -06:00
func (parser *parsingOperation) nextToken (allowed ...lexer.TokenKind) (err error) {
parser.tokenIndex ++
if parser.tokenIndex >= len(parser.tokens) { return io.EOF }
parser.token = parser.tokens[parser.tokenIndex]
2022-08-12 15:22:51 -06:00
err = parser.expect(allowed...)
return
}
2022-08-17 10:39:26 -06:00
// previousToken goes back one token. If the parser is already at the beginning,
// this does nothing.
2022-10-11 21:57:27 -06:00
func (parser *parsingOperation) previousToken () {
2022-08-17 10:39:26 -06:00
parser.tokenIndex --
if parser.tokenIndex < 0 { parser.tokenIndex = 0 }
parser.token = parser.tokens[parser.tokenIndex]
return
}
2022-09-05 12:56:35 -06:00
// skipIndentLevel advances the parser, ignoring every line with an indentation
// equal to or greater than the specified indent.
2022-10-11 21:57:27 -06:00
func (parser *parsingOperation) skipIndentLevel (indent int) (err error) {
2022-09-29 09:15:58 -06:00
braceLevel := 0
parenLevel := 0
bracketLevel := 0
2022-09-05 12:56:35 -06:00
for {
if parser.token.Is(lexer.TokenKindNewline) {
err = parser.nextToken()
if err != nil { return }
2022-09-05 12:56:35 -06:00
2022-09-29 09:15:58 -06:00
shouldBreak :=
!parser.token.Is(lexer.TokenKindIndent) ||
parser.token.Value().(int) < indent
shouldBreak =
shouldBreak &&
braceLevel < 1 &&
parenLevel < 1 &&
bracketLevel < 1
if shouldBreak { return }
}
2022-09-05 12:56:35 -06:00
2022-09-29 09:15:58 -06:00
switch parser.token.Kind() {
case lexer.TokenKindLBrace: braceLevel ++
case lexer.TokenKindRBrace: braceLevel --
case lexer.TokenKindLParen: parenLevel ++
case lexer.TokenKindRParen: parenLevel --
case lexer.TokenKindLBracket: bracketLevel ++
case lexer.TokenKindRBracket: bracketLevel --
2022-09-05 12:56:35 -06:00
}
err = parser.nextToken()
if err != nil { return }
2022-09-05 12:56:35 -06:00
}
}
2022-09-16 10:27:13 -06:00
// skipWhitespace skips over newlines and indent tokens.
2022-10-11 21:57:27 -06:00
func (parser *parsingOperation) skipWhitespace () (err error) {
2022-09-16 10:27:13 -06:00
for {
isWhitespace :=
parser.token.Is(lexer.TokenKindIndent) ||
parser.token.Is(lexer.TokenKindNewline)
if !isWhitespace {
break
}
err = parser.nextToken()
if err != nil { return }
}
return
}