218 lines
5.5 KiB
Go
218 lines
5.5 KiB
Go
/*
|
|
Package parser implements a parser for the ARF language. It contains an abstract
|
|
syntax tree (SyntaxTree), various tree nodes, and a function called Fetch that
|
|
returns a SyntaxTree for the module located at the given path. Internally, the
|
|
parser caches parsing results so Fetch may be called frequently.
|
|
|
|
Trees returned by this package can be expected to be internally consistent and
|
|
syntactically corred, but not semantically correct. Ensuring the semantic
|
|
integrity of ARF code is the job of the analyzer package.
|
|
|
|
This package automatically invokes lexer before parsing module files.
|
|
*/
|
|
package parser
|
|
|
|
import "io"
|
|
import "os"
|
|
import "path/filepath"
|
|
import "git.tebibyte.media/arf/arf/file"
|
|
import "git.tebibyte.media/arf/arf/lexer"
|
|
import "git.tebibyte.media/arf/arf/infoerr"
|
|
|
|
// parsingOperation holds information about an ongoing parsing operation.
|
|
type parsingOperation struct {
|
|
modulePath string
|
|
token lexer.Token
|
|
tokens []lexer.Token
|
|
tokenIndex int
|
|
skimming bool
|
|
|
|
tree SyntaxTree
|
|
}
|
|
|
|
// Fetch returns the parsed module located at the specified path as a
|
|
// SyntaxTree. If the module has not yet been parsed, it parses it first. If it
|
|
// has, it grabs it out of a cache. This function can be called frequently.
|
|
func Fetch (modulePath string, skim bool) (tree SyntaxTree, err error) {
|
|
if modulePath[0] != '/' {
|
|
panic("module path did not begin at filesystem root")
|
|
}
|
|
|
|
// try to hit cache
|
|
cached, exists := cache[modulePath]
|
|
if exists && !(!skim && cached.skimmed){
|
|
tree = cached.tree
|
|
return
|
|
}
|
|
|
|
// miss, so parse the module.
|
|
parser := parsingOperation {
|
|
modulePath: modulePath,
|
|
skimming: skim,
|
|
tree: SyntaxTree {
|
|
requires: make(map[string] string),
|
|
sections: make(map[string] Section),
|
|
},
|
|
}
|
|
|
|
if parser.modulePath[len(parser.modulePath) - 1] != '/' {
|
|
parser.modulePath += "/"
|
|
}
|
|
|
|
var moduleFiles []os.DirEntry
|
|
moduleFiles, err = os.ReadDir(parser.modulePath)
|
|
if err != nil { return }
|
|
|
|
for _, entry := range moduleFiles {
|
|
if filepath.Ext(entry.Name()) != ".arf" || entry.IsDir() {
|
|
continue
|
|
}
|
|
|
|
var sourceFile *file.File
|
|
sourceFile, err = file.Open(parser.modulePath + entry.Name())
|
|
if err != nil { return }
|
|
|
|
// parse the tokens into the module
|
|
err = parser.parse(sourceFile)
|
|
}
|
|
|
|
tree = parser.tree
|
|
|
|
// cache tree
|
|
cache[modulePath] = cacheItem {
|
|
tree: tree,
|
|
skimmed: false,
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
// parse parses a file and adds it to the syntax tree.
|
|
func (parser *parsingOperation) parse (sourceFile *file.File) (err error) {
|
|
var tokens []lexer.Token
|
|
tokens, err = lexer.Tokenize(sourceFile)
|
|
if err != nil { return }
|
|
|
|
// reset the parser
|
|
if len(tokens) == 0 { return }
|
|
parser.tokens = tokens
|
|
parser.token = tokens[0]
|
|
parser.tokenIndex = 0
|
|
|
|
err = parser.parseMeta()
|
|
if err != nil { return }
|
|
|
|
err = parser.parseBody()
|
|
if err != nil { return }
|
|
|
|
return
|
|
}
|
|
|
|
// expect takes in a list of allowed token kinds, and returns an error if the
|
|
// current token isn't one of them. If the length of allowed is zero, this
|
|
// function will not return an error.
|
|
func (parser *parsingOperation) expect (allowed ...lexer.TokenKind) (err error) {
|
|
if len(allowed) == 0 { return }
|
|
|
|
for _, kind := range allowed {
|
|
if parser.token.Is(kind) { return }
|
|
}
|
|
|
|
message :=
|
|
"unexpected " + parser.token.Kind().Describe() +
|
|
" token, expected "
|
|
|
|
for index, allowedItem := range allowed {
|
|
if index > 0 {
|
|
if index == len(allowed) - 1 {
|
|
message += " or "
|
|
} else {
|
|
message += ", "
|
|
}
|
|
}
|
|
|
|
message += allowedItem.Describe()
|
|
}
|
|
|
|
err = infoerr.NewError (
|
|
parser.token.Location(),
|
|
message, infoerr.ErrorKindError)
|
|
return
|
|
}
|
|
|
|
// nextToken is the same as expect, but it advances to the next token first.
|
|
func (parser *parsingOperation) nextToken (allowed ...lexer.TokenKind) (err error) {
|
|
parser.tokenIndex ++
|
|
if parser.tokenIndex >= len(parser.tokens) { return io.EOF }
|
|
parser.token = parser.tokens[parser.tokenIndex]
|
|
|
|
err = parser.expect(allowed...)
|
|
return
|
|
}
|
|
|
|
// previousToken goes back one token. If the parser is already at the beginning,
|
|
// this does nothing.
|
|
func (parser *parsingOperation) previousToken () {
|
|
parser.tokenIndex --
|
|
if parser.tokenIndex < 0 { parser.tokenIndex = 0 }
|
|
parser.token = parser.tokens[parser.tokenIndex]
|
|
return
|
|
}
|
|
|
|
// skipIndentLevel advances the parser, ignoring every line with an indentation
|
|
// equal to or greater than the specified indent.
|
|
func (parser *parsingOperation) skipIndentLevel (indent int) (err error) {
|
|
braceLevel := 0
|
|
parenLevel := 0
|
|
bracketLevel := 0
|
|
|
|
for {
|
|
if parser.token.Is(lexer.TokenKindNewline) {
|
|
err = parser.nextToken()
|
|
if err != nil { return }
|
|
|
|
shouldBreak :=
|
|
!parser.token.Is(lexer.TokenKindIndent) ||
|
|
parser.token.Value().(int) < indent
|
|
|
|
shouldBreak =
|
|
shouldBreak &&
|
|
braceLevel < 1 &&
|
|
parenLevel < 1 &&
|
|
bracketLevel < 1
|
|
|
|
if shouldBreak { return }
|
|
}
|
|
|
|
switch parser.token.Kind() {
|
|
case lexer.TokenKindLBrace: braceLevel ++
|
|
case lexer.TokenKindRBrace: braceLevel --
|
|
case lexer.TokenKindLParen: parenLevel ++
|
|
case lexer.TokenKindRParen: parenLevel --
|
|
case lexer.TokenKindLBracket: bracketLevel ++
|
|
case lexer.TokenKindRBracket: bracketLevel --
|
|
}
|
|
|
|
err = parser.nextToken()
|
|
if err != nil { return }
|
|
}
|
|
}
|
|
|
|
// skipWhitespace skips over newlines and indent tokens.
|
|
func (parser *parsingOperation) skipWhitespace () (err error) {
|
|
for {
|
|
isWhitespace :=
|
|
parser.token.Is(lexer.TokenKindIndent) ||
|
|
parser.token.Is(lexer.TokenKindNewline)
|
|
|
|
if !isWhitespace {
|
|
break
|
|
}
|
|
|
|
err = parser.nextToken()
|
|
if err != nil { return }
|
|
}
|
|
|
|
return
|
|
}
|