arf/parser/parser.go

package parser

import "io"
import "os"
import "path/filepath"
import "git.tebibyte.media/arf/arf/file"
import "git.tebibyte.media/arf/arf/lexer"
import "git.tebibyte.media/arf/arf/infoerr"

// ParsingOperation holds information about an ongoing parsing operation.
type ParsingOperation struct {
	modulePath string
	token      lexer.Token
	tokens     []lexer.Token
	tokenIndex int
	skimming   bool

	tree SyntaxTree
}

// Fetch returns the parsed module located at the specified path, and returns an
// abstract syntax tree. If the module has not yet been parsed, it parses it
// first.
func Fetch (modulePath string, skim bool) (tree SyntaxTree, err error) {
	if modulePath[0] != '/' {
		panic("module path did not begin at filesystem root")
	}

	// try to hit cache
	cached, exists := cache[modulePath]
	if exists && !(!skim && cached.skimmed){
		tree = cached.tree
		return
	}

	// miss, so parse the module.
	parser := ParsingOperation {
		modulePath: modulePath,
		skimming:   skim,
		tree: SyntaxTree {
			requires: make(map[string] string),
			sections: make(map[string] Section),			
		},
	}

	if parser.modulePath[len(parser.modulePath) - 1] != '/' {
		parser.modulePath += "/"
	}

	var moduleFiles []os.DirEntry
	moduleFiles, err = os.ReadDir(parser.modulePath)
	if err != nil { return }

	for _, entry := range moduleFiles {
		if filepath.Ext(entry.Name()) != ".arf" || entry.IsDir() {
			continue
		}

		var sourceFile *file.File
		sourceFile, err = file.Open(parser.modulePath + entry.Name())
		if err != nil { return }

 		// parse the tokens into the module
		err  = parser.parse(sourceFile)
	}
	
	tree = parser.tree

	// cache tree
	cache[modulePath] = cacheItem {
		tree:    tree,
		skimmed: false,
	}
	
	return
}

// parse parses a file and adds it to the syntax tree.
func (parser *ParsingOperation) parse (sourceFile *file.File) (err error) {
	var tokens []lexer.Token
	tokens, err = lexer.Tokenize(sourceFile)
	if err != nil { return }

	// reset the parser
	if len(tokens) == 0 { return }
	parser.tokens = tokens
	parser.token  = tokens[0]
	parser.tokenIndex = 0

	err = parser.parseMeta()
	if err != nil { return }

	err = parser.parseBody()
	if err != nil { return }

	return
}

// expect takes in a list of allowed token kinds, and returns an error if the
// current token isn't one of them. If the length of allowed is zero, this
// function will not return an error.
func (parser *ParsingOperation) expect (allowed ...lexer.TokenKind) (err error) {
	if len(allowed) == 0 { return }

	for _, kind := range allowed {
		if parser.token.Is(kind) { return }
	}

	message :=
		"unexpected " + parser.token.Kind().Describe() +
		" token, expected "

	for index, allowedItem := range allowed {
		if index > 0 {
			if index == len(allowed) - 1 {
				message += " or "
			} else {
				message += ", " 
			}
		}
	
		message += allowedItem.Describe()
	}

	err = infoerr.NewError (
		parser.token.Location(),
		message, infoerr.ErrorKindError)
	return
}

// nextToken is the same as expect, but it advances to the next token first.
func (parser *ParsingOperation) nextToken (allowed ...lexer.TokenKind) (err error) {
	parser.tokenIndex ++
	if parser.tokenIndex >= len(parser.tokens) { return io.EOF }
	parser.token = parser.tokens[parser.tokenIndex]
	
	err = parser.expect(allowed...)
	return
}

// previousToken goes back one token. If the parser is already at the beginning,
// this does nothing.
func (parser *ParsingOperation) previousToken () {
	parser.tokenIndex --
	if parser.tokenIndex < 0 { parser.tokenIndex = 0 }
	parser.token = parser.tokens[parser.tokenIndex]
	return
}

// skipIndentLevel advances the parser, ignoring every line with an indentation
// equal to or greater than the specified indent.
func (parser *ParsingOperation) skipIndentLevel (indent int) (err error) {
	braceLevel   := 0
	parenLevel   := 0
	bracketLevel := 0

	for {
		if parser.token.Is(lexer.TokenKindNewline) {
			err = parser.nextToken()
			if err != nil { return }

			shouldBreak :=
				!parser.token.Is(lexer.TokenKindIndent) ||
				parser.token.Value().(int) < indent
			
			shouldBreak =
				shouldBreak      &&
				braceLevel   < 1 &&
				parenLevel   < 1 &&
				bracketLevel < 1

			if shouldBreak { return }
		}

		switch parser.token.Kind() {
		case lexer.TokenKindLBrace:   braceLevel ++
		case lexer.TokenKindRBrace:   braceLevel --
		case lexer.TokenKindLParen:   parenLevel ++
		case lexer.TokenKindRParen:   parenLevel --
		case lexer.TokenKindLBracket: bracketLevel ++
		case lexer.TokenKindRBracket: bracketLevel --
		}

		err = parser.nextToken()
		if err != nil { return }
	}
}

// skipWhitespace skips over newlines and indent tokens.
func (parser *ParsingOperation) skipWhitespace () (err error) {
	for {
		isWhitespace :=
			parser.token.Is(lexer.TokenKindIndent) ||
			parser.token.Is(lexer.TokenKindNewline)

		if !isWhitespace {
			break
		}

		err = parser.nextToken()
		if err != nil { return }
	}

	return
}
Created base for parser The parser now handles file opening and invokes the lexer. 2022-08-12 09:11:43 -06:00			`package parser`

Added expect and nextToken methods to parser 2022-08-12 12:33:21 -06:00			`import "io"`
Created base for parser The parser now handles file opening and invokes the lexer. 2022-08-12 09:11:43 -06:00			`import "os"`
			`import "path/filepath"`
Fixed import paths 2022-08-29 23:11:10 -06:00			`import "git.tebibyte.media/arf/arf/file"`
			`import "git.tebibyte.media/arf/arf/lexer"`
			`import "git.tebibyte.media/arf/arf/infoerr"`
Created base for parser The parser now handles file opening and invokes the lexer. 2022-08-12 09:11:43 -06:00
			`// ParsingOperation holds information about an ongoing parsing operation.`
			`type ParsingOperation struct {`
			`modulePath string`
			`token lexer.Token`
			`tokens []lexer.Token`
			`tokenIndex int`
Data section initialization values are now skimmed over 2022-09-05 13:04:39 -06:00			`skimming bool`
Created basic test for parser 2022-08-12 10:55:17 -06:00
The syntax tree is no longer passed by reference 2022-09-04 20:30:14 -06:00			`tree SyntaxTree`
Created base for parser The parser now handles file opening and invokes the lexer. 2022-08-12 09:11:43 -06:00			`}`

Parse -> Fetch, now tries to hit cache 2022-09-05 11:31:38 -06:00			`// Fetch returns the parsed module located at the specified path, and returns an`
			`// abstract syntax tree. If the module has not yet been parsed, it parses it`
			`// first.`
Added skim boolean that does nothing 2022-09-05 11:46:10 -06:00			`func Fetch (modulePath string, skim bool) (tree SyntaxTree, err error) {`
Parse -> Fetch, now tries to hit cache 2022-09-05 11:31:38 -06:00			`if modulePath[0] != '/' {`
			`panic("module path did not begin at filesystem root")`
			`}`

			`// try to hit cache`
			`cached, exists := cache[modulePath]`
Added skim boolean that does nothing 2022-09-05 11:46:10 -06:00			`if exists && !(!skim && cached.skimmed){`
Parse -> Fetch, now tries to hit cache 2022-09-05 11:31:38 -06:00			`tree = cached.tree`
			`return`
			`}`

			`// miss, so parse the module.`
The section kind specific maps are gone I've REPLACED IT with the unified sections map. Interfaces, baby! 2022-09-04 17:30:59 -06:00			`parser := ParsingOperation {`
			`modulePath: modulePath,`
Data section initialization values are now skimmed over 2022-09-05 13:04:39 -06:00			`skimming: skim,`
The syntax tree is no longer passed by reference 2022-09-04 20:30:14 -06:00			`tree: SyntaxTree {`
Syntax tree now stores map of require names -> full paths 2022-09-07 15:12:46 -06:00			`requires: make(map[string] string),`
The section kind specific maps are gone I've REPLACED IT with the unified sections map. Interfaces, baby! 2022-09-04 17:30:59 -06:00			`sections: make(map[string] Section),`
			`},`
			`}`
Created basic test for parser 2022-08-12 10:55:17 -06:00
Created base for parser The parser now handles file opening and invokes the lexer. 2022-08-12 09:11:43 -06:00			`if parser.modulePath[len(parser.modulePath) - 1] != '/' {`
			`parser.modulePath += "/"`
			`}`

			`var moduleFiles []os.DirEntry`
			`moduleFiles, err = os.ReadDir(parser.modulePath)`
			`if err != nil { return }`

			`for _, entry := range moduleFiles {`
			`if filepath.Ext(entry.Name()) != ".arf" \|\| entry.IsDir() {`
			`continue`
			`}`

			`var sourceFile *file.File`
			`sourceFile, err = file.Open(parser.modulePath + entry.Name())`
			`if err != nil { return }`

Parser now understands the separation between files This needs to be done because each file has a metadata section at the top. 2022-08-12 11:02:20 -06:00			`// parse the tokens into the module`
			`err = parser.parse(sourceFile)`
Created base for parser The parser now handles file opening and invokes the lexer. 2022-08-12 09:11:43 -06:00			`}`
Parser now understands the separation between files This needs to be done because each file has a metadata section at the top. 2022-08-12 11:02:20 -06:00
			`tree = parser.tree`
Parse -> Fetch, now tries to hit cache 2022-09-05 11:31:38 -06:00
			`// cache tree`
			`cache[modulePath] = cacheItem {`
			`tree: tree,`
			`skimmed: false,`
			`}`

Parser now understands the separation between files This needs to be done because each file has a metadata section at the top. 2022-08-12 11:02:20 -06:00			`return`
			`}`

			`// parse parses a file and adds it to the syntax tree.`
			`func (parser ParsingOperation) parse (sourceFile file.File) (err error) {`
			`var tokens []lexer.Token`
			`tokens, err = lexer.Tokenize(sourceFile)`
			`if err != nil { return }`

Added expect and nextToken methods to parser 2022-08-12 12:33:21 -06:00			`// reset the parser`
Parser now understands the separation between files This needs to be done because each file has a metadata section at the top. 2022-08-12 11:02:20 -06:00			`if len(tokens) == 0 { return }`
			`parser.tokens = tokens`
			`parser.token = tokens[0]`
			`parser.tokenIndex = 0`

Added expect and nextToken methods to parser 2022-08-12 12:33:21 -06:00			`err = parser.parseMeta()`
			`if err != nil { return }`

Added base parse body function 2022-08-14 20:38:57 -06:00			`err = parser.parseBody()`
			`if err != nil { return }`

Added expect and nextToken methods to parser 2022-08-12 12:33:21 -06:00			`return`
			`}`

			`// expect takes in a list of allowed token kinds, and returns an error if the`
			`// current token isn't one of them. If the length of allowed is zero, this`
			`// function will not return an error.`
Added metadata parser 2022-08-12 15:22:51 -06:00			`func (parser *ParsingOperation) expect (allowed ...lexer.TokenKind) (err error) {`
Added expect and nextToken methods to parser 2022-08-12 12:33:21 -06:00			`if len(allowed) == 0 { return }`

			`for _, kind := range allowed {`
			`if parser.token.Is(kind) { return }`
			`}`

Parser can now print out a list of expected token kinds 2022-08-12 16:09:37 -06:00			`message :=`
			`"unexpected " + parser.token.Kind().Describe() +`
			`" token, expected "`

			`for index, allowedItem := range allowed {`
			`if index > 0 {`
			`if index == len(allowed) - 1 {`
			`message += " or "`
			`} else {`
			`message += ", "`
			`}`
			`}`

			`message += allowedItem.Describe()`
			`}`

Replaced references to file.Error with infoerr.Error 2022-08-17 22:58:40 -06:00			`err = infoerr.NewError (`
Added metadata parser 2022-08-12 15:22:51 -06:00			`parser.token.Location(),`
Replaced references to file.Error with infoerr.Error 2022-08-17 22:58:40 -06:00			`message, infoerr.ErrorKindError)`
Added expect and nextToken methods to parser 2022-08-12 12:33:21 -06:00			`return`
			`}`

			`// nextToken is the same as expect, but it advances to the next token first.`
Added metadata parser 2022-08-12 15:22:51 -06:00			`func (parser *ParsingOperation) nextToken (allowed ...lexer.TokenKind) (err error) {`
Added expect and nextToken methods to parser 2022-08-12 12:33:21 -06:00			`parser.tokenIndex ++`
			`if parser.tokenIndex >= len(parser.tokens) { return io.EOF }`
			`parser.token = parser.tokens[parser.tokenIndex]`
Added metadata parser 2022-08-12 15:22:51 -06:00
			`err = parser.expect(allowed...)`
Created base for parser The parser now handles file opening and invokes the lexer. 2022-08-12 09:11:43 -06:00			`return`
			`}`
Added previousToken method to parser 2022-08-17 10:39:26 -06:00
			`// previousToken goes back one token. If the parser is already at the beginning,`
			`// this does nothing.`
			`func (parser *ParsingOperation) previousToken () {`
			`parser.tokenIndex --`
			`if parser.tokenIndex < 0 { parser.tokenIndex = 0 }`
			`parser.token = parser.tokens[parser.tokenIndex]`
			`return`
			`}`
Add skipIndentLevel function 2022-09-05 12:56:35 -06:00
			`// skipIndentLevel advances the parser, ignoring every line with an indentation`
			`// equal to or greater than the specified indent.`
			`func (parser *ParsingOperation) skipIndentLevel (indent int) (err error) {`
Pass skim test 2022-09-29 09:15:58 -06:00			`braceLevel := 0`
			`parenLevel := 0`
			`bracketLevel := 0`

Add skipIndentLevel function 2022-09-05 12:56:35 -06:00			`for {`
Data section initialization values are now skimmed over 2022-09-05 13:04:39 -06:00			`if parser.token.Is(lexer.TokenKindNewline) {`
			`err = parser.nextToken()`
			`if err != nil { return }`
Add skipIndentLevel function 2022-09-05 12:56:35 -06:00
Pass skim test 2022-09-29 09:15:58 -06:00			`shouldBreak :=`
			`!parser.token.Is(lexer.TokenKindIndent) \|\|`
			`parser.token.Value().(int) < indent`

			`shouldBreak =`
			`shouldBreak &&`
			`braceLevel < 1 &&`
			`parenLevel < 1 &&`
			`bracketLevel < 1`

			`if shouldBreak { return }`
			`}`
Add skipIndentLevel function 2022-09-05 12:56:35 -06:00
Pass skim test 2022-09-29 09:15:58 -06:00			`switch parser.token.Kind() {`
			`case lexer.TokenKindLBrace: braceLevel ++`
			`case lexer.TokenKindRBrace: braceLevel --`
			`case lexer.TokenKindLParen: parenLevel ++`
			`case lexer.TokenKindRParen: parenLevel --`
			`case lexer.TokenKindLBracket: bracketLevel ++`
			`case lexer.TokenKindRBracket: bracketLevel --`
Add skipIndentLevel function 2022-09-05 12:56:35 -06:00			`}`
Data section initialization values are now skimmed over 2022-09-05 13:04:39 -06:00
			`err = parser.nextToken()`
			`if err != nil { return }`
Add skipIndentLevel function 2022-09-05 12:56:35 -06:00			`}`
			`}`
Fixed the hanging thing 2022-09-16 10:27:13 -06:00
			`// skipWhitespace skips over newlines and indent tokens.`
			`func (parser *ParsingOperation) skipWhitespace () (err error) {`
			`for {`
			`isWhitespace :=`
			`parser.token.Is(lexer.TokenKindIndent) \|\|`
			`parser.token.Is(lexer.TokenKindNewline)`

			`if !isWhitespace {`
			`break`
			`}`

			`err = parser.nextToken()`
			`if err != nil { return }`
			`}`

			`return`
			`}`