fspl/parser/parser.go

package parser

import "io"
import "fmt"
import "git.tebibyte.media/sashakoshka/fspl/lexer"
import "git.tebibyte.media/sashakoshka/fspl/errors"

// When writing a parsing method on Parser, follow this flow:
//   - Start with the token already present in Parser.token. Do not get the
//     token after it.
//   - Use Parser.expect(), Parser.expectValue(), etc. to test whether the token
//     is a valid start for the entity
//   - If starting by calling another parsing method, trust that method to do
//     this instead.
//   - When getting new tokens, use Parser.expectNext(),
//     Parser.expectNextDesc(), etc. Only use Parser.next() when getting a token
//     *right before* calling another parsing method, or at the *very end* of
//     the current method.
//   - To terminate the method, get the next token and do nothing with it.
//   - If terminating by calling another parsing method, trust that method to do
//     this instead.
//
// Remember that parsing methods always start with the current token, and end by
// getting a trailing token for the next method to start with. This makes it
// possible to reliably switch between parsing methods depending on the type or
// value of a token.
//
// The parser must never backtrack or look ahead, but it may revise previous
// data it has output upon receiving a new token that comes directly after the
// last token of said previous data. For example:
//
//   X in XYZ may not be converted to A once the parser has seen Z, but
//   X in XYZ may be converted to A once the parser has seen Y.
//
// This disallows complex and ambiguous syntax, but should allow things such as
// the very occasional infix operator (like . and =)

// Parser parses tokens from a lexer into syntax entities, which it places into
// a tree.
type Parser struct {
	token lexer.Token
	lexer lexer.Lexer
	tree  *Tree
}

// NewParser creates a new parser that parses the given file.
func NewParser (name string, file io.Reader) (*Parser, error) {
	lx, err := lexer.NewLexer(name, file)
	if err != nil { return nil, err }

	return &Parser {
		lexer: lx,
	}, nil
}

// ParseInto parses the parser's file into the given syntax tree.
func (this *Parser) ParseInto (tree *Tree) error {
	this.tree = tree
	err := this.parse()
	if err == io.EOF { err = nil }
	return err
}

// expect checks the current token to see if it matches a list of token kind(s),
// else it returns an error describing what it expected.
func (this *Parser) expect (allowed ...lexer.TokenKind) error {
	// fmt.Println("expect", this.token, allowed)
	if !this.token.Is(allowed...) {
		return errors.Errorf (
			this.token.Position, "unexpected %v; expected %s",
			this.token, commaList(allowed...))
	}
	return nil
}

// expectDesc is like expect, but the expected entitie(s) are described
// manually. This can be helpful when a large syntactical entity is expected and
// the first token(s) of it offer insufficient information.
func (this *Parser) expectDesc (description string, allowed ...lexer.TokenKind) error {
	// fmt.Println("expectDesc", this.token, description, allowed)
	if !this.token.Is(allowed...) {
		return errors.Errorf (
			this.token.Position, "unexpected %v; expected %s",
			this.token, description)
	}
	return nil
}

// expectNext is like expect, but gets the next token first.
func (this *Parser) expectNext (allowed ...lexer.TokenKind) error {
	err := this.next()
	if err != nil { return err }
	// fmt.Println("expectNext", this.token, allowed)
	return this.expect(allowed...)
}

// expectNextDesc is like expectDesc, but gets the next token first.
func (this *Parser) expectNextDesc (description string, allowed ...lexer.TokenKind) error {
	err := this.next()
	if err != nil { return err }
	// fmt.Println("expectNextDesc", this.token, description, allowed)
	return this.expectDesc(description, allowed...)
}

// expectValue returns an error if the current token's value does not match the
// allowed values.
func (this *Parser) expectValue (kind lexer.TokenKind, allowed ...string) error {
	// fmt.Println("expectValue", this.token, kind, allowed)
	if !((this.token.Is(kind) || kind == 0) && this.token.ValueIs(allowed...)) {
		return errors.Errorf (
			this.token.Position, "unexpected %v; expected %s",
			this.token, commaList(allowed))
	}
	return nil
}

// expectValueDesc is like expectValue, but the expected value(s) are described
// manually.
func (this *Parser) expectValueDesc (description string, kind lexer.TokenKind, allowed ...string) error {
	// fmt.Println("expectValueDesc", this.token, description, kind, allowed)
	if !this.token.Is(kind) || !this.token.ValueIs(allowed...) {
		return errors.Errorf (
			this.token.Position, "unexpected %v; expected %s",
			this.token, description)
	}
	return nil
}

func (this *Parser) next () error {
	token, err := this.lexer.Next()
	if err != nil { return err }
	this.token = token
	return nil
}

func (this *Parser) bug () string {
	return fmt.Sprintln (
		"Bug detected in the compiler!\n" +
		"The parser has taken an unexpected control path.",
		"This could be due to an un-implemented feature.\n" +
		"Please submit a report with this info and stack trace to:",
		"https://git.tebibyte.media/sashakoshka/fspl/issues\n" +
		"The token being parsed was:", this.token)
}

func (this *Parser) kind () lexer.TokenKind {
	return this.token.Kind
}

func (this *Parser) value () string {
	return this.token.Value
}

func (this *Parser) pos () errors.Position {
	return this.token.Position
}

func (this *Parser) parse () error {
	err := this.next()
	if err != nil { return err }
	for this.token.Kind != lexer.EOF {
		err = this.parseTopLevel()
		if err != nil { return err }
	}
	return nil
}

func commaList[ELEMENT any] (items ...ELEMENT) string {
	list := ""

	switch {
	case len(items) == 1: list = fmt.Sprint(items[0])
	case len(items) == 2: list = fmt.Sprint(items[0], " or ", items[1])
	default:
		for index, item := range items {
			if index > 0 {
				list += ", "
				if index == len(items) - 1 {
					list += " or "
				}
			}
			list += fmt.Sprintf("%v", item)
		}
	}
	return list
}

func prependCopy[ELEMENT any] (item ELEMENT, array []ELEMENT) []ELEMENT {
	return append([]ELEMENT { item }, array...)
}

func appendCopy[ELEMENT any] (array []ELEMENT, items ...ELEMENT) []ELEMENT {
	newArray := make([]ELEMENT, len(array) + len(items))
	copy(newArray[copy(newArray, array):], items)
	return newArray
}