fspl/parser/parser.go

197 lines
6.2 KiB
Go

package parser
import "io"
import "fmt"
import "git.tebibyte.media/sashakoshka/fspl/lexer"
import "git.tebibyte.media/sashakoshka/fspl/errors"
// When writing a parsing method on Parser, follow this flow:
// - Start with the token already present in Parser.token. Do not get the
// token after it.
// - Use Parser.expect(), Parser.expectValue(), etc. to test whether the token
// is a valid start for the entity
// - If starting by calling another parsing method, trust that method to do
// this instead.
// - When getting new tokens, use Parser.expectNext(),
// Parser.expectNextDesc(), etc. Only use Parser.next() when getting a token
// *right before* calling another parsing method, or at the *very end* of
// the current method.
// - To terminate the method, get the next token and do nothing with it.
// - If terminating by calling another parsing method, trust that method to do
// this instead.
//
// Remember that parsing methods always start with the current token, and end by
// getting a trailing token for the next method to start with. This makes it
// possible to reliably switch between parsing methods depending on the type or
// value of a token.
//
// The parser must never backtrack or look ahead, but it may revise previous
// data it has output upon receiving a new token that comes directly after the
// last token of said previous data. For example:
//
// X in XYZ may not be converted to A once the parser has seen Z, but
// X in XYZ may be converted to A once the parser has seen Y.
//
// This disallows complex and ambiguous syntax, but should allow things such as
// the very occasional infix operator (like . and =)
// Parser parses tokens from a lexer into syntax entities, which it places into
// a tree.
type Parser struct {
token lexer.Token
lexer lexer.Lexer
tree *Tree
}
// NewParser creates a new parser that parses the given file.
func NewParser (name string, file io.Reader) (*Parser, error) {
lx, err := lexer.NewLexer(name, file)
if err != nil { return nil, err }
return &Parser {
lexer: lx,
}, nil
}
// ParseInto parses the parser's file into the given syntax tree.
func (this *Parser) ParseInto (tree *Tree) error {
this.tree = tree
err := this.parse()
if err == io.EOF { err = nil }
return err
}
// expect checks the current token to see if it matches a list of token kind(s),
// else it returns an error describing what it expected.
func (this *Parser) expect (allowed ...lexer.TokenKind) error {
// fmt.Println("expect", this.token, allowed)
if !this.token.Is(allowed...) {
return errors.Errorf (
this.token.Position, "unexpected %v; expected %s",
this.token, commaList(allowed...))
}
return nil
}
// expectDesc is like expect, but the expected entitie(s) are described
// manually. This can be helpful when a large syntactical entity is expected and
// the first token(s) of it offer insufficient information.
func (this *Parser) expectDesc (description string, allowed ...lexer.TokenKind) error {
// fmt.Println("expectDesc", this.token, description, allowed)
if !this.token.Is(allowed...) {
return errors.Errorf (
this.token.Position, "unexpected %v; expected %s",
this.token, description)
}
return nil
}
// expectNext is like expect, but gets the next token first.
func (this *Parser) expectNext (allowed ...lexer.TokenKind) error {
err := this.next()
if err != nil { return err }
// fmt.Println("expectNext", this.token, allowed)
return this.expect(allowed...)
}
// expectNextDesc is like expectDesc, but gets the next token first.
func (this *Parser) expectNextDesc (description string, allowed ...lexer.TokenKind) error {
err := this.next()
if err != nil { return err }
// fmt.Println("expectNextDesc", this.token, description, allowed)
return this.expectDesc(description, allowed...)
}
// expectValue returns an error if the current token's value does not match the
// allowed values.
func (this *Parser) expectValue (kind lexer.TokenKind, allowed ...string) error {
// fmt.Println("expectValue", this.token, kind, allowed)
if !((this.token.Is(kind) || kind == 0) && this.token.ValueIs(allowed...)) {
return errors.Errorf (
this.token.Position, "unexpected %v; expected %s",
this.token, commaList(allowed))
}
return nil
}
// expectValueDesc is like expectValue, but the expected value(s) are described
// manually.
func (this *Parser) expectValueDesc (description string, kind lexer.TokenKind, allowed ...string) error {
// fmt.Println("expectValueDesc", this.token, description, kind, allowed)
if !this.token.Is(kind) || !this.token.ValueIs(allowed...) {
return errors.Errorf (
this.token.Position, "unexpected %v; expected %s",
this.token, description)
}
return nil
}
func (this *Parser) next () error {
token, err := this.lexer.Next()
if err != nil { return err }
this.token = token
return nil
}
func (this *Parser) bug () string {
return fmt.Sprintln (
"Bug detected in the compiler!\n" +
"The parser has taken an unexpected control path.",
"This could be due to an un-implemented feature.\n" +
"Please submit a report with this info and stack trace to:",
"https://git.tebibyte.media/sashakoshka/fspl/issues\n" +
"The token being parsed was:", this.token)
}
func (this *Parser) kind () lexer.TokenKind {
return this.token.Kind
}
func (this *Parser) value () string {
return this.token.Value
}
func (this *Parser) pos () errors.Position {
return this.token.Position
}
func (this *Parser) parse () error {
err := this.next()
if err != nil { return err }
for this.token.Kind != lexer.EOF {
err = this.parseTopLevel()
if err != nil { return err }
}
return nil
}
func commaList[ELEMENT any] (items ...ELEMENT) string {
list := ""
switch {
case len(items) == 1: list = fmt.Sprint(items[0])
case len(items) == 2: list = fmt.Sprint(items[0], " or ", items[1])
default:
for index, item := range items {
if index > 0 {
list += ", "
if index == len(items) - 1 {
list += " or "
}
}
list += fmt.Sprintf("%v", item)
}
}
return list
}
func prependCopy[ELEMENT any] (item ELEMENT, array []ELEMENT) []ELEMENT {
return append([]ELEMENT { item }, array...)
}
func appendCopy[ELEMENT any] (array []ELEMENT, items ...ELEMENT) []ELEMENT {
newArray := make([]ELEMENT, len(array) + len(items))
copy(newArray[copy(newArray, array):], items)
return newArray
}