197 lines
6.2 KiB
Go
197 lines
6.2 KiB
Go
package parser
|
|
|
|
import "io"
|
|
import "fmt"
|
|
import "git.tebibyte.media/sashakoshka/fspl/lexer"
|
|
import "git.tebibyte.media/sashakoshka/fspl/errors"
|
|
|
|
// When writing a parsing method on Parser, follow this flow:
|
|
// - Start with the token already present in Parser.token. Do not get the
|
|
// token after it.
|
|
// - Use Parser.expect(), Parser.expectValue(), etc. to test whether the token
|
|
// is a valid start for the entity
|
|
// - If starting by calling another parsing method, trust that method to do
|
|
// this instead.
|
|
// - When getting new tokens, use Parser.expectNext(),
|
|
// Parser.expectNextDesc(), etc. Only use Parser.next() when getting a token
|
|
// *right before* calling another parsing method, or at the *very end* of
|
|
// the current method.
|
|
// - To terminate the method, get the next token and do nothing with it.
|
|
// - If terminating by calling another parsing method, trust that method to do
|
|
// this instead.
|
|
//
|
|
// Remember that parsing methods always start with the current token, and end by
|
|
// getting a trailing token for the next method to start with. This makes it
|
|
// possible to reliably switch between parsing methods depending on the type or
|
|
// value of a token.
|
|
//
|
|
// The parser must never backtrack or look ahead, but it may revise previous
|
|
// data it has output upon receiving a new token that comes directly after the
|
|
// last token of said previous data. For example:
|
|
//
|
|
// X in XYZ may not be converted to A once the parser has seen Z, but
|
|
// X in XYZ may be converted to A once the parser has seen Y.
|
|
//
|
|
// This disallows complex and ambiguous syntax, but should allow things such as
|
|
// the very occasional infix operator (like . and =)
|
|
|
|
// Parser parses tokens from a lexer into syntax entities, which it places into
|
|
// a tree.
|
|
type Parser struct {
|
|
token lexer.Token
|
|
lexer lexer.Lexer
|
|
tree *Tree
|
|
}
|
|
|
|
// NewParser creates a new parser that parses the given file.
|
|
func NewParser (name string, file io.Reader) (*Parser, error) {
|
|
lx, err := lexer.NewLexer(name, file)
|
|
if err != nil { return nil, err }
|
|
|
|
return &Parser {
|
|
lexer: lx,
|
|
}, nil
|
|
}
|
|
|
|
// ParseInto parses the parser's file into the given syntax tree.
|
|
func (this *Parser) ParseInto (tree *Tree) error {
|
|
this.tree = tree
|
|
err := this.parse()
|
|
if err == io.EOF { err = nil }
|
|
return err
|
|
}
|
|
|
|
// expect checks the current token to see if it matches a list of token kind(s),
|
|
// else it returns an error describing what it expected.
|
|
func (this *Parser) expect (allowed ...lexer.TokenKind) error {
|
|
// fmt.Println("expect", this.token, allowed)
|
|
if !this.token.Is(allowed...) {
|
|
return errors.Errorf (
|
|
this.token.Position, "unexpected %v; expected %s",
|
|
this.token, commaList(allowed...))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// expectDesc is like expect, but the expected entitie(s) are described
|
|
// manually. This can be helpful when a large syntactical entity is expected and
|
|
// the first token(s) of it offer insufficient information.
|
|
func (this *Parser) expectDesc (description string, allowed ...lexer.TokenKind) error {
|
|
// fmt.Println("expectDesc", this.token, description, allowed)
|
|
if !this.token.Is(allowed...) {
|
|
return errors.Errorf (
|
|
this.token.Position, "unexpected %v; expected %s",
|
|
this.token, description)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// expectNext is like expect, but gets the next token first.
|
|
func (this *Parser) expectNext (allowed ...lexer.TokenKind) error {
|
|
err := this.next()
|
|
if err != nil { return err }
|
|
// fmt.Println("expectNext", this.token, allowed)
|
|
return this.expect(allowed...)
|
|
}
|
|
|
|
// expectNextDesc is like expectDesc, but gets the next token first.
|
|
func (this *Parser) expectNextDesc (description string, allowed ...lexer.TokenKind) error {
|
|
err := this.next()
|
|
if err != nil { return err }
|
|
// fmt.Println("expectNextDesc", this.token, description, allowed)
|
|
return this.expectDesc(description, allowed...)
|
|
}
|
|
|
|
// expectValue returns an error if the current token's value does not match the
|
|
// allowed values.
|
|
func (this *Parser) expectValue (kind lexer.TokenKind, allowed ...string) error {
|
|
// fmt.Println("expectValue", this.token, kind, allowed)
|
|
if !((this.token.Is(kind) || kind == 0) && this.token.ValueIs(allowed...)) {
|
|
return errors.Errorf (
|
|
this.token.Position, "unexpected %v; expected %s",
|
|
this.token, commaList(allowed))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// expectValueDesc is like expectValue, but the expected value(s) are described
|
|
// manually.
|
|
func (this *Parser) expectValueDesc (description string, kind lexer.TokenKind, allowed ...string) error {
|
|
// fmt.Println("expectValueDesc", this.token, description, kind, allowed)
|
|
if !this.token.Is(kind) || !this.token.ValueIs(allowed...) {
|
|
return errors.Errorf (
|
|
this.token.Position, "unexpected %v; expected %s",
|
|
this.token, description)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (this *Parser) next () error {
|
|
token, err := this.lexer.Next()
|
|
if err != nil { return err }
|
|
this.token = token
|
|
return nil
|
|
}
|
|
|
|
func (this *Parser) bug () string {
|
|
return fmt.Sprintln (
|
|
"Bug detected in the compiler!\n" +
|
|
"The parser has taken an unexpected control path.",
|
|
"This could be due to an un-implemented feature.\n" +
|
|
"Please submit a report with this info and stack trace to:",
|
|
"https://git.tebibyte.media/sashakoshka/fspl/issues\n" +
|
|
"The token being parsed was:", this.token)
|
|
}
|
|
|
|
func (this *Parser) kind () lexer.TokenKind {
|
|
return this.token.Kind
|
|
}
|
|
|
|
func (this *Parser) value () string {
|
|
return this.token.Value
|
|
}
|
|
|
|
func (this *Parser) pos () errors.Position {
|
|
return this.token.Position
|
|
}
|
|
|
|
func (this *Parser) parse () error {
|
|
err := this.next()
|
|
if err != nil { return err }
|
|
for this.token.Kind != lexer.EOF {
|
|
err = this.parseTopLevel()
|
|
if err != nil { return err }
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func commaList[ELEMENT any] (items ...ELEMENT) string {
|
|
list := ""
|
|
|
|
switch {
|
|
case len(items) == 1: list = fmt.Sprint(items[0])
|
|
case len(items) == 2: list = fmt.Sprint(items[0], " or ", items[1])
|
|
default:
|
|
for index, item := range items {
|
|
if index > 0 {
|
|
list += ", "
|
|
if index == len(items) - 1 {
|
|
list += " or "
|
|
}
|
|
}
|
|
list += fmt.Sprintf("%v", item)
|
|
}
|
|
}
|
|
return list
|
|
}
|
|
|
|
func prependCopy[ELEMENT any] (item ELEMENT, array []ELEMENT) []ELEMENT {
|
|
return append([]ELEMENT { item }, array...)
|
|
}
|
|
|
|
func appendCopy[ELEMENT any] (array []ELEMENT, items ...ELEMENT) []ELEMENT {
|
|
newArray := make([]ELEMENT, len(array) + len(items))
|
|
copy(newArray[copy(newArray, array):], items)
|
|
return newArray
|
|
}
|