435 lines
9.7 KiB
Go
435 lines
9.7 KiB
Go
package lexer
|
|
|
|
import "os"
|
|
import "io"
|
|
import "fmt"
|
|
import "bufio"
|
|
import "unicode"
|
|
import "git.tebibyte.media/sashakoshka/fspl/errors"
|
|
|
|
// TokenKind is an enumeration of all tokens the FSPL compiler recognizes.
|
|
type TokenKind int; const (
|
|
EOF TokenKind = -(iota + 1)
|
|
|
|
// Name Rough regex-ish description
|
|
Ident // [a-z][a-zA-Z0-9]*
|
|
TypeIdent // [A-Z][a-zA-Z0-9]*
|
|
Int // (0b|0x)?[0-9a-fA-F]+
|
|
Float // [0-9]*\.[0-9]+
|
|
String // \'.*\'
|
|
|
|
Symbol // [~!@#$%^&*-_=+\\|;,<>/?]+
|
|
LParen // \(
|
|
LBrace // \{
|
|
LBracket // \[
|
|
RParen // \)
|
|
RBrace // \}
|
|
RBracket // \]
|
|
Colon // :
|
|
DoubleColon // ::
|
|
Dot // .
|
|
DoubleDot // ..
|
|
Star // \*
|
|
)
|
|
|
|
// String returns a string representation of the token kind. The result for any
|
|
// kind corresponds directly to the name of the constant which defines it.
|
|
func (kind TokenKind) String () string {
|
|
switch kind {
|
|
case EOF: return "EOF"
|
|
case Ident: return "Ident"
|
|
case TypeIdent: return "TypeIdent"
|
|
case Int: return "Int"
|
|
case Float: return "Float"
|
|
case String: return "String"
|
|
case Symbol: return "Symbol"
|
|
case LParen: return "LParen"
|
|
case LBrace: return "LBrace"
|
|
case LBracket: return "LBracket"
|
|
case RParen: return "RParen"
|
|
case RBrace: return "RBrace"
|
|
case RBracket: return "RBracket"
|
|
case Colon: return "Colon"
|
|
case DoubleColon: return "DoubleColon"
|
|
case Dot: return "Dot"
|
|
case DoubleDot: return "DoubleDot"
|
|
case Star: return "Star"
|
|
default: return fmt.Sprintf("TokenKind(%d)", kind)
|
|
}
|
|
}
|
|
|
|
// Token represents a single lexeme of an FSPL file.
|
|
type Token struct {
|
|
Position errors.Position // The position of the token in its file
|
|
Kind TokenKind // Which kind of token it is
|
|
Value string // The token's value
|
|
}
|
|
|
|
// String returns a string representation of the token, which is of the form:
|
|
// KIND 'VALUE'
|
|
// or if the value is empty:
|
|
// KIND
|
|
func (tok Token) String () string {
|
|
output := tok.Kind.String()
|
|
if tok.Value != "" {
|
|
output += fmt.Sprintf(" '%s'", tok.Value)
|
|
}
|
|
return output
|
|
}
|
|
|
|
// EOF returns whether or not the token is an EOF token.
|
|
func (tok Token) EOF () bool {
|
|
return tok.Kind == EOF
|
|
}
|
|
|
|
// Is returns whether or not the token kind matches any of the given kinds.
|
|
func (tok Token) Is (kinds ...TokenKind) bool {
|
|
for _, kind := range kinds {
|
|
if tok.Kind == kind { return true }
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Is returns whether or not the token value matches any of the given values.
|
|
func (tok Token) ValueIs (values ...string) bool {
|
|
for _, value := range values {
|
|
if tok.Value == value { return true }
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Lexer is an object capable of producing tokens.
|
|
type Lexer interface {
|
|
// Next returns the next token. If there are no more tokens, it returns
|
|
// an EOF token. It only returns an error on EOF if the file terminated
|
|
// unexpectedly.
|
|
Next () (Token, error)
|
|
}
|
|
|
|
// LexReader creates a new default lexer that reads from the given reader. The
|
|
// filename parameter is used for token locations and error messages.
|
|
func LexReader (filename string, reader io.Reader) (Lexer, error) {
|
|
lexer := &fsplLexer {
|
|
filename: filename,
|
|
lineScanner: bufio.NewScanner(reader),
|
|
}
|
|
lexer.nextRune()
|
|
return lexer, nil
|
|
}
|
|
|
|
// LexFile creates a new default lexer that reads from the given file.
|
|
func LexFile (filename string) (Lexer, error) {
|
|
file, err := os.Open(filename)
|
|
if err != nil { return nil, err }
|
|
|
|
lexer := &fsplLexer {
|
|
filename: filename,
|
|
lineScanner: bufio.NewScanner(file),
|
|
}
|
|
lexer.nextRune()
|
|
return lexer, nil
|
|
}
|
|
|
|
type fsplLexer struct {
|
|
filename string
|
|
lineScanner *bufio.Scanner
|
|
rune rune
|
|
runeLine []rune
|
|
|
|
offset int
|
|
row int
|
|
column int
|
|
|
|
eof bool
|
|
}
|
|
|
|
func (this *fsplLexer) Next () (Token, error) {
|
|
token, err := this.nextInternal()
|
|
if err == io.EOF { err = this.errUnexpectedEOF() }
|
|
return token, err
|
|
}
|
|
|
|
func (this *fsplLexer) nextInternal () (token Token, err error) {
|
|
err = this.skipWhitespace()
|
|
token.Position = this.pos()
|
|
if this.eof {
|
|
token.Kind = EOF
|
|
err = nil
|
|
return
|
|
}
|
|
if err != nil { return }
|
|
|
|
appendRune := func () {
|
|
token.Value += string(this.rune)
|
|
err = this.nextRune()
|
|
}
|
|
|
|
doNumber := func () {
|
|
token.Kind = Int
|
|
for isDigit(this.rune) {
|
|
appendRune()
|
|
if this.eof { err = nil; return }
|
|
if err != nil { return }
|
|
}
|
|
if this.rune == '.' {
|
|
token.Kind = Float
|
|
appendRune()
|
|
for isDigit(this.rune) {
|
|
appendRune()
|
|
if err != nil { return }
|
|
}
|
|
}
|
|
}
|
|
|
|
doSymbol := func () {
|
|
token.Kind = Symbol
|
|
for isSymbol(this.rune) {
|
|
appendRune()
|
|
if err != nil { return }
|
|
}
|
|
}
|
|
|
|
defer func () {
|
|
newPos := this.pos()
|
|
newPos.End -- // TODO figure out why tf we have to do this
|
|
token.Position = token.Position.Union(newPos)
|
|
} ()
|
|
|
|
switch {
|
|
// Ident
|
|
case unicode.IsLower(this.rune):
|
|
token.Kind = Ident
|
|
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
|
|
appendRune()
|
|
if this.eof { err = nil; return }
|
|
if err != nil { return }
|
|
}
|
|
// TypeIdent
|
|
case unicode.IsUpper(this.rune):
|
|
token.Kind = TypeIdent
|
|
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
|
|
appendRune()
|
|
if this.eof { err = nil; return }
|
|
if err != nil { return }
|
|
}
|
|
// Int, Float
|
|
case isDigit(this.rune):
|
|
doNumber()
|
|
if this.eof { err = nil; return }
|
|
// String
|
|
case this.rune == '\'':
|
|
token.Kind = String
|
|
err = this.nextRune()
|
|
if err != nil { return }
|
|
|
|
for this.rune != '\'' {
|
|
if this.rune == '\\' {
|
|
var result rune
|
|
result, err = this.escapeSequence()
|
|
if err != nil { return }
|
|
token.Value += string(result)
|
|
} else {
|
|
appendRune()
|
|
if this.eof { err = nil; return }
|
|
if err != nil { return }
|
|
}
|
|
}
|
|
err = this.nextRune()
|
|
if this.eof { err = nil; return }
|
|
if err != nil { return }
|
|
// Symbol, Int, Float
|
|
case this.rune == '-':
|
|
token.Kind = Symbol
|
|
appendRune()
|
|
if err != nil { return }
|
|
if isDigit(this.rune) {
|
|
doNumber()
|
|
if this.eof { err = nil; return }
|
|
} else if isSymbol(this.rune) {
|
|
doSymbol()
|
|
if this.eof { err = nil; return }
|
|
}
|
|
// Symbol
|
|
case isSymbol(this.rune):
|
|
doSymbol()
|
|
if this.eof { err = nil; return }
|
|
case this.rune == '(':
|
|
token.Kind = LParen
|
|
appendRune()
|
|
if this.eof { err = nil; return }
|
|
case this.rune == '{':
|
|
token.Kind = LBrace
|
|
appendRune()
|
|
if this.eof { err = nil; return }
|
|
case this.rune == '[':
|
|
token.Kind = LBracket
|
|
appendRune()
|
|
if this.eof { err = nil; return }
|
|
case this.rune == ')':
|
|
token.Kind = RParen
|
|
appendRune()
|
|
if this.eof { err = nil; return }
|
|
case this.rune == '}':
|
|
token.Kind = RBrace
|
|
appendRune()
|
|
if this.eof { err = nil; return }
|
|
case this.rune == ']':
|
|
token.Kind = RBracket
|
|
appendRune()
|
|
if this.eof { err = nil; return }
|
|
// Colon, DoubleColon
|
|
case this.rune == ':':
|
|
token.Kind = Colon
|
|
appendRune()
|
|
if this.rune == ':' {
|
|
token.Kind = DoubleColon
|
|
appendRune()
|
|
}
|
|
if this.eof { err = nil; return }
|
|
// Dot, DoubleDot
|
|
case this.rune == '.':
|
|
token.Kind = Dot
|
|
appendRune()
|
|
if this.rune == '.' {
|
|
token.Kind = DoubleDot
|
|
appendRune()
|
|
}
|
|
if this.eof { err = nil; return }
|
|
// Star
|
|
case this.rune == '*':
|
|
token.Kind = Star
|
|
appendRune()
|
|
if this.eof { err = nil; return }
|
|
case unicode.IsPrint(this.rune):
|
|
err = errors.Errorf (
|
|
this.pos(), "unexpected rune '%c'",
|
|
this.rune)
|
|
default:
|
|
err = errors.Errorf (
|
|
this.pos(), "unexpected rune %U",
|
|
this.rune)
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (this *fsplLexer) nextRune () error {
|
|
if this.column >= len(this.runeLine) {
|
|
ok := this.lineScanner.Scan()
|
|
if ok {
|
|
this.runeLine = []rune(this.lineScanner.Text())
|
|
this.rune = '\n'
|
|
this.column = 0
|
|
this.row ++
|
|
} else {
|
|
err := this.lineScanner.Err()
|
|
if err == nil {
|
|
this.eof = true
|
|
return io.EOF
|
|
} else {
|
|
return err
|
|
}
|
|
}
|
|
} else {
|
|
this.rune = this.runeLine[this.column]
|
|
this.column ++
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (this *fsplLexer) escapeSequence () (rune, error) {
|
|
err := this.nextRune()
|
|
if err != nil { return 0, err }
|
|
|
|
if isDigit(this.rune) {
|
|
var number rune
|
|
for index := 0; index < 3; index ++ {
|
|
if !isDigit(this.rune) { break }
|
|
|
|
number *= 8
|
|
number += this.rune - '0'
|
|
|
|
err = this.nextRune()
|
|
if err != nil { return 0, err }
|
|
}
|
|
return number, nil
|
|
}
|
|
|
|
defer this.nextRune()
|
|
switch this.rune {
|
|
case '\\', '\n', '\'':
|
|
return this.rune, nil
|
|
case 'a': return '\a', nil
|
|
case 'b': return '\b', nil
|
|
case 't': return '\t', nil
|
|
case 'n': return '\n', nil
|
|
case 'v': return '\v', nil
|
|
case 'f': return '\f', nil
|
|
case 'r': return '\r', nil
|
|
default: return 0, this.errBadEscapeSequence()
|
|
}
|
|
}
|
|
|
|
func (this *fsplLexer) skipWhitespace () error {
|
|
err := this.skipComment()
|
|
if err != nil { return err }
|
|
for isWhitespace(this.rune) {
|
|
err := this.nextRune()
|
|
if err != nil { return err }
|
|
err = this.skipComment()
|
|
if err != nil { return err }
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (this *fsplLexer) skipComment () error {
|
|
if this.rune == ';' {
|
|
for this.rune != '\n' {
|
|
err := this.nextRune()
|
|
if err != nil { return err }
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (this *fsplLexer) pos () errors.Position {
|
|
return errors.Position {
|
|
File: this.filename,
|
|
Line: this.lineScanner.Text(),
|
|
Row: this.row - 1,
|
|
Start: this.column - 1,
|
|
End: this.column,
|
|
}
|
|
}
|
|
|
|
func (this *fsplLexer) errUnexpectedEOF () error {
|
|
return errors.Errorf(this.pos(), "unexpected EOF")
|
|
}
|
|
|
|
func (this *fsplLexer) errBadEscapeSequence () error {
|
|
return errors.Errorf(this.pos(), "bad escape sequence")
|
|
}
|
|
|
|
func isWhitespace (char rune) bool {
|
|
switch char {
|
|
case ' ', '\t', '\r', '\n': return true
|
|
default: return false
|
|
}
|
|
}
|
|
|
|
func isSymbol (char rune) bool {
|
|
switch char {
|
|
case
|
|
'~', '!', '@', '#', '$', '%', '^', '&', '-', '_', '=', '+',
|
|
'\\', '|', ';', ',', '<', '>', '/', '?':
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
func isDigit (char rune) bool {
|
|
return char >= '0' && char <= '9'
|
|
}
|