fspl/lexer/lexer.go

444 lines
9.9 KiB
Go

package lexer
import "os"
import "io"
import "fmt"
import "bufio"
import "unicode"
import "unicode/utf8"
import "git.tebibyte.media/fspl/fspl/errors"
// TokenKind is an enumeration of all tokens the FSPL compiler recognizes.
type TokenKind int; const (
EOF TokenKind = -(iota + 1)
// Name Rough regex-ish description
Ident // [a-z][a-zA-Z0-9]*
TypeIdent // [A-Z][a-zA-Z0-9]*
Int // (0b|0x)?[0-9a-fA-F]+
Float // [0-9]*\.[0-9]+
String // \'.*\'
Symbol // [~!@#$%^&*-_=+\\|;,<>/?]+
LParen // \(
LBrace // \{
LBracket // \[
RParen // \)
RBrace // \}
RBracket // \]
Colon // :
DoubleColon // ::
Dot // .
DoubleDot // ..
Star // \*
)
// String returns a string representation of the token kind. The result for any
// kind corresponds directly to the name of the constant which defines it.
func (kind TokenKind) String () string {
switch kind {
case EOF: return "EOF"
case Ident: return "Ident"
case TypeIdent: return "TypeIdent"
case Int: return "Int"
case Float: return "Float"
case String: return "String"
case Symbol: return "Symbol"
case LParen: return "LParen"
case LBrace: return "LBrace"
case LBracket: return "LBracket"
case RParen: return "RParen"
case RBrace: return "RBrace"
case RBracket: return "RBracket"
case Colon: return "Colon"
case DoubleColon: return "DoubleColon"
case Dot: return "Dot"
case DoubleDot: return "DoubleDot"
case Star: return "Star"
default: return fmt.Sprintf("TokenKind(%d)", kind)
}
}
// Token represents a single lexeme of an FSPL file.
type Token struct {
Position errors.Position // The position of the token in its file
Kind TokenKind // Which kind of token it is
Value string // The token's value
}
// String returns a string representation of the token, which is of the form:
// KIND 'VALUE'
// or if the value is empty:
// KIND
func (tok Token) String () string {
output := tok.Kind.String()
if tok.Value != "" {
output += fmt.Sprintf(" '%s'", tok.Value)
}
return output
}
// EOF returns whether or not the token is an EOF token.
func (tok Token) EOF () bool {
return tok.Kind == EOF
}
// Is returns whether or not the token kind matches any of the given kinds.
func (tok Token) Is (kinds ...TokenKind) bool {
for _, kind := range kinds {
if tok.Kind == kind { return true }
}
return false
}
// Is returns whether or not the token value matches any of the given values.
func (tok Token) ValueIs (values ...string) bool {
for _, value := range values {
if tok.Value == value { return true }
}
return false
}
// Lexer is an object capable of producing tokens.
type Lexer interface {
// Next returns the next token. If there are no more tokens, it returns
// an EOF token. It only returns an error on EOF if the file terminated
// unexpectedly.
Next () (Token, error)
}
// LexReader creates a new default lexer that reads from the given reader. The
// filename parameter is used for token locations and error messages.
func LexReader (filename string, reader io.Reader) (Lexer, error) {
lexer := &fsplLexer {
filename: filename,
lineScanner: bufio.NewScanner(reader),
}
lexer.nextRune()
return lexer, nil
}
// LexFile creates a new default lexer that reads from the given file.
func LexFile (filename string) (Lexer, error) {
file, err := os.Open(filename)
if err != nil { return nil, err }
lexer := &fsplLexer {
filename: filename,
lineScanner: bufio.NewScanner(file),
}
lexer.nextRune()
return lexer, nil
}
type fsplLexer struct {
filename string
lineScanner *bufio.Scanner
rune rune
line string
lineFood string
offset int
row int
column int
eof bool
}
func (this *fsplLexer) Next () (Token, error) {
token, err := this.nextInternal()
if err == io.EOF { err = this.errUnexpectedEOF() }
return token, err
}
func (this *fsplLexer) nextInternal () (token Token, err error) {
err = this.skipWhitespace()
token.Position = this.pos()
if this.eof {
token.Kind = EOF
err = nil
return
}
if err != nil { return }
appendRune := func () {
token.Value += string(this.rune)
err = this.nextRune()
}
doNumber := func () {
token.Kind = Int
for isDigit(this.rune) {
appendRune()
if this.eof { err = nil; return }
if err != nil { return }
}
if this.rune == '.' {
token.Kind = Float
appendRune()
for isDigit(this.rune) {
appendRune()
if err != nil { return }
}
}
}
doSymbol := func () {
token.Kind = Symbol
for isSymbol(this.rune) {
appendRune()
if err != nil { return }
}
}
defer func () {
newPos := this.pos()
newPos.End -- // TODO figure out why tf we have to do this
token.Position = token.Position.Union(newPos)
} ()
switch {
// Ident
case unicode.IsLower(this.rune):
token.Kind = Ident
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
appendRune()
if this.eof { err = nil; return }
if err != nil { return }
}
// TypeIdent
case unicode.IsUpper(this.rune):
token.Kind = TypeIdent
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
appendRune()
if this.eof { err = nil; return }
if err != nil { return }
}
// Int, Float
case isDigit(this.rune):
doNumber()
if this.eof { err = nil; return }
// String
case this.rune == '\'':
token.Kind = String
err = this.nextRune()
if err != nil { return }
for this.rune != '\'' {
if this.rune == '\\' {
var result rune
result, err = this.escapeSequence()
if err != nil { return }
token.Value += string(result)
} else {
appendRune()
if this.eof { err = nil; return }
if err != nil { return }
}
}
err = this.nextRune()
if this.eof { err = nil; return }
if err != nil { return }
// Symbol, Int, Float
case this.rune == '-':
token.Kind = Symbol
appendRune()
if err != nil { return }
if isDigit(this.rune) {
doNumber()
if this.eof { err = nil; return }
} else if isSymbol(this.rune) {
doSymbol()
if this.eof { err = nil; return }
}
// Symbol
case isSymbol(this.rune):
doSymbol()
if this.eof { err = nil; return }
case this.rune == '(':
token.Kind = LParen
appendRune()
if this.eof { err = nil; return }
case this.rune == '{':
token.Kind = LBrace
appendRune()
if this.eof { err = nil; return }
case this.rune == '[':
token.Kind = LBracket
appendRune()
if this.eof { err = nil; return }
case this.rune == ')':
token.Kind = RParen
appendRune()
if this.eof { err = nil; return }
case this.rune == '}':
token.Kind = RBrace
appendRune()
if this.eof { err = nil; return }
case this.rune == ']':
token.Kind = RBracket
appendRune()
if this.eof { err = nil; return }
// Colon, DoubleColon
case this.rune == ':':
token.Kind = Colon
appendRune()
if this.rune == ':' {
token.Kind = DoubleColon
appendRune()
}
if this.eof { err = nil; return }
// Dot, DoubleDot
case this.rune == '.':
token.Kind = Dot
appendRune()
if this.rune == '.' {
token.Kind = DoubleDot
appendRune()
}
if this.eof { err = nil; return }
// Star
case this.rune == '*':
token.Kind = Star
appendRune()
if this.eof { err = nil; return }
case unicode.IsPrint(this.rune):
err = errors.Errorf (
this.pos(), "unexpected rune '%c'",
this.rune)
default:
err = errors.Errorf (
this.pos(), "unexpected rune %U",
this.rune)
}
return
}
func (this *fsplLexer) nextRune () error {
if this.lineFood == "" {
ok := this.lineScanner.Scan()
if ok {
this.line = this.lineScanner.Text()
this.lineFood = this.line
this.rune = '\n'
this.column = 0
this.row ++
} else {
err := this.lineScanner.Err()
if err == nil {
this.eof = true
return io.EOF
} else {
return err
}
}
} else {
var ch rune
var size int
for ch == 0 && this.lineFood != "" {
ch, size = utf8.DecodeRuneInString(this.lineFood)
this.lineFood = this.lineFood[size:]
}
this.rune = ch
this.column ++
}
return nil
}
func (this *fsplLexer) escapeSequence () (rune, error) {
err := this.nextRune()
if err != nil { return 0, err }
if isDigit(this.rune) {
var number rune
for index := 0; index < 3; index ++ {
if !isDigit(this.rune) { break }
number *= 8
number += this.rune - '0'
err = this.nextRune()
if err != nil { return 0, err }
}
return number, nil
}
defer this.nextRune()
switch this.rune {
case '\\', '\n', '\'':
return this.rune, nil
case 'a': return '\a', nil
case 'b': return '\b', nil
case 't': return '\t', nil
case 'n': return '\n', nil
case 'v': return '\v', nil
case 'f': return '\f', nil
case 'r': return '\r', nil
default: return 0, this.errBadEscapeSequence()
}
}
func (this *fsplLexer) skipWhitespace () error {
err := this.skipComment()
if err != nil { return err }
for isWhitespace(this.rune) {
err := this.nextRune()
if err != nil { return err }
err = this.skipComment()
if err != nil { return err }
}
return nil
}
func (this *fsplLexer) skipComment () error {
if this.rune == ';' {
for this.rune != '\n' {
err := this.nextRune()
if err != nil { return err }
}
}
return nil
}
func (this *fsplLexer) pos () errors.Position {
return errors.Position {
File: this.filename,
Line: this.lineScanner.Text(),
Row: this.row - 1,
Start: this.column - 1,
End: this.column,
}
}
func (this *fsplLexer) errUnexpectedEOF () error {
return errors.Errorf(this.pos(), "unexpected EOF")
}
func (this *fsplLexer) errBadEscapeSequence () error {
return errors.Errorf(this.pos(), "bad escape sequence")
}
func isWhitespace (char rune) bool {
switch char {
case ' ', '\t', '\r', '\n': return true
default: return false
}
}
func isSymbol (char rune) bool {
switch char {
case
'~', '!', '@', '#', '$', '%', '^', '&', '-', '_', '=', '+',
'\\', '|', ';', ',', '<', '>', '/', '?':
return true
default:
return false
}
}
func isDigit (char rune) bool {
return char >= '0' && char <= '9'
}