package lexer import "os" import "io" import "fmt" import "bufio" import "unicode" import "unicode/utf8" import "git.tebibyte.media/fspl/fspl/errors" // TokenKind is an enumeration of all tokens the FSPL compiler recognizes. type TokenKind int; const ( EOF TokenKind = -(iota + 1) // Name Rough regex-ish description Ident // [a-z][a-zA-Z0-9]* TypeIdent // [A-Z][a-zA-Z0-9]* Int // (0b|0x)?[0-9a-fA-F]+ Float // [0-9]*\.[0-9]+ String // \'.*\' Symbol // [~!@#$%^&*-_=+\\|;,<>/?]+ LParen // \( LBrace // \{ LBracket // \[ RParen // \) RBrace // \} RBracket // \] Colon // : DoubleColon // :: Dot // . DoubleDot // .. Star // \* ) // String returns a string representation of the token kind. The result for any // kind corresponds directly to the name of the constant which defines it. func (kind TokenKind) String () string { switch kind { case EOF: return "EOF" case Ident: return "Ident" case TypeIdent: return "TypeIdent" case Int: return "Int" case Float: return "Float" case String: return "String" case Symbol: return "Symbol" case LParen: return "LParen" case LBrace: return "LBrace" case LBracket: return "LBracket" case RParen: return "RParen" case RBrace: return "RBrace" case RBracket: return "RBracket" case Colon: return "Colon" case DoubleColon: return "DoubleColon" case Dot: return "Dot" case DoubleDot: return "DoubleDot" case Star: return "Star" default: return fmt.Sprintf("TokenKind(%d)", kind) } } // Token represents a single lexeme of an FSPL file. type Token struct { Position errors.Position // The position of the token in its file Kind TokenKind // Which kind of token it is Value string // The token's value } // String returns a string representation of the token, which is of the form: // KIND 'VALUE' // or if the value is empty: // KIND func (tok Token) String () string { output := tok.Kind.String() if tok.Value != "" { output += fmt.Sprintf(" '%s'", tok.Value) } return output } // EOF returns whether or not the token is an EOF token. func (tok Token) EOF () bool { return tok.Kind == EOF } // Is returns whether or not the token kind matches any of the given kinds. func (tok Token) Is (kinds ...TokenKind) bool { for _, kind := range kinds { if tok.Kind == kind { return true } } return false } // Is returns whether or not the token value matches any of the given values. func (tok Token) ValueIs (values ...string) bool { for _, value := range values { if tok.Value == value { return true } } return false } // Lexer is an object capable of producing tokens. type Lexer interface { // Next returns the next token. If there are no more tokens, it returns // an EOF token. It only returns an error on EOF if the file terminated // unexpectedly. Next () (Token, error) } // LexReader creates a new default lexer that reads from the given reader. The // filename parameter is used for token locations and error messages. func LexReader (filename string, reader io.Reader) (Lexer, error) { lexer := &fsplLexer { filename: filename, lineScanner: bufio.NewScanner(reader), } lexer.nextRune() return lexer, nil } // LexFile creates a new default lexer that reads from the given file. func LexFile (filename string) (Lexer, error) { file, err := os.Open(filename) if err != nil { return nil, err } lexer := &fsplLexer { filename: filename, lineScanner: bufio.NewScanner(file), } lexer.nextRune() return lexer, nil } type fsplLexer struct { filename string lineScanner *bufio.Scanner rune rune line string lineFood string offset int row int column int eof bool } func (this *fsplLexer) Next () (Token, error) { token, err := this.nextInternal() if err == io.EOF { err = this.errUnexpectedEOF() } return token, err } func (this *fsplLexer) nextInternal () (token Token, err error) { err = this.skipWhitespace() token.Position = this.pos() if this.eof { token.Kind = EOF err = nil return } if err != nil { return } appendRune := func () { token.Value += string(this.rune) err = this.nextRune() } doNumber := func () { token.Kind = Int for isDigit(this.rune) { appendRune() if this.eof { err = nil; return } if err != nil { return } } if this.rune == '.' { token.Kind = Float appendRune() for isDigit(this.rune) { appendRune() if err != nil { return } } } } doSymbol := func () { token.Kind = Symbol for isSymbol(this.rune) { appendRune() if err != nil { return } } } defer func () { newPos := this.pos() newPos.End -- // TODO figure out why tf we have to do this token.Position = token.Position.Union(newPos) } () switch { // Ident case unicode.IsLower(this.rune): token.Kind = Ident for unicode.IsLetter(this.rune) || isDigit(this.rune) { appendRune() if this.eof { err = nil; return } if err != nil { return } } // TypeIdent case unicode.IsUpper(this.rune): token.Kind = TypeIdent for unicode.IsLetter(this.rune) || isDigit(this.rune) { appendRune() if this.eof { err = nil; return } if err != nil { return } } // Int, Float case isDigit(this.rune): doNumber() if this.eof { err = nil; return } // String case this.rune == '\'': token.Kind = String err = this.nextRune() if err != nil { return } for this.rune != '\'' { if this.rune == '\\' { var result rune result, err = this.escapeSequence() if err != nil { return } token.Value += string(result) } else { appendRune() if this.eof { err = nil; return } if err != nil { return } } } err = this.nextRune() if this.eof { err = nil; return } if err != nil { return } // Symbol, Int, Float case this.rune == '-': token.Kind = Symbol appendRune() if err != nil { return } if isDigit(this.rune) { doNumber() if this.eof { err = nil; return } } else if isSymbol(this.rune) { doSymbol() if this.eof { err = nil; return } } // Symbol case isSymbol(this.rune): doSymbol() if this.eof { err = nil; return } case this.rune == '(': token.Kind = LParen appendRune() if this.eof { err = nil; return } case this.rune == '{': token.Kind = LBrace appendRune() if this.eof { err = nil; return } case this.rune == '[': token.Kind = LBracket appendRune() if this.eof { err = nil; return } case this.rune == ')': token.Kind = RParen appendRune() if this.eof { err = nil; return } case this.rune == '}': token.Kind = RBrace appendRune() if this.eof { err = nil; return } case this.rune == ']': token.Kind = RBracket appendRune() if this.eof { err = nil; return } // Colon, DoubleColon case this.rune == ':': token.Kind = Colon appendRune() if this.rune == ':' { token.Kind = DoubleColon appendRune() } if this.eof { err = nil; return } // Dot, DoubleDot case this.rune == '.': token.Kind = Dot appendRune() if this.rune == '.' { token.Kind = DoubleDot appendRune() } if this.eof { err = nil; return } // Star case this.rune == '*': token.Kind = Star appendRune() if this.eof { err = nil; return } case unicode.IsPrint(this.rune): err = errors.Errorf ( this.pos(), "unexpected rune '%c'", this.rune) default: err = errors.Errorf ( this.pos(), "unexpected rune %U", this.rune) } return } func (this *fsplLexer) nextRune () error { if this.lineFood == "" { ok := this.lineScanner.Scan() if ok { this.line = this.lineScanner.Text() this.lineFood = this.line this.rune = '\n' this.column = 0 this.row ++ } else { err := this.lineScanner.Err() if err == nil { this.eof = true return io.EOF } else { return err } } } else { var ch rune var size int for ch == 0 && this.lineFood != "" { ch, size = utf8.DecodeRuneInString(this.lineFood) this.lineFood = this.lineFood[size:] } this.rune = ch this.column ++ } return nil } func (this *fsplLexer) escapeSequence () (rune, error) { err := this.nextRune() if err != nil { return 0, err } if isDigit(this.rune) { var number rune for index := 0; index < 3; index ++ { if !isDigit(this.rune) { break } number *= 8 number += this.rune - '0' err = this.nextRune() if err != nil { return 0, err } } return number, nil } defer this.nextRune() switch this.rune { case '\\', '\n', '\'': return this.rune, nil case 'a': return '\a', nil case 'b': return '\b', nil case 't': return '\t', nil case 'n': return '\n', nil case 'v': return '\v', nil case 'f': return '\f', nil case 'r': return '\r', nil default: return 0, this.errBadEscapeSequence() } } func (this *fsplLexer) skipWhitespace () error { err := this.skipComment() if err != nil { return err } for isWhitespace(this.rune) { err := this.nextRune() if err != nil { return err } err = this.skipComment() if err != nil { return err } } return nil } func (this *fsplLexer) skipComment () error { if this.rune == ';' { for this.rune != '\n' { err := this.nextRune() if err != nil { return err } } } return nil } func (this *fsplLexer) pos () errors.Position { return errors.Position { File: this.filename, Line: this.lineScanner.Text(), Row: this.row - 1, Start: this.column - 1, End: this.column, } } func (this *fsplLexer) errUnexpectedEOF () error { return errors.Errorf(this.pos(), "unexpected EOF") } func (this *fsplLexer) errBadEscapeSequence () error { return errors.Errorf(this.pos(), "bad escape sequence") } func isWhitespace (char rune) bool { switch char { case ' ', '\t', '\r', '\n': return true default: return false } } func isSymbol (char rune) bool { switch char { case '~', '!', '@', '#', '$', '%', '^', '&', '-', '_', '=', '+', '\\', '|', ';', ',', '<', '>', '/', '?': return true default: return false } } func isDigit (char rune) bool { return char >= '0' && char <= '9' }