package generate import "io" import "bufio" import "unicode" import "unicode/utf8" import "git.tebibyte.media/sashakoshka/goparse" const ( TokenMethod parse.TokenKind = iota TokenKey TokenIdent TokenComma TokenLBrace TokenRBrace TokenLBracket TokenRBracket ) var tokenNames = map[parse.TokenKind] string { TokenMethod: "Method", TokenKey: "Key", TokenIdent: "Ident", TokenComma: "Comma", TokenLBrace: "LBrace", TokenRBrace: "RBrace", TokenLBracket: "LBracket", TokenRBracket: "RBracket", } func Lex(fileName string, reader io.Reader) (parse.Lexer, error) { lex := &lexer { fileName: fileName, lineScanner: bufio.NewScanner(reader), } lex.nextRune() return lex, nil } type lexer struct { fileName string lineScanner *bufio.Scanner rune rune line string lineFood string offset int row int column int eof bool } func (this *lexer) Next() (parse.Token, error) { token, err := this.nextInternal() if err == io.EOF { err = this.errUnexpectedEOF() } return token, err } func (this *lexer) nextInternal() (token parse.Token, err error) { err = this.skipWhitespace() token.Position = this.pos() if this.eof { token.Kind = parse.EOF err = nil return } if err != nil { return } appendRune := func () { token.Value += string(this.rune) err = this.nextRune() } doNumber := func () { for isDigit(this.rune) { appendRune() if this.eof { err = nil; return } if err != nil { return } } } defer func () { newPos := this.pos() newPos.End -- // TODO figure out why tf we have to do this token.Position = token.Position.Union(newPos) } () switch { // Method case this.rune == 'M': token.Kind = TokenMethod err = this.nextRune() if err != nil { return } doNumber() if this.eof { err = nil; return } // Key case isDigit(this.rune): token.Kind = TokenKey doNumber() if this.eof { err = nil; return } // Ident case unicode.IsUpper(this.rune): token.Kind = TokenIdent for unicode.IsLetter(this.rune) || isDigit(this.rune) { appendRune() if this.eof { err = nil; return } if err != nil { return } } // Comma case this.rune == ',': token.Kind = TokenComma appendRune() if this.eof { err = nil; return } // LBrace case this.rune == '{': token.Kind = TokenLBrace appendRune() if this.eof { err = nil; return } // RBrace case this.rune == '}': token.Kind = TokenRBrace appendRune() if this.eof { err = nil; return } // LBracket case this.rune == '[': token.Kind = TokenLBracket appendRune() if this.eof { err = nil; return } // RBracket case this.rune == ']': token.Kind = TokenRBracket appendRune() if this.eof { err = nil; return } case unicode.IsPrint(this.rune): err = parse.Errorf ( this.pos(), "unexpected rune '%c'", this.rune) default: err = parse.Errorf ( this.pos(), "unexpected rune %U", this.rune) } return } func (this *lexer) nextRune() error { if this.lineFood == "" { ok := this.lineScanner.Scan() if ok { this.line = this.lineScanner.Text() this.lineFood = this.line this.rune = '\n' this.column = 0 this.row ++ } else { err := this.lineScanner.Err() if err == nil { this.eof = true return io.EOF } else { return err } } } else { var ch rune var size int for ch == 0 && this.lineFood != "" { ch, size = utf8.DecodeRuneInString(this.lineFood) this.lineFood = this.lineFood[size:] } this.rune = ch this.column ++ } return nil } func (this *lexer) skipWhitespace() error { err := this.skipComment() if err != nil { return err } for isWhitespace(this.rune) { err := this.nextRune() if err != nil { return err } err = this.skipComment() if err != nil { return err } } return nil } func (this *lexer) skipComment() error { if this.rune == ';' { for this.rune != '\n' { err := this.nextRune() if err != nil { return err } } } return nil } func (this *lexer) pos() parse.Position { return parse.Position { File: this.fileName, Line: this.lineScanner.Text(), Row: this.row - 1, Start: this.column - 1, End: this.column, } } func (this *lexer) errUnexpectedEOF() error { return parse.Errorf(this.pos(), "unexpected EOF") } func isWhitespace(char rune) bool { switch char { case ' ', '\t', '\r', '\n': return true default: return false } } func isDigit(char rune) bool { return char >= '0' && char <= '9' } func isHexDigit(char rune) bool { return isDigit(char) || char >= 'a' && char <= 'f' || char >= 'A' && char <= 'F' }