package tss import "io" import "bufio" import "unicode" import "unicode/utf8" import "git.tebibyte.media/sashakoshka/goparse" const ( Comment parse.TokenKind = iota LBrace RBrace LBracket RBracket Equals Colon Comma Semicolon Star Dot Dollar Slash Color Ident Number String ) var tokenNames = map[parse.TokenKind] string { parse.EOF: "EOF", Comment: "Comment", LBrace: "LBrace", RBrace: "RBrace", LBracket: "LBracket", RBracket: "RBracket", Equals: "Equals", Colon: "Colon", Comma: "Comma", Semicolon: "Semicolon", Star: "Star", Dot: "Dot", Dollar: "Dollar", Slash: "Slash", Color: "Color", Ident: "Ident", Number: "Number", String: "String", } type lexer struct { filename string lineScanner *bufio.Scanner rune rune line string lineFood string offset int row int column int eof bool } func Lex (filename string, reader io.Reader) parse.Lexer { lex := &lexer { filename: filename, lineScanner: bufio.NewScanner(reader), } lex.nextRune() return lex } func (this *lexer) Next () (parse.Token, error) { for { token, err := this.next() if err == io.EOF { return token, this.errUnexpectedEOF() } if err != nil { return token, err } if !token.Is(Comment) { return token, err } } } func (this *lexer) next () (token parse.Token, err error) { err = this.skipWhitespace() token.Position = this.pos() if this.eof { token.Kind = parse.EOF err = nil return } if err != nil { return } appendRune := func () { token.Value += string(this.rune) err = this.nextRune() } skipRune := func () { err = this.nextRune() } defer func () { newPos := this.pos() newPos.End -- token.Position = token.Position.Union(newPos) } () switch { case this.rune == '/': token.Kind = Comment skipRune() if err != nil { return } if this.rune == '/' { for this.rune != '\n' { skipRune() if err != nil { return } } } else { token.Kind = Slash } if this.eof { err = nil; return } case this.rune == '{': token.Kind = LBrace appendRune() if this.eof { err = nil; return } case this.rune == '}': token.Kind = RBrace appendRune() if this.eof { err = nil; return } case this.rune == '[': token.Kind = LBracket appendRune() if this.eof { err = nil; return } case this.rune == ']': token.Kind = RBracket appendRune() if this.eof { err = nil; return } case this.rune == '=': token.Kind = Equals appendRune() if this.eof { err = nil; return } case this.rune == ':': token.Kind = Colon appendRune() if this.eof { err = nil; return } case this.rune == ',': token.Kind = Comma appendRune() if this.eof { err = nil; return } case this.rune == ';': token.Kind = Semicolon appendRune() if this.eof { err = nil; return } case this.rune == '*': token.Kind = Star appendRune() if this.eof { err = nil; return } case this.rune == '.': token.Kind = Dot appendRune() if this.eof { err = nil; return } case this.rune == '$': token.Kind = Dollar appendRune() if this.eof { err = nil; return } case this.rune == '#': token.Kind = Color skipRune() if err != nil { return } for isHexDigit(this.rune) { appendRune() if this.eof { err = nil; return } } if this.eof { err = nil; return } case unicode.IsLetter(this.rune): token.Kind = Ident for unicode.IsLetter(this.rune) || unicode.IsNumber(this.rune) { appendRune() if this.eof { err = nil; return } } if this.eof { err = nil; return } case this.rune == '-': token.Kind = Number appendRune() for isDigit(this.rune) { appendRune() if this.eof { err = nil; return } } if this.eof { err = nil; return } case isDigit(this.rune): token.Kind = Number for isDigit(this.rune) { appendRune() if this.eof { err = nil; return } } if this.eof { err = nil; return } case this.rune == '\'', this.rune == '"': stringDelimiter := this.rune token.Kind = String err = this.nextRune() if err != nil { return } for this.rune != stringDelimiter { if this.rune == '\\' { var result rune result, err = this.escapeSequence(stringDelimiter) if err != nil { return } token.Value += string(result) } else { appendRune() if this.eof { err = nil; return } if err != nil { return } } } err = this.nextRune() if this.eof { err = nil; return } if err != nil { return } default: err = parse.Errorf ( this.pos(), "unexpected rune %U", this.rune) } return } func (this *lexer) nextRune () error { if this.lineFood == "" { ok := this.lineScanner.Scan() if ok { this.line = this.lineScanner.Text() this.lineFood = this.line this.rune = '\n' this.column = 0 this.row ++ } else { err := this.lineScanner.Err() if err == nil { this.eof = true return io.EOF } else { return err } } } else { var ch rune var size int for ch == 0 && this.lineFood != "" { ch, size = utf8.DecodeRuneInString(this.lineFood) this.lineFood = this.lineFood[size:] } this.rune = ch this.column ++ } return nil } func (this *lexer) escapeSequence (stringDelimiter rune) (rune, error) { err := this.nextRune() if err != nil { return 0, err } if isDigit(this.rune) { var number rune for index := 0; index < 3; index ++ { if !isDigit(this.rune) { break } number *= 8 number += this.rune - '0' err = this.nextRune() if err != nil { return 0, err } } return number, nil } defer this.nextRune() switch this.rune { case '\\', '\n', stringDelimiter: return this.rune, nil case 'a': return '\a', nil case 'b': return '\b', nil case 't': return '\t', nil case 'n': return '\n', nil case 'v': return '\v', nil case 'f': return '\f', nil case 'r': return '\r', nil default: return 0, this.errBadEscapeSequence() } } func (this *lexer) skipWhitespace () error { for isWhitespace(this.rune) { err := this.nextRune() if err != nil { return err } } return nil } func (this *lexer) pos () parse.Position { return parse.Position { File: this.filename, Line: this.lineScanner.Text(), Row: this.row - 1, Start: this.column - 1, End: this.column, } } func (this *lexer) errUnexpectedEOF () error { return parse.Errorf(this.pos(), "unexpected EOF") } func (this *lexer) errBadEscapeSequence () error { return parse.Errorf(this.pos(), "bad escape sequence") } func isWhitespace (char rune) bool { switch char { case ' ', '\t', '\r', '\n': return true default: return false } } func isSymbol (char rune) bool { switch char { case '~', '!', '@', '#', '$', '%', '^', '&', '-', '_', '=', '+', '\\', '|', ';', ',', '<', '>', '/', '?': return true default: return false } } func isDigit (char rune) bool { return char >= '0' && char <= '9' } func isHexDigit (char rune) bool { return isDigit(char) || char >= 'a' && char <= 'f' || char >= 'A' && char <= 'F' }