hopp/generate/lex.go

231 lines
4.5 KiB
Go

package generate
import "io"
import "bufio"
import "unicode"
import "unicode/utf8"
import "git.tebibyte.media/sashakoshka/goparse"
const (
TokenMethod parse.TokenKind = iota
TokenKey
TokenIdent
TokenComma
TokenLBrace
TokenRBrace
TokenLBracket
TokenRBracket
)
var tokenNames = map[parse.TokenKind] string {
TokenMethod: "Method",
TokenKey: "Key",
TokenIdent: "Ident",
TokenComma: "Comma",
TokenLBrace: "LBrace",
TokenRBrace: "RBrace",
TokenLBracket: "LBracket",
TokenRBracket: "RBracket",
}
func Lex(fileName string, reader io.Reader) (parse.Lexer, error) {
lex := &lexer {
fileName: fileName,
lineScanner: bufio.NewScanner(reader),
}
lex.nextRune()
return lex, nil
}
type lexer struct {
fileName string
lineScanner *bufio.Scanner
rune rune
line string
lineFood string
offset int
row int
column int
eof bool
}
func (this *lexer) Next() (parse.Token, error) {
token, err := this.nextInternal()
if err == io.EOF { err = this.errUnexpectedEOF() }
return token, err
}
func (this *lexer) nextInternal() (token parse.Token, err error) {
err = this.skipWhitespace()
token.Position = this.pos()
if this.eof {
token.Kind = parse.EOF
err = nil
return
}
if err != nil { return }
appendRune := func () {
token.Value += string(this.rune)
err = this.nextRune()
}
doNumber := func () {
for isDigit(this.rune) {
appendRune()
if this.eof { err = nil; return }
if err != nil { return }
}
}
defer func () {
newPos := this.pos()
newPos.End -- // TODO figure out why tf we have to do this
token.Position = token.Position.Union(newPos)
} ()
switch {
// Method
case this.rune == 'M':
token.Kind = TokenMethod
err = this.nextRune()
if err != nil { return }
doNumber()
if this.eof { err = nil; return }
// Key
case isDigit(this.rune):
token.Kind = TokenKey
doNumber()
if this.eof { err = nil; return }
// Ident
case unicode.IsUpper(this.rune):
token.Kind = TokenIdent
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
appendRune()
if this.eof { err = nil; return }
if err != nil { return }
}
// Comma
case this.rune == ',':
token.Kind = TokenComma
appendRune()
if this.eof { err = nil; return }
// LBrace
case this.rune == '{':
token.Kind = TokenLBrace
appendRune()
if this.eof { err = nil; return }
// RBrace
case this.rune == '}':
token.Kind = TokenRBrace
appendRune()
if this.eof { err = nil; return }
// LBracket
case this.rune == '[':
token.Kind = TokenLBracket
appendRune()
if this.eof { err = nil; return }
// RBracket
case this.rune == ']':
token.Kind = TokenRBracket
appendRune()
if this.eof { err = nil; return }
case unicode.IsPrint(this.rune):
err = parse.Errorf (
this.pos(), "unexpected rune '%c'",
this.rune)
default:
err = parse.Errorf (
this.pos(), "unexpected rune %U",
this.rune)
}
return
}
func (this *lexer) nextRune() error {
if this.lineFood == "" {
ok := this.lineScanner.Scan()
if ok {
this.line = this.lineScanner.Text()
this.lineFood = this.line
this.rune = '\n'
this.column = 0
this.row ++
} else {
err := this.lineScanner.Err()
if err == nil {
this.eof = true
return io.EOF
} else {
return err
}
}
} else {
var ch rune
var size int
for ch == 0 && this.lineFood != "" {
ch, size = utf8.DecodeRuneInString(this.lineFood)
this.lineFood = this.lineFood[size:]
}
this.rune = ch
this.column ++
}
return nil
}
func (this *lexer) skipWhitespace() error {
err := this.skipComment()
if err != nil { return err }
for isWhitespace(this.rune) {
err := this.nextRune()
if err != nil { return err }
err = this.skipComment()
if err != nil { return err }
}
return nil
}
func (this *lexer) skipComment() error {
if this.rune == ';' {
for this.rune != '\n' {
err := this.nextRune()
if err != nil { return err }
}
}
return nil
}
func (this *lexer) pos() parse.Position {
return parse.Position {
File: this.fileName,
Line: this.lineScanner.Text(),
Row: this.row - 1,
Start: this.column - 1,
End: this.column,
}
}
func (this *lexer) errUnexpectedEOF() error {
return parse.Errorf(this.pos(), "unexpected EOF")
}
func isWhitespace(char rune) bool {
switch char {
case ' ', '\t', '\r', '\n': return true
default: return false
}
}
func isDigit(char rune) bool {
return char >= '0' && char <= '9'
}
func isHexDigit(char rune) bool {
return isDigit(char) || char >= 'a' && char <= 'f' || char >= 'A' && char <= 'F'
}