fspl/lexer/lexer.go
2023-12-05 22:04:12 -05:00

332 lines
6.4 KiB
Go

package lexer
import "io"
import "bufio"
import "unicode"
import "github.com/alecthomas/participle/v2"
import "github.com/alecthomas/participle/v2/lexer"
const (
EOF lexer.TokenType = -(iota + 1)
// Name Rough regex-ish description
Ident // [a-z][a-zA-Z0-9]*
TypeIdent // [A-Z][a-zA-Z0-9]*
Int // (0b|0x)?[0-9a-fA-F]+
Float // [0-9]*\.[0-9]+
String // \'.*\'
Symbol // [~!@#$%^&*-_=+\\|;:,<.>/?]+
LParen // \(
LBrace // \{
LBracket // \[
RParen // \)
RBrace // \}
RBracket // \]
Colon // :
DoubleColon // ::
Star // \*
)
type definition struct { }
// NewDefinition returns a lexer definition.
func NewDefinition () lexer.Definition {
return definition { }
}
func (definition) Symbols () map[string] lexer.TokenType {
return map[string] lexer.TokenType {
"EOF": EOF,
"Ident": Ident,
"TypeIdent": TypeIdent,
"Int": Int,
"Float": Float,
"String": String,
"Symbol": Symbol,
"LParen": LParen,
"LBrace": LBrace,
"LBracket": LBracket,
"RParen": RParen,
"RBrace": RBrace,
"RBracket": RBracket,
"Colon": Colon,
"DoubleColon": DoubleColon,
"Star": Star,
}
}
func (definition) Lex (filename string, reader io.Reader) (lexer.Lexer, error) {
lexer := &fsplLexer {
filename: filename,
reader: bufio.NewReader(reader),
line: 1,
}
lexer.nextRune()
return lexer, nil
}
type fsplLexer struct {
filename string
reader *bufio.Reader
rune rune
offset int
line int
column int
eof bool
}
func (this *fsplLexer) Next () (lexer.Token, error) {
token, err := this.nextInternal()
if err == io.EOF { err = this.errUnexpectedEOF() }
return token, err
}
func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
err = this.skipWhitespace()
token.Pos = this.pos()
if this.eof == true {
token.Type = EOF
err = nil
return
}
if err != nil { return }
appendRune := func () {
token.Value += string(this.rune)
err = this.nextRune()
}
doNumber := func () {
token.Type = Int
for isDigit(this.rune) {
appendRune()
if err != nil { return }
}
if this.rune == '.' {
token.Type = Float
appendRune()
for isDigit(this.rune) {
appendRune()
if err != nil { return }
}
}
}
doSymbol := func () {
token.Type = Symbol
for isSymbol(this.rune) {
appendRune()
if err != nil { return }
}
}
switch {
// Ident
case unicode.IsLower(this.rune):
token.Type = Ident
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
appendRune()
if err != nil { return }
}
// TypeIdent
case unicode.IsUpper(this.rune):
token.Type = TypeIdent
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
appendRune()
if err != nil { return }
}
// Int, Float
case isDigit(this.rune):
doNumber()
// String
case this.rune == '\'':
token.Type = String
err = this.nextRune()
if err != nil { return }
for this.rune != '\'' {
if this.rune == '\\' {
var result rune
result, err = this.escapeSequence()
if err != nil { return }
token.Value += string(result)
} else {
appendRune()
if err != nil { return }
}
}
err = this.nextRune()
if err != nil { return }
// Symbol, Int, Float
case this.rune == '-':
token.Type = Symbol
appendRune()
if err != nil { return }
if isDigit(this.rune) {
doNumber()
} else if isSymbol(this.rune) {
doSymbol()
}
// Symbol
case isSymbol(this.rune):
doSymbol()
case this.rune == '(':
token.Type = LParen
appendRune()
case this.rune == '{':
token.Type = LBrace
appendRune()
case this.rune == '[':
token.Type = LBracket
appendRune()
case this.rune == ')':
token.Type = RParen
appendRune()
case this.rune == '}':
token.Type = RBrace
appendRune()
case this.rune == ']':
token.Type = RBracket
appendRune()
// Colon, DoubleColon
case this.rune == ':':
token.Type = Colon
appendRune()
if this.rune == ':' {
token.Type = DoubleColon
appendRune()
}
// Star
case this.rune == '*':
token.Type = Star
appendRune()
case unicode.IsPrint(this.rune):
err = participle.Errorf (
this.pos(), "unexpected rune \"%c\"",
this.rune)
default:
err = participle.Errorf (
this.pos(), "unexpected rune %U",
this.rune)
}
return
}
func (this *fsplLexer) nextRune () error {
char, _, err := this.reader.ReadRune()
this.rune = char
this.offset ++
if char == '\n' {
this.line ++
this.column = 0
} else {
this.column ++
}
if err == io.EOF {
this.eof = true
}
return err
}
func (this *fsplLexer) escapeSequence () (rune, error) {
err := this.nextRune()
if err != nil { return 0, err }
if isDigit(this.rune) {
var number rune
for index := 0; index < 3; index ++ {
if !isDigit(this.rune) { break }
number *= 8
number += this.rune - '0'
err = this.nextRune()
if err != nil { return 0, err }
}
return number, nil
}
defer this.nextRune()
switch this.rune {
case '\\', '\n', '\'':
return this.rune, nil
case 'a': return '\a', nil
case 'b': return '\b', nil
case 't': return '\t', nil
case 'n': return '\n', nil
case 'v': return '\v', nil
case 'f': return '\f', nil
case 'r': return '\r', nil
default: return 0, this.errBadEscapeSequence()
}
}
func (this *fsplLexer) skipWhitespace () error {
err := this.skipComment()
if err != nil { return err }
for isWhitespace(this.rune) {
err := this.nextRune()
if err != nil { return err }
err = this.skipComment()
if err != nil { return err }
}
return nil
}
func (this *fsplLexer) skipComment () error {
if this.rune == ';' {
for this.rune != '\n' {
err := this.nextRune()
if err != nil { return err }
}
}
return nil
}
func (this *fsplLexer) pos () lexer.Position {
return lexer.Position {
Filename: this.filename,
Offset: this.offset,
Line: this.line,
Column: this.column,
}
}
func (this *fsplLexer) errUnexpectedEOF () error {
return participle.Errorf(this.pos(), "unexpected EOF")
}
func (this *fsplLexer) errBadEscapeSequence () error {
return participle.Errorf(this.pos(), "bad escape sequence")
}
func isWhitespace (char rune) bool {
switch char {
case ' ', '\t', '\r', '\n': return true
default: return false
}
}
func isSymbol (char rune) bool {
switch char {
case
'~', '!', '@', '#', '$', '%', '^', '&', '-', '_', '=', '+',
'\\', '|', ';', ',', '<', '.', '>', '/', '?':
return true
default:
return false
}
}
func isDigit (char rune) bool {
return char >= '0' && char <= '9'
}