Lexer has no dependency on participle now
This commit is contained in:
parent
6bfc073608
commit
d2ecd7a266
101
lexer/lexer.go
101
lexer/lexer.go
|
@ -3,11 +3,10 @@ package lexer
|
|||
import "io"
|
||||
import "bufio"
|
||||
import "unicode"
|
||||
import "github.com/alecthomas/participle/v2"
|
||||
import "github.com/alecthomas/participle/v2/lexer"
|
||||
import "git.tebibyte.media/sashakoshka/fspl/errors"
|
||||
|
||||
const (
|
||||
EOF lexer.TokenType = -(iota + 1)
|
||||
type TokenKind int; const (
|
||||
EOF TokenKind = -(iota + 1)
|
||||
|
||||
// Name Rough regex-ish description
|
||||
Ident // [a-z][a-zA-Z0-9]*
|
||||
|
@ -28,15 +27,8 @@ const (
|
|||
Star // \*
|
||||
)
|
||||
|
||||
type definition struct { }
|
||||
|
||||
// NewDefinition returns a lexer definition.
|
||||
func NewDefinition () lexer.Definition {
|
||||
return definition { }
|
||||
}
|
||||
|
||||
func (definition) Symbols () map[string] lexer.TokenType {
|
||||
return map[string] lexer.TokenType {
|
||||
func Symbols () map[string] TokenKind {
|
||||
return map[string] TokenKind {
|
||||
"EOF": EOF,
|
||||
"Ident": Ident,
|
||||
"TypeIdent": TypeIdent,
|
||||
|
@ -56,11 +48,25 @@ func (definition) Symbols () map[string] lexer.TokenType {
|
|||
}
|
||||
}
|
||||
|
||||
func (definition) Lex (filename string, reader io.Reader) (lexer.Lexer, error) {
|
||||
type Token struct {
|
||||
Kind TokenKind
|
||||
Value string
|
||||
Position errors.Position
|
||||
}
|
||||
|
||||
func (tok Token) EOF () bool {
|
||||
return tok.Kind == EOF
|
||||
}
|
||||
|
||||
type Lexer interface {
|
||||
Next () (Token, error)
|
||||
}
|
||||
|
||||
func NewLexer (filename string, reader io.Reader) (Lexer, error) {
|
||||
lexer := &fsplLexer {
|
||||
filename: filename,
|
||||
reader: bufio.NewReader(reader),
|
||||
line: 1,
|
||||
row: 1,
|
||||
}
|
||||
lexer.nextRune()
|
||||
return lexer, nil
|
||||
|
@ -72,25 +78,25 @@ type fsplLexer struct {
|
|||
rune rune
|
||||
|
||||
offset int
|
||||
line int
|
||||
row int
|
||||
column int
|
||||
|
||||
eof bool
|
||||
}
|
||||
|
||||
func (this *fsplLexer) Next () (lexer.Token, error) {
|
||||
func (this *fsplLexer) Next () (Token, error) {
|
||||
token, err := this.nextInternal()
|
||||
if err == io.EOF { err = this.errUnexpectedEOF() }
|
||||
return token, err
|
||||
}
|
||||
|
||||
func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
||||
func (this *fsplLexer) nextInternal () (token Token, err error) {
|
||||
err = this.skipWhitespace()
|
||||
|
||||
token.Pos = this.pos()
|
||||
token.Position = this.pos()
|
||||
|
||||
if this.eof {
|
||||
token.Type = EOF
|
||||
token.Kind = EOF
|
||||
err = nil
|
||||
return
|
||||
}
|
||||
|
@ -102,14 +108,14 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
|||
}
|
||||
|
||||
doNumber := func () {
|
||||
token.Type = Int
|
||||
token.Kind = Int
|
||||
for isDigit(this.rune) {
|
||||
appendRune()
|
||||
if this.eof { err = nil; return }
|
||||
if err != nil { return }
|
||||
}
|
||||
if this.rune == '.' {
|
||||
token.Type = Float
|
||||
token.Kind = Float
|
||||
appendRune()
|
||||
for isDigit(this.rune) {
|
||||
appendRune()
|
||||
|
@ -119,7 +125,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
|||
}
|
||||
|
||||
doSymbol := func () {
|
||||
token.Type = Symbol
|
||||
token.Kind = Symbol
|
||||
for isSymbol(this.rune) {
|
||||
appendRune()
|
||||
if err != nil { return }
|
||||
|
@ -129,7 +135,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
|||
switch {
|
||||
// Ident
|
||||
case unicode.IsLower(this.rune):
|
||||
token.Type = Ident
|
||||
token.Kind = Ident
|
||||
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
|
||||
appendRune()
|
||||
if this.eof { err = nil; return }
|
||||
|
@ -137,7 +143,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
|||
}
|
||||
// TypeIdent
|
||||
case unicode.IsUpper(this.rune):
|
||||
token.Type = TypeIdent
|
||||
token.Kind = TypeIdent
|
||||
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
|
||||
appendRune()
|
||||
if this.eof { err = nil; return }
|
||||
|
@ -148,7 +154,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
|||
doNumber()
|
||||
// String
|
||||
case this.rune == '\'':
|
||||
token.Type = String
|
||||
token.Kind = String
|
||||
err = this.nextRune()
|
||||
if err != nil { return }
|
||||
|
||||
|
@ -169,7 +175,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
|||
if err != nil { return }
|
||||
// Symbol, Int, Float
|
||||
case this.rune == '-':
|
||||
token.Type = Symbol
|
||||
token.Kind = Symbol
|
||||
appendRune()
|
||||
if err != nil { return }
|
||||
if isDigit(this.rune) {
|
||||
|
@ -184,41 +190,41 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
|||
doSymbol()
|
||||
if this.eof { err = nil; return }
|
||||
case this.rune == '(':
|
||||
token.Type = LParen
|
||||
token.Kind = LParen
|
||||
appendRune()
|
||||
case this.rune == '{':
|
||||
token.Type = LBrace
|
||||
token.Kind = LBrace
|
||||
appendRune()
|
||||
case this.rune == '[':
|
||||
token.Type = LBracket
|
||||
token.Kind = LBracket
|
||||
appendRune()
|
||||
case this.rune == ')':
|
||||
token.Type = RParen
|
||||
token.Kind = RParen
|
||||
appendRune()
|
||||
case this.rune == '}':
|
||||
token.Type = RBrace
|
||||
token.Kind = RBrace
|
||||
appendRune()
|
||||
case this.rune == ']':
|
||||
token.Type = RBracket
|
||||
token.Kind = RBracket
|
||||
appendRune()
|
||||
// Colon, DoubleColon
|
||||
case this.rune == ':':
|
||||
token.Type = Colon
|
||||
token.Kind = Colon
|
||||
appendRune()
|
||||
if this.rune == ':' {
|
||||
token.Type = DoubleColon
|
||||
token.Kind = DoubleColon
|
||||
appendRune()
|
||||
}
|
||||
// Star
|
||||
case this.rune == '*':
|
||||
token.Type = Star
|
||||
token.Kind = Star
|
||||
appendRune()
|
||||
case unicode.IsPrint(this.rune):
|
||||
err = participle.Errorf (
|
||||
err = errors.Errorf (
|
||||
this.pos(), "unexpected rune \"%c\"",
|
||||
this.rune)
|
||||
default:
|
||||
err = participle.Errorf (
|
||||
err = errors.Errorf (
|
||||
this.pos(), "unexpected rune %U",
|
||||
this.rune)
|
||||
}
|
||||
|
@ -232,7 +238,7 @@ func (this *fsplLexer) nextRune () error {
|
|||
|
||||
this.offset ++
|
||||
if char == '\n' {
|
||||
this.line ++
|
||||
this.row ++
|
||||
this.column = 0
|
||||
} else {
|
||||
this.column ++
|
||||
|
@ -299,21 +305,22 @@ func (this *fsplLexer) skipComment () error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (this *fsplLexer) pos () lexer.Position {
|
||||
return lexer.Position {
|
||||
Filename: this.filename,
|
||||
Offset: this.offset,
|
||||
Line: this.line,
|
||||
Column: this.column,
|
||||
func (this *fsplLexer) pos () errors.Position {
|
||||
return errors.Position {
|
||||
File: this.filename,
|
||||
Line: "",
|
||||
Row: this.row,
|
||||
Start: this.column,
|
||||
End: this.column,
|
||||
}
|
||||
}
|
||||
|
||||
func (this *fsplLexer) errUnexpectedEOF () error {
|
||||
return participle.Errorf(this.pos(), "unexpected EOF")
|
||||
return errors.Errorf(this.pos(), "unexpected EOF")
|
||||
}
|
||||
|
||||
func (this *fsplLexer) errBadEscapeSequence () error {
|
||||
return participle.Errorf(this.pos(), "bad escape sequence")
|
||||
return errors.Errorf(this.pos(), "bad escape sequence")
|
||||
}
|
||||
|
||||
func isWhitespace (char rune) bool {
|
||||
|
|
|
@ -3,12 +3,11 @@ package lexer
|
|||
import "fmt"
|
||||
import "testing"
|
||||
import "strings"
|
||||
import "github.com/alecthomas/participle/v2"
|
||||
import "github.com/alecthomas/participle/v2/lexer"
|
||||
import "git.tebibyte.media/sashakoshka/fspl/errors"
|
||||
|
||||
func tok (ty lexer.TokenType, value string) lexer.Token {
|
||||
return lexer.Token {
|
||||
Type: ty,
|
||||
func tok (kind TokenKind, value string) Token {
|
||||
return Token {
|
||||
Kind: kind,
|
||||
Value: value,
|
||||
}
|
||||
}
|
||||
|
@ -16,7 +15,7 @@ func tok (ty lexer.TokenType, value string) lexer.Token {
|
|||
func testString (
|
||||
test *testing.T,
|
||||
input string,
|
||||
correct ...lexer.Token,
|
||||
correct ...Token,
|
||||
) {
|
||||
testStringErr(test, "", 0, 0, input, correct...)
|
||||
}
|
||||
|
@ -24,31 +23,31 @@ func testString (
|
|||
func testStringErr (
|
||||
test *testing.T,
|
||||
errMessage string,
|
||||
errLine int,
|
||||
errColumn int,
|
||||
errRow int,
|
||||
errStart int,
|
||||
input string,
|
||||
correct ...lexer.Token,
|
||||
correct ...Token,
|
||||
) {
|
||||
testError := func (err error) bool {
|
||||
got := err.(participle.Error)
|
||||
gotMessage := got.Message()
|
||||
gotLine := got.Position().Line
|
||||
gotColumn := got.Position().Column
|
||||
got := err.(*errors.Error)
|
||||
gotMessage := got.Message
|
||||
gotRow := got.Position.Row
|
||||
gotStart := got.Position.Start
|
||||
correct :=
|
||||
gotMessage == errMessage &&
|
||||
gotLine == errLine &&
|
||||
gotColumn == errColumn
|
||||
gotRow == errRow &&
|
||||
gotStart == errStart
|
||||
if !correct {
|
||||
test.Log("errors do not match")
|
||||
test.Logf("got:\n%v:%v: %v", gotLine, gotColumn, gotMessage)
|
||||
test.Logf("correct:\n%v:%v: %v", errLine, errColumn, errMessage)
|
||||
test.Logf("got:\n%v:%v: %v", gotRow, gotStart, gotMessage)
|
||||
test.Logf("correct:\n%v:%v: %v", errRow, errStart, errMessage)
|
||||
test.Fail()
|
||||
}
|
||||
return correct
|
||||
}
|
||||
|
||||
reader := strings.NewReader(input)
|
||||
lx, err := NewDefinition().Lex("stream0.fspl", reader)
|
||||
lx, err := NewLexer("stream0.fspl", reader)
|
||||
if err != nil {
|
||||
if errMessage == "" {
|
||||
test.Error("lexer returned error: ", err)
|
||||
|
@ -58,7 +57,7 @@ func testStringErr (
|
|||
}
|
||||
}
|
||||
|
||||
var tokens []lexer.Token
|
||||
var tokens []Token
|
||||
for {
|
||||
got, err := lx.Next()
|
||||
tokens = append(tokens, got)
|
||||
|
@ -78,8 +77,8 @@ func testStringErr (
|
|||
test.Logf("%-40v | %-40v", left, right)
|
||||
}
|
||||
|
||||
dumpToken := func (token lexer.Token) string {
|
||||
return fmt.Sprintf("%4v: \"%s\"", token.Type, token.Value)
|
||||
dumpToken := func (token Token) string {
|
||||
return fmt.Sprintf("%4v: \"%s\"", token.Kind, token.Value)
|
||||
}
|
||||
|
||||
compareTokens := func () {
|
||||
|
@ -113,7 +112,7 @@ func testStringErr (
|
|||
|
||||
for index, token := range correct {
|
||||
gotToken := tokens[index]
|
||||
if token.Type != gotToken.Type || token.Value != gotToken.Value {
|
||||
if token.Kind != gotToken.Kind || token.Value != gotToken.Value {
|
||||
test.Logf("correct and got do not match at %v", index)
|
||||
compareTokens()
|
||||
test.Fail()
|
||||
|
|
Loading…
Reference in New Issue