Lexer has no dependency on participle now
This commit is contained in:
parent
0eea2b61a3
commit
fc88e27abf
101
lexer/lexer.go
101
lexer/lexer.go
@ -3,11 +3,10 @@ package lexer
|
|||||||
import "io"
|
import "io"
|
||||||
import "bufio"
|
import "bufio"
|
||||||
import "unicode"
|
import "unicode"
|
||||||
import "github.com/alecthomas/participle/v2"
|
import "git.tebibyte.media/sashakoshka/fspl/errors"
|
||||||
import "github.com/alecthomas/participle/v2/lexer"
|
|
||||||
|
|
||||||
const (
|
type TokenKind int; const (
|
||||||
EOF lexer.TokenType = -(iota + 1)
|
EOF TokenKind = -(iota + 1)
|
||||||
|
|
||||||
// Name Rough regex-ish description
|
// Name Rough regex-ish description
|
||||||
Ident // [a-z][a-zA-Z0-9]*
|
Ident // [a-z][a-zA-Z0-9]*
|
||||||
@ -28,15 +27,8 @@ const (
|
|||||||
Star // \*
|
Star // \*
|
||||||
)
|
)
|
||||||
|
|
||||||
type definition struct { }
|
func Symbols () map[string] TokenKind {
|
||||||
|
return map[string] TokenKind {
|
||||||
// NewDefinition returns a lexer definition.
|
|
||||||
func NewDefinition () lexer.Definition {
|
|
||||||
return definition { }
|
|
||||||
}
|
|
||||||
|
|
||||||
func (definition) Symbols () map[string] lexer.TokenType {
|
|
||||||
return map[string] lexer.TokenType {
|
|
||||||
"EOF": EOF,
|
"EOF": EOF,
|
||||||
"Ident": Ident,
|
"Ident": Ident,
|
||||||
"TypeIdent": TypeIdent,
|
"TypeIdent": TypeIdent,
|
||||||
@ -56,11 +48,25 @@ func (definition) Symbols () map[string] lexer.TokenType {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (definition) Lex (filename string, reader io.Reader) (lexer.Lexer, error) {
|
type Token struct {
|
||||||
|
Kind TokenKind
|
||||||
|
Value string
|
||||||
|
Position errors.Position
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tok Token) EOF () bool {
|
||||||
|
return tok.Kind == EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
type Lexer interface {
|
||||||
|
Next () (Token, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewLexer (filename string, reader io.Reader) (Lexer, error) {
|
||||||
lexer := &fsplLexer {
|
lexer := &fsplLexer {
|
||||||
filename: filename,
|
filename: filename,
|
||||||
reader: bufio.NewReader(reader),
|
reader: bufio.NewReader(reader),
|
||||||
line: 1,
|
row: 1,
|
||||||
}
|
}
|
||||||
lexer.nextRune()
|
lexer.nextRune()
|
||||||
return lexer, nil
|
return lexer, nil
|
||||||
@ -72,25 +78,25 @@ type fsplLexer struct {
|
|||||||
rune rune
|
rune rune
|
||||||
|
|
||||||
offset int
|
offset int
|
||||||
line int
|
row int
|
||||||
column int
|
column int
|
||||||
|
|
||||||
eof bool
|
eof bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (this *fsplLexer) Next () (lexer.Token, error) {
|
func (this *fsplLexer) Next () (Token, error) {
|
||||||
token, err := this.nextInternal()
|
token, err := this.nextInternal()
|
||||||
if err == io.EOF { err = this.errUnexpectedEOF() }
|
if err == io.EOF { err = this.errUnexpectedEOF() }
|
||||||
return token, err
|
return token, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
func (this *fsplLexer) nextInternal () (token Token, err error) {
|
||||||
err = this.skipWhitespace()
|
err = this.skipWhitespace()
|
||||||
|
|
||||||
token.Pos = this.pos()
|
token.Position = this.pos()
|
||||||
|
|
||||||
if this.eof {
|
if this.eof {
|
||||||
token.Type = EOF
|
token.Kind = EOF
|
||||||
err = nil
|
err = nil
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -102,14 +108,14 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
doNumber := func () {
|
doNumber := func () {
|
||||||
token.Type = Int
|
token.Kind = Int
|
||||||
for isDigit(this.rune) {
|
for isDigit(this.rune) {
|
||||||
appendRune()
|
appendRune()
|
||||||
if this.eof { err = nil; return }
|
if this.eof { err = nil; return }
|
||||||
if err != nil { return }
|
if err != nil { return }
|
||||||
}
|
}
|
||||||
if this.rune == '.' {
|
if this.rune == '.' {
|
||||||
token.Type = Float
|
token.Kind = Float
|
||||||
appendRune()
|
appendRune()
|
||||||
for isDigit(this.rune) {
|
for isDigit(this.rune) {
|
||||||
appendRune()
|
appendRune()
|
||||||
@ -119,7 +125,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
doSymbol := func () {
|
doSymbol := func () {
|
||||||
token.Type = Symbol
|
token.Kind = Symbol
|
||||||
for isSymbol(this.rune) {
|
for isSymbol(this.rune) {
|
||||||
appendRune()
|
appendRune()
|
||||||
if err != nil { return }
|
if err != nil { return }
|
||||||
@ -129,7 +135,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
|||||||
switch {
|
switch {
|
||||||
// Ident
|
// Ident
|
||||||
case unicode.IsLower(this.rune):
|
case unicode.IsLower(this.rune):
|
||||||
token.Type = Ident
|
token.Kind = Ident
|
||||||
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
|
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
|
||||||
appendRune()
|
appendRune()
|
||||||
if this.eof { err = nil; return }
|
if this.eof { err = nil; return }
|
||||||
@ -137,7 +143,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
|||||||
}
|
}
|
||||||
// TypeIdent
|
// TypeIdent
|
||||||
case unicode.IsUpper(this.rune):
|
case unicode.IsUpper(this.rune):
|
||||||
token.Type = TypeIdent
|
token.Kind = TypeIdent
|
||||||
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
|
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
|
||||||
appendRune()
|
appendRune()
|
||||||
if this.eof { err = nil; return }
|
if this.eof { err = nil; return }
|
||||||
@ -148,7 +154,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
|||||||
doNumber()
|
doNumber()
|
||||||
// String
|
// String
|
||||||
case this.rune == '\'':
|
case this.rune == '\'':
|
||||||
token.Type = String
|
token.Kind = String
|
||||||
err = this.nextRune()
|
err = this.nextRune()
|
||||||
if err != nil { return }
|
if err != nil { return }
|
||||||
|
|
||||||
@ -169,7 +175,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
|||||||
if err != nil { return }
|
if err != nil { return }
|
||||||
// Symbol, Int, Float
|
// Symbol, Int, Float
|
||||||
case this.rune == '-':
|
case this.rune == '-':
|
||||||
token.Type = Symbol
|
token.Kind = Symbol
|
||||||
appendRune()
|
appendRune()
|
||||||
if err != nil { return }
|
if err != nil { return }
|
||||||
if isDigit(this.rune) {
|
if isDigit(this.rune) {
|
||||||
@ -184,41 +190,41 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
|
|||||||
doSymbol()
|
doSymbol()
|
||||||
if this.eof { err = nil; return }
|
if this.eof { err = nil; return }
|
||||||
case this.rune == '(':
|
case this.rune == '(':
|
||||||
token.Type = LParen
|
token.Kind = LParen
|
||||||
appendRune()
|
appendRune()
|
||||||
case this.rune == '{':
|
case this.rune == '{':
|
||||||
token.Type = LBrace
|
token.Kind = LBrace
|
||||||
appendRune()
|
appendRune()
|
||||||
case this.rune == '[':
|
case this.rune == '[':
|
||||||
token.Type = LBracket
|
token.Kind = LBracket
|
||||||
appendRune()
|
appendRune()
|
||||||
case this.rune == ')':
|
case this.rune == ')':
|
||||||
token.Type = RParen
|
token.Kind = RParen
|
||||||
appendRune()
|
appendRune()
|
||||||
case this.rune == '}':
|
case this.rune == '}':
|
||||||
token.Type = RBrace
|
token.Kind = RBrace
|
||||||
appendRune()
|
appendRune()
|
||||||
case this.rune == ']':
|
case this.rune == ']':
|
||||||
token.Type = RBracket
|
token.Kind = RBracket
|
||||||
appendRune()
|
appendRune()
|
||||||
// Colon, DoubleColon
|
// Colon, DoubleColon
|
||||||
case this.rune == ':':
|
case this.rune == ':':
|
||||||
token.Type = Colon
|
token.Kind = Colon
|
||||||
appendRune()
|
appendRune()
|
||||||
if this.rune == ':' {
|
if this.rune == ':' {
|
||||||
token.Type = DoubleColon
|
token.Kind = DoubleColon
|
||||||
appendRune()
|
appendRune()
|
||||||
}
|
}
|
||||||
// Star
|
// Star
|
||||||
case this.rune == '*':
|
case this.rune == '*':
|
||||||
token.Type = Star
|
token.Kind = Star
|
||||||
appendRune()
|
appendRune()
|
||||||
case unicode.IsPrint(this.rune):
|
case unicode.IsPrint(this.rune):
|
||||||
err = participle.Errorf (
|
err = errors.Errorf (
|
||||||
this.pos(), "unexpected rune \"%c\"",
|
this.pos(), "unexpected rune \"%c\"",
|
||||||
this.rune)
|
this.rune)
|
||||||
default:
|
default:
|
||||||
err = participle.Errorf (
|
err = errors.Errorf (
|
||||||
this.pos(), "unexpected rune %U",
|
this.pos(), "unexpected rune %U",
|
||||||
this.rune)
|
this.rune)
|
||||||
}
|
}
|
||||||
@ -232,7 +238,7 @@ func (this *fsplLexer) nextRune () error {
|
|||||||
|
|
||||||
this.offset ++
|
this.offset ++
|
||||||
if char == '\n' {
|
if char == '\n' {
|
||||||
this.line ++
|
this.row ++
|
||||||
this.column = 0
|
this.column = 0
|
||||||
} else {
|
} else {
|
||||||
this.column ++
|
this.column ++
|
||||||
@ -299,21 +305,22 @@ func (this *fsplLexer) skipComment () error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (this *fsplLexer) pos () lexer.Position {
|
func (this *fsplLexer) pos () errors.Position {
|
||||||
return lexer.Position {
|
return errors.Position {
|
||||||
Filename: this.filename,
|
File: this.filename,
|
||||||
Offset: this.offset,
|
Line: "",
|
||||||
Line: this.line,
|
Row: this.row,
|
||||||
Column: this.column,
|
Start: this.column,
|
||||||
|
End: this.column,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (this *fsplLexer) errUnexpectedEOF () error {
|
func (this *fsplLexer) errUnexpectedEOF () error {
|
||||||
return participle.Errorf(this.pos(), "unexpected EOF")
|
return errors.Errorf(this.pos(), "unexpected EOF")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (this *fsplLexer) errBadEscapeSequence () error {
|
func (this *fsplLexer) errBadEscapeSequence () error {
|
||||||
return participle.Errorf(this.pos(), "bad escape sequence")
|
return errors.Errorf(this.pos(), "bad escape sequence")
|
||||||
}
|
}
|
||||||
|
|
||||||
func isWhitespace (char rune) bool {
|
func isWhitespace (char rune) bool {
|
||||||
|
@ -3,12 +3,11 @@ package lexer
|
|||||||
import "fmt"
|
import "fmt"
|
||||||
import "testing"
|
import "testing"
|
||||||
import "strings"
|
import "strings"
|
||||||
import "github.com/alecthomas/participle/v2"
|
import "git.tebibyte.media/sashakoshka/fspl/errors"
|
||||||
import "github.com/alecthomas/participle/v2/lexer"
|
|
||||||
|
|
||||||
func tok (ty lexer.TokenType, value string) lexer.Token {
|
func tok (kind TokenKind, value string) Token {
|
||||||
return lexer.Token {
|
return Token {
|
||||||
Type: ty,
|
Kind: kind,
|
||||||
Value: value,
|
Value: value,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -16,7 +15,7 @@ func tok (ty lexer.TokenType, value string) lexer.Token {
|
|||||||
func testString (
|
func testString (
|
||||||
test *testing.T,
|
test *testing.T,
|
||||||
input string,
|
input string,
|
||||||
correct ...lexer.Token,
|
correct ...Token,
|
||||||
) {
|
) {
|
||||||
testStringErr(test, "", 0, 0, input, correct...)
|
testStringErr(test, "", 0, 0, input, correct...)
|
||||||
}
|
}
|
||||||
@ -24,31 +23,31 @@ func testString (
|
|||||||
func testStringErr (
|
func testStringErr (
|
||||||
test *testing.T,
|
test *testing.T,
|
||||||
errMessage string,
|
errMessage string,
|
||||||
errLine int,
|
errRow int,
|
||||||
errColumn int,
|
errStart int,
|
||||||
input string,
|
input string,
|
||||||
correct ...lexer.Token,
|
correct ...Token,
|
||||||
) {
|
) {
|
||||||
testError := func (err error) bool {
|
testError := func (err error) bool {
|
||||||
got := err.(participle.Error)
|
got := err.(*errors.Error)
|
||||||
gotMessage := got.Message()
|
gotMessage := got.Message
|
||||||
gotLine := got.Position().Line
|
gotRow := got.Position.Row
|
||||||
gotColumn := got.Position().Column
|
gotStart := got.Position.Start
|
||||||
correct :=
|
correct :=
|
||||||
gotMessage == errMessage &&
|
gotMessage == errMessage &&
|
||||||
gotLine == errLine &&
|
gotRow == errRow &&
|
||||||
gotColumn == errColumn
|
gotStart == errStart
|
||||||
if !correct {
|
if !correct {
|
||||||
test.Log("errors do not match")
|
test.Log("errors do not match")
|
||||||
test.Logf("got:\n%v:%v: %v", gotLine, gotColumn, gotMessage)
|
test.Logf("got:\n%v:%v: %v", gotRow, gotStart, gotMessage)
|
||||||
test.Logf("correct:\n%v:%v: %v", errLine, errColumn, errMessage)
|
test.Logf("correct:\n%v:%v: %v", errRow, errStart, errMessage)
|
||||||
test.Fail()
|
test.Fail()
|
||||||
}
|
}
|
||||||
return correct
|
return correct
|
||||||
}
|
}
|
||||||
|
|
||||||
reader := strings.NewReader(input)
|
reader := strings.NewReader(input)
|
||||||
lx, err := NewDefinition().Lex("stream0.fspl", reader)
|
lx, err := NewLexer("stream0.fspl", reader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errMessage == "" {
|
if errMessage == "" {
|
||||||
test.Error("lexer returned error: ", err)
|
test.Error("lexer returned error: ", err)
|
||||||
@ -58,7 +57,7 @@ func testStringErr (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var tokens []lexer.Token
|
var tokens []Token
|
||||||
for {
|
for {
|
||||||
got, err := lx.Next()
|
got, err := lx.Next()
|
||||||
tokens = append(tokens, got)
|
tokens = append(tokens, got)
|
||||||
@ -78,8 +77,8 @@ func testStringErr (
|
|||||||
test.Logf("%-40v | %-40v", left, right)
|
test.Logf("%-40v | %-40v", left, right)
|
||||||
}
|
}
|
||||||
|
|
||||||
dumpToken := func (token lexer.Token) string {
|
dumpToken := func (token Token) string {
|
||||||
return fmt.Sprintf("%4v: \"%s\"", token.Type, token.Value)
|
return fmt.Sprintf("%4v: \"%s\"", token.Kind, token.Value)
|
||||||
}
|
}
|
||||||
|
|
||||||
compareTokens := func () {
|
compareTokens := func () {
|
||||||
@ -113,7 +112,7 @@ func testStringErr (
|
|||||||
|
|
||||||
for index, token := range correct {
|
for index, token := range correct {
|
||||||
gotToken := tokens[index]
|
gotToken := tokens[index]
|
||||||
if token.Type != gotToken.Type || token.Value != gotToken.Value {
|
if token.Kind != gotToken.Kind || token.Value != gotToken.Value {
|
||||||
test.Logf("correct and got do not match at %v", index)
|
test.Logf("correct and got do not match at %v", index)
|
||||||
compareTokens()
|
compareTokens()
|
||||||
test.Fail()
|
test.Fail()
|
||||||
|
Loading…
Reference in New Issue
Block a user