Lexer has no dependency on participle now

This commit is contained in:
Sasha Koshka 2024-02-05 15:04:54 -05:00
parent 6bfc073608
commit d2ecd7a266
2 changed files with 75 additions and 69 deletions

View File

@ -3,11 +3,10 @@ package lexer
import "io"
import "bufio"
import "unicode"
import "github.com/alecthomas/participle/v2"
import "github.com/alecthomas/participle/v2/lexer"
import "git.tebibyte.media/sashakoshka/fspl/errors"
const (
EOF lexer.TokenType = -(iota + 1)
type TokenKind int; const (
EOF TokenKind = -(iota + 1)
// Name Rough regex-ish description
Ident // [a-z][a-zA-Z0-9]*
@ -28,15 +27,8 @@ const (
Star // \*
)
type definition struct { }
// NewDefinition returns a lexer definition.
func NewDefinition () lexer.Definition {
return definition { }
}
func (definition) Symbols () map[string] lexer.TokenType {
return map[string] lexer.TokenType {
func Symbols () map[string] TokenKind {
return map[string] TokenKind {
"EOF": EOF,
"Ident": Ident,
"TypeIdent": TypeIdent,
@ -56,11 +48,25 @@ func (definition) Symbols () map[string] lexer.TokenType {
}
}
func (definition) Lex (filename string, reader io.Reader) (lexer.Lexer, error) {
type Token struct {
Kind TokenKind
Value string
Position errors.Position
}
func (tok Token) EOF () bool {
return tok.Kind == EOF
}
type Lexer interface {
Next () (Token, error)
}
func NewLexer (filename string, reader io.Reader) (Lexer, error) {
lexer := &fsplLexer {
filename: filename,
reader: bufio.NewReader(reader),
line: 1,
row: 1,
}
lexer.nextRune()
return lexer, nil
@ -72,25 +78,25 @@ type fsplLexer struct {
rune rune
offset int
line int
row int
column int
eof bool
}
func (this *fsplLexer) Next () (lexer.Token, error) {
func (this *fsplLexer) Next () (Token, error) {
token, err := this.nextInternal()
if err == io.EOF { err = this.errUnexpectedEOF() }
return token, err
}
func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
func (this *fsplLexer) nextInternal () (token Token, err error) {
err = this.skipWhitespace()
token.Pos = this.pos()
token.Position = this.pos()
if this.eof {
token.Type = EOF
token.Kind = EOF
err = nil
return
}
@ -102,14 +108,14 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
}
doNumber := func () {
token.Type = Int
token.Kind = Int
for isDigit(this.rune) {
appendRune()
if this.eof { err = nil; return }
if err != nil { return }
}
if this.rune == '.' {
token.Type = Float
token.Kind = Float
appendRune()
for isDigit(this.rune) {
appendRune()
@ -119,7 +125,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
}
doSymbol := func () {
token.Type = Symbol
token.Kind = Symbol
for isSymbol(this.rune) {
appendRune()
if err != nil { return }
@ -129,7 +135,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
switch {
// Ident
case unicode.IsLower(this.rune):
token.Type = Ident
token.Kind = Ident
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
appendRune()
if this.eof { err = nil; return }
@ -137,7 +143,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
}
// TypeIdent
case unicode.IsUpper(this.rune):
token.Type = TypeIdent
token.Kind = TypeIdent
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
appendRune()
if this.eof { err = nil; return }
@ -148,7 +154,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
doNumber()
// String
case this.rune == '\'':
token.Type = String
token.Kind = String
err = this.nextRune()
if err != nil { return }
@ -169,7 +175,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
if err != nil { return }
// Symbol, Int, Float
case this.rune == '-':
token.Type = Symbol
token.Kind = Symbol
appendRune()
if err != nil { return }
if isDigit(this.rune) {
@ -184,41 +190,41 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
doSymbol()
if this.eof { err = nil; return }
case this.rune == '(':
token.Type = LParen
token.Kind = LParen
appendRune()
case this.rune == '{':
token.Type = LBrace
token.Kind = LBrace
appendRune()
case this.rune == '[':
token.Type = LBracket
token.Kind = LBracket
appendRune()
case this.rune == ')':
token.Type = RParen
token.Kind = RParen
appendRune()
case this.rune == '}':
token.Type = RBrace
token.Kind = RBrace
appendRune()
case this.rune == ']':
token.Type = RBracket
token.Kind = RBracket
appendRune()
// Colon, DoubleColon
case this.rune == ':':
token.Type = Colon
token.Kind = Colon
appendRune()
if this.rune == ':' {
token.Type = DoubleColon
token.Kind = DoubleColon
appendRune()
}
// Star
case this.rune == '*':
token.Type = Star
token.Kind = Star
appendRune()
case unicode.IsPrint(this.rune):
err = participle.Errorf (
err = errors.Errorf (
this.pos(), "unexpected rune \"%c\"",
this.rune)
default:
err = participle.Errorf (
err = errors.Errorf (
this.pos(), "unexpected rune %U",
this.rune)
}
@ -232,7 +238,7 @@ func (this *fsplLexer) nextRune () error {
this.offset ++
if char == '\n' {
this.line ++
this.row ++
this.column = 0
} else {
this.column ++
@ -299,21 +305,22 @@ func (this *fsplLexer) skipComment () error {
return nil
}
func (this *fsplLexer) pos () lexer.Position {
return lexer.Position {
Filename: this.filename,
Offset: this.offset,
Line: this.line,
Column: this.column,
func (this *fsplLexer) pos () errors.Position {
return errors.Position {
File: this.filename,
Line: "",
Row: this.row,
Start: this.column,
End: this.column,
}
}
func (this *fsplLexer) errUnexpectedEOF () error {
return participle.Errorf(this.pos(), "unexpected EOF")
return errors.Errorf(this.pos(), "unexpected EOF")
}
func (this *fsplLexer) errBadEscapeSequence () error {
return participle.Errorf(this.pos(), "bad escape sequence")
return errors.Errorf(this.pos(), "bad escape sequence")
}
func isWhitespace (char rune) bool {

View File

@ -3,12 +3,11 @@ package lexer
import "fmt"
import "testing"
import "strings"
import "github.com/alecthomas/participle/v2"
import "github.com/alecthomas/participle/v2/lexer"
import "git.tebibyte.media/sashakoshka/fspl/errors"
func tok (ty lexer.TokenType, value string) lexer.Token {
return lexer.Token {
Type: ty,
func tok (kind TokenKind, value string) Token {
return Token {
Kind: kind,
Value: value,
}
}
@ -16,7 +15,7 @@ func tok (ty lexer.TokenType, value string) lexer.Token {
func testString (
test *testing.T,
input string,
correct ...lexer.Token,
correct ...Token,
) {
testStringErr(test, "", 0, 0, input, correct...)
}
@ -24,31 +23,31 @@ func testString (
func testStringErr (
test *testing.T,
errMessage string,
errLine int,
errColumn int,
errRow int,
errStart int,
input string,
correct ...lexer.Token,
correct ...Token,
) {
testError := func (err error) bool {
got := err.(participle.Error)
gotMessage := got.Message()
gotLine := got.Position().Line
gotColumn := got.Position().Column
got := err.(*errors.Error)
gotMessage := got.Message
gotRow := got.Position.Row
gotStart := got.Position.Start
correct :=
gotMessage == errMessage &&
gotLine == errLine &&
gotColumn == errColumn
gotRow == errRow &&
gotStart == errStart
if !correct {
test.Log("errors do not match")
test.Logf("got:\n%v:%v: %v", gotLine, gotColumn, gotMessage)
test.Logf("correct:\n%v:%v: %v", errLine, errColumn, errMessage)
test.Logf("got:\n%v:%v: %v", gotRow, gotStart, gotMessage)
test.Logf("correct:\n%v:%v: %v", errRow, errStart, errMessage)
test.Fail()
}
return correct
}
reader := strings.NewReader(input)
lx, err := NewDefinition().Lex("stream0.fspl", reader)
lx, err := NewLexer("stream0.fspl", reader)
if err != nil {
if errMessage == "" {
test.Error("lexer returned error: ", err)
@ -58,7 +57,7 @@ func testStringErr (
}
}
var tokens []lexer.Token
var tokens []Token
for {
got, err := lx.Next()
tokens = append(tokens, got)
@ -78,8 +77,8 @@ func testStringErr (
test.Logf("%-40v | %-40v", left, right)
}
dumpToken := func (token lexer.Token) string {
return fmt.Sprintf("%4v: \"%s\"", token.Type, token.Value)
dumpToken := func (token Token) string {
return fmt.Sprintf("%4v: \"%s\"", token.Kind, token.Value)
}
compareTokens := func () {
@ -113,7 +112,7 @@ func testStringErr (
for index, token := range correct {
gotToken := tokens[index]
if token.Type != gotToken.Type || token.Value != gotToken.Value {
if token.Kind != gotToken.Kind || token.Value != gotToken.Value {
test.Logf("correct and got do not match at %v", index)
compareTokens()
test.Fail()