Lexer has no dependency on participle now

This commit is contained in:
Sasha Koshka 2024-02-05 15:04:54 -05:00
parent 0eea2b61a3
commit fc88e27abf
2 changed files with 75 additions and 69 deletions

View File

@ -3,11 +3,10 @@ package lexer
import "io" import "io"
import "bufio" import "bufio"
import "unicode" import "unicode"
import "github.com/alecthomas/participle/v2" import "git.tebibyte.media/sashakoshka/fspl/errors"
import "github.com/alecthomas/participle/v2/lexer"
const ( type TokenKind int; const (
EOF lexer.TokenType = -(iota + 1) EOF TokenKind = -(iota + 1)
// Name Rough regex-ish description // Name Rough regex-ish description
Ident // [a-z][a-zA-Z0-9]* Ident // [a-z][a-zA-Z0-9]*
@ -28,15 +27,8 @@ const (
Star // \* Star // \*
) )
type definition struct { } func Symbols () map[string] TokenKind {
return map[string] TokenKind {
// NewDefinition returns a lexer definition.
func NewDefinition () lexer.Definition {
return definition { }
}
func (definition) Symbols () map[string] lexer.TokenType {
return map[string] lexer.TokenType {
"EOF": EOF, "EOF": EOF,
"Ident": Ident, "Ident": Ident,
"TypeIdent": TypeIdent, "TypeIdent": TypeIdent,
@ -56,11 +48,25 @@ func (definition) Symbols () map[string] lexer.TokenType {
} }
} }
func (definition) Lex (filename string, reader io.Reader) (lexer.Lexer, error) { type Token struct {
Kind TokenKind
Value string
Position errors.Position
}
func (tok Token) EOF () bool {
return tok.Kind == EOF
}
type Lexer interface {
Next () (Token, error)
}
func NewLexer (filename string, reader io.Reader) (Lexer, error) {
lexer := &fsplLexer { lexer := &fsplLexer {
filename: filename, filename: filename,
reader: bufio.NewReader(reader), reader: bufio.NewReader(reader),
line: 1, row: 1,
} }
lexer.nextRune() lexer.nextRune()
return lexer, nil return lexer, nil
@ -72,25 +78,25 @@ type fsplLexer struct {
rune rune rune rune
offset int offset int
line int row int
column int column int
eof bool eof bool
} }
func (this *fsplLexer) Next () (lexer.Token, error) { func (this *fsplLexer) Next () (Token, error) {
token, err := this.nextInternal() token, err := this.nextInternal()
if err == io.EOF { err = this.errUnexpectedEOF() } if err == io.EOF { err = this.errUnexpectedEOF() }
return token, err return token, err
} }
func (this *fsplLexer) nextInternal () (token lexer.Token, err error) { func (this *fsplLexer) nextInternal () (token Token, err error) {
err = this.skipWhitespace() err = this.skipWhitespace()
token.Pos = this.pos() token.Position = this.pos()
if this.eof { if this.eof {
token.Type = EOF token.Kind = EOF
err = nil err = nil
return return
} }
@ -102,14 +108,14 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
} }
doNumber := func () { doNumber := func () {
token.Type = Int token.Kind = Int
for isDigit(this.rune) { for isDigit(this.rune) {
appendRune() appendRune()
if this.eof { err = nil; return } if this.eof { err = nil; return }
if err != nil { return } if err != nil { return }
} }
if this.rune == '.' { if this.rune == '.' {
token.Type = Float token.Kind = Float
appendRune() appendRune()
for isDigit(this.rune) { for isDigit(this.rune) {
appendRune() appendRune()
@ -119,7 +125,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
} }
doSymbol := func () { doSymbol := func () {
token.Type = Symbol token.Kind = Symbol
for isSymbol(this.rune) { for isSymbol(this.rune) {
appendRune() appendRune()
if err != nil { return } if err != nil { return }
@ -129,7 +135,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
switch { switch {
// Ident // Ident
case unicode.IsLower(this.rune): case unicode.IsLower(this.rune):
token.Type = Ident token.Kind = Ident
for unicode.IsLetter(this.rune) || isDigit(this.rune) { for unicode.IsLetter(this.rune) || isDigit(this.rune) {
appendRune() appendRune()
if this.eof { err = nil; return } if this.eof { err = nil; return }
@ -137,7 +143,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
} }
// TypeIdent // TypeIdent
case unicode.IsUpper(this.rune): case unicode.IsUpper(this.rune):
token.Type = TypeIdent token.Kind = TypeIdent
for unicode.IsLetter(this.rune) || isDigit(this.rune) { for unicode.IsLetter(this.rune) || isDigit(this.rune) {
appendRune() appendRune()
if this.eof { err = nil; return } if this.eof { err = nil; return }
@ -148,7 +154,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
doNumber() doNumber()
// String // String
case this.rune == '\'': case this.rune == '\'':
token.Type = String token.Kind = String
err = this.nextRune() err = this.nextRune()
if err != nil { return } if err != nil { return }
@ -169,7 +175,7 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
if err != nil { return } if err != nil { return }
// Symbol, Int, Float // Symbol, Int, Float
case this.rune == '-': case this.rune == '-':
token.Type = Symbol token.Kind = Symbol
appendRune() appendRune()
if err != nil { return } if err != nil { return }
if isDigit(this.rune) { if isDigit(this.rune) {
@ -184,41 +190,41 @@ func (this *fsplLexer) nextInternal () (token lexer.Token, err error) {
doSymbol() doSymbol()
if this.eof { err = nil; return } if this.eof { err = nil; return }
case this.rune == '(': case this.rune == '(':
token.Type = LParen token.Kind = LParen
appendRune() appendRune()
case this.rune == '{': case this.rune == '{':
token.Type = LBrace token.Kind = LBrace
appendRune() appendRune()
case this.rune == '[': case this.rune == '[':
token.Type = LBracket token.Kind = LBracket
appendRune() appendRune()
case this.rune == ')': case this.rune == ')':
token.Type = RParen token.Kind = RParen
appendRune() appendRune()
case this.rune == '}': case this.rune == '}':
token.Type = RBrace token.Kind = RBrace
appendRune() appendRune()
case this.rune == ']': case this.rune == ']':
token.Type = RBracket token.Kind = RBracket
appendRune() appendRune()
// Colon, DoubleColon // Colon, DoubleColon
case this.rune == ':': case this.rune == ':':
token.Type = Colon token.Kind = Colon
appendRune() appendRune()
if this.rune == ':' { if this.rune == ':' {
token.Type = DoubleColon token.Kind = DoubleColon
appendRune() appendRune()
} }
// Star // Star
case this.rune == '*': case this.rune == '*':
token.Type = Star token.Kind = Star
appendRune() appendRune()
case unicode.IsPrint(this.rune): case unicode.IsPrint(this.rune):
err = participle.Errorf ( err = errors.Errorf (
this.pos(), "unexpected rune \"%c\"", this.pos(), "unexpected rune \"%c\"",
this.rune) this.rune)
default: default:
err = participle.Errorf ( err = errors.Errorf (
this.pos(), "unexpected rune %U", this.pos(), "unexpected rune %U",
this.rune) this.rune)
} }
@ -232,7 +238,7 @@ func (this *fsplLexer) nextRune () error {
this.offset ++ this.offset ++
if char == '\n' { if char == '\n' {
this.line ++ this.row ++
this.column = 0 this.column = 0
} else { } else {
this.column ++ this.column ++
@ -299,21 +305,22 @@ func (this *fsplLexer) skipComment () error {
return nil return nil
} }
func (this *fsplLexer) pos () lexer.Position { func (this *fsplLexer) pos () errors.Position {
return lexer.Position { return errors.Position {
Filename: this.filename, File: this.filename,
Offset: this.offset, Line: "",
Line: this.line, Row: this.row,
Column: this.column, Start: this.column,
End: this.column,
} }
} }
func (this *fsplLexer) errUnexpectedEOF () error { func (this *fsplLexer) errUnexpectedEOF () error {
return participle.Errorf(this.pos(), "unexpected EOF") return errors.Errorf(this.pos(), "unexpected EOF")
} }
func (this *fsplLexer) errBadEscapeSequence () error { func (this *fsplLexer) errBadEscapeSequence () error {
return participle.Errorf(this.pos(), "bad escape sequence") return errors.Errorf(this.pos(), "bad escape sequence")
} }
func isWhitespace (char rune) bool { func isWhitespace (char rune) bool {

View File

@ -3,12 +3,11 @@ package lexer
import "fmt" import "fmt"
import "testing" import "testing"
import "strings" import "strings"
import "github.com/alecthomas/participle/v2" import "git.tebibyte.media/sashakoshka/fspl/errors"
import "github.com/alecthomas/participle/v2/lexer"
func tok (ty lexer.TokenType, value string) lexer.Token { func tok (kind TokenKind, value string) Token {
return lexer.Token { return Token {
Type: ty, Kind: kind,
Value: value, Value: value,
} }
} }
@ -16,7 +15,7 @@ func tok (ty lexer.TokenType, value string) lexer.Token {
func testString ( func testString (
test *testing.T, test *testing.T,
input string, input string,
correct ...lexer.Token, correct ...Token,
) { ) {
testStringErr(test, "", 0, 0, input, correct...) testStringErr(test, "", 0, 0, input, correct...)
} }
@ -24,31 +23,31 @@ func testString (
func testStringErr ( func testStringErr (
test *testing.T, test *testing.T,
errMessage string, errMessage string,
errLine int, errRow int,
errColumn int, errStart int,
input string, input string,
correct ...lexer.Token, correct ...Token,
) { ) {
testError := func (err error) bool { testError := func (err error) bool {
got := err.(participle.Error) got := err.(*errors.Error)
gotMessage := got.Message() gotMessage := got.Message
gotLine := got.Position().Line gotRow := got.Position.Row
gotColumn := got.Position().Column gotStart := got.Position.Start
correct := correct :=
gotMessage == errMessage && gotMessage == errMessage &&
gotLine == errLine && gotRow == errRow &&
gotColumn == errColumn gotStart == errStart
if !correct { if !correct {
test.Log("errors do not match") test.Log("errors do not match")
test.Logf("got:\n%v:%v: %v", gotLine, gotColumn, gotMessage) test.Logf("got:\n%v:%v: %v", gotRow, gotStart, gotMessage)
test.Logf("correct:\n%v:%v: %v", errLine, errColumn, errMessage) test.Logf("correct:\n%v:%v: %v", errRow, errStart, errMessage)
test.Fail() test.Fail()
} }
return correct return correct
} }
reader := strings.NewReader(input) reader := strings.NewReader(input)
lx, err := NewDefinition().Lex("stream0.fspl", reader) lx, err := NewLexer("stream0.fspl", reader)
if err != nil { if err != nil {
if errMessage == "" { if errMessage == "" {
test.Error("lexer returned error: ", err) test.Error("lexer returned error: ", err)
@ -58,7 +57,7 @@ func testStringErr (
} }
} }
var tokens []lexer.Token var tokens []Token
for { for {
got, err := lx.Next() got, err := lx.Next()
tokens = append(tokens, got) tokens = append(tokens, got)
@ -78,8 +77,8 @@ func testStringErr (
test.Logf("%-40v | %-40v", left, right) test.Logf("%-40v | %-40v", left, right)
} }
dumpToken := func (token lexer.Token) string { dumpToken := func (token Token) string {
return fmt.Sprintf("%4v: \"%s\"", token.Type, token.Value) return fmt.Sprintf("%4v: \"%s\"", token.Kind, token.Value)
} }
compareTokens := func () { compareTokens := func () {
@ -113,7 +112,7 @@ func testStringErr (
for index, token := range correct { for index, token := range correct {
gotToken := tokens[index] gotToken := tokens[index]
if token.Type != gotToken.Type || token.Value != gotToken.Value { if token.Kind != gotToken.Kind || token.Value != gotToken.Value {
test.Logf("correct and got do not match at %v", index) test.Logf("correct and got do not match at %v", index)
compareTokens() compareTokens()
test.Fail() test.Fail()