generate: Add PDL lexer

This commit is contained in:
Sasha Koshka 2025-06-05 22:06:22 -04:00
parent bb5fc89cc5
commit 127aa23a61
2 changed files with 284 additions and 0 deletions

230
generate/lex.go Normal file
View File

@ -0,0 +1,230 @@
package generate
import "io"
import "bufio"
import "unicode"
import "unicode/utf8"
import "git.tebibyte.media/sashakoshka/goparse"
const (
TokenMethod parse.TokenKind = iota
TokenKey
TokenIdent
TokenComma
TokenLBrace
TokenRBrace
TokenLBracket
TokenRBracket
)
var tokenNames = map[parse.TokenKind] string {
TokenMethod: "Method",
TokenKey: "Key",
TokenIdent: "Ident",
TokenComma: "Comma",
TokenLBrace: "LBrace",
TokenRBrace: "RBrace",
TokenLBracket: "LBracket",
TokenRBracket: "RBracket",
}
func Lex(fileName string, reader io.Reader) (parse.Lexer, error) {
lex := &lexer {
fileName: fileName,
lineScanner: bufio.NewScanner(reader),
}
lex.nextRune()
return lex, nil
}
type lexer struct {
fileName string
lineScanner *bufio.Scanner
rune rune
line string
lineFood string
offset int
row int
column int
eof bool
}
func (this *lexer) Next() (parse.Token, error) {
token, err := this.nextInternal()
if err == io.EOF { err = this.errUnexpectedEOF() }
return token, err
}
func (this *lexer) nextInternal() (token parse.Token, err error) {
err = this.skipWhitespace()
token.Position = this.pos()
if this.eof {
token.Kind = parse.EOF
err = nil
return
}
if err != nil { return }
appendRune := func () {
token.Value += string(this.rune)
err = this.nextRune()
}
doNumber := func () {
for isDigit(this.rune) {
appendRune()
if this.eof { err = nil; return }
if err != nil { return }
}
}
defer func () {
newPos := this.pos()
newPos.End -- // TODO figure out why tf we have to do this
token.Position = token.Position.Union(newPos)
} ()
switch {
// Method
case this.rune == 'M':
token.Kind = TokenMethod
err = this.nextRune()
if err != nil { return }
doNumber()
if this.eof { err = nil; return }
// Key
case isDigit(this.rune):
token.Kind = TokenKey
doNumber()
if this.eof { err = nil; return }
// Ident
case unicode.IsUpper(this.rune):
token.Kind = TokenIdent
for unicode.IsLetter(this.rune) || isDigit(this.rune) {
appendRune()
if this.eof { err = nil; return }
if err != nil { return }
}
// Comma
case this.rune == ',':
token.Kind = TokenComma
appendRune()
if this.eof { err = nil; return }
// LBrace
case this.rune == '{':
token.Kind = TokenLBrace
appendRune()
if this.eof { err = nil; return }
// RBrace
case this.rune == '}':
token.Kind = TokenRBrace
appendRune()
if this.eof { err = nil; return }
// LBracket
case this.rune == '[':
token.Kind = TokenLBracket
appendRune()
if this.eof { err = nil; return }
// RBracket
case this.rune == ']':
token.Kind = TokenRBracket
appendRune()
if this.eof { err = nil; return }
case unicode.IsPrint(this.rune):
err = parse.Errorf (
this.pos(), "unexpected rune '%c'",
this.rune)
default:
err = parse.Errorf (
this.pos(), "unexpected rune %U",
this.rune)
}
return
}
func (this *lexer) nextRune() error {
if this.lineFood == "" {
ok := this.lineScanner.Scan()
if ok {
this.line = this.lineScanner.Text()
this.lineFood = this.line
this.rune = '\n'
this.column = 0
this.row ++
} else {
err := this.lineScanner.Err()
if err == nil {
this.eof = true
return io.EOF
} else {
return err
}
}
} else {
var ch rune
var size int
for ch == 0 && this.lineFood != "" {
ch, size = utf8.DecodeRuneInString(this.lineFood)
this.lineFood = this.lineFood[size:]
}
this.rune = ch
this.column ++
}
return nil
}
func (this *lexer) skipWhitespace() error {
err := this.skipComment()
if err != nil { return err }
for isWhitespace(this.rune) {
err := this.nextRune()
if err != nil { return err }
err = this.skipComment()
if err != nil { return err }
}
return nil
}
func (this *lexer) skipComment() error {
if this.rune == ';' {
for this.rune != '\n' {
err := this.nextRune()
if err != nil { return err }
}
}
return nil
}
func (this *lexer) pos() parse.Position {
return parse.Position {
File: this.fileName,
Line: this.lineScanner.Text(),
Row: this.row - 1,
Start: this.column - 1,
End: this.column,
}
}
func (this *lexer) errUnexpectedEOF() error {
return parse.Errorf(this.pos(), "unexpected EOF")
}
func isWhitespace(char rune) bool {
switch char {
case ' ', '\t', '\r', '\n': return true
default: return false
}
}
func isDigit(char rune) bool {
return char >= '0' && char <= '9'
}
func isHexDigit(char rune) bool {
return isDigit(char) || char >= 'a' && char <= 'f' || char >= 'A' && char <= 'F'
}

54
generate/lex_test.go Normal file
View File

@ -0,0 +1,54 @@
package generate
import "strings"
import "testing"
import "git.tebibyte.media/sashakoshka/goparse"
func TestLex(test *testing.T) {
lexer, err := Lex("test.pdl", strings.NewReader(`
M0001 User {
0000 Name String,
0001 Users []User,
0002 Followers U32,
}`))
if err != nil { test.Fatal(parse.Format(err)) }
correctTokens := []parse.Token {
tok(TokenMethod, "0001"),
tok(TokenIdent, "User"),
tok(TokenLBrace, "{"),
tok(TokenKey, "0000"),
tok(TokenIdent, "Name"),
tok(TokenIdent, "String"),
tok(TokenComma, ","),
tok(TokenKey, "0001"),
tok(TokenIdent, "Users"),
tok(TokenLBracket, "["),
tok(TokenRBracket, "]"),
tok(TokenIdent, "User"),
tok(TokenComma, ","),
tok(TokenKey, "0002"),
tok(TokenIdent, "Followers"),
tok(TokenIdent, "U32"),
tok(TokenComma, ","),
tok(TokenRBrace, "}"),
tok(parse.EOF, ""),
}
for index, correct := range correctTokens {
got, err := lexer.Next()
if err != nil { test.Fatal(parse.Format(err)) }
if got.Kind != correct.Kind || got.Value != correct.Value {
test.Logf("token %d mismatch", index)
test.Log("GOT:", tokenNames[got.Kind], got.Value)
test.Fatal("CORRECT:", tokenNames[correct.Kind], correct.Value)
}
}
}
func tok(kind parse.TokenKind, value string) parse.Token {
return parse.Token {
Kind: kind,
Value: value,
}
}