diff --git a/README.md b/README.md index 6ca2326..21b85c4 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,8 @@ These are some design goals that I have followed/am following: - Language syntax must have zero ambiguity - The compiler should not generate new functions or complex logic that the user has not written +- One line at a time - the language's syntax should encourage writing code that + flows vertically and not horizontally, with minimal nesting ## Planned features diff --git a/file/error.go b/file/error.go index 7cb437b..3f40a72 100644 --- a/file/error.go +++ b/file/error.go @@ -52,20 +52,30 @@ func (err Error) Error () (formattedMessage string) { if err.width > 0 { // print erroneous line + line := err.Location.file.lines[err.Location.row] formattedMessage += err.Location.file.lines[err.Location.row] + "\n" + // position error marker + var index int + for index = 0; index < err.Location.column; index ++ { + if line[index] == '\t' { + formattedMessage += "\t" + } else { + formattedMessage += " " + } + } + // print an arrow with a tail spanning the width of the mistake - columnCountdown := err.Location.column - for columnCountdown > 1 { - // TODO: for tabs, print out a teb instead. - formattedMessage += " " - columnCountdown -- - } for err.width > 1 { - // TODO: for tabs, print out 8 of these instead. - formattedMessage += "-" + if line[index] == '\t' { + formattedMessage += "--------" + } else { + formattedMessage += "-" + } + index ++ } + formattedMessage += "^\n" } formattedMessage += err.message + "\n" diff --git a/lexer/lexer.go b/lexer/lexer.go index cbdc5fb..1c2b761 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -176,7 +176,17 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { err = lexer.nextRune() case '.': token := lexer.newToken() + err = lexer.nextRune() + if err != nil { return } token.kind = TokenKindDot + if lexer.char == '.' { + token.kind = TokenKindElipsis + err = lexer.nextRune() + } + lexer.addToken(token) + case ',': + token := lexer.newToken() + token.kind = TokenKindComma lexer.addToken(token) err = lexer.nextRune() case '[': @@ -200,15 +210,15 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { lexer.addToken(token) err = lexer.nextRune() case '+': + token := lexer.newToken() err = lexer.nextRune() if err != nil { return } - token := lexer.newToken() token.kind = TokenKindPlus if lexer.char == '+' { token.kind = TokenKindIncrement + err = lexer.nextRune() } lexer.addToken(token) - err = lexer.nextRune() case '-': err = lexer.tokenizeDashBeginning() case '*': @@ -242,45 +252,45 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) { lexer.addToken(token) err = lexer.nextRune() case '<': + token := lexer.newToken() err = lexer.nextRune() if err != nil { return } - token := lexer.newToken() token.kind = TokenKindLessThan if lexer.char == '<' { token.kind = TokenKindLShift + err = lexer.nextRune() } lexer.addToken(token) - err = lexer.nextRune() case '>': + token := lexer.newToken() err = lexer.nextRune() if err != nil { return } - token := lexer.newToken() token.kind = TokenKindGreaterThan if lexer.char == '>' { token.kind = TokenKindRShift + err = lexer.nextRune() } lexer.addToken(token) - err = lexer.nextRune() case '|': + token := lexer.newToken() err = lexer.nextRune() if err != nil { return } - token := lexer.newToken() token.kind = TokenKindBinaryOr if lexer.char == '|' { token.kind = TokenKindLogicalOr + err = lexer.nextRune() } lexer.addToken(token) - err = lexer.nextRune() case '&': + token := lexer.newToken() err = lexer.nextRune() if err != nil { return } - token := lexer.newToken() token.kind = TokenKindBinaryAnd if lexer.char == '&' { token.kind = TokenKindLogicalAnd + err = lexer.nextRune() } lexer.addToken(token) - err = lexer.nextRune() default: err = file.NewError ( lexer.file.Location(1), diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index 1d1b175..27216d0 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -62,6 +62,8 @@ func TestTokenizeAll (test *testing.T) { Token { kind: TokenKindName, value: "helloWorld" }, Token { kind: TokenKindColon }, Token { kind: TokenKindDot }, + Token { kind: TokenKindComma }, + Token { kind: TokenKindElipsis }, Token { kind: TokenKindLBracket }, Token { kind: TokenKindRBracket }, Token { kind: TokenKindLBrace }, @@ -91,6 +93,10 @@ func TestTokenizeAll (test *testing.T) { func TestTokenizeNumbers (test *testing.T) { checkTokenSlice("../tests/lexer/numbers.arf", []Token { + Token { kind: TokenKindUInt, value: uint64(0) }, + Token { kind: TokenKindNewline }, + Token { kind: TokenKindUInt, value: uint64(8) }, + Token { kind: TokenKindNewline }, Token { kind: TokenKindUInt, value: uint64(83628266) }, Token { kind: TokenKindNewline }, Token { kind: TokenKindUInt, value: uint64(83628266) }, diff --git a/lexer/numbers.go b/lexer/numbers.go index 4dce92b..7093750 100644 --- a/lexer/numbers.go +++ b/lexer/numbers.go @@ -1,7 +1,5 @@ package lexer -import "git.tebibyte.media/sashakoshka/arf/file" - // tokenizeSymbolBeginning lexes a token that starts with a number. func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error) { var number uint64 @@ -23,11 +21,6 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error number, fragment, isFloat, err = lexer.tokenizeNumber(10) } else if lexer.char >= '0' && lexer.char <= '9' { number, fragment, isFloat, err = lexer.tokenizeNumber(8) - } else { - return file.NewError ( - lexer.file.Location(1), - "unexpected character in number literal", - file.ErrorKindError) } } else { number, fragment, isFloat, err = lexer.tokenizeNumber(10) diff --git a/lexer/token.go b/lexer/token.go index 0b2683f..43cd675 100644 --- a/lexer/token.go +++ b/lexer/token.go @@ -24,6 +24,8 @@ const ( TokenKindColon TokenKindDot + TokenKindElipsis + TokenKindComma TokenKindLBracket TokenKindRBracket @@ -133,6 +135,10 @@ func (tokenKind TokenKind) Describe () (description string) { description = "Colon" case TokenKindDot: description = "Dot" + case TokenKindElipsis: + description = "Elipsis" + case TokenKindComma: + description = "Comma" case TokenKindLBracket: description = "LBracket" case TokenKindRBracket: diff --git a/parser/argument.go b/parser/argument.go new file mode 100644 index 0000000..c7f42b0 --- /dev/null +++ b/parser/argument.go @@ -0,0 +1,91 @@ +package parser + +import "git.tebibyte.media/sashakoshka/arf/file" +import "git.tebibyte.media/sashakoshka/arf/lexer" + +var validArgumentStartTokens = []lexer.TokenKind { + lexer.TokenKindName, + + lexer.TokenKindInt, + lexer.TokenKindUInt, + lexer.TokenKindFloat, + lexer.TokenKindString, + lexer.TokenKindRune, + + lexer.TokenKindLBrace, + lexer.TokenKindLBracket, +} + +func (parser *ParsingOperation) parseArgument () (argument Argument, err error) { + argument.location = parser.token.Location() + + err = parser.expect(validArgumentStartTokens...) + if err != nil { return } + + switch parser.token.Kind() { + case lexer.TokenKindName: + var identifier Identifier + identifier, err = parser.parseIdentifier() + if err != nil { return } + + if parser.token.Is(lexer.TokenKindColon) { + var what Type + what, err = parser.parseType() + if err != nil { return } + + if len(identifier.trail) != 1 { + err = parser.token.NewError ( + "cannot use member selection in " + + "a variable definition", + file.ErrorKindError) + return + } + + argument.kind = ArgumentKindDeclaration + argument.value = Declaration { + location: argument.location, + name: identifier.trail[0], + what: what, + } + } else { + argument.kind = ArgumentKindIdentifier + argument.value = identifier + } + + case lexer.TokenKindInt: + argument.kind = ArgumentKindInt + argument.value = parser.token.Value().(int64) + err = parser.nextToken() + + case lexer.TokenKindUInt: + argument.kind = ArgumentKindUInt + argument.value = parser.token.Value().(uint64) + err = parser.nextToken() + + case lexer.TokenKindFloat: + argument.kind = ArgumentKindFloat + argument.value = parser.token.Value().(float64) + err = parser.nextToken() + + case lexer.TokenKindString: + argument.kind = ArgumentKindString + argument.value = parser.token.Value().(string) + parser.nextToken() + + case lexer.TokenKindRune: + argument.kind = ArgumentKindRune + argument.value = parser.token.Value().(rune) + parser.nextToken() + + // case lexer.TokenKindLBrace: + + // case lexer.TokenKindLBracket: + + default: + panic ( + "unimplemented argument kind " + + parser.token.Kind().Describe()) + } + + return +} diff --git a/parser/body.go b/parser/body.go index cd05305..459d184 100644 --- a/parser/body.go +++ b/parser/body.go @@ -1,18 +1,34 @@ package parser +import "git.tebibyte.media/sashakoshka/arf/file" import "git.tebibyte.media/sashakoshka/arf/lexer" // parse body parses the body of an arf file, after the metadata header. func (parser *ParsingOperation) parseBody () (err error) { - err = parser.nextToken(lexer.TokenKindName) - if err != nil { return } + for { + err = parser.expect(lexer.TokenKindName) + if err != nil { return } - switch parser.token.Value().(string) { - case "data": - case "type": - case "func": - case "face": + sectionType := parser.token.Value().(string) + switch sectionType { + case "data": + var section *DataSection + section, err = parser.parseDataSection() + if parser.tree.dataSections == nil { + parser.tree.dataSections = + make(map[string] *DataSection) + } + parser.tree.dataSections[section.name] = section + if err != nil { return } + case "type": + case "face": + case "enum": + case "func": + default: + err = parser.token.NewError ( + "unknown section type \"" + sectionType + "\"", + file.ErrorKindError) + return + } } - - return } diff --git a/parser/data.go b/parser/data.go index 9f72e54..cf32548 100644 --- a/parser/data.go +++ b/parser/data.go @@ -1,6 +1,312 @@ package parser -// parseData parses a data section -func (parser *ParsingOperation) parseData () (err error) { +import "git.tebibyte.media/sashakoshka/arf/file" +import "git.tebibyte.media/sashakoshka/arf/types" +import "git.tebibyte.media/sashakoshka/arf/lexer" + +// parseData parses a data section. +func (parser *ParsingOperation) parseDataSection () ( + section *DataSection, + err error, +) { + err = parser.expect(lexer.TokenKindName) + if err != nil { return } + section = &DataSection { location: parser.token.Location() } + + err = parser.nextToken(lexer.TokenKindPermission) + if err != nil { return } + section.permission = parser.token.Value().(types.Permission) + + err = parser.nextToken(lexer.TokenKindName) + if err != nil { return } + section.name = parser.token.Value().(string) + + err = parser.nextToken(lexer.TokenKindColon) + if err != nil { return } + err = parser.nextToken() + if err != nil { return } + section.what, err = parser.parseType() + if err != nil { return } + + if parser.token.Is(lexer.TokenKindNewline) { + err = parser.nextToken() + if err != nil { return } + + section.value, err = parser.parseInitializationValues(0) + if err != nil { return } + } else { + section.value, err = parser.parseArgument() + if err != nil { return } + + err = parser.expect(lexer.TokenKindNewline) + if err != nil { return } + err = parser.nextToken() + if err != nil { return } + } + return +} + +// parseInitializationValues starts on the line after a data section, or a set +// phrase. It checks for an indent greater than the indent of the aforementioned +// data section or set phrase (passed through baseIndent), and if there is, +// it parses initialization values. +func (parser *ParsingOperation) parseInitializationValues ( + baseIndent int, +) ( + initializationArgument Argument, + err error, +) { + // check if line is indented one more than baseIndent + if !parser.token.Is(lexer.TokenKindIndent) { return } + if parser.token.Value().(int) != baseIndent + 1 { return } + + initializationArgument.location = parser.token.Location() + + err = parser.nextToken() + if err != nil { return } + + if parser.token.Is(lexer.TokenKindDot) { + + // object initialization + parser.previousToken() + var initializationValues ObjectInitializationValues + initializationValues, err = parser.parseObjectInitializationValues() + initializationArgument.kind = ArgumentKindObjectInitializationValues + initializationArgument.value = &initializationValues + + } else { + + // array initialization + parser.previousToken() + var initializationValues ArrayInitializationValues + initializationValues, err = parser.parseArrayInitializationValues() + initializationArgument.kind = ArgumentKindArrayInitializationValues + initializationArgument.value = &initializationValues + } + + return +} + +// parseObjectInitializationValues parses a list of object initialization +// values until the indentation level drops. +func (parser *ParsingOperation) parseObjectInitializationValues () ( + initializationValues ObjectInitializationValues, + err error, +) { + println("BEGIN") + defer println("END") + + initializationValues.attributes = make(map[string] Argument) + + baseIndent := 0 + begin := true + + for { + // if there is no indent we can just stop parsing + if !parser.token.Is(lexer.TokenKindIndent) { break} + indent := parser.token.Value().(int) + + if begin == true { + initializationValues.location = parser.token.Location() + baseIndent = indent + begin = false + } + + // do not parse any further if the indent has changed + if indent != baseIndent { break } + + println("HIT") + + // move on to the beginning of the line, which must contain + // a member initialization value + err = parser.nextToken(lexer.TokenKindDot) + if err != nil { return } + err = parser.nextToken(lexer.TokenKindName) + if err != nil { return } + name := parser.token.Value().(string) + + // if the member has already been listed, throw an error + _, exists := initializationValues.attributes[name] + if exists { + err = parser.token.NewError ( + "duplicate member \"" + name + "\" in object " + + "member initialization", + file.ErrorKindError) + return + } + + // parse the argument determining the member initialization + // value + err = parser.nextToken() + if err != nil { return } + var value Argument + if parser.token.Is(lexer.TokenKindNewline) { + + // recurse + err = parser.nextToken(lexer.TokenKindIndent) + if err != nil { return } + + value, err = parser.parseInitializationValues(baseIndent) + initializationValues.attributes[name] = value + if err != nil { return } + + } else { + + // parse as normal argument + value, err = parser.parseArgument() + initializationValues.attributes[name] = value + if err != nil { return } + + err = parser.expect(lexer.TokenKindNewline) + if err != nil { return } + err = parser.nextToken() + if err != nil { return } + } + } + + return +} + +// parseArrayInitializationValues parses a list of array initialization values +// until the indentation lexel drops. +func (parser *ParsingOperation) parseArrayInitializationValues () ( + initializationValues ArrayInitializationValues, + err error, +) { + baseIndent := 0 + begin := true + + for { + // if there is no indent we can just stop parsing + if !parser.token.Is(lexer.TokenKindIndent) { break} + indent := parser.token.Value().(int) + + if begin == true { + initializationValues.location = parser.token.Location() + baseIndent = indent + begin = false + } + + // do not parse any further if the indent has changed + if indent != baseIndent { break } + + // move on to the beginning of the line, which must contain + // arguments + err = parser.nextToken(validArgumentStartTokens...) + if err != nil { return } + + for { + // stop parsing this line and go on to the next if a + // newline token is encountered + if parser.token.Is(lexer.TokenKindNewline) { + err = parser.nextToken() + if err != nil { return } + break + } + + // otherwise, parse the argument + var argument Argument + argument, err = parser.parseArgument() + if err != nil { return } + initializationValues.values = append ( + initializationValues.values, + argument) + } + } + + return +} + +// parseType parses a type notation of the form Name, {Name}, etc. +func (parser *ParsingOperation) parseType () (what Type, err error) { + err = parser.expect(lexer.TokenKindName, lexer.TokenKindLBrace) + if err != nil { return } + what.location = parser.token.Location() + + if parser.token.Is(lexer.TokenKindLBrace) { + what.kind = TypeKindPointer + + err = parser.nextToken() + if err != nil { return } + + var points Type + points, err = parser.parseType() + if err != nil { return } + what.points = &points + + err = parser.expect ( + lexer.TokenKindUInt, + lexer.TokenKindRBrace, + lexer.TokenKindElipsis) + if err != nil { return } + + if parser.token.Is(lexer.TokenKindUInt) { + what.kind = TypeKindArray + + what.length = parser.token.Value().(uint64) + + err = parser.nextToken(lexer.TokenKindRBrace) + if err != nil { return } + } else if parser.token.Is(lexer.TokenKindElipsis) { + what.kind = TypeKindArray + + err = parser.nextToken(lexer.TokenKindRBrace) + if err != nil { return } + } + + err = parser.nextToken() + if err != nil { return } + } else { + what.name, err = parser.parseIdentifier() + if err != nil { return } + } + + if parser.token.Is(lexer.TokenKindColon) { + err = parser.nextToken(lexer.TokenKindName) + if err != nil { return } + + qualifier := parser.token.Value().(string) + switch qualifier { + case "mut": + what.mutable = true + default: + err = parser.token.NewError ( + "unknown type qualifier \"" + qualifier + "\"", + file.ErrorKindError) + return + } + + err = parser.nextToken() + if err != nil { return } + } + + return +} + +// parseIdentifier parses an identifier made out of dot separated names. +func (parser *ParsingOperation) parseIdentifier () ( + identifier Identifier, + err error, +) { + err = parser.expect(lexer.TokenKindName) + if err != nil { return } + identifier.location = parser.token.Location() + + for { + // TODO: eat up newlines and tabs after the dot, but not before + // it. + if !parser.token.Is(lexer.TokenKindName) { break } + + identifier.trail = append ( + identifier.trail, + parser.token.Value().(string)) + + err = parser.nextToken() + if err != nil { return } + + if !parser.token.Is(lexer.TokenKindDot) { break } + } + + return } diff --git a/parser/parser.go b/parser/parser.go index 3907a53..ac183b6 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -111,3 +111,12 @@ func (parser *ParsingOperation) nextToken (allowed ...lexer.TokenKind) (err erro err = parser.expect(allowed...) return } + +// previousToken goes back one token. If the parser is already at the beginning, +// this does nothing. +func (parser *ParsingOperation) previousToken () { + parser.tokenIndex -- + if parser.tokenIndex < 0 { parser.tokenIndex = 0 } + parser.token = parser.tokens[parser.tokenIndex] + return +} diff --git a/parser/parser_test.go b/parser/parser_test.go index f4a7181..18e2ee0 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -1,33 +1,75 @@ package parser -import "reflect" +import "io" import "testing" +// import "git.tebibyte.media/sashakoshka/arf/types" -func checkTree (modulePath string, correct *SyntaxTree, test *testing.T) { +func checkTree (modulePath string, correct string, test *testing.T) { tree, err := Parse(modulePath) + treeString := tree.ToString(0) - if err != nil { + test.Log("CORRECT TREE:") + test.Log(correct) + test.Log("WHAT WAS PARSED:") + test.Log(treeString) + + if err != io.EOF && err != nil { test.Log("returned error:") test.Log(err.Error()) test.Fail() return } - if !reflect.DeepEqual(tree, correct) { - test.Log("trees not equal") + if treeString != correct { + test.Log("trees not equal!") test.Fail() return } } func TestMeta (test *testing.T) { - checkTree("../tests/parser/meta",&SyntaxTree { - license: "GPLv3", - author: "Sasha Koshka", - - requires: []string { - "someModule", - "otherModule", - }, - }, test) + checkTree ("../tests/parser/meta", +`:arf +author "Sasha Koshka" +license "GPLv3" +require "someModule" +require "otherModule" +--- +`, test) } + +func TestData (test *testing.T) { + checkTree ("../tests/parser/data", +`:arf +--- +data wr integer:Int 3202 +data wr integerArray16:{Int 16} +data wr integerArrayInitialized:{Int 16} + 3948 + 293 + 293049 + 948 + 912 + 340 + 0 + 2304 + 0 + 4785 + 92 +data wr integerArrayVariable:{Int ..} +data wr integerPointer:{Int} +data wr mutInteger:Int:mut 3202 +data wr mutIntegerPointer:{Int}:mut +data wr nestedObject:Obj + .that + .bird2 123.8439 + .bird3 9328.21348239 + .this + .bird0 324 + .bird1 "hello world" +data wr object:Obj + .that 2139 + .this 324 +`, test) +} + diff --git a/parser/tree-tostring.go b/parser/tree-tostring.go new file mode 100644 index 0000000..0c2ed33 --- /dev/null +++ b/parser/tree-tostring.go @@ -0,0 +1,248 @@ +package parser + +import "fmt" +import "sort" + +func doIndent (indent int, input ...string) (output string) { + for index := 0; index < indent; index ++ { + output += "\t" + } + for _, inputSection := range input { + output += inputSection + } + return +} + +func sortMapKeysAlphabetically[KEY_TYPE any] ( + unsortedMap map[string] KEY_TYPE, +) ( + sortedKeys []string, +) { + sortedKeys = make([]string, len(unsortedMap)) + index := 0 + for key, _ := range unsortedMap { + sortedKeys[index] = key + index ++ + } + sort.Strings(sortedKeys) + + return +} + +func (tree *SyntaxTree) ToString (indent int) (output string) { + output += doIndent(indent, ":arf\n") + + if tree.author != "" { + output += doIndent(indent, "author \"", tree.author, "\"\n") + } + + if tree.license != "" { + output += doIndent(indent, "license \"", tree.license, "\"\n") + } + + for _, require := range tree.requires { + output += doIndent(indent, "require \"", require, "\"\n") + } + + output += doIndent(indent, "---\n") + + dataSectionKeys := sortMapKeysAlphabetically(tree.dataSections) + for _, name := range dataSectionKeys { + output += tree.dataSections[name].ToString(indent) + } + return +} + +func (identifier *Identifier) ToString () (output string) { + for index, trailItem := range identifier.trail { + if index > 0 { + output += "." + } + + output += trailItem + } + return +} + +func (what *Type) ToString () (output string) { + if what.kind == TypeKindBasic { + output += what.name.ToString() + } else { + output += "{" + output += what.points.ToString() + + if what.kind == TypeKindArray { + output += " " + if what.length == 0 { + output += ".." + } else { + output += fmt.Sprint(what.length) + } + } + + output += "}" + } + + if what.mutable { + output += ":mut" + } + + return +} + +func (declaration *Declaration) ToString () (output string) { + output += declaration.name + ":" + output += declaration.what.ToString() + return +} + +func (attributes *ObjectInitializationValues) ToString ( + indent int, +) ( + output string, +) { + for _, name := range sortMapKeysAlphabetically(attributes.attributes) { + value := attributes.attributes[name] + + output += doIndent(indent, ".", name, " ") + if value.kind == ArgumentKindObjectInitializationValues { + output += "\n" + output += value.ToString(indent + 1, true) + } else { + output += value.ToString(0, false) + "\n" + } + } + + return +} + +func (values *ArrayInitializationValues) ToString ( + indent int, +) ( + output string, +) { + for _, value := range values.values { + output += value.ToString(indent, true) + } + + return +} + +func (phrase *Phrase) ToString (indent int, breakLine bool) (output string) { + if breakLine { + output += doIndent ( + indent, + "[", phrase.command.ToString(0, false)) + output += "\n" + for _, argument := range phrase.arguments { + output += doIndent ( + indent, + argument.ToString(indent + 1, true)) + } + } else { + output += "[" + phrase.command.ToString(0, false) + for _, argument := range phrase.arguments { + output += " " + argument.ToString(0, false) + } + } + + output += "]" + + if len(phrase.returnsTo) > 0 { + output += " ->" + for _, returnItem := range phrase.returnsTo { + output += " " + returnItem.ToString(0, false) + } + } + + if breakLine { + output += "\n" + } + return +} + +func (argument *Argument) ToString (indent int, breakLine bool) (output string) { + if !breakLine { indent = 0 } + if argument.kind == ArgumentKindNil { + output += "NIL-ARGUMENT" + if breakLine { output += "\n" } + return + } + + switch argument.kind { + case ArgumentKindPhrase: + output += argument.value.(*Phrase).ToString ( + indent, + breakLine) + + case ArgumentKindObjectInitializationValues: + // this should only appear in contexts where breakLine is true + output += argument.value.(*ObjectInitializationValues). + ToString(indent) + + case ArgumentKindArrayInitializationValues: + // this should only appear in contexts where breakLine is true + output += argument.value.(*ArrayInitializationValues). + ToString(indent) + + case ArgumentKindIdentifier: + output += doIndent ( + indent, + argument.value.(*Identifier).ToString()) + if breakLine { output += "\n" } + + case ArgumentKindDeclaration: + output += doIndent ( + indent, + argument.value.(*Declaration).ToString()) + if breakLine { output += "\n" } + + case ArgumentKindInt, ArgumentKindUInt, ArgumentKindFloat: + output += doIndent(indent, fmt.Sprint(argument.value)) + if breakLine { output += "\n" } + + case ArgumentKindString: + output += doIndent ( + indent, + "\"" + argument.value.(string) + "\"") + if breakLine { output += "\n" } + + case ArgumentKindRune: + output += doIndent ( + indent, + "'" + string(argument.value.(rune)) + "'") + if breakLine { output += "\n" } + + case ArgumentKindOperator: + // TODO + // also when parsing this argument kind, don't do it in the + // argument parsing function. do it specifically when parsing a + // phrase command. + } + + return +} + +func (section *DataSection) ToString (indent int) (output string) { + output += doIndent ( + indent, + "data ", + section.permission.ToString(), " ", + section.name, ":", + section.what.ToString()) + + isComplexInitialization := + section.value.kind == ArgumentKindObjectInitializationValues || + section.value.kind == ArgumentKindArrayInitializationValues + + if section.value.value == nil { + output += "\n" + } else if isComplexInitialization { + output += "\n" + output += section.value.ToString(indent + 1, true) + } else { + output += " " + section.value.ToString(0, false) + output += "\n" + } + return +} diff --git a/parser/tree.go b/parser/tree.go index 4b95d9d..9ef87e7 100644 --- a/parser/tree.go +++ b/parser/tree.go @@ -1,5 +1,8 @@ package parser +import "git.tebibyte.media/sashakoshka/arf/file" +import "git.tebibyte.media/sashakoshka/arf/types" + // SyntaxTree represents an abstract syntax tree. It covers an entire module. It // can be expected to be syntactically correct, but it might not be semantically // correct (because it has not been analyzed yet.) @@ -7,5 +10,152 @@ type SyntaxTree struct { license string author string - requires []string + requires []string + dataSections map[string] *DataSection +} + +// Identifier represents a chain of arguments separated by a dot. +type Identifier struct { + location file.Location + trail []string +} + +// TypeKind represents what kind of type a type is +type TypeKind int + +const ( + // TypeKindBasic either means it's a primitive, or it inherits from + // something. + TypeKindBasic TypeKind = iota + + // TypeKindPointer means it's a pointer + TypeKindPointer + + // TypeKindArray means it's an array. + TypeKindArray +) + +// Type represents a type specifier +type Type struct { + location file.Location + + mutable bool + kind TypeKind + + // only applicable for arrays. a value of zero means it has an + // undefined/dynamic length. + length uint64 + + // only applicable for basic. + name Identifier + + // not applicable for basic. + points *Type +} + +// Declaration represents a variable declaration. +type Declaration struct { + location file.Location + name string + what Type +} + +// ObjectInitializationValues represents a list of object member initialization +// attributes. +type ObjectInitializationValues struct { + location file.Location + attributes map[string] Argument +} + +// ArrayInitializationValues represents a list of attributes initializing an +// array. +type ArrayInitializationValues struct { + location file.Location + values []Argument +} + +// Phrase represents a function call or operator. In ARF they are the same +// syntactical concept. +type Phrase struct { + location file.Location + command Argument + arguments []Argument + returnsTo []Argument +} + +// ArgumentKind specifies the type of thing the value of an argument should be +// cast to. +type ArgumentKind int + +const ( + ArgumentKindNil ArgumentKind = iota + + // [name argument] + // [name argument argument] + // etc... + ArgumentKindPhrase = iota + + // {name} + ArgumentKindDereference + + // {name 23} + ArgumentKindSubscript + + // .name value + // but like, a lot of them + ArgumentKindObjectInitializationValues + + // value value... + ArgumentKindArrayInitializationValues + + // name.name + // name.name.name + // etc... + ArgumentKindIdentifier + + // name:Type + // name:{Type} + // name:{Type ..} + // name:{Type 23} + // etc... + ArgumentKindDeclaration + + // -1337 + ArgumentKindInt + + // 1337 + ArgumentKindUInt + + // 0.44 + ArgumentKindFloat + + // "hello world" + ArgumentKindString + + // 'S' + ArgumentKindRune + + // + - * / etc... + // this is only used as a phrase command + ArgumentKindOperator +) + +// Argument represents a value that can be placed anywhere a value goes. This +// allows things like phrases being arguments to other phrases. +type Argument struct { + location file.Location + kind ArgumentKind + value any + // TODO: if there is an argument expansion operator its existence should + // be stored here in a boolean. +} + +// DataSection represents a global variable. +type DataSection struct { + location file.Location + name string + + what Type + value Argument + permission types.Permission } diff --git a/tests/lexer/all.arf b/tests/lexer/all.arf index 6b9f9c6..29834fe 100644 --- a/tests/lexer/all.arf +++ b/tests/lexer/all.arf @@ -1,3 +1,3 @@ :arf ---- rw -> -349820394 932748397 239485.37520 "hello world!\n" 'E' helloWorld:.[]{} +--- rw -> -349820394 932748397 239485.37520 "hello world!\n" 'E' helloWorld:.,..[]{} + - ++ -- * / @ ! % ~ < << > >> | || & && diff --git a/tests/lexer/numbers.arf b/tests/lexer/numbers.arf index 9e512ca..2cb6de4 100644 --- a/tests/lexer/numbers.arf +++ b/tests/lexer/numbers.arf @@ -1,4 +1,6 @@ :arf +0 +8 83628266 0b100111111000001000011101010 0x4Fc10Ea diff --git a/tests/parser/data/main.arf b/tests/parser/data/main.arf index d3419cd..8efeb61 100644 --- a/tests/parser/data/main.arf +++ b/tests/parser/data/main.arf @@ -3,25 +3,50 @@ data wr integer:Int 3202 +data wr mutInteger:Int:mut 3202 + data wr integerPointer:{Int} -# TODO: data wr integerPointer:{Int} [& integer] + +data wr mutIntegerPointer:{Int}:mut data wr integerArray16:{Int 16} -data wr integerArrayVariable:{Int ...} +data wr integerArrayVariable:{Int ..} data wr integerArrayInitialized:{Int 16} 3948 293 293049 948 912 340 0 2304 0 4785 92 +# TODO: reinstate these two after phrase parsing is implemented +# data wr integerPointerInit:{Int} [& integer] + +# data wr mutIntegerPointerInit:{Int}:mut [& integer] + data wr object:Obj - : this 324 - : that 2139 + .this 324 + .that 2139 data wr nestedObject:Obj - : this - : bird0 324 - : bird1 "hello world" - : that - : bird2 123.8439 - : bird3 9328.21348239 + .this + .bird0 324 + .bird1 "hello world" + .that + .bird2 123.8439 + .bird3 9328.21348239 + + +# func rr main + # --- + # # TODO: set should be a special case, checking under itself for object + # member initialization args. it should also check for args in general + # under there which should be treated as array initialization args. + # basically, under a set phrase, it should do the same checks that it + # does under a data section. + # + # [set object:Obj] + # .this 324 + # .that 2139 + # + # set object:Obj + # .this 324 + # .that 2139 diff --git a/types/permission.go b/types/permission.go index 4421058..cd24150 100644 --- a/types/permission.go +++ b/types/permission.go @@ -30,3 +30,19 @@ func PermissionFrom (data string) (permission Permission) { permission.External = ModeFrom(rune(data[1])) return } + +func (mode Mode) ToString () (output string) { + switch mode { + case ModeNone: output = "n" + case ModeRead: output = "r" + case ModeWrite: output = "w" + } + + return +} + +func (permission Permission) ToString () (output string) { + output += permission.Internal.ToString() + output += permission.External.ToString() + return +}