2022-08-17 12:21:20 -06:00
17 changed files with 994 additions and 62 deletions
--- a/README.md
+++ b/README.md
@@ -29,6 +29,8 @@ These are some design goals that I have followed/am following:
 - Language syntax must have zero ambiguity
 - The compiler should not generate new functions or complex logic that the user
  has not written
+- One line at a time - the language's syntax should encourage writing code that
+  flows vertically and not horizontally, with minimal nesting

 ## Planned features

--- a/file/error.go
+++ b/file/error.go
@@ -52,20 +52,30 @@ func (err Error) Error () (formattedMessage string) {
 	
 	if err.width > 0 {
 		// print erroneous line
+		line := err.Location.file.lines[err.Location.row]
 		formattedMessage +=
 			err.Location.file.lines[err.Location.row] + "\n"

+		// position error marker
+		var index int
+		for index = 0; index < err.Location.column; index ++ {
+			if line[index] == '\t' {
+				formattedMessage += "\t"
+			} else {
+				formattedMessage += " "
+			}
+		}
+		
 		// print an arrow with a tail spanning the width of the mistake
-		columnCountdown := err.Location.column
-		for columnCountdown > 1 {
-			// TODO: for tabs, print out a teb instead.
-			formattedMessage += " "
-			columnCountdown --
-		}
 		for err.width > 1 {
-			// TODO: for tabs, print out 8 of these instead.
-			formattedMessage += "-"
+			if line[index] == '\t' {
+				formattedMessage += "--------"
+			} else {
+				formattedMessage += "-"
+			}
+			index ++
 		}
+		
 		formattedMessage += "^\n"
 	}
 	formattedMessage += err.message + "\n"
--- a/lexer/lexer.go
+++ b/lexer/lexer.go
@@ -176,7 +176,17 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
 		err = lexer.nextRune()
 	case '.':
 		token := lexer.newToken()
+		err = lexer.nextRune()
+		if err != nil { return }
 		token.kind = TokenKindDot
+		if lexer.char == '.' {
+			token.kind = TokenKindElipsis
+			err = lexer.nextRune()
+		}
+		lexer.addToken(token)
+	case ',':
+		token := lexer.newToken()
+		token.kind = TokenKindComma
 		lexer.addToken(token)
 		err = lexer.nextRune()
 	case '[':
@@ -200,15 +210,15 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
 		lexer.addToken(token)
 		err = lexer.nextRune()
 	case '+':
+		token := lexer.newToken()
 		err = lexer.nextRune()
 		if err != nil { return }
-		token := lexer.newToken()
 		token.kind = TokenKindPlus
 		if lexer.char == '+' {
 			token.kind = TokenKindIncrement
+			err = lexer.nextRune()
 		}
 		lexer.addToken(token)
-		err = lexer.nextRune()
 	case '-':
 		err = lexer.tokenizeDashBeginning()
 	case '*':
@@ -242,45 +252,45 @@ func (lexer *LexingOperation) tokenizeSymbolBeginning () (err error) {
 		lexer.addToken(token)
 		err = lexer.nextRune()
 	case '<':
+		token := lexer.newToken()
 		err = lexer.nextRune()
 		if err != nil { return }
-		token := lexer.newToken()
 		token.kind = TokenKindLessThan
 		if lexer.char == '<' {
 			token.kind = TokenKindLShift
+			err = lexer.nextRune()
 		}
 		lexer.addToken(token)
-		err = lexer.nextRune()
 	case '>':
+		token := lexer.newToken()
 		err = lexer.nextRune()
 		if err != nil { return }
-		token := lexer.newToken()
 		token.kind = TokenKindGreaterThan
 		if lexer.char == '>' {
 			token.kind = TokenKindRShift
+			err = lexer.nextRune()
 		}
 		lexer.addToken(token)
-		err = lexer.nextRune()
 	case '|':
+		token := lexer.newToken()
 		err = lexer.nextRune()
 		if err != nil { return }
-		token := lexer.newToken()
 		token.kind = TokenKindBinaryOr
 		if lexer.char == '|' {
 			token.kind = TokenKindLogicalOr
+			err = lexer.nextRune()
 		}
 		lexer.addToken(token)
-		err = lexer.nextRune()
 	case '&':
+		token := lexer.newToken()
 		err = lexer.nextRune()
 		if err != nil { return }
-		token := lexer.newToken()
 		token.kind = TokenKindBinaryAnd
 		if lexer.char == '&' {
 			token.kind = TokenKindLogicalAnd
+			err = lexer.nextRune()
 		}
 		lexer.addToken(token)
-		err = lexer.nextRune()
 	default:
 		err = file.NewError (
 			lexer.file.Location(1),
--- a/lexer/lexer_test.go
+++ b/lexer/lexer_test.go
@@ -62,6 +62,8 @@ func TestTokenizeAll (test *testing.T) {
 		Token { kind: TokenKindName, value: "helloWorld" },
 		Token { kind: TokenKindColon },
 		Token { kind: TokenKindDot },
+		Token { kind: TokenKindComma },
+		Token { kind: TokenKindElipsis },
 		Token { kind: TokenKindLBracket },
 		Token { kind: TokenKindRBracket },
 		Token { kind: TokenKindLBrace },
@@ -91,6 +93,10 @@ func TestTokenizeAll (test *testing.T) {

 func TestTokenizeNumbers (test *testing.T) {
 	checkTokenSlice("../tests/lexer/numbers.arf", []Token {
+		Token { kind: TokenKindUInt, value: uint64(0) },
+		Token { kind: TokenKindNewline },
+		Token { kind: TokenKindUInt, value: uint64(8) },
+		Token { kind: TokenKindNewline },
 		Token { kind: TokenKindUInt, value: uint64(83628266) },
 		Token { kind: TokenKindNewline },
 		Token { kind: TokenKindUInt, value: uint64(83628266) },
--- a/lexer/numbers.go
+++ b/lexer/numbers.go
@@ -1,7 +1,5 @@
 package lexer

-import "git.tebibyte.media/sashakoshka/arf/file"
-
 // tokenizeSymbolBeginning lexes a token that starts with a number.
 func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error) {
 	var number   uint64
@@ -23,11 +21,6 @@ func (lexer *LexingOperation) tokenizeNumberBeginning (negative bool) (err error
 			number, fragment, isFloat, err = lexer.tokenizeNumber(10)
 		} else if lexer.char >= '0' && lexer.char <= '9' {
 			number, fragment, isFloat, err = lexer.tokenizeNumber(8)
-		} else {
-			return file.NewError (
-				lexer.file.Location(1),
-				"unexpected character in number literal",
-				file.ErrorKindError)
 		}
 	} else {
 		number, fragment, isFloat, err = lexer.tokenizeNumber(10)
--- a/lexer/token.go
+++ b/lexer/token.go
@@ -24,6 +24,8 @@ const (

        TokenKindColon
        TokenKindDot
+        TokenKindElipsis
+        TokenKindComma
        
        TokenKindLBracket
        TokenKindRBracket
@@ -133,6 +135,10 @@ func (tokenKind TokenKind) Describe () (description string) {
 		description = "Colon"
 	case TokenKindDot:
 		description = "Dot"
+	case TokenKindElipsis:
+		description = "Elipsis"
+	case TokenKindComma:
+		description = "Comma"
 	case TokenKindLBracket:
 		description = "LBracket"
 	case TokenKindRBracket:
--- a/parser/argument.go
+++ b/parser/argument.go
@@ -0,0 +1,91 @@
+package parser
+
+import "git.tebibyte.media/sashakoshka/arf/file"
+import "git.tebibyte.media/sashakoshka/arf/lexer"
+
+var validArgumentStartTokens = []lexer.TokenKind {
+	lexer.TokenKindName,
+	
+	lexer.TokenKindInt,
+	lexer.TokenKindUInt,
+	lexer.TokenKindFloat,
+	lexer.TokenKindString,
+	lexer.TokenKindRune,
+	
+	lexer.TokenKindLBrace,
+	lexer.TokenKindLBracket,
+}
+
+func (parser *ParsingOperation) parseArgument () (argument Argument, err error) {
+	argument.location = parser.token.Location()
+
+	err = parser.expect(validArgumentStartTokens...)
+	if err != nil { return }
+
+	switch parser.token.Kind() {
+	case lexer.TokenKindName:
+		var identifier Identifier
+		identifier, err = parser.parseIdentifier()
+		if err != nil { return }
+
+		if parser.token.Is(lexer.TokenKindColon) {
+			var what Type
+			what, err = parser.parseType()
+			if err != nil { return }
+
+			if len(identifier.trail) != 1 {
+				err = parser.token.NewError (
+					"cannot use member selection in " +
+					"a variable definition",
+					file.ErrorKindError)
+				return
+			}
+
+			argument.kind  = ArgumentKindDeclaration
+			argument.value = Declaration {
+				location: argument.location,
+				name:     identifier.trail[0],
+				what:     what,
+			}
+		} else {
+			argument.kind  = ArgumentKindIdentifier
+			argument.value = identifier
+		}
+		
+	case lexer.TokenKindInt:
+		argument.kind  = ArgumentKindInt
+		argument.value = parser.token.Value().(int64)
+		err = parser.nextToken()
+		
+	case lexer.TokenKindUInt:
+		argument.kind  = ArgumentKindUInt
+		argument.value = parser.token.Value().(uint64)
+		err = parser.nextToken()
+		
+	case lexer.TokenKindFloat:
+		argument.kind  = ArgumentKindFloat
+		argument.value = parser.token.Value().(float64)
+		err = parser.nextToken()
+		
+	case lexer.TokenKindString:
+		argument.kind  = ArgumentKindString
+		argument.value = parser.token.Value().(string)
+		parser.nextToken()
+		
+	case lexer.TokenKindRune:
+		argument.kind  = ArgumentKindRune
+		argument.value = parser.token.Value().(rune)
+		parser.nextToken()
+		
+	// case lexer.TokenKindLBrace:
+		
+	// case lexer.TokenKindLBracket:
+
+	default:
+		panic (
+			"unimplemented argument kind " +
+			parser.token.Kind().Describe())
+	}
+
+	return
+}
--- a/parser/body.go
+++ b/parser/body.go
@@ -1,18 +1,34 @@
 package parser

+import "git.tebibyte.media/sashakoshka/arf/file"
 import "git.tebibyte.media/sashakoshka/arf/lexer"

 // parse body parses the body of an arf file, after the metadata header.
 func (parser *ParsingOperation) parseBody () (err error) {
-	err = parser.nextToken(lexer.TokenKindName)
-	if err != nil { return }
+	for {
+		err = parser.expect(lexer.TokenKindName)
+		if err != nil { return }

-	switch parser.token.Value().(string) {
-	case "data":
-	case "type":
-	case "func":
-	case "face":
+		sectionType := parser.token.Value().(string)
+		switch sectionType {
+		case "data":
+			var section *DataSection
+			section, err = parser.parseDataSection()
+			if parser.tree.dataSections == nil {
+				parser.tree.dataSections =
+					make(map[string] *DataSection)
+			}
+			parser.tree.dataSections[section.name] = section
+			if err != nil { return }
+		case "type":
+		case "face":
+		case "enum":
+		case "func":
+		default:
+			err = parser.token.NewError (
+				"unknown section type \"" + sectionType + "\"",
+				file.ErrorKindError)
+			return
+		}
 	}
-
-	return
 }
--- a/parser/data.go
+++ b/parser/data.go
@@ -1,6 +1,312 @@
 package parser

-// parseData parses a data section
-func (parser *ParsingOperation) parseData () (err error) {
+import "git.tebibyte.media/sashakoshka/arf/file"
+import "git.tebibyte.media/sashakoshka/arf/types"
+import "git.tebibyte.media/sashakoshka/arf/lexer"
+
+// parseData parses a data section.
+func (parser *ParsingOperation) parseDataSection () (
+	section *DataSection,
+	err     error,
+) {
+	err = parser.expect(lexer.TokenKindName)
+	if err != nil { return }
 	
+	section = &DataSection { location: parser.token.Location() }
+
+	err = parser.nextToken(lexer.TokenKindPermission)
+	if err != nil { return }
+	section.permission = parser.token.Value().(types.Permission)
+
+	err = parser.nextToken(lexer.TokenKindName)
+	if err != nil { return }
+	section.name = parser.token.Value().(string)
+
+	err = parser.nextToken(lexer.TokenKindColon)
+	if err != nil { return }
+	err = parser.nextToken()
+	if err != nil { return }
+	section.what, err = parser.parseType()
+	if err != nil { return }
+
+	if parser.token.Is(lexer.TokenKindNewline) {
+		err = parser.nextToken()
+		if err != nil { return }
+
+		section.value, err = parser.parseInitializationValues(0)
+		if err != nil { return }
+	} else {
+		section.value, err = parser.parseArgument()
+		if err != nil { return }
+
+		err = parser.expect(lexer.TokenKindNewline)
+		if err != nil { return }
+		err = parser.nextToken()
+		if err != nil { return }
+	}
+	return
+}
+
+// parseInitializationValues starts on the line after a data section, or a set
+// phrase. It checks for an indent greater than the indent of the aforementioned
+// data section or set phrase (passed through baseIndent), and if there is,
+// it parses initialization values.
+func (parser *ParsingOperation) parseInitializationValues (
+	baseIndent int,
+) (
+	initializationArgument Argument,
+	err error,
+) {
+	// check if line is indented one more than baseIndent
+	if !parser.token.Is(lexer.TokenKindIndent) { return }
+	if parser.token.Value().(int) != baseIndent + 1 { return }
+
+	initializationArgument.location = parser.token.Location()
+	
+	err = parser.nextToken()
+	if err != nil { return }
+
+	if parser.token.Is(lexer.TokenKindDot) {
+
+		// object initialization
+		parser.previousToken()
+		var initializationValues ObjectInitializationValues
+		initializationValues, err    = parser.parseObjectInitializationValues()
+		initializationArgument.kind  = ArgumentKindObjectInitializationValues
+		initializationArgument.value = &initializationValues
+		
+	} else {
+	
+		// array initialization
+		parser.previousToken()
+		var initializationValues ArrayInitializationValues
+		initializationValues, err    = parser.parseArrayInitializationValues()
+		initializationArgument.kind  = ArgumentKindArrayInitializationValues
+		initializationArgument.value = &initializationValues
+	}
+	
+	return
+}
+
+// parseObjectInitializationValues parses a list of object initialization
+// values until the indentation level drops.
+func (parser *ParsingOperation) parseObjectInitializationValues () (
+	initializationValues ObjectInitializationValues,
+	err                  error,
+) {
+	println("BEGIN")
+	defer println("END")
+	
+	initializationValues.attributes = make(map[string] Argument)
+
+	baseIndent := 0
+	begin      := true
+	
+	for {
+		// if there is no indent we can just stop parsing
+		if !parser.token.Is(lexer.TokenKindIndent) { break}
+		indent := parser.token.Value().(int)
+		
+		if begin == true {
+			initializationValues.location = parser.token.Location()
+			baseIndent = indent 
+			begin      = false
+		}
+
+		// do not parse any further if the indent has changed
+		if indent != baseIndent { break }
+
+		println("HIT")
+
+		// move on to the beginning of the line, which must contain
+		// a member initialization value
+		err = parser.nextToken(lexer.TokenKindDot)
+		if err != nil { return }
+		err = parser.nextToken(lexer.TokenKindName)
+		if err != nil { return }
+		name := parser.token.Value().(string)
+
+		// if the member has already been listed, throw an error
+		_, exists := initializationValues.attributes[name]
+		if exists {
+			err = parser.token.NewError (
+				"duplicate member \"" + name + "\" in object " +
+				"member initialization",
+				file.ErrorKindError)
+			return
+		}
+
+		// parse the argument determining the member initialization
+		// value
+		err = parser.nextToken()
+		if err != nil { return }
+		var value Argument
+		if parser.token.Is(lexer.TokenKindNewline) {
+		
+			// recurse
+			err = parser.nextToken(lexer.TokenKindIndent)
+			if err != nil { return }
+			
+			value, err = parser.parseInitializationValues(baseIndent)
+			initializationValues.attributes[name] = value
+			if err != nil { return }
+			
+		} else {
+
+			// parse as normal argument
+			value, err = parser.parseArgument()
+			initializationValues.attributes[name] = value
+			if err != nil { return }
+			
+			err = parser.expect(lexer.TokenKindNewline)
+			if err != nil { return }
+			err = parser.nextToken()
+			if err != nil { return }
+		}
+	}
+	
+	return
+}
+
+// parseArrayInitializationValues parses a list of array initialization values
+// until the indentation lexel drops.
+func (parser *ParsingOperation) parseArrayInitializationValues () (
+	initializationValues ArrayInitializationValues,
+	err                  error,
+) {
+	baseIndent := 0
+	begin      := true
+	
+	for {
+		// if there is no indent we can just stop parsing
+		if !parser.token.Is(lexer.TokenKindIndent) { break}
+		indent := parser.token.Value().(int)
+		
+		if begin == true {
+			initializationValues.location = parser.token.Location()
+			baseIndent = indent 
+			begin      = false
+		}
+
+		// do not parse any further if the indent has changed
+		if indent != baseIndent { break }
+
+		// move on to the beginning of the line, which must contain
+		// arguments
+		err = parser.nextToken(validArgumentStartTokens...)
+		if err != nil { return }
+
+		for {
+			// stop parsing this line and go on to the next if a
+			// newline token is encountered
+			if parser.token.Is(lexer.TokenKindNewline) {
+				err = parser.nextToken()
+				if err != nil { return }
+				break
+			}
+
+			// otherwise, parse the argument
+			var argument Argument
+			argument, err = parser.parseArgument()
+			if err != nil { return }
+			initializationValues.values = append (
+				initializationValues.values,
+				argument)
+		}
+	}
+
+	return
+}
+
+// parseType parses a type notation of the form Name, {Name}, etc.
+func (parser *ParsingOperation) parseType () (what Type, err error) {
+	err = parser.expect(lexer.TokenKindName, lexer.TokenKindLBrace)
+	if err != nil { return }
+	what.location = parser.token.Location()
+
+	if parser.token.Is(lexer.TokenKindLBrace) {
+		what.kind = TypeKindPointer
+
+		err = parser.nextToken()
+		if err != nil { return }
+	
+		var points Type
+		points, err = parser.parseType()
+		if err != nil { return }
+		what.points = &points
+
+		err = parser.expect (
+			lexer.TokenKindUInt,
+			lexer.TokenKindRBrace,
+			lexer.TokenKindElipsis)
+		if err != nil { return }
+
+		if parser.token.Is(lexer.TokenKindUInt) {
+			what.kind = TypeKindArray
+		
+			what.length = parser.token.Value().(uint64)
+		
+			err = parser.nextToken(lexer.TokenKindRBrace)
+			if err != nil { return }
+		} else if parser.token.Is(lexer.TokenKindElipsis) {
+			what.kind = TypeKindArray
+		
+			err = parser.nextToken(lexer.TokenKindRBrace)
+			if err != nil { return }
+		}
+
+		err = parser.nextToken()
+		if err != nil { return }
+	} else {
+		what.name, err = parser.parseIdentifier()
+		if err != nil { return }
+	}
+
+	if parser.token.Is(lexer.TokenKindColon) {
+		err = parser.nextToken(lexer.TokenKindName)
+		if err != nil { return }
+
+		qualifier := parser.token.Value().(string)
+		switch qualifier {
+		case "mut":
+			what.mutable = true
+		default:
+			err = parser.token.NewError (
+				"unknown type qualifier \"" + qualifier + "\"",
+				file.ErrorKindError)
+			return
+		}
+		
+		err = parser.nextToken()
+		if err != nil { return }
+	}
+
+	return
+}
+
+// parseIdentifier parses an identifier made out of dot separated names.
+func (parser *ParsingOperation) parseIdentifier () (
+	identifier Identifier,
+	err        error,
+) {
+	err = parser.expect(lexer.TokenKindName)
+	if err != nil { return }
+	identifier.location = parser.token.Location()
+
+	for {
+		// TODO: eat up newlines and tabs after the dot, but not before
+		// it.
+		if !parser.token.Is(lexer.TokenKindName) { break }
+
+		identifier.trail = append (
+			identifier.trail,
+			parser.token.Value().(string))
+
+		err = parser.nextToken()
+		if err != nil { return }
+		
+		if !parser.token.Is(lexer.TokenKindDot) { break }
+	}
+
+	return
 }
--- a/parser/parser.go
+++ b/parser/parser.go
@@ -111,3 +111,12 @@ func (parser *ParsingOperation) nextToken (allowed ...lexer.TokenKind) (err erro
 	err = parser.expect(allowed...)
 	return
 }
+
+// previousToken goes back one token. If the parser is already at the beginning,
+// this does nothing.
+func (parser *ParsingOperation) previousToken () {
+	parser.tokenIndex --
+	if parser.tokenIndex < 0 { parser.tokenIndex = 0 }
+	parser.token = parser.tokens[parser.tokenIndex]
+	return
+}
--- a/parser/parser_test.go
+++ b/parser/parser_test.go
@@ -1,33 +1,75 @@
 package parser

-import "reflect"
+import "io"
 import "testing"
+// import "git.tebibyte.media/sashakoshka/arf/types"

-func checkTree (modulePath string, correct *SyntaxTree, test *testing.T) {
+func checkTree (modulePath string, correct string, test *testing.T) {
 	tree, err := Parse(modulePath)
+	treeString := tree.ToString(0)
 	
-	if err != nil {
+	test.Log("CORRECT TREE:")
+	test.Log(correct)
+	test.Log("WHAT WAS PARSED:")
+	test.Log(treeString)
+	
+	if err != io.EOF && err != nil {
 		test.Log("returned error:")
 		test.Log(err.Error())
 		test.Fail()
 		return
 	}

-	if !reflect.DeepEqual(tree, correct) {
-		test.Log("trees not equal")
+	if treeString != correct {
+		test.Log("trees not equal!")
 		test.Fail()
 		return
 	}
 }

 func TestMeta (test *testing.T) {
-	checkTree("../tests/parser/meta",&SyntaxTree {
-		license: "GPLv3",
-		author:  "Sasha Koshka",
-	
-		requires: []string {
-			"someModule",
-			"otherModule",
-		},
-	}, test)
+	checkTree ("../tests/parser/meta",
+`:arf
+author "Sasha Koshka"
+license "GPLv3"
+require "someModule"
+require "otherModule"
+---
+`, test)
 }
+
+func TestData (test *testing.T) {
+	checkTree ("../tests/parser/data",
+`:arf
+---
+data wr integer:Int 3202
+data wr integerArray16:{Int 16}
+data wr integerArrayInitialized:{Int 16}
+	3948
+	293
+	293049
+	948
+	912
+	340
+	0
+	2304
+	0
+	4785
+	92
+data wr integerArrayVariable:{Int ..}
+data wr integerPointer:{Int}
+data wr mutInteger:Int:mut 3202
+data wr mutIntegerPointer:{Int}:mut
+data wr nestedObject:Obj
+	.that
+		.bird2 123.8439
+		.bird3 9328.21348239
+	.this
+		.bird0 324
+		.bird1 "hello world"
+data wr object:Obj
+	.that 2139
+	.this 324
+`, test)
+}
+
--- a/parser/tree-tostring.go
+++ b/parser/tree-tostring.go
@@ -0,0 +1,248 @@
+package parser
+
+import "fmt"
+import "sort"
+
+func doIndent (indent int, input ...string) (output string) {
+	for index := 0; index < indent; index ++ {
+		output += "\t"
+	}
+	for _, inputSection := range input {
+		output += inputSection
+	}
+	return
+}
+
+func sortMapKeysAlphabetically[KEY_TYPE any] (
+	unsortedMap map[string] KEY_TYPE,
+) (
+	sortedKeys []string,
+) {
+	sortedKeys = make([]string, len(unsortedMap))
+	index := 0
+	for key, _ := range unsortedMap {
+		sortedKeys[index] = key
+		index ++
+	}
+	sort.Strings(sortedKeys)
+
+	return
+}
+
+func (tree *SyntaxTree) ToString (indent int) (output string) {
+	output += doIndent(indent, ":arf\n")
+
+	if tree.author != "" {
+		output += doIndent(indent, "author \"", tree.author, "\"\n")
+	}
+
+	if tree.license != "" {
+		output += doIndent(indent, "license \"", tree.license, "\"\n")
+	}
+
+	for _, require := range tree.requires {
+		output += doIndent(indent, "require \"", require, "\"\n")
+	}
+	
+	output += doIndent(indent, "---\n")
+
+	dataSectionKeys := sortMapKeysAlphabetically(tree.dataSections)
+	for _, name := range dataSectionKeys {
+		output += tree.dataSections[name].ToString(indent)
+	}
+	return
+}
+
+func (identifier *Identifier) ToString () (output string) {
+	for index, trailItem := range identifier.trail {
+		if index > 0 {
+			output += "."
+		}
+
+		output += trailItem
+	}
+	return
+}
+
+func (what *Type) ToString () (output string) {
+	if what.kind == TypeKindBasic {
+		output += what.name.ToString()
+	} else {
+		output += "{"
+		output += what.points.ToString()
+
+		if what.kind == TypeKindArray {
+			output += " "
+			if what.length == 0 {
+				output += ".."
+			} else {
+				output += fmt.Sprint(what.length)
+			}
+		}
+		
+		output += "}"
+	}
+
+	if what.mutable {
+		output += ":mut"
+	}
+	
+	return
+}
+
+func (declaration *Declaration) ToString () (output string) {
+	output += declaration.name + ":"
+	output += declaration.what.ToString()
+	return
+}
+
+func (attributes *ObjectInitializationValues) ToString (
+	indent int,
+) (
+	output string,
+) {
+	for _, name := range sortMapKeysAlphabetically(attributes.attributes) {
+		value := attributes.attributes[name]
+	
+		output += doIndent(indent, ".", name, " ")
+		if value.kind == ArgumentKindObjectInitializationValues {
+			output += "\n"
+			output += value.ToString(indent + 1, true)
+		} else {
+			output += value.ToString(0, false) + "\n"
+		}
+	}
+	
+	return
+}
+
+func (values *ArrayInitializationValues) ToString (
+	indent int,
+) (
+	output string,
+) {
+	for _, value := range values.values {
+		output += value.ToString(indent, true)
+	}
+	
+	return
+}
+
+func (phrase *Phrase) ToString (indent int, breakLine bool) (output string) {
+	if breakLine {
+		output += doIndent (
+			indent,
+			"[", phrase.command.ToString(0, false))
+		output += "\n"
+		for _, argument := range phrase.arguments {
+			output += doIndent (
+				indent,
+				argument.ToString(indent + 1, true))
+		}
+	} else {
+		output += "[" + phrase.command.ToString(0, false)
+		for _, argument := range phrase.arguments {
+			output += " " + argument.ToString(0, false)
+		}
+	}
+	
+	output += "]"
+
+	if len(phrase.returnsTo) > 0 {
+		output += " ->"
+		for _, returnItem := range phrase.returnsTo {
+			output += " " + returnItem.ToString(0, false)
+		}
+	}
+
+	if breakLine {
+		output += "\n"
+	}
+	return
+}
+
+func (argument *Argument) ToString (indent int, breakLine bool) (output string) {
+	if !breakLine { indent = 0 }
+	if argument.kind == ArgumentKindNil {
+		output += "NIL-ARGUMENT"
+		if breakLine { output += "\n" }
+		return
+	}
+
+	switch argument.kind {
+	case ArgumentKindPhrase:
+		output += argument.value.(*Phrase).ToString (	
+				indent,
+				breakLine)
+	
+	case ArgumentKindObjectInitializationValues:
+		// this should only appear in contexts where breakLine is true
+		output += argument.value.(*ObjectInitializationValues).
+				ToString(indent)
+	
+	case ArgumentKindArrayInitializationValues:
+		// this should only appear in contexts where breakLine is true
+		output += argument.value.(*ArrayInitializationValues).
+				ToString(indent)
+	
+	case ArgumentKindIdentifier:
+		output += doIndent (
+			indent,
+			argument.value.(*Identifier).ToString())
+		if breakLine { output += "\n" }
+	
+	case ArgumentKindDeclaration:
+		output += doIndent (
+			indent,
+			argument.value.(*Declaration).ToString())
+		if breakLine { output += "\n" }
+	
+	case ArgumentKindInt, ArgumentKindUInt, ArgumentKindFloat:
+		output += doIndent(indent, fmt.Sprint(argument.value))
+		if breakLine { output += "\n" }
+	
+	case ArgumentKindString:
+		output += doIndent (
+			indent,
+			"\"" + argument.value.(string) + "\"")
+		if breakLine { output += "\n" }
+		
+	case ArgumentKindRune:
+		output += doIndent (
+			indent,
+			"'" + string(argument.value.(rune)) + "'")
+		if breakLine { output += "\n" }
+		
+	case ArgumentKindOperator:
+		// TODO
+		// also when parsing this argument kind, don't do it in the
+		// argument parsing function. do it specifically when parsing a
+		// phrase command.
+	}
+
+	return
+}
+
+func (section *DataSection) ToString (indent int) (output string) {
+	output += doIndent (
+		indent,
+		"data ",
+		section.permission.ToString(), " ",
+		section.name, ":",
+		section.what.ToString())
+
+	isComplexInitialization :=
+		section.value.kind == ArgumentKindObjectInitializationValues ||
+		section.value.kind == ArgumentKindArrayInitializationValues
+
+	if section.value.value == nil {
+		output += "\n"
+	} else if isComplexInitialization {
+		output += "\n"
+		output += section.value.ToString(indent + 1, true)
+	} else {
+		output += " " + section.value.ToString(0, false)
+		output += "\n"
+	}
+	return
+}
--- a/parser/tree.go
+++ b/parser/tree.go
@@ -1,5 +1,8 @@
 package parser

+import "git.tebibyte.media/sashakoshka/arf/file"
+import "git.tebibyte.media/sashakoshka/arf/types"
+
 // SyntaxTree represents an abstract syntax tree. It covers an entire module. It
 // can be expected to be syntactically correct, but it might not be semantically
 // correct (because it has not been analyzed yet.)
@@ -7,5 +10,152 @@ type SyntaxTree struct {
 	license string
 	author  string

-	requires []string
+	requires     []string
+	dataSections map[string] *DataSection
+}
+
+// Identifier represents a chain of arguments separated by a dot.
+type Identifier struct {
+	location file.Location
+	trail    []string
+}
+
+// TypeKind represents what kind of type a type is
+type TypeKind int
+
+const (
+	// TypeKindBasic either means it's a primitive, or it inherits from
+	// something.
+	TypeKindBasic TypeKind = iota
+
+	// TypeKindPointer means it's a pointer
+	TypeKindPointer
+
+	// TypeKindArray means it's an array.
+	TypeKindArray
+)
+
+// Type represents a type specifier
+type Type struct {
+	location file.Location
+
+	mutable bool
+	kind TypeKind
+
+	// only applicable for arrays. a value of zero means it has an
+	// undefined/dynamic length.
+	length uint64
+
+	// only applicable for basic.
+        name Identifier
+
+	// not applicable for basic.
+	points *Type
+}
+
+// Declaration represents a variable declaration.
+type Declaration struct {
+	location file.Location
+	name     string
+	what     Type
+}
+
+// ObjectInitializationValues represents a list of object member initialization
+// attributes.
+type ObjectInitializationValues struct {
+	location   file.Location
+	attributes map[string] Argument
+}
+
+// ArrayInitializationValues represents a list of attributes initializing an
+// array.
+type ArrayInitializationValues struct {
+	location file.Location
+	values   []Argument
+}
+
+// Phrase represents a function call or operator. In ARF they are the same
+// syntactical concept.
+type Phrase struct {
+	location  file.Location
+	command   Argument
+	arguments []Argument
+	returnsTo []Argument
+}
+
+// ArgumentKind specifies the type of thing the value of an argument should be
+// cast to.
+type ArgumentKind int
+
+const (
+	ArgumentKindNil ArgumentKind = iota
+	
+	// [name argument]
+	// [name argument argument]
+	// etc...
+	ArgumentKindPhrase = iota
+
+	// {name}
+	ArgumentKindDereference
+	
+	// {name 23}
+	ArgumentKindSubscript
+
+	// .name value
+	// but like, a lot of them
+	ArgumentKindObjectInitializationValues
+
+	// value value...
+	ArgumentKindArrayInitializationValues
+
+	// name.name
+	// name.name.name
+	// etc...
+	ArgumentKindIdentifier
+
+	// name:Type
+	// name:{Type}
+	// name:{Type ..}
+	// name:{Type 23}
+	// etc...
+	ArgumentKindDeclaration
+
+	// -1337
+	ArgumentKindInt
+
+	// 1337
+	ArgumentKindUInt
+
+	// 0.44
+	ArgumentKindFloat
+
+	// "hello world"
+	ArgumentKindString
+
+	// 'S'
+	ArgumentKindRune
+
+	// + - * / etc...
+	// this is only used as a phrase command
+	ArgumentKindOperator
+)
+
+// Argument represents a value that can be placed anywhere a value goes. This
+// allows things like phrases being arguments to other phrases.
+type Argument struct {
+	location file.Location
+	kind     ArgumentKind
+	value    any
+	// TODO: if there is an argument expansion operator its existence should
+	// be stored here in a boolean.
+}
+
+// DataSection represents a global variable.
+type DataSection struct {
+	location file.Location
+	name     string
+	
+	what       Type
+	value      Argument
+	permission types.Permission
 }
--- a/tests/lexer/all.arf
+++ b/tests/lexer/all.arf
@@ -1,3 +1,3 @@
 :arf
--- rw -> -349820394 932748397 239485.37520 "hello world!\n" 'E' helloWorld:.[]{}
+--- rw -> -349820394 932748397 239485.37520 "hello world!\n" 'E' helloWorld:.,..[]{}
 + - ++ -- * / @ ! % ~ < << > >> | || & &&
--- a/tests/lexer/numbers.arf
+++ b/tests/lexer/numbers.arf
@@ -1,4 +1,6 @@
 :arf
+0
+8
 83628266
 0b100111111000001000011101010
 0x4Fc10Ea
--- a/tests/parser/data/main.arf
+++ b/tests/parser/data/main.arf
@@ -3,25 +3,50 @@

 data wr integer:Int 3202

+data wr mutInteger:Int:mut 3202
+
 data wr integerPointer:{Int}
-# TODO: data wr integerPointer:{Int} [& integer]
+
+data wr mutIntegerPointer:{Int}:mut

 data wr integerArray16:{Int 16}

-data wr integerArrayVariable:{Int ...}
+data wr integerArrayVariable:{Int ..}

 data wr integerArrayInitialized:{Int 16}
 	3948 293 293049 948 912
 	340 0 2304 0 4785 92

+# TODO: reinstate these two after phrase parsing is implemented
+# data wr integerPointerInit:{Int} [& integer]
+
+# data wr mutIntegerPointerInit:{Int}:mut [& integer]
+
 data wr object:Obj
-	: this 324
-	: that 2139
+	.this 324
+	.that 2139

 data wr nestedObject:Obj
-	: this
-		: bird0 324
-		: bird1 "hello world"
-	: that
-		: bird2 123.8439
-		: bird3 9328.21348239
+	.this
+		.bird0 324
+		.bird1 "hello world"
+	.that
+		.bird2 123.8439
+		.bird3 9328.21348239
+
+
+# func rr main
+	# ---
+	# # TODO: set should be a special case, checking under itself for object
+	# member initialization args. it should also check for args in general
+	# under there which should be treated as array initialization args.
+	# basically, under a set phrase, it should do the same checks that it
+	# does under a data section.
+	#
+	# [set object:Obj]
+		# .this 324
+		# .that 2139
+	# 
+	# set object:Obj 
+	#	.this 324
+	#	.that 2139
--- a/types/permission.go
+++ b/types/permission.go
@@ -30,3 +30,19 @@ func PermissionFrom (data string) (permission Permission) {
 	permission.External = ModeFrom(rune(data[1]))
 	return
 }
+
+func (mode Mode) ToString () (output string) {
+	switch mode {
+	case ModeNone:  output = "n"
+	case ModeRead:  output = "r"
+	case ModeWrite: output = "w"
+	}
+
+	return
+}
+
+func (permission Permission) ToString () (output string) {
+	output += permission.Internal.ToString()
+	output += permission.External.ToString()
+	return
+}