Fix token slice size estimation to massively reduce waste

2024-09-19 09:36:01 -04:00 · 2024-09-19 09:36:01 -04:00 · 288a1fb9ef
commit 288a1fb9ef
parent a91816df6c
1 changed files with 15 additions and 14 deletions
--- a/parse.go
+++ b/parse.go
@ -5,10 +5,21 @@ import "unicode"
 // TODO perhaps follow https://unicode.org/reports/tr14/

 func parseString (text string) ([]runeLayout, []token) {
-	// TODO find an optimal size for both of these to minimize allocs. will
-	// require some testing.
-	runes  := make([]runeLayout, 0, len(text) * 2 / 3)
-	tokens := make([]token,      0, len(text) / 4)
+	// alloc initial rune slice
+	runes := make([]runeLayout, 0, len(text) * 2 / 3)
+
+	// build the rune slice
+	// we need to do this before parsing into tokens, because otherwise
+	// a realloc will occur in the middle of it and the tokens at the start
+	// will be referencing old memory
+	for _, run := range text {
+		runes = append(runes, runeLayout {
+			run: run,
+		})
+	}
+
+	// alloc initial token slice
+	tokens := make([]token, 0, len(runes) / 3)

 	var index         int
 	var startingIndex int
@ -31,16 +42,6 @@ func parseString (text string) ([]runeLayout, []token) {
 		}
 	}

-	// build the rune slice
-	// we need to do this before parsing into tokens, because otherwise
-	// a realloc will occur in the middle of it and the tokens at the start
-	// will be referencing old memory
-	for _, run := range text {
-		runes = append(runes, runeLayout {
-			run: run,
-		})
-	}
-
 	// parse tokens
 	for index, runl = range runes {
 		switch {