From 288a1fb9ef39737f5079b857be1edfe5c84b2997 Mon Sep 17 00:00:00 2001 From: Sasha Koshka Date: Thu, 19 Sep 2024 09:36:01 -0400 Subject: [PATCH] Fix token slice size estimation to massively reduce waste --- parse.go | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/parse.go b/parse.go index 52c2841..6fc2eba 100644 --- a/parse.go +++ b/parse.go @@ -5,10 +5,21 @@ import "unicode" // TODO perhaps follow https://unicode.org/reports/tr14/ func parseString (text string) ([]runeLayout, []token) { - // TODO find an optimal size for both of these to minimize allocs. will - // require some testing. - runes := make([]runeLayout, 0, len(text) * 2 / 3) - tokens := make([]token, 0, len(text) / 4) + // alloc initial rune slice + runes := make([]runeLayout, 0, len(text) * 2 / 3) + + // build the rune slice + // we need to do this before parsing into tokens, because otherwise + // a realloc will occur in the middle of it and the tokens at the start + // will be referencing old memory + for _, run := range text { + runes = append(runes, runeLayout { + run: run, + }) + } + + // alloc initial token slice + tokens := make([]token, 0, len(runes) / 3) var index int var startingIndex int @@ -31,16 +42,6 @@ func parseString (text string) ([]runeLayout, []token) { } } - // build the rune slice - // we need to do this before parsing into tokens, because otherwise - // a realloc will occur in the middle of it and the tokens at the start - // will be referencing old memory - for _, run := range text { - runes = append(runes, runeLayout { - run: run, - }) - } - // parse tokens for index, runl = range runes { switch {