Fix token slice size estimation to massively reduce waste

This commit is contained in:
Sasha Koshka 2024-09-19 09:36:01 -04:00
parent a91816df6c
commit 288a1fb9ef

View File

@ -5,10 +5,21 @@ import "unicode"
// TODO perhaps follow https://unicode.org/reports/tr14/
func parseString (text string) ([]runeLayout, []token) {
// TODO find an optimal size for both of these to minimize allocs. will
// require some testing.
runes := make([]runeLayout, 0, len(text) * 2 / 3)
tokens := make([]token, 0, len(text) / 4)
// alloc initial rune slice
runes := make([]runeLayout, 0, len(text) * 2 / 3)
// build the rune slice
// we need to do this before parsing into tokens, because otherwise
// a realloc will occur in the middle of it and the tokens at the start
// will be referencing old memory
for _, run := range text {
runes = append(runes, runeLayout {
run: run,
})
}
// alloc initial token slice
tokens := make([]token, 0, len(runes) / 3)
var index int
var startingIndex int
@ -31,16 +42,6 @@ func parseString (text string) ([]runeLayout, []token) {
}
}
// build the rune slice
// we need to do this before parsing into tokens, because otherwise
// a realloc will occur in the middle of it and the tokens at the start
// will be referencing old memory
for _, run := range text {
runes = append(runes, runeLayout {
run: run,
})
}
// parse tokens
for index, runl = range runes {
switch {