diff --git a/parse.go b/parse.go index e7d816e..52c2841 100644 --- a/parse.go +++ b/parse.go @@ -12,7 +12,7 @@ func parseString (text string) ([]runeLayout, []token) { var index int var startingIndex int - var run rune + var runl runeLayout var lastRune rune var tok token @@ -31,19 +31,26 @@ func parseString (text string) ([]runeLayout, []token) { } } - for index, run = range text { + // build the rune slice + // we need to do this before parsing into tokens, because otherwise + // a realloc will occur in the middle of it and the tokens at the start + // will be referencing old memory + for _, run := range text { runes = append(runes, runeLayout { run: run, }) + } + // parse tokens + for index, runl = range runes { switch { - case run == '\r': + case runl.run == '\r': tokenBoundary() // we don't know the token type yet. if next rune is a // \n then this is a CRLF line break. if not, this is // just a word. - case run == '\n': + case runl.run == '\n': if lastRune == '\r' { // continue the \r to make a CRLF line break tok.kind = tokenKindLineBreak @@ -52,16 +59,16 @@ func parseString (text string) ([]runeLayout, []token) { tok.kind = tokenKindLineBreak } - case run == '\t': + case runl.run == '\t': mustBeInToken(tokenKindTab) - case unicode.IsSpace(run): + case unicode.IsSpace(runl.run): mustBeInToken(tokenKindSpace) default: mustBeInToken(tokenKindWord) } - lastRune = run + lastRune = runl.run } index ++ // make index equal to len([]rune(text))