From 31b424d2050e38184db3740bc2550014d4a53b69 Mon Sep 17 00:00:00 2001 From: Emma Tebibyte Date: Mon, 27 Oct 2025 23:37:11 -0600 Subject: [PATCH 01/10] libdelimit: initial commit --- src/libdelimit.rs | 92 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 src/libdelimit.rs diff --git a/src/libdelimit.rs b/src/libdelimit.rs new file mode 100644 index 0000000..6e648cf --- /dev/null +++ b/src/libdelimit.rs @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2025 Emma Tebibyte + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU Affero General Public License as published by the + * Free Software Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License + * for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see https://www.gnu.org/licenses/. + */ + +use std::{ + io::{ Read, Result, stdin }, + mem::self, + process::ExitCode, +}; + +const BUFFER_SIZE: usize = 4096; + +struct Delimited { + stream: Box, + delimiter: Vec, + buffer: Vec +} + +impl Delimited { + fn new(stream: Box, delimiter: &[u8]) -> Self { + Delimited { + stream, + delimiter: delimiter.to_vec(), + buffer: Vec::with_capacity(BUFFER_SIZE), + } + } +} + +impl Iterator for Delimited { + type Item = Result>; + + fn next(&mut self) -> Option { + let mut buf = [0; BUFFER_SIZE]; + + loop { + if let Some(p) = find_subslice(&self.buffer, &self.delimiter) { + let chunk = self.buffer.drain(..p).collect::>(); + + let _ = self.buffer.drain(..self.delimiter.len()); + + return Some(Ok(chunk)); + } + + match self.stream.read(&mut buf) { + Ok(0) => { + if self.buffer.is_empty() { + return None; + } + + return Some(Ok(mem::take(&mut self.buffer))); + }, + Ok(n) => { + let content = &buf[..n]; + self.buffer.extend_from_slice(&content); + }, + Err(e) => { + return Some(Err(e)); + }, + } + } + } +} + +fn find_subslice(stack: &[u8], key: &[u8]) -> Option { + if key.len() == 1 { + return stack.iter().position(|&b| b == key[0]); + } + if key.len() > stack.len() { + return None; + } + for i in 0..=stack.len() - key.len() { + if &stack[i..i + key.len()] == key { + return Some(i); + } + } + + None +} From 15039805f9dc6d9ad92102f8f14354d7f41d2fdf Mon Sep 17 00:00:00 2001 From: Emma Tebibyte Date: Tue, 28 Oct 2025 14:08:17 -0600 Subject: [PATCH 02/10] Makefile, libdelimit: update for including libdelimit in utilities --- Makefile | 11 ++++++++--- src/libdelimit.rs | 7 +++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index d104d7f..f853c85 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,8 @@ SYSEXITS != printf '\043include \n' | cpp -M - | tr ' ' '\n' \ CC ?= cc RUSTC ?= rustc -RUSTFLAGS += --extern getopt=build/o/libgetopt.rlib \ +RUSTFLAGS += --extern delimit=build/o/libdelimit.rlib \ + --extern getopt=build/o/libgetopt.rlib \ --extern strerror=build/o/libstrerror.rlib \ --extern sysexits=build/o/libsysexits.rlib CFLAGS += -I$(SYSEXITS) @@ -88,8 +89,12 @@ docs: docs/ build include $(OS_INCLUDE) .PHONY: rustlibs -rustlibs: build/o/libgetopt.rlib build/o/libstrerror.rlib \ - build/o/libsysexits.rlib $(OSLIB) +rustlibs: build/o/libdelimit.rlib build/o/libgetopt.rlib \ + build/o/libstrerror.rlib build/o/libsysexits.rlib $(OSLIB) + +build/o/libdelimit.rlib: build src/libdelimit.rs + $(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=delimit \ + -o $@ src/libdelimit.rs build/o/libgetopt.rlib: build src/libgetopt.rs $(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=getopt \ diff --git a/src/libdelimit.rs b/src/libdelimit.rs index 6e648cf..777fb96 100644 --- a/src/libdelimit.rs +++ b/src/libdelimit.rs @@ -17,21 +17,20 @@ */ use std::{ - io::{ Read, Result, stdin }, + io::{ Read, Result }, mem::self, - process::ExitCode, }; const BUFFER_SIZE: usize = 4096; -struct Delimited { +pub struct Delimited { stream: Box, delimiter: Vec, buffer: Vec } impl Delimited { - fn new(stream: Box, delimiter: &[u8]) -> Self { + pub fn new(stream: Box, delimiter: &[u8]) -> Self { Delimited { stream, delimiter: delimiter.to_vec(), From 4aab77bee47955aa296d5738f25e83a46276cb09 Mon Sep 17 00:00:00 2001 From: Emma Tebibyte Date: Tue, 28 Oct 2025 14:11:00 -0600 Subject: [PATCH 03/10] fop(1), fop.1: update to use libdelimit --- docs/fop.1 | 16 ++++-- src/fop.rs | 151 ++++++++++++++++++++++++++++++----------------------- 2 files changed, 98 insertions(+), 69 deletions(-) diff --git a/docs/fop.1 b/docs/fop.1 index b96033a..22c1cf9 100644 --- a/docs/fop.1 +++ b/docs/fop.1 @@ -11,10 +11,9 @@ fop \(en field operator .SH SYNOPSIS fop -.RB ( -d ) -.RB [ delimiter ] -.RB index -.RB program... +.RB [ -d\ delimiter ] +index program +.RB [ arguments... ] .\" .SH DESCRIPTION @@ -26,8 +25,17 @@ Performs operations on specified fields in data read from the standard input. Sets a delimiter by which the input data will be split into fields. The default is an ASCII record separator. .\" +.SH DIAGNOSTICS +in the event of an error, a debug message will be printed and the program will +exit with the appropriate sysexits.h(3) error code. +.\" .SH CAVEATS +If the specified index does not exist in the data, the program +will print all data to the standard output before exiting with an error. If +input data is not delimited by the specified delimiter, the program will fill +memory with the contents of the stream before it is output. + Field indices are zero-indexed, which may be unexpected behavior for some users. .\" .SH RATIONALE diff --git a/src/fop.rs b/src/fop.rs index 261aba5..9b0ea83 100644 --- a/src/fop.rs +++ b/src/fop.rs @@ -18,14 +18,16 @@ use std::{ env::args, - io::{ Error, Read, Write, stdin, stdout }, + io::{ Error, Write, stdin, stdout }, process::{ Command, ExitCode, Stdio, exit }, }; +extern crate delimit; extern crate getopt; extern crate strerror; extern crate sysexits; +use delimit::Delimited; use getopt::GetOpt; use strerror::StrError; use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE }; @@ -90,78 +92,97 @@ fn main() -> ExitCode { exit(usage(&argv[0]).into()); }); - /* read entire standard input into memory */ - let mut buf = String::new(); - if let Err(e) = stdin().read_to_string(&mut buf) { - err(&argv[0], e); - exit(EX_IOERR.into()); - }; + let stdin = Box::new(stdin().lock()); - /* split the buffer by the delimiter (by default, '\u{1E}') */ - let mut fields = buf.split(&d).collect::>(); + let mut input = Delimited::new(stdin, d.clone().as_bytes()); + let mut n = 0; - /* collect arguments for the operator command */ - let command_args = argv - .iter() - .clone() - .skip(command_arg + 1) /* skip the command name */ - .collect::>(); + let mut fopped = false; - /* spawn the command to operate on the field */ - let mut spawned = Command::new(operator) - .args(command_args) /* spawn with the specified arguments */ - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) /* piped stdout to handle output ourselves */ - .spawn() - .unwrap_or_else( |e| { + while let Some(i) = input.next() { + let v = match i { + Ok(v) => v, + Err(e) => { + err(&argv[0], e); + exit(EX_IOERR.into()); + }, + }; + + let mut out = Vec::new(); + + if n == index { /* fop it */ + /* collect arguments for the operator command */ + let command_args = argv + .iter() + .clone() + .skip(command_arg + 1) /* skip the command name */ + .collect::>(); + + /* spawn the command to operate on the field */ + let mut spawned = Command::new(operator) + .args(command_args) /* spawn with the specified arguments */ + .stdin(Stdio::piped()) + /* piped stdout to handle output ourselves */ + .stdout(Stdio::piped()) + .spawn() + .unwrap_or_else( |e| { + err(&argv[0], e); + exit(EX_UNAVAILABLE.into()); + }); + + /* feed the spawned program’s stdin the field value */ + if let Some(mut child_stdin) = spawned.stdin.take() { + let _ = child_stdin.write_all(&v); + drop(child_stdin); /* stay safe! drop your children! */ + + let output = spawned.wait_with_output().unwrap_or_else(|e| { + err(&argv[0], e); + exit(EX_IOERR.into()); + }); + + /* get the output with which the original field will + * be replaced */ + let mut replace = output.stdout.clone(); + + /* pop trailing newline out if the input did not contain it */ + if v.iter().last() != Some(&b'\n') + && replace.pop() != Some(b'\n') + { + out = output.stdout; + } else { + out = replace; + } + } + + fopped = true; + } else { + out = v; + } + + /* since we cannot know when we’re done, place a new delimiter before + * each index unless it is the 0th */ + if n != 0 { + stdout().write_all(d.as_bytes()).unwrap_or_else(|e| { + err(&argv[0], e); + exit(EX_IOERR.into()); + }); + } + + stdout().write_all(&out).unwrap_or_else(|e| { err(&argv[0], e); - exit(EX_UNAVAILABLE.into()); + exit(EX_IOERR.into()); }); - /* get field we want to pipe into spawned program */ - let field = fields.get(index).unwrap_or_else(|| { + + n += 1; + } + + if fopped { + return ExitCode::SUCCESS; + } else { eprintln!("{}: {}: no such index in input", argv[0], index); - exit(EX_DATAERR.into()); - }); - - /* get the stdin of the newly spawned program and feed it the field val */ - if let Some(mut child_stdin) = spawned.stdin.take() { - let _ = child_stdin.write_all(field.as_bytes()); - drop(child_stdin); /* stay safe! drop your children! */ + return EX_DATAERR.into(); } - let output = spawned.wait_with_output().unwrap_or_else(|e| { - err(&argv[0], e); - exit(EX_IOERR.into()); - }); - /* get the output with which the original field will be replaced */ - let mut replace = output.stdout.clone(); - - /* pop trailing newline out if the input did not contain it */ - if fields[index].chars().last() != Some('\n') /* no newline */ - && replace.pop() != Some(b'\n') { /* pop last char of replacement */ - /* restore replacement to original command output if popped char was not - * a newline */ - replace = output.stdout; - } - - /* convert the output of the program to UTF-8 */ - let new_field = String::from_utf8(replace).unwrap_or_else(|e| { - eprintln!("{}: {}", argv[0], e); - exit(EX_IOERR.into()); - }); - - /* store the new field in the old fields vector */ - fields[index] = &new_field; - - /* fop it */ - stdout().write_all( - fields.join(&d.to_string()).as_bytes() - ).unwrap_or_else(|e| { - err(&argv[0], e); - exit(EX_IOERR.into()); - }); - - ExitCode::SUCCESS } From 5cc1e2067b2a90b033599aaec5be1ae1b8a61c31 Mon Sep 17 00:00:00 2001 From: Emma Tebibyte Date: Thu, 30 Oct 2025 19:32:03 -0600 Subject: [PATCH 04/10] libdelimit, Makefile: adds test for libdelimit, changes API; fop(1): changes to new libdelimit API --- Makefile | 8 ++++++-- src/fop.rs | 2 +- src/libdelimit.rs | 32 ++++++++++++++++++++++++++------ 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index f853c85..0a2dfd2 100644 --- a/Makefile +++ b/Makefile @@ -71,12 +71,16 @@ TESTS != printf '%s\n' "$(TESTFILES)" | xargs -n1 basename \ include $(TESTFILES) .PHONY: test -test: all $(TESTS) /tmp/getopt +test: all $(TESTS) /tmp/delimit /tmp/getopt @echo $(TESTS) + /tmp/delimit /tmp/getopt +/tmp/delimit: src/libdelimit.rs + $(RUSTC) --test -o $@ src/libdelimit.rs + /tmp/getopt: src/libgetopt.rs - $(RUSTC) --test -o /tmp/getopt src/libgetopt.rs + $(RUSTC) --test -o $@ src/libgetopt.rs .PHONY: docs docs: docs/ build diff --git a/src/fop.rs b/src/fop.rs index 9b0ea83..948b5c2 100644 --- a/src/fop.rs +++ b/src/fop.rs @@ -92,7 +92,7 @@ fn main() -> ExitCode { exit(usage(&argv[0]).into()); }); - let stdin = Box::new(stdin().lock()); + let stdin = stdin().lock(); let mut input = Delimited::new(stdin, d.clone().as_bytes()); let mut n = 0; diff --git a/src/libdelimit.rs b/src/libdelimit.rs index 777fb96..d55066c 100644 --- a/src/libdelimit.rs +++ b/src/libdelimit.rs @@ -23,14 +23,14 @@ use std::{ const BUFFER_SIZE: usize = 4096; -pub struct Delimited { - stream: Box, +pub struct Delimited { delimiter: Vec, - buffer: Vec + buffer: Vec, + stream: T, } -impl Delimited { - pub fn new(stream: Box, delimiter: &[u8]) -> Self { +impl Delimited where T: Read { + pub fn new(stream: T, delimiter: &[u8]) -> Self { Delimited { stream, delimiter: delimiter.to_vec(), @@ -39,7 +39,7 @@ impl Delimited { } } -impl Iterator for Delimited { +impl Iterator for Delimited where T: Read { type Item = Result>; fn next(&mut self) -> Option { @@ -89,3 +89,23 @@ fn find_subslice(stack: &[u8], key: &[u8]) -> Option { None } + +#[cfg(test)] +mod tests { + use Delimited; + + #[test] + fn testing() { + let d = '\u{1E}'.to_string(); + let input = vec!["meow", "woof", "ribbit"]; + let r = input.join(&d); + + let mut output = Delimited::new(r.as_bytes(), d.as_bytes()); + + let mut i = 0; + while let Some(item) = output.next() { + assert_eq!(input[i].as_bytes(), item.unwrap()); + i += 1; + } + } +} From 28bc2cd0d0d73c65fe58a9de310774a1e32ab572 Mon Sep 17 00:00:00 2001 From: Emma Tebibyte Date: Thu, 30 Oct 2025 23:29:25 -0600 Subject: [PATCH 05/10] libdelimit: formatting & correctness --- src/libdelimit.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/libdelimit.rs b/src/libdelimit.rs index d55066c..9957eb6 100644 --- a/src/libdelimit.rs +++ b/src/libdelimit.rs @@ -56,15 +56,12 @@ impl Iterator for Delimited where T: Read { match self.stream.read(&mut buf) { Ok(0) => { - if self.buffer.is_empty() { - return None; - } + let _ = self.buffer.is_empty() && return None; return Some(Ok(mem::take(&mut self.buffer))); }, Ok(n) => { - let content = &buf[..n]; - self.buffer.extend_from_slice(&content); + self.buffer.extend_from_slice(&buf[..n]); }, Err(e) => { return Some(Err(e)); @@ -78,9 +75,11 @@ fn find_subslice(stack: &[u8], key: &[u8]) -> Option { if key.len() == 1 { return stack.iter().position(|&b| b == key[0]); } + if key.len() > stack.len() { return None; } + for i in 0..=stack.len() - key.len() { if &stack[i..i + key.len()] == key { return Some(i); From c1df6e632222eae6992ff4c1d42b0cf2c580a8b1 Mon Sep 17 00:00:00 2001 From: silt Date: Fri, 31 Oct 2025 06:28:11 +0000 Subject: [PATCH 06/10] libdelimit: refactored find_subslice() --- src/libdelimit.rs | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/libdelimit.rs b/src/libdelimit.rs index 9957eb6..0717c49 100644 --- a/src/libdelimit.rs +++ b/src/libdelimit.rs @@ -1,5 +1,6 @@ /* * Copyright (c) 2025 Emma Tebibyte + * Copyright (c) 2025 silty silt * SPDX-License-Identifier: AGPL-3.0-or-later * * This program is free software: you can redistribute it and/or modify it @@ -18,7 +19,7 @@ use std::{ io::{ Read, Result }, - mem::self, + mem, }; const BUFFER_SIZE: usize = 4096; @@ -72,21 +73,11 @@ impl Iterator for Delimited where T: Read { } fn find_subslice(stack: &[u8], key: &[u8]) -> Option { - if key.len() == 1 { - return stack.iter().position(|&b| b == key[0]); + match key.len() { + /* TODO: is this optimization necessary? */ + 1 => stack.iter().position(|&b| b == key[0]), + _ => stack.windows(key.len()).position(|w| w == key), } - - if key.len() > stack.len() { - return None; - } - - for i in 0..=stack.len() - key.len() { - if &stack[i..i + key.len()] == key { - return Some(i); - } - } - - None } #[cfg(test)] From d497d5b4aade7c713fded1fafff0b3e3a2a3dbe8 Mon Sep 17 00:00:00 2001 From: Emma Tebibyte Date: Fri, 31 Oct 2025 00:58:12 -0600 Subject: [PATCH 07/10] libdelimit: fixes tabs --- src/libdelimit.rs | 80 +++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/src/libdelimit.rs b/src/libdelimit.rs index 0717c49..b7de4a9 100644 --- a/src/libdelimit.rs +++ b/src/libdelimit.rs @@ -18,66 +18,66 @@ */ use std::{ - io::{ Read, Result }, - mem, + io::{ Read, Result }, + mem, }; const BUFFER_SIZE: usize = 4096; pub struct Delimited { - delimiter: Vec, - buffer: Vec, - stream: T, + delimiter: Vec, + buffer: Vec, + stream: T, } impl Delimited where T: Read { pub fn new(stream: T, delimiter: &[u8]) -> Self { - Delimited { - stream, - delimiter: delimiter.to_vec(), - buffer: Vec::with_capacity(BUFFER_SIZE), - } - } + Delimited { + stream, + delimiter: delimiter.to_vec(), + buffer: Vec::with_capacity(BUFFER_SIZE), + } + } } impl Iterator for Delimited where T: Read { - type Item = Result>; + type Item = Result>; - fn next(&mut self) -> Option { - let mut buf = [0; BUFFER_SIZE]; + fn next(&mut self) -> Option { + let mut buf = [0; BUFFER_SIZE]; - loop { - if let Some(p) = find_subslice(&self.buffer, &self.delimiter) { - let chunk = self.buffer.drain(..p).collect::>(); - - let _ = self.buffer.drain(..self.delimiter.len()); + loop { + if let Some(p) = find_subslice(&self.buffer, &self.delimiter) { + let chunk = self.buffer.drain(..p).collect::>(); + + let _ = self.buffer.drain(..self.delimiter.len()); - return Some(Ok(chunk)); - } + return Some(Ok(chunk)); + } - match self.stream.read(&mut buf) { - Ok(0) => { - let _ = self.buffer.is_empty() && return None; + match self.stream.read(&mut buf) { + Ok(0) => { + let _ = self.buffer.is_empty() && return None; - return Some(Ok(mem::take(&mut self.buffer))); - }, - Ok(n) => { - self.buffer.extend_from_slice(&buf[..n]); - }, - Err(e) => { - return Some(Err(e)); - }, - } - } - } + return Some(Ok(mem::take(&mut self.buffer))); + }, + Ok(n) => { + self.buffer.extend_from_slice(&buf[..n]); + }, + Err(e) => { + return Some(Err(e)); + }, + } + } + } } fn find_subslice(stack: &[u8], key: &[u8]) -> Option { - match key.len() { - /* TODO: is this optimization necessary? */ - 1 => stack.iter().position(|&b| b == key[0]), - _ => stack.windows(key.len()).position(|w| w == key), - } + match key.len() { + /* TODO: is this optimization necessary? */ + 1 => stack.iter().position(|&b| b == key[0]), + _ => stack.windows(key.len()).position(|w| w == key), + } } #[cfg(test)] From afd58ae4e3f428b94df7ee36ccf71fc093d3f300 Mon Sep 17 00:00:00 2001 From: Emma Tebibyte Date: Thu, 6 Nov 2025 15:44:15 -0700 Subject: [PATCH 08/10] fop(1): updated copyright information --- src/fop.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fop.rs b/src/fop.rs index 948b5c2..4237fe9 100644 --- a/src/fop.rs +++ b/src/fop.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023–2024 Emma Tebibyte + * Copyright (c) 2023–2025 Emma Tebibyte * SPDX-License-Identifier: AGPL-3.0-or-later * * This program is free software: you can redistribute it and/or modify it under From 2846ee3c01159f48cb6a58b83c222f636b024dcc Mon Sep 17 00:00:00 2001 From: Emma Tebibyte Date: Fri, 7 Nov 2025 15:11:49 -0700 Subject: [PATCH 09/10] fop(1): formatting --- src/fop.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/fop.rs b/src/fop.rs index 4237fe9..2b91963 100644 --- a/src/fop.rs +++ b/src/fop.rs @@ -138,7 +138,7 @@ fn main() -> ExitCode { let output = spawned.wait_with_output().unwrap_or_else(|e| { err(&argv[0], e); exit(EX_IOERR.into()); - }); + }); /* get the output with which the original field will * be replaced */ @@ -173,7 +173,6 @@ fn main() -> ExitCode { exit(EX_IOERR.into()); }); - n += 1; } @@ -183,6 +182,4 @@ fn main() -> ExitCode { eprintln!("{}: {}: no such index in input", argv[0], index); return EX_DATAERR.into(); } - - } From 2f2b8cf0678ea8641b332773cd9b2e68742cd61b Mon Sep 17 00:00:00 2001 From: Emma Tebibyte Date: Wed, 12 Nov 2025 16:18:46 -0700 Subject: [PATCH 10/10] libdelimit: updates to make API easier to use; fop(1): updated to new libdelimit API --- src/fop.rs | 4 ++-- src/libdelimit.rs | 30 +++++++++++++++++------------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/fop.rs b/src/fop.rs index 2b91963..53c9917 100644 --- a/src/fop.rs +++ b/src/fop.rs @@ -94,7 +94,7 @@ fn main() -> ExitCode { let stdin = stdin().lock(); - let mut input = Delimited::new(stdin, d.clone().as_bytes()); + let mut input = Delimited::new(stdin, d.clone()); let mut n = 0; let mut fopped = false; @@ -104,7 +104,7 @@ fn main() -> ExitCode { Ok(v) => v, Err(e) => { err(&argv[0], e); - exit(EX_IOERR.into()); + return EX_IOERR.into(); }, }; diff --git a/src/libdelimit.rs b/src/libdelimit.rs index b7de4a9..c228e7c 100644 --- a/src/libdelimit.rs +++ b/src/libdelimit.rs @@ -31,13 +31,25 @@ pub struct Delimited { } impl Delimited where T: Read { - pub fn new(stream: T, delimiter: &[u8]) -> Self { + pub fn new(stream: T, delimiter: R) -> Self where R: AsRef<[u8]> { Delimited { stream, - delimiter: delimiter.to_vec(), + delimiter: delimiter.as_ref().to_vec(), buffer: Vec::with_capacity(BUFFER_SIZE), } } + + fn find_subslice(&self) -> Option { + match self.delimiter.len() { + /* TODO: is this optimization necessary? */ + 1 => self.buffer.iter().position(|&b| b == self.delimiter[0]), + _ => { + self.buffer + .windows(self.delimiter.len()) + .position(|w| w == self.delimiter) + }, + } + } } impl Iterator for Delimited where T: Read { @@ -47,7 +59,7 @@ impl Iterator for Delimited where T: Read { let mut buf = [0; BUFFER_SIZE]; loop { - if let Some(p) = find_subslice(&self.buffer, &self.delimiter) { + if let Some(p) = self.find_subslice() { let chunk = self.buffer.drain(..p).collect::>(); let _ = self.buffer.drain(..self.delimiter.len()); @@ -56,7 +68,7 @@ impl Iterator for Delimited where T: Read { } match self.stream.read(&mut buf) { - Ok(0) => { + Ok(0) => { /* no bytes read, we’re probably done */ let _ = self.buffer.is_empty() && return None; return Some(Ok(mem::take(&mut self.buffer))); @@ -72,14 +84,6 @@ impl Iterator for Delimited where T: Read { } } -fn find_subslice(stack: &[u8], key: &[u8]) -> Option { - match key.len() { - /* TODO: is this optimization necessary? */ - 1 => stack.iter().position(|&b| b == key[0]), - _ => stack.windows(key.len()).position(|w| w == key), - } -} - #[cfg(test)] mod tests { use Delimited; @@ -90,7 +94,7 @@ mod tests { let input = vec!["meow", "woof", "ribbit"]; let r = input.join(&d); - let mut output = Delimited::new(r.as_bytes(), d.as_bytes()); + let mut output = Delimited::new(r.as_bytes(), d); let mut i = 0; while let Some(item) = output.next() {