diff --git a/Makefile b/Makefile index 34bdcb0..da03b76 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,8 @@ SYSEXITS != printf '\043include \n' | cpp -M - | tr ' ' '\n' \ CC ?= cc RUSTC ?= rustc -RUSTFLAGS += --extern getopt=build/o/libgetopt.rlib \ +RUSTFLAGS += --extern delimit=build/o/libdelimit.rlib \ + --extern getopt=build/o/libgetopt.rlib \ --extern strerror=build/o/libstrerror.rlib \ --extern sysexits=build/o/libsysexits.rlib CFLAGS += -I$(SYSEXITS) @@ -70,12 +71,16 @@ TESTS != printf '%s\n' "$(TESTFILES)" | xargs -n1 basename \ include $(TESTFILES) .PHONY: test -test: all $(TESTS) /tmp/getopt +test: all $(TESTS) /tmp/delimit /tmp/getopt @echo $(TESTS) + /tmp/delimit /tmp/getopt +/tmp/delimit: src/libdelimit.rs + $(RUSTC) --test -o $@ src/libdelimit.rs + /tmp/getopt: src/libgetopt.rs - $(RUSTC) --test -o /tmp/getopt src/libgetopt.rs + $(RUSTC) --test -o $@ src/libgetopt.rs .PHONY: docs docs: docs/ build @@ -88,8 +93,12 @@ docs: docs/ build include $(OS_INCLUDE) .PHONY: rustlibs -rustlibs: build/o/libgetopt.rlib build/o/libstrerror.rlib \ - build/o/libsysexits.rlib $(OSLIB) +rustlibs: build/o/libdelimit.rlib build/o/libgetopt.rlib \ + build/o/libstrerror.rlib build/o/libsysexits.rlib $(OSLIB) + +build/o/libdelimit.rlib: build src/libdelimit.rs + $(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=delimit \ + -o $@ src/libdelimit.rs build/o/libgetopt.rlib: build src/libgetopt.rs $(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=getopt \ diff --git a/docs/fop.1 b/docs/fop.1 index 9afe364..b86e8f2 100644 --- a/docs/fop.1 +++ b/docs/fop.1 @@ -1,5 +1,5 @@ .\" Copyright (c) 2024 DTB -.\" Copyright (c) 2024 Emma Tebibyte +.\" Copyright (c) 2024–2025 Emma Tebibyte .\" .\" This work is licensed under CC BY-SA 4.0. To see a copy of this license, .\" visit . @@ -11,10 +11,9 @@ fop \(en field operator .SH SYNOPSIS fop -.RB ( -d ) -.RB [ delimiter ] -.RB index -.RB program... +.RB [ -d\ delimiter ] +index program +.RB [ arguments... ] .\" .SH DESCRIPTION @@ -26,8 +25,17 @@ Performs operations on specified fields in data read from the standard input. Sets a delimiter by which the input data will be split into fields. The default is an ASCII record separator. .\" +.SH DIAGNOSTICS +in the event of an error, a debug message will be printed and the program will +exit with the appropriate sysexits.h(3) error code. +.\" .SH CAVEATS +If the specified index does not exist in the data, the program +will print all data to the standard output before exiting with an error. If +input data is not delimited by the specified delimiter, the program will fill +memory with the contents of the stream before it is output. + Field indices are zero-indexed, which may be unexpected behavior for some users. .\" diff --git a/src/fop.rs b/src/fop.rs index e8f2d06..2c7f5cb 100644 --- a/src/fop.rs +++ b/src/fop.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023–2024 Emma Tebibyte + * Copyright (c) 2023–2025 Emma Tebibyte * SPDX-License-Identifier: AGPL-3.0-or-later * * This program is free software: you can redistribute it and/or modify it @@ -18,14 +18,16 @@ use std::{ env::args, - io::{ Error, Read, Write, stdin, stdout }, + io::{ Error, Write, stdin, stdout }, process::{ Command, ExitCode, Stdio, exit }, }; +extern crate delimit; extern crate getopt; extern crate strerror; extern crate sysexits; +use delimit::Delimited; use getopt::GetOpt; use strerror::StrError; use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE }; @@ -90,78 +92,94 @@ fn main() -> ExitCode { exit(usage(&argv[0]).into()); }); - /* read entire standard input into memory */ - let mut buf = String::new(); - if let Err(e) = stdin().read_to_string(&mut buf) { - err(&argv[0], e); - exit(EX_IOERR.into()); - }; + let stdin = stdin().lock(); - /* split the buffer by the delimiter (by default, '\u{1E}') */ - let mut fields = buf.split(&d).collect::>(); + let mut input = Delimited::new(stdin, d.clone()); + let mut n = 0; - /* collect arguments for the operator command */ - let command_args = argv - .iter() - .clone() - .skip(command_arg + 1) /* skip the command name */ - .collect::>(); + let mut fopped = false; - /* spawn the command to operate on the field */ - let mut spawned = Command::new(operator) - .args(command_args) /* spawn with the specified arguments */ - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) /* piped stdout to handle output ourselves */ - .spawn() - .unwrap_or_else( |e| { + while let Some(i) = input.next() { + let v = match i { + Ok(v) => v, + Err(e) => { + err(&argv[0], e); + return EX_IOERR.into(); + }, + }; + + let mut out = Vec::new(); + + if n == index { /* fop it */ + /* collect arguments for the operator command */ + let command_args = argv + .iter() + .clone() + .skip(command_arg + 1) /* skip the command name */ + .collect::>(); + + /* spawn the command to operate on the field */ + let mut spawned = Command::new(operator) + .args(command_args) /* spawn with the specified arguments */ + .stdin(Stdio::piped()) + /* piped stdout to handle output ourselves */ + .stdout(Stdio::piped()) + .spawn() + .unwrap_or_else( |e| { + err(&argv[0], e); + exit(EX_UNAVAILABLE.into()); + }); + + /* feed the spawned program’s stdin the field value */ + if let Some(mut child_stdin) = spawned.stdin.take() { + let _ = child_stdin.write_all(&v); + drop(child_stdin); /* stay safe! drop your children! */ + + let output = spawned.wait_with_output().unwrap_or_else(|e| { + err(&argv[0], e); + exit(EX_IOERR.into()); + }); + + /* get the output with which the original field will + * be replaced */ + let mut replace = output.stdout.clone(); + + /* pop trailing newline out if the input did not contain it */ + if v.iter().last() != Some(&b'\n') + && replace.pop() != Some(b'\n') + { + out = output.stdout; + } else { + out = replace; + } + } + + fopped = true; + } else { + out = v; + } + + /* since we cannot know when we’re done, place a new delimiter before + * each index unless it is the 0th */ + if n != 0 { + stdout().write_all(d.as_bytes()).unwrap_or_else(|e| { + err(&argv[0], e); + exit(EX_IOERR.into()); + }); + } + + stdout().write_all(&out).unwrap_or_else(|e| { err(&argv[0], e); - exit(EX_UNAVAILABLE.into()); + exit(EX_IOERR.into()); }); - /* get field we want to pipe into spawned program */ - let field = fields.get(index).unwrap_or_else(|| { + n += 1; + } + + if fopped { + return ExitCode::SUCCESS; + } else { eprintln!("{}: {}: no such index in input", argv[0], index); - exit(EX_DATAERR.into()); - }); - - /* get the stdin of the newly spawned program and feed it the field val */ - if let Some(mut child_stdin) = spawned.stdin.take() { - let _ = child_stdin.write_all(field.as_bytes()); - drop(child_stdin); /* stay safe! drop your children! */ + return EX_DATAERR.into(); } - - let output = spawned.wait_with_output().unwrap_or_else(|e| { - err(&argv[0], e); - exit(EX_IOERR.into()); - }); - - /* get the output with which the original field will be replaced */ - let mut replace = output.stdout.clone(); - - /* pop trailing newline out if the input did not contain it */ - if fields[index].chars().last() != Some('\n') /* no newline */ - && replace.pop() != Some(b'\n') { /* pop last char of replacement */ - /* restore replacement to original command output if popped char was - * not a newline */ - replace = output.stdout; - } - - /* convert the output of the program to UTF-8 */ - let new_field = String::from_utf8(replace).unwrap_or_else(|e| { - eprintln!("{}: {}", argv[0], e); - exit(EX_IOERR.into()); - }); - - /* store the new field in the old fields vector */ - fields[index] = &new_field; - - /* fop it */ - stdout().write_all( - fields.join(&d.to_string()).as_bytes() - ).unwrap_or_else(|e| { - err(&argv[0], e); - exit(EX_IOERR.into()); - }); - - ExitCode::SUCCESS } diff --git a/src/libdelimit.rs b/src/libdelimit.rs new file mode 100644 index 0000000..c228e7c --- /dev/null +++ b/src/libdelimit.rs @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2025 Emma Tebibyte + * Copyright (c) 2025 silty silt + * SPDX-License-Identifier: AGPL-3.0-or-later + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU Affero General Public License as published by the + * Free Software Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License + * for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see https://www.gnu.org/licenses/. + */ + +use std::{ + io::{ Read, Result }, + mem, +}; + +const BUFFER_SIZE: usize = 4096; + +pub struct Delimited { + delimiter: Vec, + buffer: Vec, + stream: T, +} + +impl Delimited where T: Read { + pub fn new(stream: T, delimiter: R) -> Self where R: AsRef<[u8]> { + Delimited { + stream, + delimiter: delimiter.as_ref().to_vec(), + buffer: Vec::with_capacity(BUFFER_SIZE), + } + } + + fn find_subslice(&self) -> Option { + match self.delimiter.len() { + /* TODO: is this optimization necessary? */ + 1 => self.buffer.iter().position(|&b| b == self.delimiter[0]), + _ => { + self.buffer + .windows(self.delimiter.len()) + .position(|w| w == self.delimiter) + }, + } + } +} + +impl Iterator for Delimited where T: Read { + type Item = Result>; + + fn next(&mut self) -> Option { + let mut buf = [0; BUFFER_SIZE]; + + loop { + if let Some(p) = self.find_subslice() { + let chunk = self.buffer.drain(..p).collect::>(); + + let _ = self.buffer.drain(..self.delimiter.len()); + + return Some(Ok(chunk)); + } + + match self.stream.read(&mut buf) { + Ok(0) => { /* no bytes read, we’re probably done */ + let _ = self.buffer.is_empty() && return None; + + return Some(Ok(mem::take(&mut self.buffer))); + }, + Ok(n) => { + self.buffer.extend_from_slice(&buf[..n]); + }, + Err(e) => { + return Some(Err(e)); + }, + } + } + } +} + +#[cfg(test)] +mod tests { + use Delimited; + + #[test] + fn testing() { + let d = '\u{1E}'.to_string(); + let input = vec!["meow", "woof", "ribbit"]; + let r = input.join(&d); + + let mut output = Delimited::new(r.as_bytes(), d); + + let mut i = 0; + while let Some(item) = output.next() { + assert_eq!(input[i].as_bytes(), item.unwrap()); + i += 1; + } + } +}