Compare commits

..

7 Commits

4 changed files with 213 additions and 74 deletions

View File

@ -30,7 +30,8 @@ SYSEXITS != printf '\043include <sysexits.h>\n' | cpp -M - | tr ' ' '\n' \
CC ?= cc
RUSTC ?= rustc
RUSTFLAGS += --extern getopt=build/o/libgetopt.rlib \
RUSTFLAGS += --extern delimit=build/o/libdelimit.rlib \
--extern getopt=build/o/libgetopt.rlib \
--extern strerror=build/o/libstrerror.rlib \
--extern sysexits=build/o/libsysexits.rlib
CFLAGS += -I$(SYSEXITS)
@ -70,12 +71,16 @@ TESTS != printf '%s\n' "$(TESTFILES)" | xargs -n1 basename \
include $(TESTFILES)
.PHONY: test
test: all $(TESTS) /tmp/getopt
test: all $(TESTS) /tmp/delimit /tmp/getopt
@echo $(TESTS)
/tmp/delimit
/tmp/getopt
/tmp/delimit: src/libdelimit.rs
$(RUSTC) --test -o $@ src/libdelimit.rs
/tmp/getopt: src/libgetopt.rs
$(RUSTC) --test -o /tmp/getopt src/libgetopt.rs
$(RUSTC) --test -o $@ src/libgetopt.rs
.PHONY: docs
docs: docs/ build
@ -88,8 +93,12 @@ docs: docs/ build
include $(OS_INCLUDE)
.PHONY: rustlibs
rustlibs: build/o/libgetopt.rlib build/o/libstrerror.rlib \
build/o/libsysexits.rlib $(OSLIB)
rustlibs: build/o/libdelimit.rlib build/o/libgetopt.rlib \
build/o/libstrerror.rlib build/o/libsysexits.rlib $(OSLIB)
build/o/libdelimit.rlib: build src/libdelimit.rs
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=delimit \
-o $@ src/libdelimit.rs
build/o/libgetopt.rlib: build src/libgetopt.rs
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=getopt \

View File

@ -11,10 +11,9 @@ fop \(en field operator
.SH SYNOPSIS
fop
.RB ( -d )
.RB [ delimiter ]
.RB index
.RB program...
.RB [ -d\ delimiter ]
index program
.RB [ arguments... ]
.\"
.SH DESCRIPTION
@ -26,8 +25,17 @@ Performs operations on specified fields in data read from the standard input.
Sets a delimiter by which the input data will be split into fields. The default
is an ASCII record separator.
.\"
.SH DIAGNOSTICS
in the event of an error, a debug message will be printed and the program will
exit with the appropriate sysexits.h(3) error code.
.\"
.SH CAVEATS
If the specified index does not exist in the data, the program
will print all data to the standard output before exiting with an error. If
input data is not delimited by the specified delimiter, the program will fill
memory with the contents of the stream before it is output.
Field indices are zero-indexed, which may be unexpected behavior for some users.
.\"
.SH RATIONALE

View File

@ -18,14 +18,16 @@
use std::{
env::args,
io::{ Error, Read, Write, stdin, stdout },
io::{ Error, Write, stdin, stdout },
process::{ Command, ExitCode, Stdio, exit },
};
extern crate delimit;
extern crate getopt;
extern crate strerror;
extern crate sysexits;
use delimit::Delimited;
use getopt::GetOpt;
use strerror::StrError;
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
@ -90,78 +92,97 @@ fn main() -> ExitCode {
exit(usage(&argv[0]).into());
});
/* read entire standard input into memory */
let mut buf = String::new();
if let Err(e) = stdin().read_to_string(&mut buf) {
err(&argv[0], e);
exit(EX_IOERR.into());
};
let stdin = stdin().lock();
/* split the buffer by the delimiter (by default, '\u{1E}') */
let mut fields = buf.split(&d).collect::<Vec<&str>>();
let mut input = Delimited::new(stdin, d.clone().as_bytes());
let mut n = 0;
/* collect arguments for the operator command */
let command_args = argv
.iter()
.clone()
.skip(command_arg + 1) /* skip the command name */
.collect::<Vec<&String>>();
let mut fopped = false;
/* spawn the command to operate on the field */
let mut spawned = Command::new(operator)
.args(command_args) /* spawn with the specified arguments */
.stdin(Stdio::piped())
.stdout(Stdio::piped()) /* piped stdout to handle output ourselves */
.spawn()
.unwrap_or_else( |e| {
while let Some(i) = input.next() {
let v = match i {
Ok(v) => v,
Err(e) => {
err(&argv[0], e);
exit(EX_IOERR.into());
},
};
let mut out = Vec::new();
if n == index { /* fop it */
/* collect arguments for the operator command */
let command_args = argv
.iter()
.clone()
.skip(command_arg + 1) /* skip the command name */
.collect::<Vec<&String>>();
/* spawn the command to operate on the field */
let mut spawned = Command::new(operator)
.args(command_args) /* spawn with the specified arguments */
.stdin(Stdio::piped())
/* piped stdout to handle output ourselves */
.stdout(Stdio::piped())
.spawn()
.unwrap_or_else( |e| {
err(&argv[0], e);
exit(EX_UNAVAILABLE.into());
});
/* feed the spawned programs stdin the field value */
if let Some(mut child_stdin) = spawned.stdin.take() {
let _ = child_stdin.write_all(&v);
drop(child_stdin); /* stay safe! drop your children! */
let output = spawned.wait_with_output().unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
/* get the output with which the original field will
* be replaced */
let mut replace = output.stdout.clone();
/* pop trailing newline out if the input did not contain it */
if v.iter().last() != Some(&b'\n')
&& replace.pop() != Some(b'\n')
{
out = output.stdout;
} else {
out = replace;
}
}
fopped = true;
} else {
out = v;
}
/* since we cannot know when were done, place a new delimiter before
* each index unless it is the 0th */
if n != 0 {
stdout().write_all(d.as_bytes()).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
}
stdout().write_all(&out).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_UNAVAILABLE.into());
exit(EX_IOERR.into());
});
/* get field we want to pipe into spawned program */
let field = fields.get(index).unwrap_or_else(|| {
n += 1;
}
if fopped {
return ExitCode::SUCCESS;
} else {
eprintln!("{}: {}: no such index in input", argv[0], index);
exit(EX_DATAERR.into());
});
/* get the stdin of the newly spawned program and feed it the field val */
if let Some(mut child_stdin) = spawned.stdin.take() {
let _ = child_stdin.write_all(field.as_bytes());
drop(child_stdin); /* stay safe! drop your children! */
return EX_DATAERR.into();
}
let output = spawned.wait_with_output().unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
/* get the output with which the original field will be replaced */
let mut replace = output.stdout.clone();
/* pop trailing newline out if the input did not contain it */
if fields[index].chars().last() != Some('\n') /* no newline */
&& replace.pop() != Some(b'\n') { /* pop last char of replacement */
/* restore replacement to original command output if popped char was not
* a newline */
replace = output.stdout;
}
/* convert the output of the program to UTF-8 */
let new_field = String::from_utf8(replace).unwrap_or_else(|e| {
eprintln!("{}: {}", argv[0], e);
exit(EX_IOERR.into());
});
/* store the new field in the old fields vector */
fields[index] = &new_field;
/* fop it */
stdout().write_all(
fields.join(&d.to_string()).as_bytes()
).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
ExitCode::SUCCESS
}

101
src/libdelimit.rs Normal file
View File

@ -0,0 +1,101 @@
/*
* Copyright (c) 2025 Emma Tebibyte <emma@tebibyte.media>
* Copyright (c) 2025 silty silt <silt@tebibyte.media>
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU Affero General Public License as published by the
* Free Software Foundation, either version 3 of the License, or (at your
* option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
* for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see https://www.gnu.org/licenses/.
*/
use std::{
io::{ Read, Result },
mem,
};
const BUFFER_SIZE: usize = 4096;
pub struct Delimited<T: Read> {
delimiter: Vec<u8>,
buffer: Vec<u8>,
stream: T,
}
impl<T> Delimited<T> where T: Read {
pub fn new(stream: T, delimiter: &[u8]) -> Self {
Delimited {
stream,
delimiter: delimiter.to_vec(),
buffer: Vec::with_capacity(BUFFER_SIZE),
}
}
}
impl<T> Iterator for Delimited<T> where T: Read {
type Item = Result<Vec<u8>>;
fn next(&mut self) -> Option<Self::Item> {
let mut buf = [0; BUFFER_SIZE];
loop {
if let Some(p) = find_subslice(&self.buffer, &self.delimiter) {
let chunk = self.buffer.drain(..p).collect::<Vec<_>>();
let _ = self.buffer.drain(..self.delimiter.len());
return Some(Ok(chunk));
}
match self.stream.read(&mut buf) {
Ok(0) => {
let _ = self.buffer.is_empty() && return None;
return Some(Ok(mem::take(&mut self.buffer)));
},
Ok(n) => {
self.buffer.extend_from_slice(&buf[..n]);
},
Err(e) => {
return Some(Err(e));
},
}
}
}
}
fn find_subslice(stack: &[u8], key: &[u8]) -> Option<usize> {
match key.len() {
/* TODO: is this optimization necessary? */
1 => stack.iter().position(|&b| b == key[0]),
_ => stack.windows(key.len()).position(|w| w == key),
}
}
#[cfg(test)]
mod tests {
use Delimited;
#[test]
fn testing() {
let d = '\u{1E}'.to_string();
let input = vec!["meow", "woof", "ribbit"];
let r = input.join(&d);
let mut output = Delimited::new(r.as_bytes(), d.as_bytes());
let mut i = 0;
while let Some(item) = output.next() {
assert_eq!(input[i].as_bytes(), item.unwrap());
i += 1;
}
}
}