Compare commits

..

7 Commits

4 changed files with 213 additions and 74 deletions

View File

@ -30,7 +30,8 @@ SYSEXITS != printf '\043include <sysexits.h>\n' | cpp -M - | tr ' ' '\n' \
CC ?= cc CC ?= cc
RUSTC ?= rustc RUSTC ?= rustc
RUSTFLAGS += --extern getopt=build/o/libgetopt.rlib \ RUSTFLAGS += --extern delimit=build/o/libdelimit.rlib \
--extern getopt=build/o/libgetopt.rlib \
--extern strerror=build/o/libstrerror.rlib \ --extern strerror=build/o/libstrerror.rlib \
--extern sysexits=build/o/libsysexits.rlib --extern sysexits=build/o/libsysexits.rlib
CFLAGS += -I$(SYSEXITS) CFLAGS += -I$(SYSEXITS)
@ -70,12 +71,16 @@ TESTS != printf '%s\n' "$(TESTFILES)" | xargs -n1 basename \
include $(TESTFILES) include $(TESTFILES)
.PHONY: test .PHONY: test
test: all $(TESTS) /tmp/getopt test: all $(TESTS) /tmp/delimit /tmp/getopt
@echo $(TESTS) @echo $(TESTS)
/tmp/delimit
/tmp/getopt /tmp/getopt
/tmp/delimit: src/libdelimit.rs
$(RUSTC) --test -o $@ src/libdelimit.rs
/tmp/getopt: src/libgetopt.rs /tmp/getopt: src/libgetopt.rs
$(RUSTC) --test -o /tmp/getopt src/libgetopt.rs $(RUSTC) --test -o $@ src/libgetopt.rs
.PHONY: docs .PHONY: docs
docs: docs/ build docs: docs/ build
@ -88,8 +93,12 @@ docs: docs/ build
include $(OS_INCLUDE) include $(OS_INCLUDE)
.PHONY: rustlibs .PHONY: rustlibs
rustlibs: build/o/libgetopt.rlib build/o/libstrerror.rlib \ rustlibs: build/o/libdelimit.rlib build/o/libgetopt.rlib \
build/o/libsysexits.rlib $(OSLIB) build/o/libstrerror.rlib build/o/libsysexits.rlib $(OSLIB)
build/o/libdelimit.rlib: build src/libdelimit.rs
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=delimit \
-o $@ src/libdelimit.rs
build/o/libgetopt.rlib: build src/libgetopt.rs build/o/libgetopt.rlib: build src/libgetopt.rs
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=getopt \ $(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=getopt \

View File

@ -11,10 +11,9 @@ fop \(en field operator
.SH SYNOPSIS .SH SYNOPSIS
fop fop
.RB ( -d ) .RB [ -d\ delimiter ]
.RB [ delimiter ] index program
.RB index .RB [ arguments... ]
.RB program...
.\" .\"
.SH DESCRIPTION .SH DESCRIPTION
@ -26,8 +25,17 @@ Performs operations on specified fields in data read from the standard input.
Sets a delimiter by which the input data will be split into fields. The default Sets a delimiter by which the input data will be split into fields. The default
is an ASCII record separator. is an ASCII record separator.
.\" .\"
.SH DIAGNOSTICS
in the event of an error, a debug message will be printed and the program will
exit with the appropriate sysexits.h(3) error code.
.\"
.SH CAVEATS .SH CAVEATS
If the specified index does not exist in the data, the program
will print all data to the standard output before exiting with an error. If
input data is not delimited by the specified delimiter, the program will fill
memory with the contents of the stream before it is output.
Field indices are zero-indexed, which may be unexpected behavior for some users. Field indices are zero-indexed, which may be unexpected behavior for some users.
.\" .\"
.SH RATIONALE .SH RATIONALE

View File

@ -18,14 +18,16 @@
use std::{ use std::{
env::args, env::args,
io::{ Error, Read, Write, stdin, stdout }, io::{ Error, Write, stdin, stdout },
process::{ Command, ExitCode, Stdio, exit }, process::{ Command, ExitCode, Stdio, exit },
}; };
extern crate delimit;
extern crate getopt; extern crate getopt;
extern crate strerror; extern crate strerror;
extern crate sysexits; extern crate sysexits;
use delimit::Delimited;
use getopt::GetOpt; use getopt::GetOpt;
use strerror::StrError; use strerror::StrError;
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE }; use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
@ -90,16 +92,25 @@ fn main() -> ExitCode {
exit(usage(&argv[0]).into()); exit(usage(&argv[0]).into());
}); });
/* read entire standard input into memory */ let stdin = stdin().lock();
let mut buf = String::new();
if let Err(e) = stdin().read_to_string(&mut buf) { let mut input = Delimited::new(stdin, d.clone().as_bytes());
let mut n = 0;
let mut fopped = false;
while let Some(i) = input.next() {
let v = match i {
Ok(v) => v,
Err(e) => {
err(&argv[0], e); err(&argv[0], e);
exit(EX_IOERR.into()); exit(EX_IOERR.into());
},
}; };
/* split the buffer by the delimiter (by default, '\u{1E}') */ let mut out = Vec::new();
let mut fields = buf.split(&d).collect::<Vec<&str>>();
if n == index { /* fop it */
/* collect arguments for the operator command */ /* collect arguments for the operator command */
let command_args = argv let command_args = argv
.iter() .iter()
@ -111,57 +122,67 @@ fn main() -> ExitCode {
let mut spawned = Command::new(operator) let mut spawned = Command::new(operator)
.args(command_args) /* spawn with the specified arguments */ .args(command_args) /* spawn with the specified arguments */
.stdin(Stdio::piped()) .stdin(Stdio::piped())
.stdout(Stdio::piped()) /* piped stdout to handle output ourselves */ /* piped stdout to handle output ourselves */
.stdout(Stdio::piped())
.spawn() .spawn()
.unwrap_or_else( |e| { .unwrap_or_else( |e| {
err(&argv[0], e); err(&argv[0], e);
exit(EX_UNAVAILABLE.into()); exit(EX_UNAVAILABLE.into());
}); });
/* get field we want to pipe into spawned program */ /* feed the spawned programs stdin the field value */
let field = fields.get(index).unwrap_or_else(|| {
eprintln!("{}: {}: no such index in input", argv[0], index);
exit(EX_DATAERR.into());
});
/* get the stdin of the newly spawned program and feed it the field val */
if let Some(mut child_stdin) = spawned.stdin.take() { if let Some(mut child_stdin) = spawned.stdin.take() {
let _ = child_stdin.write_all(field.as_bytes()); let _ = child_stdin.write_all(&v);
drop(child_stdin); /* stay safe! drop your children! */ drop(child_stdin); /* stay safe! drop your children! */
}
let output = spawned.wait_with_output().unwrap_or_else(|e| { let output = spawned.wait_with_output().unwrap_or_else(|e| {
err(&argv[0], e); err(&argv[0], e);
exit(EX_IOERR.into()); exit(EX_IOERR.into());
}); });
/* get the output with which the original field will be replaced */ /* get the output with which the original field will
* be replaced */
let mut replace = output.stdout.clone(); let mut replace = output.stdout.clone();
/* pop trailing newline out if the input did not contain it */ /* pop trailing newline out if the input did not contain it */
if fields[index].chars().last() != Some('\n') /* no newline */ if v.iter().last() != Some(&b'\n')
&& replace.pop() != Some(b'\n') { /* pop last char of replacement */ && replace.pop() != Some(b'\n')
/* restore replacement to original command output if popped char was not {
* a newline */ out = output.stdout;
replace = output.stdout; } else {
out = replace;
}
} }
/* convert the output of the program to UTF-8 */ fopped = true;
let new_field = String::from_utf8(replace).unwrap_or_else(|e| { } else {
eprintln!("{}: {}", argv[0], e); out = v;
}
/* since we cannot know when were done, place a new delimiter before
* each index unless it is the 0th */
if n != 0 {
stdout().write_all(d.as_bytes()).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into()); exit(EX_IOERR.into());
}); });
}
/* store the new field in the old fields vector */ stdout().write_all(&out).unwrap_or_else(|e| {
fields[index] = &new_field;
/* fop it */
stdout().write_all(
fields.join(&d.to_string()).as_bytes()
).unwrap_or_else(|e| {
err(&argv[0], e); err(&argv[0], e);
exit(EX_IOERR.into()); exit(EX_IOERR.into());
}); });
ExitCode::SUCCESS
n += 1;
}
if fopped {
return ExitCode::SUCCESS;
} else {
eprintln!("{}: {}: no such index in input", argv[0], index);
return EX_DATAERR.into();
}
} }

101
src/libdelimit.rs Normal file
View File

@ -0,0 +1,101 @@
/*
* Copyright (c) 2025 Emma Tebibyte <emma@tebibyte.media>
* Copyright (c) 2025 silty silt <silt@tebibyte.media>
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU Affero General Public License as published by the
* Free Software Foundation, either version 3 of the License, or (at your
* option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
* for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see https://www.gnu.org/licenses/.
*/
use std::{
io::{ Read, Result },
mem,
};
const BUFFER_SIZE: usize = 4096;
pub struct Delimited<T: Read> {
delimiter: Vec<u8>,
buffer: Vec<u8>,
stream: T,
}
impl<T> Delimited<T> where T: Read {
pub fn new(stream: T, delimiter: &[u8]) -> Self {
Delimited {
stream,
delimiter: delimiter.to_vec(),
buffer: Vec::with_capacity(BUFFER_SIZE),
}
}
}
impl<T> Iterator for Delimited<T> where T: Read {
type Item = Result<Vec<u8>>;
fn next(&mut self) -> Option<Self::Item> {
let mut buf = [0; BUFFER_SIZE];
loop {
if let Some(p) = find_subslice(&self.buffer, &self.delimiter) {
let chunk = self.buffer.drain(..p).collect::<Vec<_>>();
let _ = self.buffer.drain(..self.delimiter.len());
return Some(Ok(chunk));
}
match self.stream.read(&mut buf) {
Ok(0) => {
let _ = self.buffer.is_empty() && return None;
return Some(Ok(mem::take(&mut self.buffer)));
},
Ok(n) => {
self.buffer.extend_from_slice(&buf[..n]);
},
Err(e) => {
return Some(Err(e));
},
}
}
}
}
fn find_subslice(stack: &[u8], key: &[u8]) -> Option<usize> {
match key.len() {
/* TODO: is this optimization necessary? */
1 => stack.iter().position(|&b| b == key[0]),
_ => stack.windows(key.len()).position(|w| w == key),
}
}
#[cfg(test)]
mod tests {
use Delimited;
#[test]
fn testing() {
let d = '\u{1E}'.to_string();
let input = vec!["meow", "woof", "ribbit"];
let r = input.join(&d);
let mut output = Delimited::new(r.as_bytes(), d.as_bytes());
let mut i = 0;
while let Some(item) = output.next() {
assert_eq!(input[i].as_bytes(), item.unwrap());
i += 1;
}
}
}