fop(1), fop.1: update to use libdelimit

This commit is contained in:
Emma Tebibyte 2025-10-28 14:11:00 -06:00
parent 15039805f9
commit 4aab77bee4
Signed by: emma
GPG Key ID: 427287A2F16F44FA
2 changed files with 98 additions and 69 deletions

View File

@ -11,10 +11,9 @@ fop \(en field operator
.SH SYNOPSIS
fop
.RB ( -d )
.RB [ delimiter ]
.RB index
.RB program...
.RB [ -d\ delimiter ]
index program
.RB [ arguments... ]
.\"
.SH DESCRIPTION
@ -26,8 +25,17 @@ Performs operations on specified fields in data read from the standard input.
Sets a delimiter by which the input data will be split into fields. The default
is an ASCII record separator.
.\"
.SH DIAGNOSTICS
in the event of an error, a debug message will be printed and the program will
exit with the appropriate sysexits.h(3) error code.
.\"
.SH CAVEATS
If the specified index does not exist in the data, the program
will print all data to the standard output before exiting with an error. If
input data is not delimited by the specified delimiter, the program will fill
memory with the contents of the stream before it is output.
Field indices are zero-indexed, which may be unexpected behavior for some users.
.\"
.SH RATIONALE

View File

@ -18,14 +18,16 @@
use std::{
env::args,
io::{ Error, Read, Write, stdin, stdout },
io::{ Error, Write, stdin, stdout },
process::{ Command, ExitCode, Stdio, exit },
};
extern crate delimit;
extern crate getopt;
extern crate strerror;
extern crate sysexits;
use delimit::Delimited;
use getopt::GetOpt;
use strerror::StrError;
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
@ -90,78 +92,97 @@ fn main() -> ExitCode {
exit(usage(&argv[0]).into());
});
/* read entire standard input into memory */
let mut buf = String::new();
if let Err(e) = stdin().read_to_string(&mut buf) {
err(&argv[0], e);
exit(EX_IOERR.into());
};
let stdin = Box::new(stdin().lock());
/* split the buffer by the delimiter (by default, '\u{1E}') */
let mut fields = buf.split(&d).collect::<Vec<&str>>();
let mut input = Delimited::new(stdin, d.clone().as_bytes());
let mut n = 0;
/* collect arguments for the operator command */
let command_args = argv
.iter()
.clone()
.skip(command_arg + 1) /* skip the command name */
.collect::<Vec<&String>>();
let mut fopped = false;
/* spawn the command to operate on the field */
let mut spawned = Command::new(operator)
.args(command_args) /* spawn with the specified arguments */
.stdin(Stdio::piped())
.stdout(Stdio::piped()) /* piped stdout to handle output ourselves */
.spawn()
.unwrap_or_else( |e| {
while let Some(i) = input.next() {
let v = match i {
Ok(v) => v,
Err(e) => {
err(&argv[0], e);
exit(EX_IOERR.into());
},
};
let mut out = Vec::new();
if n == index { /* fop it */
/* collect arguments for the operator command */
let command_args = argv
.iter()
.clone()
.skip(command_arg + 1) /* skip the command name */
.collect::<Vec<&String>>();
/* spawn the command to operate on the field */
let mut spawned = Command::new(operator)
.args(command_args) /* spawn with the specified arguments */
.stdin(Stdio::piped())
/* piped stdout to handle output ourselves */
.stdout(Stdio::piped())
.spawn()
.unwrap_or_else( |e| {
err(&argv[0], e);
exit(EX_UNAVAILABLE.into());
});
/* feed the spawned programs stdin the field value */
if let Some(mut child_stdin) = spawned.stdin.take() {
let _ = child_stdin.write_all(&v);
drop(child_stdin); /* stay safe! drop your children! */
let output = spawned.wait_with_output().unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
/* get the output with which the original field will
* be replaced */
let mut replace = output.stdout.clone();
/* pop trailing newline out if the input did not contain it */
if v.iter().last() != Some(&b'\n')
&& replace.pop() != Some(b'\n')
{
out = output.stdout;
} else {
out = replace;
}
}
fopped = true;
} else {
out = v;
}
/* since we cannot know when were done, place a new delimiter before
* each index unless it is the 0th */
if n != 0 {
stdout().write_all(d.as_bytes()).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
}
stdout().write_all(&out).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_UNAVAILABLE.into());
exit(EX_IOERR.into());
});
/* get field we want to pipe into spawned program */
let field = fields.get(index).unwrap_or_else(|| {
n += 1;
}
if fopped {
return ExitCode::SUCCESS;
} else {
eprintln!("{}: {}: no such index in input", argv[0], index);
exit(EX_DATAERR.into());
});
/* get the stdin of the newly spawned program and feed it the field val */
if let Some(mut child_stdin) = spawned.stdin.take() {
let _ = child_stdin.write_all(field.as_bytes());
drop(child_stdin); /* stay safe! drop your children! */
return EX_DATAERR.into();
}
let output = spawned.wait_with_output().unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
/* get the output with which the original field will be replaced */
let mut replace = output.stdout.clone();
/* pop trailing newline out if the input did not contain it */
if fields[index].chars().last() != Some('\n') /* no newline */
&& replace.pop() != Some(b'\n') { /* pop last char of replacement */
/* restore replacement to original command output if popped char was not
* a newline */
replace = output.stdout;
}
/* convert the output of the program to UTF-8 */
let new_field = String::from_utf8(replace).unwrap_or_else(|e| {
eprintln!("{}: {}", argv[0], e);
exit(EX_IOERR.into());
});
/* store the new field in the old fields vector */
fields[index] = &new_field;
/* fop it */
stdout().write_all(
fields.join(&d.to_string()).as_bytes()
).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
ExitCode::SUCCESS
}