4 Commits

12 changed files with 206 additions and 81 deletions

View File

@@ -30,7 +30,8 @@ SYSEXITS != printf '\043include <sysexits.h>\n' | cpp -M - | tr ' ' '\n' \
CC ?= cc
RUSTC ?= rustc
RUSTFLAGS += --extern getopt=build/o/libgetopt.rlib \
RUSTFLAGS += --extern delimit=build/o/libdelimit.rlib \
--extern getopt=build/o/libgetopt.rlib \
--extern strerror=build/o/libstrerror.rlib \
--extern sysexits=build/o/libsysexits.rlib
CFLAGS += -I$(SYSEXITS)
@@ -88,8 +89,12 @@ docs: docs/ build
include $(OS_INCLUDE)
.PHONY: rustlibs
rustlibs: build/o/libgetopt.rlib build/o/libstrerror.rlib \
build/o/libsysexits.rlib $(OSLIB)
rustlibs: build/o/libdelimit.rlib build/o/libgetopt.rlib \
build/o/libstrerror.rlib build/o/libsysexits.rlib $(OSLIB)
build/o/libdelimit.rlib: build src/libdelimit.rs
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=delimit \
-o $@ src/libdelimit.rs
build/o/libgetopt.rlib: build src/libgetopt.rs
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=getopt \

View File

@@ -207,7 +207,7 @@ which are not reported.
This program was based on the
.BR dd (1p)
utility as specified in POSIX. While character conversion may have been the
utility as specified in \*(Px. While character conversion may have been the
original intent of
.BR dd (1p),
it is irrelevant to its modern use. Because of this, this program eschews

View File

@@ -15,7 +15,7 @@ always be returned.
.\"
.SH RATIONALE
In POSIX.1-2017,
In \*(Px.1-2024,
.BR false (1p)
exists for the construction of control flow and loops based on a failure. This
implementation functions as described in that standard.

View File

@@ -11,10 +11,9 @@ fop \(en field operator
.SH SYNOPSIS
fop
.RB ( -d )
.RB [ delimiter ]
.RB index
.RB program...
.RB [ -d\ delimiter ]
index program
.RB [ arguments... ]
.\"
.SH DESCRIPTION
@@ -26,8 +25,17 @@ Performs operations on specified fields in data read from the standard input.
Sets a delimiter by which the input data will be split into fields. The default
is an ASCII record separator.
.\"
.SH DIAGNOSTICS
in the event of an error, a debug message will be printed and the program will
exit with the appropriate sysexits.h(3) error code.
.\"
.SH CAVEATS
If the specified index does not exist in the data, the program
will print all data to the standard output before exiting with an error. If
input data is not delimited by the specified delimiter, the program will fill
memory with the contents of the stream before it is output.
Field indices are zero-indexed, which may be unexpected behavior for some users.
.\"
.SH RATIONALE

View File

@@ -75,7 +75,7 @@ this is elegant but unintuitive.
.\"
.SH RATIONALE
The traditional tool for integer comparisons in POSIX and other Unix shells has
The traditional tool for integer comparisons in \*(Px and other Unix shells has
been
.BR test (1).
This tool also handles string comparisons and file scrutiny. These parts of its

View File

@@ -57,7 +57,7 @@ The
.BR cat (1p)
and
.BR tee (1p)
programs specified in POSIX together provide similar functionality. The
programs specified in \*(Px together provide similar functionality. The
separation of the two sets of functionality into separate APIs seemed
unncessary.
.\"

View File

@@ -45,7 +45,7 @@ The program operates in single-byte chunks regardless of intended encoding.
.\"
.SH RATIONALE
POSIX currently lacks a way to display non-printing characters in the terminal
\*(Px currently lacks a way to display non-printing characters in the terminal
using a standard tool. A popular extension to
.BR cat (1p),
the

View File

@@ -58,7 +58,7 @@ hardware of any given machine.
An infix notation calculation utility,
.BR bc (1p),
is included in the POSIX standard, but does not accept expressions as arguments;
is included in the \*(Px standard, but does not accept expressions as arguments;
in scripts, any predefined, non-interactive input must be piped into the
program. A
.BR dc (1)

View File

@@ -47,13 +47,13 @@ visual similarity and not byte similarity.
.\"
.SH RATIONALE
The traditional tool for string comparisons in POSIX and other Unix shells has
The traditional tool for string comparisons in \*(Px and other Unix shells has
been
.BR test (1).
This tool also handles integer comparisons and file scrutiny. These parts of its
functionality have been broken out into multiple utilities.
This program\(cqs functionality may be performed on a POSIX-compliant system
This program\(cqs functionality may be performed on a \*(Px-compliant system
with
.BR test (1p).
.\"

View File

@@ -15,7 +15,7 @@ always be returned.
.\"
.SH RATIONALE
In \fIPOSIX.1-2017\fP,
In \fI\*(Px.1-2024\fP,
.BR true (1p)
exists for the construction of control flow and loops based on a success. This
implementation functions as described in that standard.

View File

@@ -18,14 +18,16 @@
use std::{
env::args,
io::{ Error, Read, Write, stdin, stdout },
io::{ Error, Write, stdin, stdout },
process::{ Command, ExitCode, Stdio, exit },
};
extern crate delimit;
extern crate getopt;
extern crate strerror;
extern crate sysexits;
use delimit::Delimited;
use getopt::GetOpt;
use strerror::StrError;
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
@@ -90,78 +92,97 @@ fn main() -> ExitCode {
exit(usage(&argv[0]).into());
});
/* read entire standard input into memory */
let mut buf = String::new();
if let Err(e) = stdin().read_to_string(&mut buf) {
err(&argv[0], e);
exit(EX_IOERR.into());
};
let stdin = Box::new(stdin().lock());
/* split the buffer by the delimiter (by default, '\u{1E}') */
let mut fields = buf.split(&d).collect::<Vec<&str>>();
let mut input = Delimited::new(stdin, d.clone().as_bytes());
let mut n = 0;
/* collect arguments for the operator command */
let command_args = argv
.iter()
.clone()
.skip(command_arg + 1) /* skip the command name */
.collect::<Vec<&String>>();
let mut fopped = false;
/* spawn the command to operate on the field */
let mut spawned = Command::new(operator)
.args(command_args) /* spawn with the specified arguments */
.stdin(Stdio::piped())
.stdout(Stdio::piped()) /* piped stdout to handle output ourselves */
.spawn()
.unwrap_or_else( |e| {
while let Some(i) = input.next() {
let v = match i {
Ok(v) => v,
Err(e) => {
err(&argv[0], e);
exit(EX_IOERR.into());
},
};
let mut out = Vec::new();
if n == index { /* fop it */
/* collect arguments for the operator command */
let command_args = argv
.iter()
.clone()
.skip(command_arg + 1) /* skip the command name */
.collect::<Vec<&String>>();
/* spawn the command to operate on the field */
let mut spawned = Command::new(operator)
.args(command_args) /* spawn with the specified arguments */
.stdin(Stdio::piped())
/* piped stdout to handle output ourselves */
.stdout(Stdio::piped())
.spawn()
.unwrap_or_else( |e| {
err(&argv[0], e);
exit(EX_UNAVAILABLE.into());
});
/* feed the spawned programs stdin the field value */
if let Some(mut child_stdin) = spawned.stdin.take() {
let _ = child_stdin.write_all(&v);
drop(child_stdin); /* stay safe! drop your children! */
let output = spawned.wait_with_output().unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
/* get the output with which the original field will
* be replaced */
let mut replace = output.stdout.clone();
/* pop trailing newline out if the input did not contain it */
if v.iter().last() != Some(&b'\n')
&& replace.pop() != Some(b'\n')
{
out = output.stdout;
} else {
out = replace;
}
}
fopped = true;
} else {
out = v;
}
/* since we cannot know when were done, place a new delimiter before
* each index unless it is the 0th */
if n != 0 {
stdout().write_all(d.as_bytes()).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
}
stdout().write_all(&out).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_UNAVAILABLE.into());
exit(EX_IOERR.into());
});
/* get field we want to pipe into spawned program */
let field = fields.get(index).unwrap_or_else(|| {
n += 1;
}
if fopped {
return ExitCode::SUCCESS;
} else {
eprintln!("{}: {}: no such index in input", argv[0], index);
exit(EX_DATAERR.into());
});
/* get the stdin of the newly spawned program and feed it the field val */
if let Some(mut child_stdin) = spawned.stdin.take() {
let _ = child_stdin.write_all(field.as_bytes());
drop(child_stdin); /* stay safe! drop your children! */
return EX_DATAERR.into();
}
let output = spawned.wait_with_output().unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
/* get the output with which the original field will be replaced */
let mut replace = output.stdout.clone();
/* pop trailing newline out if the input did not contain it */
if fields[index].chars().last() != Some('\n') /* no newline */
&& replace.pop() != Some(b'\n') { /* pop last char of replacement */
/* restore replacement to original command output if popped char was not
* a newline */
replace = output.stdout;
}
/* convert the output of the program to UTF-8 */
let new_field = String::from_utf8(replace).unwrap_or_else(|e| {
eprintln!("{}: {}", argv[0], e);
exit(EX_IOERR.into());
});
/* store the new field in the old fields vector */
fields[index] = &new_field;
/* fop it */
stdout().write_all(
fields.join(&d.to_string()).as_bytes()
).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
ExitCode::SUCCESS
}

91
src/libdelimit.rs Normal file
View File

@@ -0,0 +1,91 @@
/*
* Copyright (c) 2025 Emma Tebibyte <emma@tebibyte.media>
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU Affero General Public License as published by the
* Free Software Foundation, either version 3 of the License, or (at your
* option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
* for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see https://www.gnu.org/licenses/.
*/
use std::{
io::{ Read, Result },
mem::self,
};
const BUFFER_SIZE: usize = 4096;
pub struct Delimited {
stream: Box<dyn Read>,
delimiter: Vec<u8>,
buffer: Vec<u8>
}
impl Delimited {
pub fn new(stream: Box<dyn Read>, delimiter: &[u8]) -> Self {
Delimited {
stream,
delimiter: delimiter.to_vec(),
buffer: Vec::with_capacity(BUFFER_SIZE),
}
}
}
impl Iterator for Delimited {
type Item = Result<Vec<u8>>;
fn next(&mut self) -> Option<Self::Item> {
let mut buf = [0; BUFFER_SIZE];
loop {
if let Some(p) = find_subslice(&self.buffer, &self.delimiter) {
let chunk = self.buffer.drain(..p).collect::<Vec<_>>();
let _ = self.buffer.drain(..self.delimiter.len());
return Some(Ok(chunk));
}
match self.stream.read(&mut buf) {
Ok(0) => {
if self.buffer.is_empty() {
return None;
}
return Some(Ok(mem::take(&mut self.buffer)));
},
Ok(n) => {
let content = &buf[..n];
self.buffer.extend_from_slice(&content);
},
Err(e) => {
return Some(Err(e));
},
}
}
}
}
fn find_subslice(stack: &[u8], key: &[u8]) -> Option<usize> {
if key.len() == 1 {
return stack.iter().position(|&b| b == key[0]);
}
if key.len() > stack.len() {
return None;
}
for i in 0..=stack.len() - key.len() {
if &stack[i..i + key.len()] == key {
return Some(i);
}
}
None
}