Compare commits
4 Commits
0.14.6
...
4aab77bee4
| Author | SHA1 | Date | |
|---|---|---|---|
|
4aab77bee4
|
|||
|
15039805f9
|
|||
|
31b424d205
|
|||
|
be6bd5386d
|
11
Makefile
11
Makefile
@@ -30,7 +30,8 @@ SYSEXITS != printf '\043include <sysexits.h>\n' | cpp -M - | tr ' ' '\n' \
|
||||
|
||||
CC ?= cc
|
||||
RUSTC ?= rustc
|
||||
RUSTFLAGS += --extern getopt=build/o/libgetopt.rlib \
|
||||
RUSTFLAGS += --extern delimit=build/o/libdelimit.rlib \
|
||||
--extern getopt=build/o/libgetopt.rlib \
|
||||
--extern strerror=build/o/libstrerror.rlib \
|
||||
--extern sysexits=build/o/libsysexits.rlib
|
||||
CFLAGS += -I$(SYSEXITS)
|
||||
@@ -88,8 +89,12 @@ docs: docs/ build
|
||||
include $(OS_INCLUDE)
|
||||
|
||||
.PHONY: rustlibs
|
||||
rustlibs: build/o/libgetopt.rlib build/o/libstrerror.rlib \
|
||||
build/o/libsysexits.rlib $(OSLIB)
|
||||
rustlibs: build/o/libdelimit.rlib build/o/libgetopt.rlib \
|
||||
build/o/libstrerror.rlib build/o/libsysexits.rlib $(OSLIB)
|
||||
|
||||
build/o/libdelimit.rlib: build src/libdelimit.rs
|
||||
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=delimit \
|
||||
-o $@ src/libdelimit.rs
|
||||
|
||||
build/o/libgetopt.rlib: build src/libgetopt.rs
|
||||
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=getopt \
|
||||
|
||||
@@ -207,7 +207,7 @@ which are not reported.
|
||||
|
||||
This program was based on the
|
||||
.BR dd (1p)
|
||||
utility as specified in POSIX. While character conversion may have been the
|
||||
utility as specified in \*(Px. While character conversion may have been the
|
||||
original intent of
|
||||
.BR dd (1p),
|
||||
it is irrelevant to its modern use. Because of this, this program eschews
|
||||
|
||||
@@ -15,7 +15,7 @@ always be returned.
|
||||
.\"
|
||||
.SH RATIONALE
|
||||
|
||||
In POSIX.1-2017,
|
||||
In \*(Px.1-2024,
|
||||
.BR false (1p)
|
||||
exists for the construction of control flow and loops based on a failure. This
|
||||
implementation functions as described in that standard.
|
||||
|
||||
16
docs/fop.1
16
docs/fop.1
@@ -11,10 +11,9 @@ fop \(en field operator
|
||||
.SH SYNOPSIS
|
||||
|
||||
fop
|
||||
.RB ( -d )
|
||||
.RB [ delimiter ]
|
||||
.RB index
|
||||
.RB program...
|
||||
.RB [ -d\ delimiter ]
|
||||
index program
|
||||
.RB [ arguments... ]
|
||||
.\"
|
||||
.SH DESCRIPTION
|
||||
|
||||
@@ -26,8 +25,17 @@ Performs operations on specified fields in data read from the standard input.
|
||||
Sets a delimiter by which the input data will be split into fields. The default
|
||||
is an ASCII record separator.
|
||||
.\"
|
||||
.SH DIAGNOSTICS
|
||||
in the event of an error, a debug message will be printed and the program will
|
||||
exit with the appropriate sysexits.h(3) error code.
|
||||
.\"
|
||||
.SH CAVEATS
|
||||
|
||||
If the specified index does not exist in the data, the program
|
||||
will print all data to the standard output before exiting with an error. If
|
||||
input data is not delimited by the specified delimiter, the program will fill
|
||||
memory with the contents of the stream before it is output.
|
||||
|
||||
Field indices are zero-indexed, which may be unexpected behavior for some users.
|
||||
.\"
|
||||
.SH RATIONALE
|
||||
|
||||
@@ -75,7 +75,7 @@ this is elegant but unintuitive.
|
||||
.\"
|
||||
.SH RATIONALE
|
||||
|
||||
The traditional tool for integer comparisons in POSIX and other Unix shells has
|
||||
The traditional tool for integer comparisons in \*(Px and other Unix shells has
|
||||
been
|
||||
.BR test (1).
|
||||
This tool also handles string comparisons and file scrutiny. These parts of its
|
||||
|
||||
@@ -57,7 +57,7 @@ The
|
||||
.BR cat (1p)
|
||||
and
|
||||
.BR tee (1p)
|
||||
programs specified in POSIX together provide similar functionality. The
|
||||
programs specified in \*(Px together provide similar functionality. The
|
||||
separation of the two sets of functionality into separate APIs seemed
|
||||
unncessary.
|
||||
.\"
|
||||
|
||||
@@ -45,7 +45,7 @@ The program operates in single-byte chunks regardless of intended encoding.
|
||||
.\"
|
||||
.SH RATIONALE
|
||||
|
||||
POSIX currently lacks a way to display non-printing characters in the terminal
|
||||
\*(Px currently lacks a way to display non-printing characters in the terminal
|
||||
using a standard tool. A popular extension to
|
||||
.BR cat (1p),
|
||||
the
|
||||
|
||||
@@ -58,7 +58,7 @@ hardware of any given machine.
|
||||
|
||||
An infix notation calculation utility,
|
||||
.BR bc (1p),
|
||||
is included in the POSIX standard, but does not accept expressions as arguments;
|
||||
is included in the \*(Px standard, but does not accept expressions as arguments;
|
||||
in scripts, any predefined, non-interactive input must be piped into the
|
||||
program. A
|
||||
.BR dc (1)
|
||||
|
||||
@@ -47,13 +47,13 @@ visual similarity and not byte similarity.
|
||||
.\"
|
||||
.SH RATIONALE
|
||||
|
||||
The traditional tool for string comparisons in POSIX and other Unix shells has
|
||||
The traditional tool for string comparisons in \*(Px and other Unix shells has
|
||||
been
|
||||
.BR test (1).
|
||||
This tool also handles integer comparisons and file scrutiny. These parts of its
|
||||
functionality have been broken out into multiple utilities.
|
||||
|
||||
This program\(cqs functionality may be performed on a POSIX-compliant system
|
||||
This program\(cqs functionality may be performed on a \*(Px-compliant system
|
||||
with
|
||||
.BR test (1p).
|
||||
.\"
|
||||
|
||||
@@ -15,7 +15,7 @@ always be returned.
|
||||
.\"
|
||||
.SH RATIONALE
|
||||
|
||||
In \fIPOSIX.1-2017\fP,
|
||||
In \fI\*(Px.1-2024\fP,
|
||||
.BR true (1p)
|
||||
exists for the construction of control flow and loops based on a success. This
|
||||
implementation functions as described in that standard.
|
||||
|
||||
151
src/fop.rs
151
src/fop.rs
@@ -18,14 +18,16 @@
|
||||
|
||||
use std::{
|
||||
env::args,
|
||||
io::{ Error, Read, Write, stdin, stdout },
|
||||
io::{ Error, Write, stdin, stdout },
|
||||
process::{ Command, ExitCode, Stdio, exit },
|
||||
};
|
||||
|
||||
extern crate delimit;
|
||||
extern crate getopt;
|
||||
extern crate strerror;
|
||||
extern crate sysexits;
|
||||
|
||||
use delimit::Delimited;
|
||||
use getopt::GetOpt;
|
||||
use strerror::StrError;
|
||||
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
|
||||
@@ -90,78 +92,97 @@ fn main() -> ExitCode {
|
||||
exit(usage(&argv[0]).into());
|
||||
});
|
||||
|
||||
/* read entire standard input into memory */
|
||||
let mut buf = String::new();
|
||||
if let Err(e) = stdin().read_to_string(&mut buf) {
|
||||
err(&argv[0], e);
|
||||
exit(EX_IOERR.into());
|
||||
};
|
||||
let stdin = Box::new(stdin().lock());
|
||||
|
||||
/* split the buffer by the delimiter (by default, '\u{1E}') */
|
||||
let mut fields = buf.split(&d).collect::<Vec<&str>>();
|
||||
let mut input = Delimited::new(stdin, d.clone().as_bytes());
|
||||
let mut n = 0;
|
||||
|
||||
/* collect arguments for the operator command */
|
||||
let command_args = argv
|
||||
.iter()
|
||||
.clone()
|
||||
.skip(command_arg + 1) /* skip the command name */
|
||||
.collect::<Vec<&String>>();
|
||||
let mut fopped = false;
|
||||
|
||||
/* spawn the command to operate on the field */
|
||||
let mut spawned = Command::new(operator)
|
||||
.args(command_args) /* spawn with the specified arguments */
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped()) /* piped stdout to handle output ourselves */
|
||||
.spawn()
|
||||
.unwrap_or_else( |e| {
|
||||
while let Some(i) = input.next() {
|
||||
let v = match i {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
err(&argv[0], e);
|
||||
exit(EX_IOERR.into());
|
||||
},
|
||||
};
|
||||
|
||||
let mut out = Vec::new();
|
||||
|
||||
if n == index { /* fop it */
|
||||
/* collect arguments for the operator command */
|
||||
let command_args = argv
|
||||
.iter()
|
||||
.clone()
|
||||
.skip(command_arg + 1) /* skip the command name */
|
||||
.collect::<Vec<&String>>();
|
||||
|
||||
/* spawn the command to operate on the field */
|
||||
let mut spawned = Command::new(operator)
|
||||
.args(command_args) /* spawn with the specified arguments */
|
||||
.stdin(Stdio::piped())
|
||||
/* piped stdout to handle output ourselves */
|
||||
.stdout(Stdio::piped())
|
||||
.spawn()
|
||||
.unwrap_or_else( |e| {
|
||||
err(&argv[0], e);
|
||||
exit(EX_UNAVAILABLE.into());
|
||||
});
|
||||
|
||||
/* feed the spawned program’s stdin the field value */
|
||||
if let Some(mut child_stdin) = spawned.stdin.take() {
|
||||
let _ = child_stdin.write_all(&v);
|
||||
drop(child_stdin); /* stay safe! drop your children! */
|
||||
|
||||
let output = spawned.wait_with_output().unwrap_or_else(|e| {
|
||||
err(&argv[0], e);
|
||||
exit(EX_IOERR.into());
|
||||
});
|
||||
|
||||
/* get the output with which the original field will
|
||||
* be replaced */
|
||||
let mut replace = output.stdout.clone();
|
||||
|
||||
/* pop trailing newline out if the input did not contain it */
|
||||
if v.iter().last() != Some(&b'\n')
|
||||
&& replace.pop() != Some(b'\n')
|
||||
{
|
||||
out = output.stdout;
|
||||
} else {
|
||||
out = replace;
|
||||
}
|
||||
}
|
||||
|
||||
fopped = true;
|
||||
} else {
|
||||
out = v;
|
||||
}
|
||||
|
||||
/* since we cannot know when we’re done, place a new delimiter before
|
||||
* each index unless it is the 0th */
|
||||
if n != 0 {
|
||||
stdout().write_all(d.as_bytes()).unwrap_or_else(|e| {
|
||||
err(&argv[0], e);
|
||||
exit(EX_IOERR.into());
|
||||
});
|
||||
}
|
||||
|
||||
stdout().write_all(&out).unwrap_or_else(|e| {
|
||||
err(&argv[0], e);
|
||||
exit(EX_UNAVAILABLE.into());
|
||||
exit(EX_IOERR.into());
|
||||
});
|
||||
|
||||
/* get field we want to pipe into spawned program */
|
||||
let field = fields.get(index).unwrap_or_else(|| {
|
||||
|
||||
n += 1;
|
||||
}
|
||||
|
||||
if fopped {
|
||||
return ExitCode::SUCCESS;
|
||||
} else {
|
||||
eprintln!("{}: {}: no such index in input", argv[0], index);
|
||||
exit(EX_DATAERR.into());
|
||||
});
|
||||
|
||||
/* get the stdin of the newly spawned program and feed it the field val */
|
||||
if let Some(mut child_stdin) = spawned.stdin.take() {
|
||||
let _ = child_stdin.write_all(field.as_bytes());
|
||||
drop(child_stdin); /* stay safe! drop your children! */
|
||||
return EX_DATAERR.into();
|
||||
}
|
||||
|
||||
let output = spawned.wait_with_output().unwrap_or_else(|e| {
|
||||
err(&argv[0], e);
|
||||
exit(EX_IOERR.into());
|
||||
});
|
||||
|
||||
/* get the output with which the original field will be replaced */
|
||||
let mut replace = output.stdout.clone();
|
||||
|
||||
/* pop trailing newline out if the input did not contain it */
|
||||
if fields[index].chars().last() != Some('\n') /* no newline */
|
||||
&& replace.pop() != Some(b'\n') { /* pop last char of replacement */
|
||||
/* restore replacement to original command output if popped char was not
|
||||
* a newline */
|
||||
replace = output.stdout;
|
||||
}
|
||||
|
||||
/* convert the output of the program to UTF-8 */
|
||||
let new_field = String::from_utf8(replace).unwrap_or_else(|e| {
|
||||
eprintln!("{}: {}", argv[0], e);
|
||||
exit(EX_IOERR.into());
|
||||
});
|
||||
|
||||
/* store the new field in the old fields vector */
|
||||
fields[index] = &new_field;
|
||||
|
||||
/* fop it */
|
||||
stdout().write_all(
|
||||
fields.join(&d.to_string()).as_bytes()
|
||||
).unwrap_or_else(|e| {
|
||||
err(&argv[0], e);
|
||||
exit(EX_IOERR.into());
|
||||
});
|
||||
|
||||
ExitCode::SUCCESS
|
||||
}
|
||||
|
||||
91
src/libdelimit.rs
Normal file
91
src/libdelimit.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Emma Tebibyte <emma@tebibyte.media>
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Affero General Public License as published by the
|
||||
* Free Software Foundation, either version 3 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see https://www.gnu.org/licenses/.
|
||||
*/
|
||||
|
||||
use std::{
|
||||
io::{ Read, Result },
|
||||
mem::self,
|
||||
};
|
||||
|
||||
const BUFFER_SIZE: usize = 4096;
|
||||
|
||||
pub struct Delimited {
|
||||
stream: Box<dyn Read>,
|
||||
delimiter: Vec<u8>,
|
||||
buffer: Vec<u8>
|
||||
}
|
||||
|
||||
impl Delimited {
|
||||
pub fn new(stream: Box<dyn Read>, delimiter: &[u8]) -> Self {
|
||||
Delimited {
|
||||
stream,
|
||||
delimiter: delimiter.to_vec(),
|
||||
buffer: Vec::with_capacity(BUFFER_SIZE),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Delimited {
|
||||
type Item = Result<Vec<u8>>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let mut buf = [0; BUFFER_SIZE];
|
||||
|
||||
loop {
|
||||
if let Some(p) = find_subslice(&self.buffer, &self.delimiter) {
|
||||
let chunk = self.buffer.drain(..p).collect::<Vec<_>>();
|
||||
|
||||
let _ = self.buffer.drain(..self.delimiter.len());
|
||||
|
||||
return Some(Ok(chunk));
|
||||
}
|
||||
|
||||
match self.stream.read(&mut buf) {
|
||||
Ok(0) => {
|
||||
if self.buffer.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
return Some(Ok(mem::take(&mut self.buffer)));
|
||||
},
|
||||
Ok(n) => {
|
||||
let content = &buf[..n];
|
||||
self.buffer.extend_from_slice(&content);
|
||||
},
|
||||
Err(e) => {
|
||||
return Some(Err(e));
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn find_subslice(stack: &[u8], key: &[u8]) -> Option<usize> {
|
||||
if key.len() == 1 {
|
||||
return stack.iter().position(|&b| b == key[0]);
|
||||
}
|
||||
if key.len() > stack.len() {
|
||||
return None;
|
||||
}
|
||||
for i in 0..=stack.len() - key.len() {
|
||||
if &stack[i..i + key.len()] == key {
|
||||
return Some(i);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
Reference in New Issue
Block a user