Compare commits
No commits in common. "libdelimiter" and "trunk" have entirely different histories.
libdelimit
...
trunk
19
Makefile
19
Makefile
@ -30,8 +30,7 @@ SYSEXITS != printf '\043include <sysexits.h>\n' | cpp -M - | tr ' ' '\n' \
|
|||||||
|
|
||||||
CC ?= cc
|
CC ?= cc
|
||||||
RUSTC ?= rustc
|
RUSTC ?= rustc
|
||||||
RUSTFLAGS += --extern delimit=build/o/libdelimit.rlib \
|
RUSTFLAGS += --extern getopt=build/o/libgetopt.rlib \
|
||||||
--extern getopt=build/o/libgetopt.rlib \
|
|
||||||
--extern strerror=build/o/libstrerror.rlib \
|
--extern strerror=build/o/libstrerror.rlib \
|
||||||
--extern sysexits=build/o/libsysexits.rlib
|
--extern sysexits=build/o/libsysexits.rlib
|
||||||
CFLAGS += -I$(SYSEXITS)
|
CFLAGS += -I$(SYSEXITS)
|
||||||
@ -71,16 +70,12 @@ TESTS != printf '%s\n' "$(TESTFILES)" | xargs -n1 basename \
|
|||||||
include $(TESTFILES)
|
include $(TESTFILES)
|
||||||
|
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test: all $(TESTS) /tmp/delimit /tmp/getopt
|
test: all $(TESTS) /tmp/getopt
|
||||||
@echo $(TESTS)
|
@echo $(TESTS)
|
||||||
/tmp/delimit
|
|
||||||
/tmp/getopt
|
/tmp/getopt
|
||||||
|
|
||||||
/tmp/delimit: src/libdelimit.rs
|
|
||||||
$(RUSTC) --test -o $@ src/libdelimit.rs
|
|
||||||
|
|
||||||
/tmp/getopt: src/libgetopt.rs
|
/tmp/getopt: src/libgetopt.rs
|
||||||
$(RUSTC) --test -o $@ src/libgetopt.rs
|
$(RUSTC) --test -o /tmp/getopt src/libgetopt.rs
|
||||||
|
|
||||||
.PHONY: docs
|
.PHONY: docs
|
||||||
docs: docs/ build
|
docs: docs/ build
|
||||||
@ -93,12 +88,8 @@ docs: docs/ build
|
|||||||
include $(OS_INCLUDE)
|
include $(OS_INCLUDE)
|
||||||
|
|
||||||
.PHONY: rustlibs
|
.PHONY: rustlibs
|
||||||
rustlibs: build/o/libdelimit.rlib build/o/libgetopt.rlib \
|
rustlibs: build/o/libgetopt.rlib build/o/libstrerror.rlib \
|
||||||
build/o/libstrerror.rlib build/o/libsysexits.rlib $(OSLIB)
|
build/o/libsysexits.rlib $(OSLIB)
|
||||||
|
|
||||||
build/o/libdelimit.rlib: build src/libdelimit.rs
|
|
||||||
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=delimit \
|
|
||||||
-o $@ src/libdelimit.rs
|
|
||||||
|
|
||||||
build/o/libgetopt.rlib: build src/libgetopt.rs
|
build/o/libgetopt.rlib: build src/libgetopt.rs
|
||||||
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=getopt \
|
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=getopt \
|
||||||
|
|||||||
16
docs/fop.1
16
docs/fop.1
@ -11,9 +11,10 @@ fop \(en field operator
|
|||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
|
||||||
fop
|
fop
|
||||||
.RB [ -d\ delimiter ]
|
.RB ( -d )
|
||||||
index program
|
.RB [ delimiter ]
|
||||||
.RB [ arguments... ]
|
.RB index
|
||||||
|
.RB program...
|
||||||
.\"
|
.\"
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
|
|
||||||
@ -25,17 +26,8 @@ Performs operations on specified fields in data read from the standard input.
|
|||||||
Sets a delimiter by which the input data will be split into fields. The default
|
Sets a delimiter by which the input data will be split into fields. The default
|
||||||
is an ASCII record separator.
|
is an ASCII record separator.
|
||||||
.\"
|
.\"
|
||||||
.SH DIAGNOSTICS
|
|
||||||
in the event of an error, a debug message will be printed and the program will
|
|
||||||
exit with the appropriate sysexits.h(3) error code.
|
|
||||||
.\"
|
|
||||||
.SH CAVEATS
|
.SH CAVEATS
|
||||||
|
|
||||||
If the specified index does not exist in the data, the program
|
|
||||||
will print all data to the standard output before exiting with an error. If
|
|
||||||
input data is not delimited by the specified delimiter, the program will fill
|
|
||||||
memory with the contents of the stream before it is output.
|
|
||||||
|
|
||||||
Field indices are zero-indexed, which may be unexpected behavior for some users.
|
Field indices are zero-indexed, which may be unexpected behavior for some users.
|
||||||
.\"
|
.\"
|
||||||
.SH RATIONALE
|
.SH RATIONALE
|
||||||
|
|||||||
87
src/fop.rs
87
src/fop.rs
@ -18,16 +18,14 @@
|
|||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
env::args,
|
env::args,
|
||||||
io::{ Error, Write, stdin, stdout },
|
io::{ Error, Read, Write, stdin, stdout },
|
||||||
process::{ Command, ExitCode, Stdio, exit },
|
process::{ Command, ExitCode, Stdio, exit },
|
||||||
};
|
};
|
||||||
|
|
||||||
extern crate delimit;
|
|
||||||
extern crate getopt;
|
extern crate getopt;
|
||||||
extern crate strerror;
|
extern crate strerror;
|
||||||
extern crate sysexits;
|
extern crate sysexits;
|
||||||
|
|
||||||
use delimit::Delimited;
|
|
||||||
use getopt::GetOpt;
|
use getopt::GetOpt;
|
||||||
use strerror::StrError;
|
use strerror::StrError;
|
||||||
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
|
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
|
||||||
@ -92,25 +90,16 @@ fn main() -> ExitCode {
|
|||||||
exit(usage(&argv[0]).into());
|
exit(usage(&argv[0]).into());
|
||||||
});
|
});
|
||||||
|
|
||||||
let stdin = stdin().lock();
|
/* read entire standard input into memory */
|
||||||
|
let mut buf = String::new();
|
||||||
let mut input = Delimited::new(stdin, d.clone().as_bytes());
|
if let Err(e) = stdin().read_to_string(&mut buf) {
|
||||||
let mut n = 0;
|
|
||||||
|
|
||||||
let mut fopped = false;
|
|
||||||
|
|
||||||
while let Some(i) = input.next() {
|
|
||||||
let v = match i {
|
|
||||||
Ok(v) => v,
|
|
||||||
Err(e) => {
|
|
||||||
err(&argv[0], e);
|
err(&argv[0], e);
|
||||||
exit(EX_IOERR.into());
|
exit(EX_IOERR.into());
|
||||||
},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut out = Vec::new();
|
/* split the buffer by the delimiter (by default, '\u{1E}') */
|
||||||
|
let mut fields = buf.split(&d).collect::<Vec<&str>>();
|
||||||
|
|
||||||
if n == index { /* fop it */
|
|
||||||
/* collect arguments for the operator command */
|
/* collect arguments for the operator command */
|
||||||
let command_args = argv
|
let command_args = argv
|
||||||
.iter()
|
.iter()
|
||||||
@ -122,67 +111,57 @@ fn main() -> ExitCode {
|
|||||||
let mut spawned = Command::new(operator)
|
let mut spawned = Command::new(operator)
|
||||||
.args(command_args) /* spawn with the specified arguments */
|
.args(command_args) /* spawn with the specified arguments */
|
||||||
.stdin(Stdio::piped())
|
.stdin(Stdio::piped())
|
||||||
/* piped stdout to handle output ourselves */
|
.stdout(Stdio::piped()) /* piped stdout to handle output ourselves */
|
||||||
.stdout(Stdio::piped())
|
|
||||||
.spawn()
|
.spawn()
|
||||||
.unwrap_or_else( |e| {
|
.unwrap_or_else( |e| {
|
||||||
err(&argv[0], e);
|
err(&argv[0], e);
|
||||||
exit(EX_UNAVAILABLE.into());
|
exit(EX_UNAVAILABLE.into());
|
||||||
});
|
});
|
||||||
|
|
||||||
/* feed the spawned program’s stdin the field value */
|
/* get field we want to pipe into spawned program */
|
||||||
|
let field = fields.get(index).unwrap_or_else(|| {
|
||||||
|
eprintln!("{}: {}: no such index in input", argv[0], index);
|
||||||
|
exit(EX_DATAERR.into());
|
||||||
|
});
|
||||||
|
|
||||||
|
/* get the stdin of the newly spawned program and feed it the field val */
|
||||||
if let Some(mut child_stdin) = spawned.stdin.take() {
|
if let Some(mut child_stdin) = spawned.stdin.take() {
|
||||||
let _ = child_stdin.write_all(&v);
|
let _ = child_stdin.write_all(field.as_bytes());
|
||||||
drop(child_stdin); /* stay safe! drop your children! */
|
drop(child_stdin); /* stay safe! drop your children! */
|
||||||
|
}
|
||||||
|
|
||||||
let output = spawned.wait_with_output().unwrap_or_else(|e| {
|
let output = spawned.wait_with_output().unwrap_or_else(|e| {
|
||||||
err(&argv[0], e);
|
err(&argv[0], e);
|
||||||
exit(EX_IOERR.into());
|
exit(EX_IOERR.into());
|
||||||
});
|
});
|
||||||
|
|
||||||
/* get the output with which the original field will
|
/* get the output with which the original field will be replaced */
|
||||||
* be replaced */
|
|
||||||
let mut replace = output.stdout.clone();
|
let mut replace = output.stdout.clone();
|
||||||
|
|
||||||
/* pop trailing newline out if the input did not contain it */
|
/* pop trailing newline out if the input did not contain it */
|
||||||
if v.iter().last() != Some(&b'\n')
|
if fields[index].chars().last() != Some('\n') /* no newline */
|
||||||
&& replace.pop() != Some(b'\n')
|
&& replace.pop() != Some(b'\n') { /* pop last char of replacement */
|
||||||
{
|
/* restore replacement to original command output if popped char was not
|
||||||
out = output.stdout;
|
* a newline */
|
||||||
} else {
|
replace = output.stdout;
|
||||||
out = replace;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fopped = true;
|
/* convert the output of the program to UTF-8 */
|
||||||
} else {
|
let new_field = String::from_utf8(replace).unwrap_or_else(|e| {
|
||||||
out = v;
|
eprintln!("{}: {}", argv[0], e);
|
||||||
}
|
|
||||||
|
|
||||||
/* since we cannot know when we’re done, place a new delimiter before
|
|
||||||
* each index unless it is the 0th */
|
|
||||||
if n != 0 {
|
|
||||||
stdout().write_all(d.as_bytes()).unwrap_or_else(|e| {
|
|
||||||
err(&argv[0], e);
|
|
||||||
exit(EX_IOERR.into());
|
exit(EX_IOERR.into());
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
|
||||||
stdout().write_all(&out).unwrap_or_else(|e| {
|
/* store the new field in the old fields vector */
|
||||||
|
fields[index] = &new_field;
|
||||||
|
|
||||||
|
/* fop it */
|
||||||
|
stdout().write_all(
|
||||||
|
fields.join(&d.to_string()).as_bytes()
|
||||||
|
).unwrap_or_else(|e| {
|
||||||
err(&argv[0], e);
|
err(&argv[0], e);
|
||||||
exit(EX_IOERR.into());
|
exit(EX_IOERR.into());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
ExitCode::SUCCESS
|
||||||
n += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if fopped {
|
|
||||||
return ExitCode::SUCCESS;
|
|
||||||
} else {
|
|
||||||
eprintln!("{}: {}: no such index in input", argv[0], index);
|
|
||||||
return EX_DATAERR.into();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,101 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2025 Emma Tebibyte <emma@tebibyte.media>
|
|
||||||
* Copyright (c) 2025 silty silt <silt@tebibyte.media>
|
|
||||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify it
|
|
||||||
* under the terms of the GNU Affero General Public License as published by the
|
|
||||||
* Free Software Foundation, either version 3 of the License, or (at your
|
|
||||||
* option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
|
|
||||||
* for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU Affero General Public License
|
|
||||||
* along with this program. If not, see https://www.gnu.org/licenses/.
|
|
||||||
*/
|
|
||||||
|
|
||||||
use std::{
|
|
||||||
io::{ Read, Result },
|
|
||||||
mem,
|
|
||||||
};
|
|
||||||
|
|
||||||
const BUFFER_SIZE: usize = 4096;
|
|
||||||
|
|
||||||
pub struct Delimited<T: Read> {
|
|
||||||
delimiter: Vec<u8>,
|
|
||||||
buffer: Vec<u8>,
|
|
||||||
stream: T,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> Delimited<T> where T: Read {
|
|
||||||
pub fn new(stream: T, delimiter: &[u8]) -> Self {
|
|
||||||
Delimited {
|
|
||||||
stream,
|
|
||||||
delimiter: delimiter.to_vec(),
|
|
||||||
buffer: Vec::with_capacity(BUFFER_SIZE),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> Iterator for Delimited<T> where T: Read {
|
|
||||||
type Item = Result<Vec<u8>>;
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
let mut buf = [0; BUFFER_SIZE];
|
|
||||||
|
|
||||||
loop {
|
|
||||||
if let Some(p) = find_subslice(&self.buffer, &self.delimiter) {
|
|
||||||
let chunk = self.buffer.drain(..p).collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let _ = self.buffer.drain(..self.delimiter.len());
|
|
||||||
|
|
||||||
return Some(Ok(chunk));
|
|
||||||
}
|
|
||||||
|
|
||||||
match self.stream.read(&mut buf) {
|
|
||||||
Ok(0) => {
|
|
||||||
let _ = self.buffer.is_empty() && return None;
|
|
||||||
|
|
||||||
return Some(Ok(mem::take(&mut self.buffer)));
|
|
||||||
},
|
|
||||||
Ok(n) => {
|
|
||||||
self.buffer.extend_from_slice(&buf[..n]);
|
|
||||||
},
|
|
||||||
Err(e) => {
|
|
||||||
return Some(Err(e));
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn find_subslice(stack: &[u8], key: &[u8]) -> Option<usize> {
|
|
||||||
match key.len() {
|
|
||||||
/* TODO: is this optimization necessary? */
|
|
||||||
1 => stack.iter().position(|&b| b == key[0]),
|
|
||||||
_ => stack.windows(key.len()).position(|w| w == key),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use Delimited;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn testing() {
|
|
||||||
let d = '\u{1E}'.to_string();
|
|
||||||
let input = vec!["meow", "woof", "ribbit"];
|
|
||||||
let r = input.join(&d);
|
|
||||||
|
|
||||||
let mut output = Delimited::new(r.as_bytes(), d.as_bytes());
|
|
||||||
|
|
||||||
let mut i = 0;
|
|
||||||
while let Some(item) = output.next() {
|
|
||||||
assert_eq!(input[i].as_bytes(), item.unwrap());
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Loading…
x
Reference in New Issue
Block a user