Compare commits
	
		
			3 Commits
		
	
	
		
			be6bd5386d
			...
			4aab77bee4
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 4aab77bee4 | |||
| 15039805f9 | |||
| 31b424d205 | 
							
								
								
									
										11
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								Makefile
									
									
									
									
									
								
							@ -30,7 +30,8 @@ SYSEXITS != printf '\043include <sysexits.h>\n' | cpp -M - | tr ' ' '\n' \
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
CC ?= cc
 | 
					CC ?= cc
 | 
				
			||||||
RUSTC ?= rustc
 | 
					RUSTC ?= rustc
 | 
				
			||||||
RUSTFLAGS += --extern getopt=build/o/libgetopt.rlib \
 | 
					RUSTFLAGS += --extern delimit=build/o/libdelimit.rlib \
 | 
				
			||||||
 | 
						--extern getopt=build/o/libgetopt.rlib \
 | 
				
			||||||
	--extern strerror=build/o/libstrerror.rlib \
 | 
						--extern strerror=build/o/libstrerror.rlib \
 | 
				
			||||||
	--extern sysexits=build/o/libsysexits.rlib
 | 
						--extern sysexits=build/o/libsysexits.rlib
 | 
				
			||||||
CFLAGS += -I$(SYSEXITS)
 | 
					CFLAGS += -I$(SYSEXITS)
 | 
				
			||||||
@ -88,8 +89,12 @@ docs: docs/ build
 | 
				
			|||||||
include $(OS_INCLUDE)
 | 
					include $(OS_INCLUDE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.PHONY: rustlibs
 | 
					.PHONY: rustlibs
 | 
				
			||||||
rustlibs: build/o/libgetopt.rlib build/o/libstrerror.rlib \
 | 
					rustlibs: build/o/libdelimit.rlib build/o/libgetopt.rlib \
 | 
				
			||||||
	 build/o/libsysexits.rlib $(OSLIB)
 | 
						 build/o/libstrerror.rlib build/o/libsysexits.rlib $(OSLIB)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					build/o/libdelimit.rlib: build src/libdelimit.rs
 | 
				
			||||||
 | 
						$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=delimit \
 | 
				
			||||||
 | 
							-o $@ src/libdelimit.rs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
build/o/libgetopt.rlib: build src/libgetopt.rs
 | 
					build/o/libgetopt.rlib: build src/libgetopt.rs
 | 
				
			||||||
	$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=getopt \
 | 
						$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=getopt \
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										16
									
								
								docs/fop.1
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								docs/fop.1
									
									
									
									
									
								
							@ -11,10 +11,9 @@ fop \(en field operator
 | 
				
			|||||||
.SH SYNOPSIS
 | 
					.SH SYNOPSIS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fop
 | 
					fop
 | 
				
			||||||
.RB ( -d )
 | 
					.RB [ -d\ delimiter ]
 | 
				
			||||||
.RB [ delimiter ]
 | 
					index program
 | 
				
			||||||
.RB index 
 | 
					.RB [ arguments... ]
 | 
				
			||||||
.RB program...
 | 
					 | 
				
			||||||
.\"
 | 
					.\"
 | 
				
			||||||
.SH DESCRIPTION
 | 
					.SH DESCRIPTION
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -26,8 +25,17 @@ Performs operations on specified fields in data read from the standard input.
 | 
				
			|||||||
Sets a delimiter by which the input data will be split into fields. The default
 | 
					Sets a delimiter by which the input data will be split into fields. The default
 | 
				
			||||||
is an ASCII record separator.
 | 
					is an ASCII record separator.
 | 
				
			||||||
.\"
 | 
					.\"
 | 
				
			||||||
 | 
					.SH DIAGNOSTICS
 | 
				
			||||||
 | 
					in the event of an error, a debug message will be printed and the program will
 | 
				
			||||||
 | 
					exit with the appropriate sysexits.h(3) error code.
 | 
				
			||||||
 | 
					.\"
 | 
				
			||||||
.SH CAVEATS
 | 
					.SH CAVEATS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If the specified index does not exist in the data, the program
 | 
				
			||||||
 | 
					will print all data to the standard output before exiting with an error. If
 | 
				
			||||||
 | 
					input data is not delimited by the specified delimiter, the program will fill
 | 
				
			||||||
 | 
					memory with the contents of the stream before it is output.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Field indices are zero-indexed, which may be unexpected behavior for some users.
 | 
					Field indices are zero-indexed, which may be unexpected behavior for some users.
 | 
				
			||||||
.\"
 | 
					.\"
 | 
				
			||||||
.SH RATIONALE
 | 
					.SH RATIONALE
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										151
									
								
								src/fop.rs
									
									
									
									
									
								
							
							
						
						
									
										151
									
								
								src/fop.rs
									
									
									
									
									
								
							@ -18,14 +18,16 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
use std::{
 | 
					use std::{
 | 
				
			||||||
	env::args,
 | 
						env::args,
 | 
				
			||||||
	io::{ Error, Read, Write, stdin, stdout },
 | 
						io::{ Error, Write, stdin, stdout },
 | 
				
			||||||
	process::{ Command, ExitCode, Stdio, exit },
 | 
						process::{ Command, ExitCode, Stdio, exit },
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					extern crate delimit;
 | 
				
			||||||
extern crate getopt;
 | 
					extern crate getopt;
 | 
				
			||||||
extern crate strerror;
 | 
					extern crate strerror;
 | 
				
			||||||
extern crate sysexits;
 | 
					extern crate sysexits;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use delimit::Delimited;
 | 
				
			||||||
use getopt::GetOpt;
 | 
					use getopt::GetOpt;
 | 
				
			||||||
use strerror::StrError;
 | 
					use strerror::StrError;
 | 
				
			||||||
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
 | 
					use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
 | 
				
			||||||
@ -90,78 +92,97 @@ fn main() -> ExitCode {
 | 
				
			|||||||
		exit(usage(&argv[0]).into());
 | 
							exit(usage(&argv[0]).into());
 | 
				
			||||||
	});
 | 
						});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* read entire standard input into memory */
 | 
						let stdin = Box::new(stdin().lock());
 | 
				
			||||||
	let mut buf = String::new();
 | 
					 | 
				
			||||||
	if let Err(e) = stdin().read_to_string(&mut buf) {
 | 
					 | 
				
			||||||
		err(&argv[0], e);
 | 
					 | 
				
			||||||
		exit(EX_IOERR.into());
 | 
					 | 
				
			||||||
	};
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* split the buffer by the delimiter (by default, '\u{1E}') */
 | 
						let mut input = Delimited::new(stdin, d.clone().as_bytes());
 | 
				
			||||||
	let mut fields = buf.split(&d).collect::<Vec<&str>>();
 | 
						let mut n = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* collect arguments for the operator command */
 | 
						let mut fopped = false;
 | 
				
			||||||
	let command_args = argv
 | 
					 | 
				
			||||||
		.iter()
 | 
					 | 
				
			||||||
		.clone()
 | 
					 | 
				
			||||||
		.skip(command_arg + 1) /* skip the command name */
 | 
					 | 
				
			||||||
		.collect::<Vec<&String>>();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* spawn the command to operate on the field */
 | 
						while let Some(i) = input.next() {
 | 
				
			||||||
	let mut spawned = Command::new(operator)
 | 
							let v = match i {
 | 
				
			||||||
		.args(command_args) /* spawn with the specified arguments */
 | 
								Ok(v) => v,
 | 
				
			||||||
		.stdin(Stdio::piped())
 | 
								Err(e) => {
 | 
				
			||||||
		.stdout(Stdio::piped()) /* piped stdout to handle output ourselves */
 | 
									err(&argv[0], e);
 | 
				
			||||||
		.spawn()
 | 
									exit(EX_IOERR.into());
 | 
				
			||||||
		.unwrap_or_else( |e| {
 | 
								},
 | 
				
			||||||
 | 
							};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							let mut out = Vec::new();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if n == index { /* fop it */
 | 
				
			||||||
 | 
								/* collect arguments for the operator command */
 | 
				
			||||||
 | 
								let command_args = argv
 | 
				
			||||||
 | 
									.iter()
 | 
				
			||||||
 | 
									.clone()
 | 
				
			||||||
 | 
									.skip(command_arg + 1) /* skip the command name */
 | 
				
			||||||
 | 
									.collect::<Vec<&String>>();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								/* spawn the command to operate on the field */
 | 
				
			||||||
 | 
								let mut spawned = Command::new(operator)
 | 
				
			||||||
 | 
									.args(command_args) /* spawn with the specified arguments */
 | 
				
			||||||
 | 
									.stdin(Stdio::piped())
 | 
				
			||||||
 | 
									/* piped stdout to handle output ourselves */
 | 
				
			||||||
 | 
									.stdout(Stdio::piped()) 
 | 
				
			||||||
 | 
									.spawn()
 | 
				
			||||||
 | 
									.unwrap_or_else( |e| {
 | 
				
			||||||
 | 
										err(&argv[0], e);
 | 
				
			||||||
 | 
										exit(EX_UNAVAILABLE.into());
 | 
				
			||||||
 | 
									});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								/* feed the spawned program’s stdin the field value */
 | 
				
			||||||
 | 
								if let Some(mut child_stdin) = spawned.stdin.take() {
 | 
				
			||||||
 | 
									let _ = child_stdin.write_all(&v);
 | 
				
			||||||
 | 
									drop(child_stdin); /* stay safe! drop your children! */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									let output = spawned.wait_with_output().unwrap_or_else(|e| {
 | 
				
			||||||
 | 
										err(&argv[0], e);
 | 
				
			||||||
 | 
										exit(EX_IOERR.into());
 | 
				
			||||||
 | 
								});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									/* get the output with which the original field will
 | 
				
			||||||
 | 
									 * be replaced */
 | 
				
			||||||
 | 
									let mut replace = output.stdout.clone();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									/* pop trailing newline out if the input did not contain it */
 | 
				
			||||||
 | 
									if v.iter().last() != Some(&b'\n')
 | 
				
			||||||
 | 
										&& replace.pop() != Some(b'\n')
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										out = output.stdout;
 | 
				
			||||||
 | 
									} else {
 | 
				
			||||||
 | 
										out = replace;
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								fopped = true;
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								out = v;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* since we cannot know when we’re done, place a new delimiter before
 | 
				
			||||||
 | 
							 * each index unless it is the 0th */
 | 
				
			||||||
 | 
							if n != 0 {
 | 
				
			||||||
 | 
								stdout().write_all(d.as_bytes()).unwrap_or_else(|e| {
 | 
				
			||||||
 | 
									err(&argv[0], e);
 | 
				
			||||||
 | 
									exit(EX_IOERR.into());
 | 
				
			||||||
 | 
								});
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							stdout().write_all(&out).unwrap_or_else(|e| {
 | 
				
			||||||
			err(&argv[0], e);
 | 
								err(&argv[0], e);
 | 
				
			||||||
			exit(EX_UNAVAILABLE.into());
 | 
								exit(EX_IOERR.into());
 | 
				
			||||||
		});
 | 
							});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* get field we want to pipe into spawned program */
 | 
					
 | 
				
			||||||
	let field = fields.get(index).unwrap_or_else(|| {
 | 
							n += 1;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if fopped {
 | 
				
			||||||
 | 
							return ExitCode::SUCCESS;
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
		eprintln!("{}: {}: no such index in input", argv[0], index);
 | 
							eprintln!("{}: {}: no such index in input", argv[0], index);
 | 
				
			||||||
		exit(EX_DATAERR.into());
 | 
							return EX_DATAERR.into();
 | 
				
			||||||
	});
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* get the stdin of the newly spawned program and feed it the field val */
 | 
					 | 
				
			||||||
	if let Some(mut child_stdin) = spawned.stdin.take() {
 | 
					 | 
				
			||||||
		let _ = child_stdin.write_all(field.as_bytes());
 | 
					 | 
				
			||||||
		drop(child_stdin); /* stay safe! drop your children! */
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	let output = spawned.wait_with_output().unwrap_or_else(|e| {
 | 
					 | 
				
			||||||
		err(&argv[0], e);
 | 
					 | 
				
			||||||
		exit(EX_IOERR.into());
 | 
					 | 
				
			||||||
	});
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* get the output with which the original field will be replaced */
 | 
					 | 
				
			||||||
	let mut replace = output.stdout.clone();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* pop trailing newline out if the input did not contain it */
 | 
					 | 
				
			||||||
	if fields[index].chars().last() != Some('\n') /* no newline */
 | 
					 | 
				
			||||||
		&& replace.pop() != Some(b'\n') { /* pop last char of replacement */
 | 
					 | 
				
			||||||
		/* restore replacement to original command output if popped char was not
 | 
					 | 
				
			||||||
		 * a newline */
 | 
					 | 
				
			||||||
		replace = output.stdout;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* convert the output of the program to UTF-8 */
 | 
					 | 
				
			||||||
	let new_field = String::from_utf8(replace).unwrap_or_else(|e| {
 | 
					 | 
				
			||||||
		eprintln!("{}: {}", argv[0], e);
 | 
					 | 
				
			||||||
		exit(EX_IOERR.into());
 | 
					 | 
				
			||||||
	});
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* store the new field in the old fields vector */
 | 
					 | 
				
			||||||
	fields[index] = &new_field;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* fop it */ 
 | 
					 | 
				
			||||||
	stdout().write_all(
 | 
					 | 
				
			||||||
		fields.join(&d.to_string()).as_bytes()
 | 
					 | 
				
			||||||
	).unwrap_or_else(|e| {
 | 
					 | 
				
			||||||
		err(&argv[0], e);
 | 
					 | 
				
			||||||
		exit(EX_IOERR.into());
 | 
					 | 
				
			||||||
	});
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ExitCode::SUCCESS
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										91
									
								
								src/libdelimit.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								src/libdelimit.rs
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,91 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright (c) 2025 Emma Tebibyte <emma@tebibyte.media>
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This program is free software: you can redistribute it and/or modify it
 | 
				
			||||||
 | 
					 * under the terms of the GNU Affero General Public License as published by the
 | 
				
			||||||
 | 
					 * Free Software Foundation, either version 3 of the License, or (at your
 | 
				
			||||||
 | 
					 * option) any later version.
 | 
				
			||||||
 | 
					 * 
 | 
				
			||||||
 | 
					 * This program is distributed in the hope that it will be useful, but WITHOUT
 | 
				
			||||||
 | 
					 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 | 
				
			||||||
 | 
					 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
 | 
				
			||||||
 | 
					 * for more details.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * You should have received a copy of the GNU Affero General Public License
 | 
				
			||||||
 | 
					 * along with this program. If not, see https://www.gnu.org/licenses/.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use std::{
 | 
				
			||||||
 | 
					    io::{ Read, Result },
 | 
				
			||||||
 | 
					    mem::self,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const BUFFER_SIZE: usize = 4096;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub struct Delimited {
 | 
				
			||||||
 | 
					    stream: Box<dyn Read>,
 | 
				
			||||||
 | 
					    delimiter: Vec<u8>,
 | 
				
			||||||
 | 
					    buffer: Vec<u8>
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl Delimited {
 | 
				
			||||||
 | 
						pub fn new(stream: Box<dyn Read>, delimiter: &[u8]) -> Self {
 | 
				
			||||||
 | 
					        Delimited {
 | 
				
			||||||
 | 
					            stream,
 | 
				
			||||||
 | 
					            delimiter: delimiter.to_vec(),
 | 
				
			||||||
 | 
					            buffer: Vec::with_capacity(BUFFER_SIZE),
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl Iterator for Delimited {
 | 
				
			||||||
 | 
					    type Item = Result<Vec<u8>>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn next(&mut self) -> Option<Self::Item> {
 | 
				
			||||||
 | 
					        let mut buf = [0; BUFFER_SIZE];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        loop {
 | 
				
			||||||
 | 
					            if let Some(p) = find_subslice(&self.buffer, &self.delimiter) {
 | 
				
			||||||
 | 
					                let chunk = self.buffer.drain(..p).collect::<Vec<_>>();
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                let _ = self.buffer.drain(..self.delimiter.len());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                return Some(Ok(chunk));
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            match self.stream.read(&mut buf) {
 | 
				
			||||||
 | 
					                Ok(0) => {
 | 
				
			||||||
 | 
					                    if self.buffer.is_empty() {
 | 
				
			||||||
 | 
					                        return None;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    return Some(Ok(mem::take(&mut self.buffer)));
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					                Ok(n) => {
 | 
				
			||||||
 | 
					                    let content = &buf[..n];
 | 
				
			||||||
 | 
					                    self.buffer.extend_from_slice(&content);
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					                Err(e) => {
 | 
				
			||||||
 | 
					                    return Some(Err(e));
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn find_subslice(stack: &[u8], key: &[u8]) -> Option<usize> {
 | 
				
			||||||
 | 
					    if key.len() == 1 {
 | 
				
			||||||
 | 
					        return stack.iter().position(|&b| b == key[0]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if key.len() > stack.len() {
 | 
				
			||||||
 | 
					        return None;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    for i in 0..=stack.len() - key.len() {
 | 
				
			||||||
 | 
					        if &stack[i..i + key.len()] == key {
 | 
				
			||||||
 | 
					            return Some(i);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    None
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user