17 Commits

Author SHA1 Message Date
d3c59ede0d Merge branch 'libdelimiter' into trunk 2026-01-07 19:31:22 -07:00
2f2b8cf067 libdelimit: updates to make API easier to use; fop(1): updated to new libdelimit API 2025-11-12 16:18:46 -07:00
2846ee3c01 fop(1): formatting 2025-11-07 15:11:49 -07:00
afd58ae4e3 fop(1): updated copyright information 2025-11-06 15:44:15 -07:00
90de81b5bd Merge branch 'quick-fixes' into trunk 2025-11-05 13:35:15 -07:00
1bd190ae99 mm.1: fixed attributions 2025-11-02 12:20:17 -07:00
d497d5b4aa libdelimit: fixes tabs 2025-10-31 00:58:12 -06:00
c1df6e6322 libdelimit: refactored find_subslice() 2025-10-31 06:32:00 +00:00
28bc2cd0d0 libdelimit: formatting & correctness 2025-10-30 23:29:25 -06:00
5cc1e2067b libdelimit, Makefile: adds test for libdelimit, changes API; fop(1): changes to new libdelimit API 2025-10-30 19:32:03 -06:00
4aab77bee4 fop(1), fop.1: update to use libdelimit 2025-10-28 14:11:00 -06:00
15039805f9 Makefile, libdelimit: update for including libdelimit in utilities 2025-10-28 14:08:17 -06:00
31b424d205 libdelimit: initial commit 2025-10-27 23:37:11 -06:00
be6bd5386d docs: POSIX → \*(Px 2025-10-26 20:10:24 -06:00
3b44ddeedd scrut(1): remove erroneous utility from tree 2025-10-26 17:07:52 -06:00
ac0f9e4019 README: add CONTRIBUTING mention 2025-04-03 16:46:50 -06:00
247e469f82 CONTRIBUTING, STYLE: merge into CONTRIBUTING 2025-04-03 16:45:22 -06:00
14 changed files with 484 additions and 468 deletions

View File

@@ -99,17 +99,247 @@ notice:
Style
=====
Make sure lines never exceed 80 columns in width when using four-character
indentation steps. This helps contributors with smaller screens, those using
side-by-side editor windows or panes, and those who have no text wrapping in
their editor or terminal.
“Everyone knows that debugging is twice as hard as writing a program in the
first place. So if youre as clever as you can be when you write it, how
will you ever debug it?”
Brian Kernighan, The Elements of Programming Style
For usage text and help messages, do not implement a -h option. Instead, print
usage information when any erroneous option is specified. Follow the NetBSD
style guide for the usage texts output format [0].
The following guidelines are conducive to clear and readable code that is
consistent with the style of the rest of the Bonsai Computer System.
Use:
0. A single line for control flow statements short enough to be easily
understood at a glance:
if !(argc < 0) { usage(program_name); }
This applies to C switch statements and cases and Rust match statements, as
well:
switch (value) { /* aligning stuff to make it easier to read is fine */
case possibility: variable = foo; break;
default: variable = NULL; break;
}
1. Switch cases in C and match arms in Rust should start another level of
indentation:
switch (value) {
case possibility:
statement;
break;
default:
statement;
break;
}
match result {
Ok(n) => variable = n,
Err(e) => error = e,
}
2. Braces in control flow where their inclusion is left optional in C:
if (condition) { statement; }
3. Empty lines between different kinds of statements:
int t;
assert(io->bufuse > 0);
assert(io->bufuse <= io->bs);
if ((t = write(io->fd, io->buf, io->bufuse)) < 0) {
io->error = errno;
t = 0;
} else if (t > 0) {
memmove(io->buf, &(io->buf)[t], (io->bufuse -= t));
}
io->bytes += t;
io->prec += (t > 0 && io->bufuse > 0);
io->rec += (t > 0 && io->bufuse == 0);
return io;
4. Compiler options that yield the most useful warnings, such as -Wpedantic in
a lot of C compilers. Fix the warnings, too [0].
5. One more level of indentation and one argument per line when a function
call or statement header is too long to fit on one line:
let usage = format!(
"Usage: {} [-d delimiter] index command [args...]",
argv[0],
);
6. One more level of indentation than the keyword that initiated a multi-line
block.
if (condition) {
statement;
statement;
}
7. The return value of all non-void functions, or explicitly ignore them (like
casting to void in C) [0]:
if ((a = malloc(sizeof char)) == NULL) { /* handle this error */
(void)fprintf(stderr, "oh noes!"); /* explicitly ignore this one */
return EX_OSERR; /* ...because the program is exiting anyway */
}
8. The smallest possible scope for data [0].
9. Comments noting all the symbols and macros used from a C header file, next
to its include macro:
#include <unistd.h> /* close(2), getopt(3), lseek(2), read(2), write(2),
(space-aligned) * optarg, optind, STDIN_FILENO, STDOUT_FILENO */
10. Spaces in control flow statements, after the keyword and before the
opening brace:
for (i = 2; i < argc; ++i) {
11. In Rust, a trailing comma on all arguments or fields that are on their own
lines:
return Err(EvaluationError {
message: format!("{}: Invalid token", i),
code: EX_DATAERR,
})
12. In Rust, place extern statements after use statements that include standard
library crates. Group like statements:
use std::fs::Path;
extern crate strerror;
extern crate sysexits;
use strerror::StrError;
use sysexits::{ EX_OSERR, EX_USAGE };
13. If text is on the same line as a brace, spaces after an opening brace and
before a closing one:
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
14. Alphabetic sorting, where applicable:
use std::io::{ BufWriter, Read, Write, stderr, stdin, stdout }
15. In Rust, use the to_owned() method on string types (str, OsStr, CStr, etc.)
and the to_string() method on other types.
Avoid:
16. Unbounded loops [0].
17. Function pointers [0].
18. Heap memory allocation [0].
19. Using too much nested logic (within reason).
20. Too many levels of dereferences [0]:
/* do not do this */
for (size_t i = 0; i < sizeof a / sizeof *a; ++i) {
if (a[i].id == MATCH) { a[i].val = 0; }
}
/* do this */
for (struct MadeUp *s = &a[0]; *s != NULL; s = &s[1]) {
if (s->id == MATCH) { s->val = 0; }
}
21. Using C preprocessor macros; the fewer, the better [0].
22. The exit(3p) and std::process::exit() functions; returning from the main
function skips a system call.
Do not use:
23. More than the length of one printed page for a function [0].
24. Recursion, as its complex and can unexpectedly overflow the stack [0].
25. Any functionality not in the POSIX C specification and language features not
in C99.
26. Do-while loops, as theyre unique to C and confusing for casual programmers.
27. Labels and goto statements; use sensible flow control [0].
28. Pointer arithmetic, as it tends to be confusing and unnecessary; use
index-reference patterns like &p[1] instead of p + 1. &p[n] is the address at
p + sizeof p * n, not p + n, like pointer arithmetic suggests.
29. C struct bitfields in unions, to access certain bits of bigger data types,
as its poorly defined in the C standards; use bit arithmetic.
30. C trigraphs.
31. Inclusions in C header files, to prevent multiple file inclusions.
32. C preprocessor variables to prevent multiple inclusions of the same file,
such as:
#ifdef _FILE
#define _FILE
/* file body */
#endif /* ifdef _FILE */
Instead, take the time to ensure other files arent including any files twice.
33. The gets(3p) function from <stdio.h>, as its impossible to prevent buffer
overflows when it's used; use fgets(3p) from <stdio.h>.
34. The scanf(3p) function from <stdio.h> [1].
35. Any functionality not described in the latest POSIX make(1) specification.
36. Macros which panic on failure in Rust (such as the print!() and println!()
macros). Use a function and handle any errors. However, do use the eprintln!()
macro for error messages. Handling an error for writing an error message is
redundant.
37. A -h option for help text. Instead, print usage information when any
erroneous option is specified. See the Usage Text section below.
38. Lines which exceed 80 columns in width when using four-column indentation
steps. This helps contributors with smaller screens, those using side-by-side
editor windows or panes, and those who have no text wrapping in their editor or
terminal.
Usage Text
==========
This section is adapted from the NetBSD style guide [2].
When programs are invoked incorrectly and in the synopsis of manual pages, uasge
text should be provided to the user. The following is the format used by this
project for this purpose:
All optional arguments are to be placed in square brackets (U+005B, U+005D).
Mutually exclusive arguments can be separated by a vertical line (U+007C).
Groups of arguments should be specified in alphabetical order in most cases. The
order of arguments and an example of these rules follows:
0. Options with no option arguments.
1. Options with option arguments. Arguments should be specified inside the same
square brackets as the options.
3. Non-option arguments.
"usage: f [-aDde] [-b b_arg] [-m m_arg] req1 req2 [opt1 [opt2]]\n"
"usage: f [-a | -b] [-c [-de] [-n number]]\n"
If committing a new utility, please include tests and documentation (see
tests/ and docs/) for the new tool.
Committing
==========
@@ -117,6 +347,10 @@ Committing
When contributing to Bonsai, please sign your commit with a PGP key and create
the commit with an identity which can be easily contacted.
If committing a new utility, please include tests and documentation (see
tests/ and docs/) for the new tool.
Format commit messages following these guidelines:
$ git commit -m 'tool(1): add feature x'
@@ -147,9 +381,16 @@ Commit messages should be written in the present tense.
References
==========
[0] <http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/share/misc/style>
[0] <https://web.eecs.umich.edu/~imarkov/10rules.pdf>
[1] <http://sekrit.de/webdocs/c/beginners-guide-away-from-scanf.html>
[2] <http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/share/misc/style>
--
This work © 20232024 by Emma Tebibyte is licensed under CC BY-SA 4.0. To view a
copy of this license, visit <http://creativecommons.org/licenses/by-sa/4.0/>
Copyright © 20232024 Emma Tebibyte <emma@tebibyte.media>
Copyright © 2024 DTB <trinity@trinity.moe>
Copyright © Wikipedia contributors
This work is licensed under CC BY-SA 4.0. To view a copy of this license, visit
<http://creativecommons.org/licenses/by-sa/4.0/>.

View File

@@ -30,7 +30,8 @@ SYSEXITS != printf '\043include <sysexits.h>\n' | cpp -M - | tr ' ' '\n' \
CC ?= cc
RUSTC ?= rustc
RUSTFLAGS += --extern getopt=build/o/libgetopt.rlib \
RUSTFLAGS += --extern delimit=build/o/libdelimit.rlib \
--extern getopt=build/o/libgetopt.rlib \
--extern strerror=build/o/libstrerror.rlib \
--extern sysexits=build/o/libsysexits.rlib
CFLAGS += -I$(SYSEXITS)
@@ -70,12 +71,16 @@ TESTS != printf '%s\n' "$(TESTFILES)" | xargs -n1 basename \
include $(TESTFILES)
.PHONY: test
test: all $(TESTS) /tmp/getopt
test: all $(TESTS) /tmp/delimit /tmp/getopt
@echo $(TESTS)
/tmp/delimit
/tmp/getopt
/tmp/delimit: src/libdelimit.rs
$(RUSTC) --test -o $@ src/libdelimit.rs
/tmp/getopt: src/libgetopt.rs
$(RUSTC) --test -o /tmp/getopt src/libgetopt.rs
$(RUSTC) --test -o $@ src/libgetopt.rs
.PHONY: docs
docs: docs/ build
@@ -88,8 +93,12 @@ docs: docs/ build
include $(OS_INCLUDE)
.PHONY: rustlibs
rustlibs: build/o/libgetopt.rlib build/o/libstrerror.rlib \
build/o/libsysexits.rlib $(OSLIB)
rustlibs: build/o/libdelimit.rlib build/o/libgetopt.rlib \
build/o/libstrerror.rlib build/o/libsysexits.rlib $(OSLIB)
build/o/libdelimit.rlib: build src/libdelimit.rs
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=delimit \
-o $@ src/libdelimit.rs
build/o/libgetopt.rlib: build src/libgetopt.rs
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=getopt \

3
README
View File

@@ -18,7 +18,8 @@ anywhere. Other utility sets aim to provide a number of fully-featured programs
to be used individually, Harakit utilities are meant to be easily composable
and work together in pipelines.
See docs/ for more on the specific utilities currently implemented.
See docs/ for more on the specific utilities currently implemented and see
CONTRIBUTING for guidelines for contributions.
Building

254
STYLE
View File

@@ -1,254 +0,0 @@
“Everyone knows that debugging is twice as hard as writing a program in the
first place. So if youre as clever as you can be when you write it, how
will you ever debug it?”
Brian Kernighan, The Elements of Programming Style
The following guidelines are conducive to clear and readable code that is
consistent with the style of the rest of the Bonsai Computer System.
Use
===
0. A single line for control flow statements short enough to be easily
understood at a glance:
if !(argc < 0) { usage(program_name); }
This applies to C switch statements and cases and Rust match statements, as
well:
switch (value) { /* aligning stuff to make it easier to read is fine */
case possibility: variable = foo; break;
default: variable = NULL; break;
}
1. Switch cases in C and match arms in Rust should start another level of
indentation:
switch (value) {
case possibility:
statement;
break;
default:
statement;
break;
}
match result {
Ok(n) => variable = n,
Err(e) => error = e,
}
2. Braces in control flow where their inclusion is left optional in C:
if (condition) { statement; }
3. Empty lines between different kinds of statements:
int t;
assert(io->bufuse > 0);
assert(io->bufuse <= io->bs);
if ((t = write(io->fd, io->buf, io->bufuse)) < 0) {
io->error = errno;
t = 0;
} else if (t > 0) {
memmove(io->buf, &(io->buf)[t], (io->bufuse -= t));
}
io->bytes += t;
io->prec += (t > 0 && io->bufuse > 0);
io->rec += (t > 0 && io->bufuse == 0);
return io;
4. Compiler options that yield the most useful warnings, such as -Wpedantic in
a lot of C compilers. Fix the warnings, too [0].
5. One more level of indentation and one argument per line when a function
call or statement header is too long to fit on one line:
let usage = format!(
"Usage: {} [-d delimiter] index command [args...]",
argv[0],
);
6. One more level of indentation than the keyword that initiated a multi-line
block.
if (condition) {
statement;
statement;
}
7. The return value of all non-void functions, or explicitly ignore them (like
casting to void in C) [0]:
if ((a = malloc(sizeof char)) == NULL) { /* handle this error */
(void)fprintf(stderr, "oh noes!"); /* explicitly ignore this one */
return EX_OSERR; /* ...because the program is exiting anyway */
}
8. The smallest possible scope for data [0].
9. Comments noting all the symbols and macros used from a C header file, next
to its include macro:
#include <unistd.h> /* close(2), getopt(3), lseek(2), read(2), write(2),
(space-aligned) * optarg, optind, STDIN_FILENO, STDOUT_FILENO */
10. Spaces in control flow statements, after the keyword and before the
opening brace:
for (i = 2; i < argc; ++i) {
11. In Rust, a trailing comma on all arguments or fields that are on their own
lines:
return Err(EvaluationError {
message: format!("{}: Invalid token", i),
code: EX_DATAERR,
})
12. In Rust, place extern statements after use statements that include standard
library crates. Group like statements:
use std::fs::Path;
extern crate strerror;
extern crate sysexits;
use strerror::StrError;
use sysexits::{ EX_OSERR, EX_USAGE };
13. If text is on the same line as a brace, spaces after an opening brace and
before a closing one:
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
14. Alphabetic sorting, where applicable:
use std::io::{ BufWriter, Read, Write, stderr, stdin, stdout }
15. In Rust, use the to_owned() method on string types (str, OsStr, CStr, etc.)
and the to_string() method on other types.
Avoid
=====
16. Unbounded loops [0].
17. Function pointers [0].
18. Heap memory allocation [0].
19. Using too much nested logic (within reason).
20. Too many levels of dereferences [0]:
/* do not do this */
for (size_t i = 0; i < sizeof a / sizeof *a; ++i) {
if (a[i].id == MATCH) { a[i].val = 0; }
}
/* do this */
for (struct MadeUp *s = &a[0]; *s != NULL; s = &s[1]) {
if (s->id == MATCH) { s->val = 0; }
}
21. Using C preprocessor macros; the fewer, the better [0].
22. The exit(3p) and std::process::exit() functions; returning from the main
function skips a system call.
Do Not Use
==========
23. More than the length of one printed page for a function [0].
24. Recursion, as its complex and can unexpectedly overflow the stack [0].
25. Any functionality not in the POSIX C specification and language features not
in C99.
26. Do-while loops, as theyre unique to C and confusing for casual programmers.
27. Labels and goto statements; use sensible flow control [0].
28. Pointer arithmetic, as it tends to be confusing and unnecessary; use
index-reference patterns like &p[1] instead of p + 1. &p[n] is the address at
p + sizeof p * n, not p + n, like pointer arithmetic suggests.
29. C struct bitfields in unions, to access certain bits of bigger data types,
as its poorly defined in the C standards; use bit arithmetic.
30. C trigraphs.
31. Inclusions in C header files, to prevent multiple file inclusions.
32. C preprocessor variables to prevent multiple inclusions of the same file,
such as:
#ifdef _FILE
#define _FILE
/* file body */
#endif /* ifdef _FILE */
Instead, take the time to ensure other files arent including any files twice.
33. The gets(3p) function from <stdio.h>, as its impossible to prevent buffer
overflows when it's used; use fgets(3p) from <stdio.h>.
34. The scanf(3p) function from <stdio.h> [1].
35. Any functionality not described in the latest POSIX make(1) specification.
36. Macros which panic on failure in Rust (such as the print!() and println!()
macros). Use a function and handle any errors. However, do use the eprintln!()
macro for error messages. Handling an error for writing an error message is
redundant.
Usage Text
==========
This section is adapted from the NetBSD style guide [2].
When programs are invoked incorrectly and in the synopsis of manual pages, uasge
text should be provided to the user. The following is the format used by this
project for this purpose:
All optional arguments are to be placed in square brackets (U+005B, U+005D).
Mutually exclusive arguments can be separated by a vertical line (U+007C).
Groups of arguments should be specified in alphabetical order in most cases. The
order of arguments and an example of these rules follows:
0. Options with no option arguments.
1. Options with option arguments. Arguments should be specified inside the same
square brackets as the options.
3. Non-option arguments.
"usage: f [-aDde] [-b b_arg] [-m m_arg] req1 req2 [opt1 [opt2]]\n"
"usage: f [-a | -b] [-c [-de] [-n number]]\n"
References
==========
[0] <https://web.eecs.umich.edu/~imarkov/10rules.pdf>
[1] <http://sekrit.de/webdocs/c/beginners-guide-away-from-scanf.html>
[2] <http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/share/misc/style>
--
Copyright © 2024 Emma Tebibyte <emma@tebibyte.media>
Copyright © 2024 DTB <trinity@trinity.moe>
Copyright © Wikipedia contributors
This work is licensed under CC BY-SA 4.0. To view a copy of this license, visit
<http://creativecommons.org/licenses/by-sa/4.0/>.

View File

@@ -15,7 +15,7 @@ always be returned.
.\"
.SH RATIONALE
In POSIX.1-2017,
In \*(Px.1-2024,
.BR false (1p)
exists for the construction of control flow and loops based on a failure. This
implementation functions as described in that standard.

View File

@@ -1,5 +1,5 @@
.\" Copyright (c) 2024 DTB <trinity@trinity.moe>
.\" Copyright (c) 2024 Emma Tebibyte <emma@tebibyte.media>
.\" Copyright (c) 20242025 Emma Tebibyte <emma@tebibyte.media>
.\"
.\" This work is licensed under CC BY-SA 4.0. To see a copy of this license,
.\" visit <http://creativecommons.org/licenses/by-sa/4.0/>.
@@ -11,10 +11,9 @@ fop \(en field operator
.SH SYNOPSIS
fop
.RB ( -d )
.RB [ delimiter ]
.RB index
.RB program...
.RB [ -d\ delimiter ]
index program
.RB [ arguments... ]
.\"
.SH DESCRIPTION
@@ -26,8 +25,17 @@ Performs operations on specified fields in data read from the standard input.
Sets a delimiter by which the input data will be split into fields. The default
is an ASCII record separator.
.\"
.SH DIAGNOSTICS
in the event of an error, a debug message will be printed and the program will
exit with the appropriate sysexits.h(3) error code.
.\"
.SH CAVEATS
If the specified index does not exist in the data, the program
will print all data to the standard output before exiting with an error. If
input data is not delimited by the specified delimiter, the program will fill
memory with the contents of the stream before it is output.
Field indices are zero-indexed, which may be unexpected behavior for some
users.
.\"

View File

@@ -76,7 +76,7 @@ this is elegant but unintuitive.
.\"
.SH RATIONALE
The traditional tool for integer comparisons in POSIX and other Unix shells has
The traditional tool for integer comparisons in \*(Px and other Unix shells has
been
.BR test (1).
This tool also handles string comparisons and file scrutiny. These parts of its

View File

@@ -75,11 +75,15 @@ utility\(cqs niche.
Written by DTB
.MT trinity@trinity.moe
.ME , \
ported to Rust by Emma Tebibyte
.MT emma@tebibyte.media
.ME .
.\"
.SH COPYRIGHT
Copyright © 2023 DTB. License AGPLv3+: GNU AGPL version 3 or later
Copyright © 2024 DTB, 2024\(en2025 Emma Tebibyte. \
License AGPLv3+: GNU AGPL version 3 or later
<https://gnu.org/licenses/agpl.html>.
.\"
.SH SEE ALSO

View File

@@ -45,7 +45,7 @@ The program operates in single-byte chunks regardless of intended encoding.
.\"
.SH RATIONALE
POSIX currently lacks a way to display non-printing characters in the terminal
\*(Px currently lacks a way to display non-printing characters in the terminal
using a standard tool. A popular extension to
.BR cat (1p),
the

View File

@@ -47,13 +47,13 @@ visual similarity and not byte similarity.
.\"
.SH RATIONALE
The traditional tool for string comparisons in POSIX and other Unix shells has
The traditional tool for string comparisons in \*(Px and other Unix shells has
been
.BR test (1).
This tool also handles integer comparisons and file scrutiny. These parts of
its functionality have been broken out into multiple utilities.
This program\(cqs functionality may be performed on a POSIX-compliant system
This program\(cqs functionality may be performed on a \*(Px-compliant system
with
.BR test (1p).
.\"

View File

@@ -15,7 +15,7 @@ always be returned.
.\"
.SH RATIONALE
In \fIPOSIX.1-2017\fP,
In \fI\*(Px.1-2024\fP,
.BR true (1p)
exists for the construction of control flow and loops based on a success. This
implementation functions as described in that standard.

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 20232024 Emma Tebibyte <emma@tebibyte.media>
* Copyright (c) 20232025 Emma Tebibyte <emma@tebibyte.media>
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* This program is free software: you can redistribute it and/or modify it
@@ -18,14 +18,16 @@
use std::{
env::args,
io::{ Error, Read, Write, stdin, stdout },
io::{ Error, Write, stdin, stdout },
process::{ Command, ExitCode, Stdio, exit },
};
extern crate delimit;
extern crate getopt;
extern crate strerror;
extern crate sysexits;
use delimit::Delimited;
use getopt::GetOpt;
use strerror::StrError;
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
@@ -90,78 +92,94 @@ fn main() -> ExitCode {
exit(usage(&argv[0]).into());
});
/* read entire standard input into memory */
let mut buf = String::new();
if let Err(e) = stdin().read_to_string(&mut buf) {
err(&argv[0], e);
exit(EX_IOERR.into());
};
let stdin = stdin().lock();
/* split the buffer by the delimiter (by default, '\u{1E}') */
let mut fields = buf.split(&d).collect::<Vec<&str>>();
let mut input = Delimited::new(stdin, d.clone());
let mut n = 0;
/* collect arguments for the operator command */
let command_args = argv
.iter()
.clone()
.skip(command_arg + 1) /* skip the command name */
.collect::<Vec<&String>>();
let mut fopped = false;
/* spawn the command to operate on the field */
let mut spawned = Command::new(operator)
.args(command_args) /* spawn with the specified arguments */
.stdin(Stdio::piped())
.stdout(Stdio::piped()) /* piped stdout to handle output ourselves */
.spawn()
.unwrap_or_else( |e| {
while let Some(i) = input.next() {
let v = match i {
Ok(v) => v,
Err(e) => {
err(&argv[0], e);
return EX_IOERR.into();
},
};
let mut out = Vec::new();
if n == index { /* fop it */
/* collect arguments for the operator command */
let command_args = argv
.iter()
.clone()
.skip(command_arg + 1) /* skip the command name */
.collect::<Vec<&String>>();
/* spawn the command to operate on the field */
let mut spawned = Command::new(operator)
.args(command_args) /* spawn with the specified arguments */
.stdin(Stdio::piped())
/* piped stdout to handle output ourselves */
.stdout(Stdio::piped())
.spawn()
.unwrap_or_else( |e| {
err(&argv[0], e);
exit(EX_UNAVAILABLE.into());
});
/* feed the spawned programs stdin the field value */
if let Some(mut child_stdin) = spawned.stdin.take() {
let _ = child_stdin.write_all(&v);
drop(child_stdin); /* stay safe! drop your children! */
let output = spawned.wait_with_output().unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
/* get the output with which the original field will
* be replaced */
let mut replace = output.stdout.clone();
/* pop trailing newline out if the input did not contain it */
if v.iter().last() != Some(&b'\n')
&& replace.pop() != Some(b'\n')
{
out = output.stdout;
} else {
out = replace;
}
}
fopped = true;
} else {
out = v;
}
/* since we cannot know when were done, place a new delimiter before
* each index unless it is the 0th */
if n != 0 {
stdout().write_all(d.as_bytes()).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
}
stdout().write_all(&out).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_UNAVAILABLE.into());
exit(EX_IOERR.into());
});
/* get field we want to pipe into spawned program */
let field = fields.get(index).unwrap_or_else(|| {
n += 1;
}
if fopped {
return ExitCode::SUCCESS;
} else {
eprintln!("{}: {}: no such index in input", argv[0], index);
exit(EX_DATAERR.into());
});
/* get the stdin of the newly spawned program and feed it the field val */
if let Some(mut child_stdin) = spawned.stdin.take() {
let _ = child_stdin.write_all(field.as_bytes());
drop(child_stdin); /* stay safe! drop your children! */
return EX_DATAERR.into();
}
let output = spawned.wait_with_output().unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
/* get the output with which the original field will be replaced */
let mut replace = output.stdout.clone();
/* pop trailing newline out if the input did not contain it */
if fields[index].chars().last() != Some('\n') /* no newline */
&& replace.pop() != Some(b'\n') { /* pop last char of replacement */
/* restore replacement to original command output if popped char was
* not a newline */
replace = output.stdout;
}
/* convert the output of the program to UTF-8 */
let new_field = String::from_utf8(replace).unwrap_or_else(|e| {
eprintln!("{}: {}", argv[0], e);
exit(EX_IOERR.into());
});
/* store the new field in the old fields vector */
fields[index] = &new_field;
/* fop it */
stdout().write_all(
fields.join(&d.to_string()).as_bytes()
).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
ExitCode::SUCCESS
}

105
src/libdelimit.rs Normal file
View File

@@ -0,0 +1,105 @@
/*
* Copyright (c) 2025 Emma Tebibyte <emma@tebibyte.media>
* Copyright (c) 2025 silty silt <silt@tebibyte.media>
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU Affero General Public License as published by the
* Free Software Foundation, either version 3 of the License, or (at your
* option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
* for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see https://www.gnu.org/licenses/.
*/
use std::{
io::{ Read, Result },
mem,
};
const BUFFER_SIZE: usize = 4096;
pub struct Delimited<T: Read> {
delimiter: Vec<u8>,
buffer: Vec<u8>,
stream: T,
}
impl<T> Delimited<T> where T: Read {
pub fn new<R>(stream: T, delimiter: R) -> Self where R: AsRef<[u8]> {
Delimited {
stream,
delimiter: delimiter.as_ref().to_vec(),
buffer: Vec::with_capacity(BUFFER_SIZE),
}
}
fn find_subslice(&self) -> Option<usize> {
match self.delimiter.len() {
/* TODO: is this optimization necessary? */
1 => self.buffer.iter().position(|&b| b == self.delimiter[0]),
_ => {
self.buffer
.windows(self.delimiter.len())
.position(|w| w == self.delimiter)
},
}
}
}
impl<T> Iterator for Delimited<T> where T: Read {
type Item = Result<Vec<u8>>;
fn next(&mut self) -> Option<Self::Item> {
let mut buf = [0; BUFFER_SIZE];
loop {
if let Some(p) = self.find_subslice() {
let chunk = self.buffer.drain(..p).collect::<Vec<_>>();
let _ = self.buffer.drain(..self.delimiter.len());
return Some(Ok(chunk));
}
match self.stream.read(&mut buf) {
Ok(0) => { /* no bytes read, were probably done */
let _ = self.buffer.is_empty() && return None;
return Some(Ok(mem::take(&mut self.buffer)));
},
Ok(n) => {
self.buffer.extend_from_slice(&buf[..n]);
},
Err(e) => {
return Some(Err(e));
},
}
}
}
}
#[cfg(test)]
mod tests {
use Delimited;
#[test]
fn testing() {
let d = '\u{1E}'.to_string();
let input = vec!["meow", "woof", "ribbit"];
let r = input.join(&d);
let mut output = Delimited::new(r.as_bytes(), d);
let mut i = 0;
while let Some(item) = output.next() {
assert_eq!(input[i].as_bytes(), item.unwrap());
i += 1;
}
}
}

View File

@@ -1,116 +0,0 @@
/*
* Copyright (c) 20232024 DTB <trinity@trinity.moe>
* Copyright (c) 2024 Emma Tebibyte <emma@tebibyte.media>
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU Affero General Public License as published by the
* Free Software Foundation, either version 3 of the License, or (at your
* option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
* for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see https://www.gnu.org/licenses/.
*/
#include <assert.h> /* assert(3) */
#include <stdio.h> /* fprintf(3), stderr, NULL */
#include <stdlib.h> /* EXIT_FAILURE, EXIT_SUCCESS */
#include <string.h> /* memset(3), strchr(3) */
#include <sysexits.h> /* EX_OSERR, EX_USAGE */
#include <unistd.h> /* access(3), getopt(3), pledge(2), unveil(2), F_OK, R_OK,
* W_OK, X_OK */
#include <sys/stat.h> /* lstat(3), stat struct, S_ISBLK, S_ISCHR, S_ISDIR,
* S_ISFIFO, S_ISGID, S_ISREG, S_ISLNK, S_ISSOCK,
* S_ISUID, S_ISVTX */
char *program_name = "scrut";
#define OPTS "bcdefgkprsuwxLS"
/* this is an array so main:sel's size can be known at compile time */
static char opts[] = OPTS;
static int
usage(char *argv0) {
(void)fprintf(stderr, "Usage: %s [-" OPTS "] file...\n", argv0);
return EX_USAGE;
}
int main(int argc, char *argv[]) {
char sel[(sizeof opts) / (sizeof *opts)];
program_name = argv[0] == NULL ? program_name : argv[0];
#ifdef __OpenBSD__
if (pledge("rpath stdio unveil", "") == -1) {
perror(program_name);
return EX_OSERR;
}
#endif
if (argc < 2) { return usage(program_name); }
{ /* option parsing */
char *p;
memset(sel, '\0', sizeof sel);
for (int c; (c = getopt(argc, argv, opts)) != -1;) {
if ((p = strchr(opts, c)) == NULL) { return usage(argv[0]); }
else {
assert(p - opts < sizeof sel / sizeof *sel); /* bounds check */
sel[p - opts] = c;
}
}
/* straighten out selections; permute out nulls */
p = sel;
for (size_t i = 0; i < (sizeof sel) / (sizeof *sel); ++i) {
if (sel[i] != '\0') {
*p = sel[i];
if (&sel[i] != p++) { sel[i] = '\0'; }
}
}
}
if (optind == argc) { return usage(argv[0]); }
for (argv += optind ; *argv != NULL; argv = &argv[1]) {
struct stat buf;
#ifdef __OpenBSD__
if (unveil(*argv, "rw") == -1) {
perror(program_name);
return EX_OSERR;
}
#endif
if(access(*argv, F_OK) != 0 || lstat(*argv, &buf) == -1) {
return EXIT_FAILURE; /* doesn't exist or isn't stattable */
}
for (size_t i = 0; sel[i] != '\0'; ++i) {
if (
(sel[i] == 'b' && !S_ISBLK(buf.st_mode))
|| (sel[i] == 'c' && !S_ISCHR(buf.st_mode))
|| (sel[i] == 'd' && !S_ISDIR(buf.st_mode))
|| (sel[i] == 'e' && 0)
|| (sel[i] == 'f' && !S_ISREG(buf.st_mode))
|| (sel[i] == 'g' && !(buf.st_mode & S_ISGID))
|| (sel[i] == 'k' && !(buf.st_mode & S_ISVTX))
|| (sel[i] == 'p' && !S_ISFIFO(buf.st_mode))
|| (sel[i] == 'r' && access(*argv, R_OK) != 0)
|| (sel[i] == 'u' && !(buf.st_mode & S_ISUID))
|| (sel[i] == 'w' && access(*argv, W_OK) != 0)
|| (sel[i] == 'x' && access(*argv, X_OK) != 0)
|| (sel[i] == 'L' && !S_ISLNK(buf.st_mode))
|| (sel[i] == 'S' && !S_ISSOCK(buf.st_mode))
) { return EXIT_FAILURE; }
}
}
return EXIT_SUCCESS;
}