8 Commits

16 changed files with 462 additions and 465 deletions

View File

@@ -99,17 +99,247 @@ notice:
Style
=====
Make sure lines never exceed 80 columns in width when using four-character
indentation steps. This helps contributors with smaller screens, those using
side-by-side editor windows or panes, and those who have no text wrapping in
their editor or terminal.
“Everyone knows that debugging is twice as hard as writing a program in the
first place. So if youre as clever as you can be when you write it, how
will you ever debug it?”
Brian Kernighan, The Elements of Programming Style
For usage text and help messages, do not implement a -h option. Instead, print
usage information when any erroneous option is specified. Follow the NetBSD
style guide for the usage texts output format [0].
The following guidelines are conducive to clear and readable code that is
consistent with the style of the rest of the Bonsai Computer System.
Use:
0. A single line for control flow statements short enough to be easily
understood at a glance:
if !(argc < 0) { usage(program_name); }
This applies to C switch statements and cases and Rust match statements, as
well:
switch (value) { /* aligning stuff to make it easier to read is fine */
case possibility: variable = foo; break;
default: variable = NULL; break;
}
1. Switch cases in C and match arms in Rust should start another level of
indentation:
switch (value) {
case possibility:
statement;
break;
default:
statement;
break;
}
match result {
Ok(n) => variable = n,
Err(e) => error = e,
}
2. Braces in control flow where their inclusion is left optional in C:
if (condition) { statement; }
3. Empty lines between different kinds of statements:
int t;
assert(io->bufuse > 0);
assert(io->bufuse <= io->bs);
if ((t = write(io->fd, io->buf, io->bufuse)) < 0) {
io->error = errno;
t = 0;
} else if (t > 0) {
memmove(io->buf, &(io->buf)[t], (io->bufuse -= t));
}
io->bytes += t;
io->prec += (t > 0 && io->bufuse > 0);
io->rec += (t > 0 && io->bufuse == 0);
return io;
4. Compiler options that yield the most useful warnings, such as -Wpedantic in
a lot of C compilers. Fix the warnings, too [0].
5. One more level of indentation and one argument per line when a function
call or statement header is too long to fit on one line:
let usage = format!(
"Usage: {} [-d delimiter] index command [args...]",
argv[0],
);
6. One more level of indentation than the keyword that initiated a multi-line
block.
if (condition) {
statement;
statement;
}
7. The return value of all non-void functions, or explicitly ignore them (like
casting to void in C) [0]:
if ((a = malloc(sizeof char)) == NULL) { /* handle this error */
(void)fprintf(stderr, "oh noes!"); /* explicitly ignore this one */
return EX_OSERR; /* ...because the program is exiting anyway */
}
8. The smallest possible scope for data [0].
9. Comments noting all the symbols and macros used from a C header file, next
to its include macro:
#include <unistd.h> /* close(2), getopt(3), lseek(2), read(2), write(2),
(space-aligned) * optarg, optind, STDIN_FILENO, STDOUT_FILENO */
10. Spaces in control flow statements, after the keyword and before the
opening brace:
for (i = 2; i < argc; ++i) {
11. In Rust, a trailing comma on all arguments or fields that are on their own
lines:
return Err(EvaluationError {
message: format!("{}: Invalid token", i),
code: EX_DATAERR,
})
12. In Rust, place extern statements after use statements that include standard
library crates. Group like statements:
use std::fs::Path;
extern crate strerror;
extern crate sysexits;
use strerror::StrError;
use sysexits::{ EX_OSERR, EX_USAGE };
13. If text is on the same line as a brace, spaces after an opening brace and
before a closing one:
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
14. Alphabetic sorting, where applicable:
use std::io::{ BufWriter, Read, Write, stderr, stdin, stdout }
15. In Rust, use the to_owned() method on string types (str, OsStr, CStr, etc.)
and the to_string() method on other types.
Avoid:
16. Unbounded loops [0].
17. Function pointers [0].
18. Heap memory allocation [0].
19. Using too much nested logic (within reason).
20. Too many levels of dereferences [0]:
/* do not do this */
for (size_t i = 0; i < sizeof a / sizeof *a; ++i) {
if (a[i].id == MATCH) { a[i].val = 0; }
}
/* do this */
for (struct MadeUp *s = &a[0]; *s != NULL; s = &s[1]) {
if (s->id == MATCH) { s->val = 0; }
}
21. Using C preprocessor macros; the fewer, the better [0].
22. The exit(3p) and std::process::exit() functions; returning from the main
function skips a system call.
Do not use:
23. More than the length of one printed page for a function [0].
24. Recursion, as its complex and can unexpectedly overflow the stack [0].
25. Any functionality not in the POSIX C specification and language features not
in C99.
26. Do-while loops, as theyre unique to C and confusing for casual programmers.
27. Labels and goto statements; use sensible flow control [0].
28. Pointer arithmetic, as it tends to be confusing and unnecessary; use
index-reference patterns like &p[1] instead of p + 1. &p[n] is the address at
p + sizeof p * n, not p + n, like pointer arithmetic suggests.
29. C struct bitfields in unions, to access certain bits of bigger data types,
as its poorly defined in the C standards; use bit arithmetic.
30. C trigraphs.
31. Inclusions in C header files, to prevent multiple file inclusions.
32. C preprocessor variables to prevent multiple inclusions of the same file,
such as:
#ifdef _FILE
#define _FILE
/* file body */
#endif /* ifdef _FILE */
Instead, take the time to ensure other files arent including any files twice.
33. The gets(3p) function from <stdio.h>, as its impossible to prevent buffer
overflows when it's used; use fgets(3p) from <stdio.h>.
34. The scanf(3p) function from <stdio.h> [1].
35. Any functionality not described in the latest POSIX make(1) specification.
36. Macros which panic on failure in Rust (such as the print!() and println!()
macros). Use a function and handle any errors. However, do use the eprintln!()
macro for error messages. Handling an error for writing an error message is
redundant.
37. A -h option for help text. Instead, print usage information when any
erroneous option is specified. See the Usage Text section below.
38. Lines which exceed 80 columns in width when using four-column indentation
steps. This helps contributors with smaller screens, those using side-by-side
editor windows or panes, and those who have no text wrapping in their editor or
terminal.
Usage Text
==========
This section is adapted from the NetBSD style guide [2].
When programs are invoked incorrectly and in the synopsis of manual pages, uasge
text should be provided to the user. The following is the format used by this
project for this purpose:
All optional arguments are to be placed in square brackets (U+005B, U+005D).
Mutually exclusive arguments can be separated by a vertical line (U+007C).
Groups of arguments should be specified in alphabetical order in most cases. The
order of arguments and an example of these rules follows:
0. Options with no option arguments.
1. Options with option arguments. Arguments should be specified inside the same
square brackets as the options.
3. Non-option arguments.
"usage: f [-aDde] [-b b_arg] [-m m_arg] req1 req2 [opt1 [opt2]]\n"
"usage: f [-a | -b] [-c [-de] [-n number]]\n"
If committing a new utility, please include tests and documentation (see
tests/ and docs/) for the new tool.
Committing
==========
@@ -117,6 +347,10 @@ Committing
When contributing to Bonsai, please sign your commit with a PGP key and create
the commit with an identity which can be easily contacted.
If committing a new utility, please include tests and documentation (see
tests/ and docs/) for the new tool.
Format commit messages following these guidelines:
$ git commit -m 'tool(1): add feature x'
@@ -147,9 +381,16 @@ Commit messages should be written in the present tense.
References
==========
[0] <http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/share/misc/style>
[0] <https://web.eecs.umich.edu/~imarkov/10rules.pdf>
[1] <http://sekrit.de/webdocs/c/beginners-guide-away-from-scanf.html>
[2] <http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/share/misc/style>
--
This work © 20232024 by Emma Tebibyte is licensed under CC BY-SA 4.0. To view a
copy of this license, visit <http://creativecommons.org/licenses/by-sa/4.0/>
Copyright © 20232024 Emma Tebibyte <emma@tebibyte.media>
Copyright © 2024 DTB <trinity@trinity.moe>
Copyright © Wikipedia contributors
This work is licensed under CC BY-SA 4.0. To view a copy of this license, visit
<http://creativecommons.org/licenses/by-sa/4.0/>.

View File

@@ -30,7 +30,8 @@ SYSEXITS != printf '\043include <sysexits.h>\n' | cpp -M - | tr ' ' '\n' \
CC ?= cc
RUSTC ?= rustc
RUSTFLAGS += --extern getopt=build/o/libgetopt.rlib \
RUSTFLAGS += --extern delimit=build/o/libdelimit.rlib \
--extern getopt=build/o/libgetopt.rlib \
--extern strerror=build/o/libstrerror.rlib \
--extern sysexits=build/o/libsysexits.rlib
CFLAGS += -I$(SYSEXITS)
@@ -88,8 +89,12 @@ docs: docs/ build
include $(OS_INCLUDE)
.PHONY: rustlibs
rustlibs: build/o/libgetopt.rlib build/o/libstrerror.rlib \
build/o/libsysexits.rlib $(OSLIB)
rustlibs: build/o/libdelimit.rlib build/o/libgetopt.rlib \
build/o/libstrerror.rlib build/o/libsysexits.rlib $(OSLIB)
build/o/libdelimit.rlib: build src/libdelimit.rs
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=delimit \
-o $@ src/libdelimit.rs
build/o/libgetopt.rlib: build src/libgetopt.rs
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=getopt \
@@ -120,7 +125,7 @@ build/bin/false: src/false.c build
.PHONY: fileis
fileis: build/bin/fileis
build/bin/fileis: src/fileis.rs build rustlibs
$(RUSTC) $(RUSTFLAGS) $(RUSTLIBS) -o $@ src/fileis.rs
$(RUSTC) $(RUSTFLAGS) -o $@ src/fileis.rs
.PHONY: fop
fop: build/bin/fop

3
README
View File

@@ -18,7 +18,8 @@ anywhere. Other utility sets aim to provide a number of fully-featured
programs to be used individually, Harakit utilities are meant to be easily
composable and work together in pipelines.
See docs/ for more on the specific utilities currently implemented.
See docs/ for more on the specific utilities currently implemented and see
CONTRIBUTING for guidelines for contributions.
Building

254
STYLE
View File

@@ -1,254 +0,0 @@
“Everyone knows that debugging is twice as hard as writing a program in the
first place. So if youre as clever as you can be when you write it, how
will you ever debug it?”
Brian Kernighan, The Elements of Programming Style
The following guidelines are conducive to clear and readable code that is
consistent with the style of the rest of the Bonsai Computer System.
Use
===
0. A single line for control flow statements short enough to be easily
understood at a glance:
if !(argc < 0) { usage(program_name); }
This applies to C switch statements and cases and Rust match statements, as
well:
switch (value) { /* aligning stuff to make it easier to read is fine */
case possibility: variable = foo; break;
default: variable = NULL; break;
}
1. Switch cases in C and match arms in Rust should start another level of
indentation:
switch (value) {
case possibility:
statement;
break;
default:
statement;
break;
}
match result {
Ok(n) => variable = n,
Err(e) => error = e,
}
2. Braces in control flow where their inclusion is left optional in C:
if (condition) { statement; }
3. Empty lines between different kinds of statements:
int t;
assert(io->bufuse > 0);
assert(io->bufuse <= io->bs);
if ((t = write(io->fd, io->buf, io->bufuse)) < 0) {
io->error = errno;
t = 0;
} else if (t > 0) {
memmove(io->buf, &(io->buf)[t], (io->bufuse -= t));
}
io->bytes += t;
io->prec += (t > 0 && io->bufuse > 0);
io->rec += (t > 0 && io->bufuse == 0);
return io;
4. Compiler options that yield the most useful warnings, such as -Wpedantic in
a lot of C compilers. Fix the warnings, too [0].
5. One more level of indentation and one argument per line when a function
call or statement header is too long to fit on one line:
let usage = format!(
"Usage: {} [-d delimiter] index command [args...]",
argv[0],
);
6. One more level of indentation than the keyword that initiated a multi-line
block.
if (condition) {
statement;
statement;
}
7. The return value of all non-void functions, or explicitly ignore them (like
casting to void in C) [0]:
if ((a = malloc(sizeof char)) == NULL) { /* handle this error */
(void)fprintf(stderr, "oh noes!"); /* explicitly ignore this one */
return EX_OSERR; /* ...because the program is exiting anyway */
}
8. The smallest possible scope for data [0].
9. Comments noting all the symbols and macros used from a C header file, next
to its include macro:
#include <unistd.h> /* close(2), getopt(3), lseek(2), read(2), write(2),
(space-aligned) * optarg, optind, STDIN_FILENO, STDOUT_FILENO */
10. Spaces in control flow statements, after the keyword and before the
opening brace:
for (i = 2; i < argc; ++i) {
11. In Rust, a trailing comma on all arguments or fields that are on their own
lines:
return Err(EvaluationError {
message: format!("{}: Invalid token", i),
code: EX_DATAERR,
})
12. In Rust, place extern statements after use statements that include standard
library crates. Group like statements:
use std::fs::Path;
extern crate strerror;
extern crate sysexits;
use strerror::StrError;
use sysexits::{ EX_OSERR, EX_USAGE };
13. If text is on the same line as a brace, spaces after an opening brace and
before a closing one:
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
14. Alphabetic sorting, where applicable:
use std::io::{ BufWriter, Read, Write, stderr, stdin, stdout }
15. In Rust, use the to_owned() method on string types (str, OsStr, CStr, etc.)
and the to_string() method on other types.
Avoid
=====
16. Unbounded loops [0].
17. Function pointers [0].
18. Heap memory allocation [0].
19. Using too much nested logic (within reason).
20. Too many levels of dereferences [0]:
/* do not do this */
for (size_t i = 0; i < sizeof a / sizeof *a; ++i) {
if (a[i].id == MATCH) { a[i].val = 0; }
}
/* do this */
for (struct MadeUp *s = &a[0]; *s != NULL; s = &s[1]) {
if (s->id == MATCH) { s->val = 0; }
}
21. Using C preprocessor macros; the fewer, the better [0].
22. The exit(3p) and std::process::exit() functions; returning from the main
function skips a system call.
Do Not Use
==========
23. More than the length of one printed page for a function [0].
24. Recursion, as its complex and can unexpectedly overflow the stack [0].
25. Any functionality not in the POSIX C specification and language features not
in C99.
26. Do-while loops, as theyre unique to C and confusing for casual programmers.
27. Labels and goto statements; use sensible flow control [0].
28. Pointer arithmetic, as it tends to be confusing and unnecessary; use
index-reference patterns like &p[1] instead of p + 1. &p[n] is the address at
p + sizeof p * n, not p + n, like pointer arithmetic suggests.
29. C struct bitfields in unions, to access certain bits of bigger data types,
as its poorly defined in the C standards; use bit arithmetic.
30. C trigraphs.
31. Inclusions in C header files, to prevent multiple file inclusions.
32. C preprocessor variables to prevent multiple inclusions of the same file,
such as:
#ifdef _FILE
#define _FILE
/* file body */
#endif /* ifdef _FILE */
Instead, take the time to ensure other files arent including any files twice.
33. The gets(3p) function from <stdio.h>, as its impossible to prevent buffer
overflows when it's used; use fgets(3p) from <stdio.h>.
34. The scanf(3p) function from <stdio.h> [1].
35. Any functionality not described in the latest POSIX make(1) specification.
36. Macros which panic on failure in Rust (such as the print!() and println!()
macros). Use a function and handle any errors. However, do use the eprintln!()
macro for error messages. Handling an error for writing an error message is
redundant.
Usage Text
==========
This section is adapted from the NetBSD style guide [2].
When programs are invoked incorrectly and in the synopsis of manual pages, uasge
text should be provided to the user. The following is the format used by this
project for this purpose:
All optional arguments are to be placed in square brackets (U+005B, U+005D).
Mutually exclusive arguments can be separated by a vertical line (U+007C).
Groups of arguments should be specified in alphabetical order in most cases. The
order of arguments and an example of these rules follows:
0. Options with no option arguments.
1. Options with option arguments. Arguments should be specified inside the same
square brackets as the options.
3. Non-option arguments.
"usage: f [-aDde] [-b b_arg] [-m m_arg] req1 req2 [opt1 [opt2]]\n"
"usage: f [-a | -b] [-c [-de] [-n number]]\n"
References
==========
[0] <https://web.eecs.umich.edu/~imarkov/10rules.pdf>
[1] <http://sekrit.de/webdocs/c/beginners-guide-away-from-scanf.html>
[2] <http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/share/misc/style>
--
Copyright © 2024 Emma Tebibyte <emma@tebibyte.media>
Copyright © 2024 DTB <trinity@trinity.moe>
Copyright © Wikipedia contributors
This work is licensed under CC BY-SA 4.0. To view a copy of this license, visit
<http://creativecommons.org/licenses/by-sa/4.0/>.

View File

@@ -207,7 +207,7 @@ which are not reported.
This program was based on the
.BR dd (1p)
utility as specified in POSIX. While character conversion may have been the
utility as specified in \*(Px. While character conversion may have been the
original intent of
.BR dd (1p),
it is irrelevant to its modern use. Because of this, this program eschews

View File

@@ -15,7 +15,7 @@ always be returned.
.\"
.SH RATIONALE
In POSIX.1-2017,
In \*(Px.1-2024,
.BR false (1p)
exists for the construction of control flow and loops based on a failure. This
implementation functions as described in that standard.

View File

@@ -11,10 +11,9 @@ fop \(en field operator
.SH SYNOPSIS
fop
.RB ( -d )
.RB [ delimiter ]
.RB index
.RB program...
.RB [ -d\ delimiter ]
index program
.RB [ arguments... ]
.\"
.SH DESCRIPTION
@@ -26,8 +25,17 @@ Performs operations on specified fields in data read from the standard input.
Sets a delimiter by which the input data will be split into fields. The default
is an ASCII record separator.
.\"
.SH DIAGNOSTICS
in the event of an error, a debug message will be printed and the program will
exit with the appropriate sysexits.h(3) error code.
.\"
.SH CAVEATS
If the specified index does not exist in the data, the program
will print all data to the standard output before exiting with an error. If
input data is not delimited by the specified delimiter, the program will fill
memory with the contents of the stream before it is output.
Field indices are zero-indexed, which may be unexpected behavior for some users.
.\"
.SH RATIONALE

View File

@@ -75,7 +75,7 @@ this is elegant but unintuitive.
.\"
.SH RATIONALE
The traditional tool for integer comparisons in POSIX and other Unix shells has
The traditional tool for integer comparisons in \*(Px and other Unix shells has
been
.BR test (1).
This tool also handles string comparisons and file scrutiny. These parts of its

View File

@@ -57,7 +57,7 @@ The
.BR cat (1p)
and
.BR tee (1p)
programs specified in POSIX together provide similar functionality. The
programs specified in \*(Px together provide similar functionality. The
separation of the two sets of functionality into separate APIs seemed
unncessary.
.\"

View File

@@ -45,7 +45,7 @@ The program operates in single-byte chunks regardless of intended encoding.
.\"
.SH RATIONALE
POSIX currently lacks a way to display non-printing characters in the terminal
\*(Px currently lacks a way to display non-printing characters in the terminal
using a standard tool. A popular extension to
.BR cat (1p),
the

View File

@@ -58,7 +58,7 @@ hardware of any given machine.
An infix notation calculation utility,
.BR bc (1p),
is included in the POSIX standard, but does not accept expressions as arguments;
is included in the \*(Px standard, but does not accept expressions as arguments;
in scripts, any predefined, non-interactive input must be piped into the
program. A
.BR dc (1)

View File

@@ -47,13 +47,13 @@ visual similarity and not byte similarity.
.\"
.SH RATIONALE
The traditional tool for string comparisons in POSIX and other Unix shells has
The traditional tool for string comparisons in \*(Px and other Unix shells has
been
.BR test (1).
This tool also handles integer comparisons and file scrutiny. These parts of its
functionality have been broken out into multiple utilities.
This program\(cqs functionality may be performed on a POSIX-compliant system
This program\(cqs functionality may be performed on a \*(Px-compliant system
with
.BR test (1p).
.\"

View File

@@ -15,7 +15,7 @@ always be returned.
.\"
.SH RATIONALE
In \fIPOSIX.1-2017\fP,
In \fI\*(Px.1-2024\fP,
.BR true (1p)
exists for the construction of control flow and loops based on a success. This
implementation functions as described in that standard.

View File

@@ -18,14 +18,16 @@
use std::{
env::args,
io::{ Error, Read, Write, stdin, stdout },
io::{ Error, Write, stdin, stdout },
process::{ Command, ExitCode, Stdio, exit },
};
extern crate delimit;
extern crate getopt;
extern crate strerror;
extern crate sysexits;
use delimit::Delimited;
use getopt::GetOpt;
use strerror::StrError;
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
@@ -90,78 +92,97 @@ fn main() -> ExitCode {
exit(usage(&argv[0]).into());
});
/* read entire standard input into memory */
let mut buf = String::new();
if let Err(e) = stdin().read_to_string(&mut buf) {
err(&argv[0], e);
exit(EX_IOERR.into());
};
let stdin = Box::new(stdin().lock());
/* split the buffer by the delimiter (by default, '\u{1E}') */
let mut fields = buf.split(&d).collect::<Vec<&str>>();
let mut input = Delimited::new(stdin, d.clone().as_bytes());
let mut n = 0;
/* collect arguments for the operator command */
let command_args = argv
.iter()
.clone()
.skip(command_arg + 1) /* skip the command name */
.collect::<Vec<&String>>();
let mut fopped = false;
/* spawn the command to operate on the field */
let mut spawned = Command::new(operator)
.args(command_args) /* spawn with the specified arguments */
.stdin(Stdio::piped())
.stdout(Stdio::piped()) /* piped stdout to handle output ourselves */
.spawn()
.unwrap_or_else( |e| {
while let Some(i) = input.next() {
let v = match i {
Ok(v) => v,
Err(e) => {
err(&argv[0], e);
exit(EX_IOERR.into());
},
};
let mut out = Vec::new();
if n == index { /* fop it */
/* collect arguments for the operator command */
let command_args = argv
.iter()
.clone()
.skip(command_arg + 1) /* skip the command name */
.collect::<Vec<&String>>();
/* spawn the command to operate on the field */
let mut spawned = Command::new(operator)
.args(command_args) /* spawn with the specified arguments */
.stdin(Stdio::piped())
/* piped stdout to handle output ourselves */
.stdout(Stdio::piped())
.spawn()
.unwrap_or_else( |e| {
err(&argv[0], e);
exit(EX_UNAVAILABLE.into());
});
/* feed the spawned programs stdin the field value */
if let Some(mut child_stdin) = spawned.stdin.take() {
let _ = child_stdin.write_all(&v);
drop(child_stdin); /* stay safe! drop your children! */
let output = spawned.wait_with_output().unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
/* get the output with which the original field will
* be replaced */
let mut replace = output.stdout.clone();
/* pop trailing newline out if the input did not contain it */
if v.iter().last() != Some(&b'\n')
&& replace.pop() != Some(b'\n')
{
out = output.stdout;
} else {
out = replace;
}
}
fopped = true;
} else {
out = v;
}
/* since we cannot know when were done, place a new delimiter before
* each index unless it is the 0th */
if n != 0 {
stdout().write_all(d.as_bytes()).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
}
stdout().write_all(&out).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_UNAVAILABLE.into());
exit(EX_IOERR.into());
});
/* get field we want to pipe into spawned program */
let field = fields.get(index).unwrap_or_else(|| {
n += 1;
}
if fopped {
return ExitCode::SUCCESS;
} else {
eprintln!("{}: {}: no such index in input", argv[0], index);
exit(EX_DATAERR.into());
});
/* get the stdin of the newly spawned program and feed it the field val */
if let Some(mut child_stdin) = spawned.stdin.take() {
let _ = child_stdin.write_all(field.as_bytes());
drop(child_stdin); /* stay safe! drop your children! */
return EX_DATAERR.into();
}
let output = spawned.wait_with_output().unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
/* get the output with which the original field will be replaced */
let mut replace = output.stdout.clone();
/* pop trailing newline out if the input did not contain it */
if fields[index].chars().last() != Some('\n') /* no newline */
&& replace.pop() != Some(b'\n') { /* pop last char of replacement */
/* restore replacement to original command output if popped char was not
* a newline */
replace = output.stdout;
}
/* convert the output of the program to UTF-8 */
let new_field = String::from_utf8(replace).unwrap_or_else(|e| {
eprintln!("{}: {}", argv[0], e);
exit(EX_IOERR.into());
});
/* store the new field in the old fields vector */
fields[index] = &new_field;
/* fop it */
stdout().write_all(
fields.join(&d.to_string()).as_bytes()
).unwrap_or_else(|e| {
err(&argv[0], e);
exit(EX_IOERR.into());
});
ExitCode::SUCCESS
}

91
src/libdelimit.rs Normal file
View File

@@ -0,0 +1,91 @@
/*
* Copyright (c) 2025 Emma Tebibyte <emma@tebibyte.media>
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU Affero General Public License as published by the
* Free Software Foundation, either version 3 of the License, or (at your
* option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
* for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see https://www.gnu.org/licenses/.
*/
use std::{
io::{ Read, Result },
mem::self,
};
const BUFFER_SIZE: usize = 4096;
pub struct Delimited {
stream: Box<dyn Read>,
delimiter: Vec<u8>,
buffer: Vec<u8>
}
impl Delimited {
pub fn new(stream: Box<dyn Read>, delimiter: &[u8]) -> Self {
Delimited {
stream,
delimiter: delimiter.to_vec(),
buffer: Vec::with_capacity(BUFFER_SIZE),
}
}
}
impl Iterator for Delimited {
type Item = Result<Vec<u8>>;
fn next(&mut self) -> Option<Self::Item> {
let mut buf = [0; BUFFER_SIZE];
loop {
if let Some(p) = find_subslice(&self.buffer, &self.delimiter) {
let chunk = self.buffer.drain(..p).collect::<Vec<_>>();
let _ = self.buffer.drain(..self.delimiter.len());
return Some(Ok(chunk));
}
match self.stream.read(&mut buf) {
Ok(0) => {
if self.buffer.is_empty() {
return None;
}
return Some(Ok(mem::take(&mut self.buffer)));
},
Ok(n) => {
let content = &buf[..n];
self.buffer.extend_from_slice(&content);
},
Err(e) => {
return Some(Err(e));
},
}
}
}
}
fn find_subslice(stack: &[u8], key: &[u8]) -> Option<usize> {
if key.len() == 1 {
return stack.iter().position(|&b| b == key[0]);
}
if key.len() > stack.len() {
return None;
}
for i in 0..=stack.len() - key.len() {
if &stack[i..i + key.len()] == key {
return Some(i);
}
}
None
}

View File

@@ -1,116 +0,0 @@
/*
* Copyright (c) 20232024 DTB <trinity@trinity.moe>
* Copyright (c) 2024 Emma Tebibyte <emma@tebibyte.media>
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
* details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see https://www.gnu.org/licenses/.
*/
#include <assert.h> /* assert(3) */
#include <stdio.h> /* fprintf(3), stderr, NULL */
#include <stdlib.h> /* EXIT_FAILURE, EXIT_SUCCESS */
#include <string.h> /* memset(3), strchr(3) */
#include <sysexits.h> /* EX_OSERR, EX_USAGE */
#include <unistd.h> /* access(3), getopt(3), pledge(2), unveil(2), F_OK, R_OK,
* W_OK, X_OK */
#include <sys/stat.h> /* lstat(3), stat struct, S_ISBLK, S_ISCHR, S_ISDIR,
* S_ISFIFO, S_ISGID, S_ISREG, S_ISLNK, S_ISSOCK,
* S_ISUID, S_ISVTX */
char *program_name = "scrut";
#define OPTS "bcdefgkprsuwxLS"
/* this is an array so main:sel's size can be known at compile time */
static char opts[] = OPTS;
static int
usage(char *argv0) {
(void)fprintf(stderr, "Usage: %s [-" OPTS "] file...\n", argv0);
return EX_USAGE;
}
int main(int argc, char *argv[]) {
char sel[(sizeof opts) / (sizeof *opts)];
program_name = argv[0] == NULL ? program_name : argv[0];
#ifdef __OpenBSD__
if (pledge("rpath stdio unveil", "") == -1) {
perror(program_name);
return EX_OSERR;
}
#endif
if (argc < 2) { return usage(program_name); }
{ /* option parsing */
char *p;
memset(sel, '\0', sizeof sel);
for (int c; (c = getopt(argc, argv, opts)) != -1;) {
if ((p = strchr(opts, c)) == NULL) { return usage(argv[0]); }
else {
assert(p - opts < sizeof sel / sizeof *sel); /* bounds check */
sel[p - opts] = c;
}
}
/* straighten out selections; permute out nulls */
p = sel;
for (size_t i = 0; i < (sizeof sel) / (sizeof *sel); ++i) {
if (sel[i] != '\0') {
*p = sel[i];
if (&sel[i] != p++) { sel[i] = '\0'; }
}
}
}
if (optind == argc) { return usage(argv[0]); }
for (argv += optind ; *argv != NULL; argv = &argv[1]) {
struct stat buf;
#ifdef __OpenBSD__
if (unveil(*argv, "rw") == -1) {
perror(program_name);
return EX_OSERR;
}
#endif
if(access(*argv, F_OK) != 0 || lstat(*argv, &buf) == -1) {
return EXIT_FAILURE; /* doesn't exist or isn't stattable */
}
for (size_t i = 0; sel[i] != '\0'; ++i) {
if (
(sel[i] == 'b' && !S_ISBLK(buf.st_mode))
|| (sel[i] == 'c' && !S_ISCHR(buf.st_mode))
|| (sel[i] == 'd' && !S_ISDIR(buf.st_mode))
|| (sel[i] == 'e' && 0)
|| (sel[i] == 'f' && !S_ISREG(buf.st_mode))
|| (sel[i] == 'g' && !(buf.st_mode & S_ISGID))
|| (sel[i] == 'k' && !(buf.st_mode & S_ISVTX))
|| (sel[i] == 'p' && !S_ISFIFO(buf.st_mode))
|| (sel[i] == 'r' && access(*argv, R_OK) != 0)
|| (sel[i] == 'u' && !(buf.st_mode & S_ISUID))
|| (sel[i] == 'w' && access(*argv, W_OK) != 0)
|| (sel[i] == 'x' && access(*argv, X_OK) != 0)
|| (sel[i] == 'L' && !S_ISLNK(buf.st_mode))
|| (sel[i] == 'S' && !S_ISSOCK(buf.st_mode))
) { return EXIT_FAILURE; }
}
}
return EXIT_SUCCESS;
}