Compare commits
17 Commits
791f44aa1e
...
trunk
| Author | SHA1 | Date | |
|---|---|---|---|
| d3c59ede0d | |||
|
2f2b8cf067
|
|||
|
2846ee3c01
|
|||
|
afd58ae4e3
|
|||
|
90de81b5bd
|
|||
|
1bd190ae99
|
|||
|
d497d5b4aa
|
|||
|
c1df6e6322
|
|||
|
28bc2cd0d0
|
|||
|
5cc1e2067b
|
|||
|
4aab77bee4
|
|||
|
15039805f9
|
|||
|
31b424d205
|
|||
|
be6bd5386d
|
|||
|
3b44ddeedd
|
|||
|
ac0f9e4019
|
|||
|
247e469f82
|
265
CONTRIBUTING
265
CONTRIBUTING
@@ -99,17 +99,247 @@ notice:
|
||||
Style
|
||||
=====
|
||||
|
||||
Make sure lines never exceed 80 columns in width when using four-character
|
||||
indentation steps. This helps contributors with smaller screens, those using
|
||||
side-by-side editor windows or panes, and those who have no text wrapping in
|
||||
their editor or terminal.
|
||||
“Everyone knows that debugging is twice as hard as writing a program in the
|
||||
first place. So if you’re as clever as you can be when you write it, how
|
||||
will you ever debug it?”
|
||||
– Brian Kernighan, The Elements of Programming Style
|
||||
|
||||
For usage text and help messages, do not implement a -h option. Instead, print
|
||||
usage information when any erroneous option is specified. Follow the NetBSD
|
||||
style guide for the usage text’s output format [0].
|
||||
The following guidelines are conducive to clear and readable code that is
|
||||
consistent with the style of the rest of the Bonsai Computer System.
|
||||
|
||||
Use:
|
||||
|
||||
0. A single line for control flow statements short enough to be easily
|
||||
understood at a glance:
|
||||
|
||||
if !(argc < 0) { usage(program_name); }
|
||||
|
||||
This applies to C switch statements and cases and Rust match statements, as
|
||||
well:
|
||||
|
||||
switch (value) { /* aligning stuff to make it easier to read is fine */
|
||||
case possibility: variable = foo; break;
|
||||
default: variable = NULL; break;
|
||||
}
|
||||
|
||||
1. Switch cases in C and match arms in Rust should start another level of
|
||||
indentation:
|
||||
|
||||
switch (value) {
|
||||
case possibility:
|
||||
statement;
|
||||
break;
|
||||
default:
|
||||
statement;
|
||||
break;
|
||||
}
|
||||
|
||||
match result {
|
||||
Ok(n) => variable = n,
|
||||
Err(e) => error = e,
|
||||
}
|
||||
|
||||
2. Braces in control flow where their inclusion is left optional in C:
|
||||
|
||||
if (condition) { statement; }
|
||||
|
||||
3. Empty lines between different kinds of statements:
|
||||
|
||||
int t;
|
||||
|
||||
assert(io->bufuse > 0);
|
||||
assert(io->bufuse <= io->bs);
|
||||
|
||||
if ((t = write(io->fd, io->buf, io->bufuse)) < 0) {
|
||||
io->error = errno;
|
||||
t = 0;
|
||||
} else if (t > 0) {
|
||||
memmove(io->buf, &(io->buf)[t], (io->bufuse -= t));
|
||||
}
|
||||
|
||||
io->bytes += t;
|
||||
io->prec += (t > 0 && io->bufuse > 0);
|
||||
io->rec += (t > 0 && io->bufuse == 0);
|
||||
|
||||
return io;
|
||||
|
||||
4. Compiler options that yield the most useful warnings, such as -Wpedantic in
|
||||
a lot of C compilers. Fix the warnings, too [0].
|
||||
|
||||
5. One more level of indentation and one argument per line when a function
|
||||
call or statement header is too long to fit on one line:
|
||||
let usage = format!(
|
||||
"Usage: {} [-d delimiter] index command [args...]",
|
||||
argv[0],
|
||||
);
|
||||
|
||||
6. One more level of indentation than the keyword that initiated a multi-line
|
||||
block.
|
||||
|
||||
if (condition) {
|
||||
statement;
|
||||
statement;
|
||||
}
|
||||
|
||||
7. The return value of all non-void functions, or explicitly ignore them (like
|
||||
casting to void in C) [0]:
|
||||
|
||||
if ((a = malloc(sizeof char)) == NULL) { /* handle this error */
|
||||
(void)fprintf(stderr, "oh noes!"); /* explicitly ignore this one */
|
||||
return EX_OSERR; /* ...because the program is exiting anyway */
|
||||
}
|
||||
|
||||
8. The smallest possible scope for data [0].
|
||||
|
||||
9. Comments noting all the symbols and macros used from a C header file, next
|
||||
to its include macro:
|
||||
|
||||
#include <unistd.h> /* close(2), getopt(3), lseek(2), read(2), write(2),
|
||||
(space-aligned) * optarg, optind, STDIN_FILENO, STDOUT_FILENO */
|
||||
|
||||
10. Spaces in control flow statements, after the keyword and before the
|
||||
opening brace:
|
||||
|
||||
for (i = 2; i < argc; ++i) {
|
||||
|
||||
|
||||
11. In Rust, a trailing comma on all arguments or fields that are on their own
|
||||
lines:
|
||||
|
||||
return Err(EvaluationError {
|
||||
message: format!("{}: Invalid token", i),
|
||||
code: EX_DATAERR,
|
||||
})
|
||||
|
||||
12. In Rust, place extern statements after use statements that include standard
|
||||
library crates. Group like statements:
|
||||
|
||||
use std::fs::Path;
|
||||
|
||||
extern crate strerror;
|
||||
extern crate sysexits;
|
||||
|
||||
use strerror::StrError;
|
||||
use sysexits::{ EX_OSERR, EX_USAGE };
|
||||
|
||||
13. If text is on the same line as a brace, spaces after an opening brace and
|
||||
before a closing one:
|
||||
|
||||
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
|
||||
|
||||
14. Alphabetic sorting, where applicable:
|
||||
|
||||
use std::io::{ BufWriter, Read, Write, stderr, stdin, stdout }
|
||||
|
||||
15. In Rust, use the to_owned() method on string types (str, OsStr, CStr, etc.)
|
||||
and the to_string() method on other types.
|
||||
|
||||
|
||||
Avoid:
|
||||
|
||||
16. Unbounded loops [0].
|
||||
|
||||
17. Function pointers [0].
|
||||
|
||||
18. Heap memory allocation [0].
|
||||
|
||||
19. Using too much nested logic (within reason).
|
||||
|
||||
20. Too many levels of dereferences [0]:
|
||||
|
||||
/* do not do this */
|
||||
for (size_t i = 0; i < sizeof a / sizeof *a; ++i) {
|
||||
if (a[i].id == MATCH) { a[i].val = 0; }
|
||||
}
|
||||
|
||||
/* do this */
|
||||
for (struct MadeUp *s = &a[0]; *s != NULL; s = &s[1]) {
|
||||
if (s->id == MATCH) { s->val = 0; }
|
||||
}
|
||||
|
||||
21. Using C preprocessor macros; the fewer, the better [0].
|
||||
|
||||
22. The exit(3p) and std::process::exit() functions; returning from the main
|
||||
function skips a system call.
|
||||
|
||||
|
||||
Do not use:
|
||||
|
||||
23. More than the length of one printed page for a function [0].
|
||||
|
||||
24. Recursion, as it’s complex and can unexpectedly overflow the stack [0].
|
||||
|
||||
25. Any functionality not in the POSIX C specification and language features not
|
||||
in C99.
|
||||
|
||||
26. Do-while loops, as they’re unique to C and confusing for casual programmers.
|
||||
|
||||
27. Labels and goto statements; use sensible flow control [0].
|
||||
|
||||
28. Pointer arithmetic, as it tends to be confusing and unnecessary; use
|
||||
index-reference patterns like &p[1] instead of p + 1. &p[n] is the address at
|
||||
p + sizeof p * n, not p + n, like pointer arithmetic suggests.
|
||||
|
||||
29. C struct bitfields in unions, to access certain bits of bigger data types,
|
||||
as it’s poorly defined in the C standards; use bit arithmetic.
|
||||
|
||||
30. C trigraphs.
|
||||
|
||||
31. Inclusions in C header files, to prevent multiple file inclusions.
|
||||
|
||||
32. C preprocessor variables to prevent multiple inclusions of the same file,
|
||||
such as:
|
||||
|
||||
#ifdef _FILE
|
||||
#define _FILE
|
||||
/* file body */
|
||||
#endif /* ifdef _FILE */
|
||||
|
||||
Instead, take the time to ensure other files aren’t including any files twice.
|
||||
|
||||
33. The gets(3p) function from <stdio.h>, as it’s impossible to prevent buffer
|
||||
overflows when it's used; use fgets(3p) from <stdio.h>.
|
||||
|
||||
34. The scanf(3p) function from <stdio.h> [1].
|
||||
|
||||
35. Any functionality not described in the latest POSIX make(1) specification.
|
||||
|
||||
36. Macros which panic on failure in Rust (such as the print!() and println!()
|
||||
macros). Use a function and handle any errors. However, do use the eprintln!()
|
||||
macro for error messages. Handling an error for writing an error message is
|
||||
redundant.
|
||||
|
||||
37. A -h option for help text. Instead, print usage information when any
|
||||
erroneous option is specified. See the Usage Text section below.
|
||||
|
||||
38. Lines which exceed 80 columns in width when using four-column indentation
|
||||
steps. This helps contributors with smaller screens, those using side-by-side
|
||||
editor windows or panes, and those who have no text wrapping in their editor or
|
||||
terminal.
|
||||
|
||||
|
||||
Usage Text
|
||||
==========
|
||||
|
||||
This section is adapted from the NetBSD style guide [2].
|
||||
|
||||
When programs are invoked incorrectly and in the synopsis of manual pages, uasge
|
||||
text should be provided to the user. The following is the format used by this
|
||||
project for this purpose:
|
||||
|
||||
All optional arguments are to be placed in square brackets (U+005B, U+005D).
|
||||
Mutually exclusive arguments can be separated by a vertical line (U+007C).
|
||||
Groups of arguments should be specified in alphabetical order in most cases. The
|
||||
order of arguments and an example of these rules follows:
|
||||
|
||||
0. Options with no option arguments.
|
||||
1. Options with option arguments. Arguments should be specified inside the same
|
||||
square brackets as the options.
|
||||
3. Non-option arguments.
|
||||
|
||||
"usage: f [-aDde] [-b b_arg] [-m m_arg] req1 req2 [opt1 [opt2]]\n"
|
||||
"usage: f [-a | -b] [-c [-de] [-n number]]\n"
|
||||
|
||||
If committing a new utility, please include tests and documentation (see
|
||||
tests/ and docs/) for the new tool.
|
||||
|
||||
Committing
|
||||
==========
|
||||
@@ -117,6 +347,10 @@ Committing
|
||||
When contributing to Bonsai, please sign your commit with a PGP key and create
|
||||
the commit with an identity which can be easily contacted.
|
||||
|
||||
If committing a new utility, please include tests and documentation (see
|
||||
tests/ and docs/) for the new tool.
|
||||
|
||||
|
||||
Format commit messages following these guidelines:
|
||||
|
||||
$ git commit -m 'tool(1): add feature x'
|
||||
@@ -147,9 +381,16 @@ Commit messages should be written in the present tense.
|
||||
References
|
||||
==========
|
||||
|
||||
[0] <http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/share/misc/style>
|
||||
[0] <https://web.eecs.umich.edu/~imarkov/10rules.pdf>
|
||||
[1] <http://sekrit.de/webdocs/c/beginners-guide-away-from-scanf.html>
|
||||
[2] <http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/share/misc/style>
|
||||
|
||||
|
||||
--
|
||||
This work © 2023–2024 by Emma Tebibyte is licensed under CC BY-SA 4.0. To view a
|
||||
copy of this license, visit <http://creativecommons.org/licenses/by-sa/4.0/>
|
||||
|
||||
Copyright © 2023–2024 Emma Tebibyte <emma@tebibyte.media>
|
||||
Copyright © 2024 DTB <trinity@trinity.moe>
|
||||
Copyright © Wikipedia contributors
|
||||
|
||||
This work is licensed under CC BY-SA 4.0. To view a copy of this license, visit
|
||||
<http://creativecommons.org/licenses/by-sa/4.0/>.
|
||||
|
||||
19
Makefile
19
Makefile
@@ -30,7 +30,8 @@ SYSEXITS != printf '\043include <sysexits.h>\n' | cpp -M - | tr ' ' '\n' \
|
||||
|
||||
CC ?= cc
|
||||
RUSTC ?= rustc
|
||||
RUSTFLAGS += --extern getopt=build/o/libgetopt.rlib \
|
||||
RUSTFLAGS += --extern delimit=build/o/libdelimit.rlib \
|
||||
--extern getopt=build/o/libgetopt.rlib \
|
||||
--extern strerror=build/o/libstrerror.rlib \
|
||||
--extern sysexits=build/o/libsysexits.rlib
|
||||
CFLAGS += -I$(SYSEXITS)
|
||||
@@ -70,12 +71,16 @@ TESTS != printf '%s\n' "$(TESTFILES)" | xargs -n1 basename \
|
||||
include $(TESTFILES)
|
||||
|
||||
.PHONY: test
|
||||
test: all $(TESTS) /tmp/getopt
|
||||
test: all $(TESTS) /tmp/delimit /tmp/getopt
|
||||
@echo $(TESTS)
|
||||
/tmp/delimit
|
||||
/tmp/getopt
|
||||
|
||||
/tmp/delimit: src/libdelimit.rs
|
||||
$(RUSTC) --test -o $@ src/libdelimit.rs
|
||||
|
||||
/tmp/getopt: src/libgetopt.rs
|
||||
$(RUSTC) --test -o /tmp/getopt src/libgetopt.rs
|
||||
$(RUSTC) --test -o $@ src/libgetopt.rs
|
||||
|
||||
.PHONY: docs
|
||||
docs: docs/ build
|
||||
@@ -88,8 +93,12 @@ docs: docs/ build
|
||||
include $(OS_INCLUDE)
|
||||
|
||||
.PHONY: rustlibs
|
||||
rustlibs: build/o/libgetopt.rlib build/o/libstrerror.rlib \
|
||||
build/o/libsysexits.rlib $(OSLIB)
|
||||
rustlibs: build/o/libdelimit.rlib build/o/libgetopt.rlib \
|
||||
build/o/libstrerror.rlib build/o/libsysexits.rlib $(OSLIB)
|
||||
|
||||
build/o/libdelimit.rlib: build src/libdelimit.rs
|
||||
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=delimit \
|
||||
-o $@ src/libdelimit.rs
|
||||
|
||||
build/o/libgetopt.rlib: build src/libgetopt.rs
|
||||
$(RUSTC) $(RUSTFLAGS) --crate-type=lib --crate-name=getopt \
|
||||
|
||||
3
README
3
README
@@ -18,7 +18,8 @@ anywhere. Other utility sets aim to provide a number of fully-featured programs
|
||||
to be used individually, Harakit utilities are meant to be easily composable
|
||||
and work together in pipelines.
|
||||
|
||||
See docs/ for more on the specific utilities currently implemented.
|
||||
See docs/ for more on the specific utilities currently implemented and see
|
||||
CONTRIBUTING for guidelines for contributions.
|
||||
|
||||
|
||||
Building
|
||||
|
||||
254
STYLE
254
STYLE
@@ -1,254 +0,0 @@
|
||||
“Everyone knows that debugging is twice as hard as writing a program in the
|
||||
first place. So if you’re as clever as you can be when you write it, how
|
||||
will you ever debug it?”
|
||||
– Brian Kernighan, The Elements of Programming Style
|
||||
|
||||
|
||||
The following guidelines are conducive to clear and readable code that is
|
||||
consistent with the style of the rest of the Bonsai Computer System.
|
||||
|
||||
|
||||
Use
|
||||
===
|
||||
|
||||
0. A single line for control flow statements short enough to be easily
|
||||
understood at a glance:
|
||||
|
||||
if !(argc < 0) { usage(program_name); }
|
||||
|
||||
This applies to C switch statements and cases and Rust match statements, as
|
||||
well:
|
||||
|
||||
switch (value) { /* aligning stuff to make it easier to read is fine */
|
||||
case possibility: variable = foo; break;
|
||||
default: variable = NULL; break;
|
||||
}
|
||||
|
||||
1. Switch cases in C and match arms in Rust should start another level of
|
||||
indentation:
|
||||
|
||||
switch (value) {
|
||||
case possibility:
|
||||
statement;
|
||||
break;
|
||||
default:
|
||||
statement;
|
||||
break;
|
||||
}
|
||||
|
||||
match result {
|
||||
Ok(n) => variable = n,
|
||||
Err(e) => error = e,
|
||||
}
|
||||
|
||||
2. Braces in control flow where their inclusion is left optional in C:
|
||||
|
||||
if (condition) { statement; }
|
||||
|
||||
3. Empty lines between different kinds of statements:
|
||||
|
||||
int t;
|
||||
|
||||
assert(io->bufuse > 0);
|
||||
assert(io->bufuse <= io->bs);
|
||||
|
||||
if ((t = write(io->fd, io->buf, io->bufuse)) < 0) {
|
||||
io->error = errno;
|
||||
t = 0;
|
||||
} else if (t > 0) {
|
||||
memmove(io->buf, &(io->buf)[t], (io->bufuse -= t));
|
||||
}
|
||||
|
||||
io->bytes += t;
|
||||
io->prec += (t > 0 && io->bufuse > 0);
|
||||
io->rec += (t > 0 && io->bufuse == 0);
|
||||
|
||||
return io;
|
||||
|
||||
4. Compiler options that yield the most useful warnings, such as -Wpedantic in
|
||||
a lot of C compilers. Fix the warnings, too [0].
|
||||
|
||||
5. One more level of indentation and one argument per line when a function
|
||||
call or statement header is too long to fit on one line:
|
||||
|
||||
let usage = format!(
|
||||
"Usage: {} [-d delimiter] index command [args...]",
|
||||
argv[0],
|
||||
);
|
||||
|
||||
6. One more level of indentation than the keyword that initiated a multi-line
|
||||
block.
|
||||
|
||||
if (condition) {
|
||||
statement;
|
||||
statement;
|
||||
}
|
||||
|
||||
7. The return value of all non-void functions, or explicitly ignore them (like
|
||||
casting to void in C) [0]:
|
||||
|
||||
if ((a = malloc(sizeof char)) == NULL) { /* handle this error */
|
||||
(void)fprintf(stderr, "oh noes!"); /* explicitly ignore this one */
|
||||
return EX_OSERR; /* ...because the program is exiting anyway */
|
||||
}
|
||||
|
||||
8. The smallest possible scope for data [0].
|
||||
|
||||
9. Comments noting all the symbols and macros used from a C header file, next
|
||||
to its include macro:
|
||||
|
||||
#include <unistd.h> /* close(2), getopt(3), lseek(2), read(2), write(2),
|
||||
(space-aligned) * optarg, optind, STDIN_FILENO, STDOUT_FILENO */
|
||||
|
||||
10. Spaces in control flow statements, after the keyword and before the
|
||||
opening brace:
|
||||
|
||||
for (i = 2; i < argc; ++i) {
|
||||
|
||||
|
||||
11. In Rust, a trailing comma on all arguments or fields that are on their own
|
||||
lines:
|
||||
|
||||
return Err(EvaluationError {
|
||||
message: format!("{}: Invalid token", i),
|
||||
code: EX_DATAERR,
|
||||
})
|
||||
|
||||
12. In Rust, place extern statements after use statements that include standard
|
||||
library crates. Group like statements:
|
||||
|
||||
use std::fs::Path;
|
||||
|
||||
extern crate strerror;
|
||||
extern crate sysexits;
|
||||
|
||||
use strerror::StrError;
|
||||
use sysexits::{ EX_OSERR, EX_USAGE };
|
||||
|
||||
13. If text is on the same line as a brace, spaces after an opening brace and
|
||||
before a closing one:
|
||||
|
||||
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
|
||||
|
||||
14. Alphabetic sorting, where applicable:
|
||||
|
||||
use std::io::{ BufWriter, Read, Write, stderr, stdin, stdout }
|
||||
|
||||
15. In Rust, use the to_owned() method on string types (str, OsStr, CStr, etc.)
|
||||
and the to_string() method on other types.
|
||||
|
||||
|
||||
Avoid
|
||||
=====
|
||||
|
||||
16. Unbounded loops [0].
|
||||
|
||||
17. Function pointers [0].
|
||||
|
||||
18. Heap memory allocation [0].
|
||||
|
||||
19. Using too much nested logic (within reason).
|
||||
|
||||
20. Too many levels of dereferences [0]:
|
||||
|
||||
/* do not do this */
|
||||
for (size_t i = 0; i < sizeof a / sizeof *a; ++i) {
|
||||
if (a[i].id == MATCH) { a[i].val = 0; }
|
||||
}
|
||||
|
||||
/* do this */
|
||||
for (struct MadeUp *s = &a[0]; *s != NULL; s = &s[1]) {
|
||||
if (s->id == MATCH) { s->val = 0; }
|
||||
}
|
||||
|
||||
21. Using C preprocessor macros; the fewer, the better [0].
|
||||
|
||||
22. The exit(3p) and std::process::exit() functions; returning from the main
|
||||
function skips a system call.
|
||||
|
||||
|
||||
Do Not Use
|
||||
==========
|
||||
|
||||
23. More than the length of one printed page for a function [0].
|
||||
|
||||
24. Recursion, as it’s complex and can unexpectedly overflow the stack [0].
|
||||
|
||||
25. Any functionality not in the POSIX C specification and language features not
|
||||
in C99.
|
||||
|
||||
26. Do-while loops, as they’re unique to C and confusing for casual programmers.
|
||||
|
||||
27. Labels and goto statements; use sensible flow control [0].
|
||||
|
||||
28. Pointer arithmetic, as it tends to be confusing and unnecessary; use
|
||||
index-reference patterns like &p[1] instead of p + 1. &p[n] is the address at
|
||||
p + sizeof p * n, not p + n, like pointer arithmetic suggests.
|
||||
|
||||
29. C struct bitfields in unions, to access certain bits of bigger data types,
|
||||
as it’s poorly defined in the C standards; use bit arithmetic.
|
||||
|
||||
30. C trigraphs.
|
||||
|
||||
31. Inclusions in C header files, to prevent multiple file inclusions.
|
||||
|
||||
32. C preprocessor variables to prevent multiple inclusions of the same file,
|
||||
such as:
|
||||
|
||||
#ifdef _FILE
|
||||
#define _FILE
|
||||
/* file body */
|
||||
#endif /* ifdef _FILE */
|
||||
|
||||
Instead, take the time to ensure other files aren’t including any files twice.
|
||||
|
||||
33. The gets(3p) function from <stdio.h>, as it’s impossible to prevent buffer
|
||||
overflows when it's used; use fgets(3p) from <stdio.h>.
|
||||
|
||||
34. The scanf(3p) function from <stdio.h> [1].
|
||||
|
||||
35. Any functionality not described in the latest POSIX make(1) specification.
|
||||
|
||||
36. Macros which panic on failure in Rust (such as the print!() and println!()
|
||||
macros). Use a function and handle any errors. However, do use the eprintln!()
|
||||
macro for error messages. Handling an error for writing an error message is
|
||||
redundant.
|
||||
|
||||
|
||||
Usage Text
|
||||
==========
|
||||
|
||||
This section is adapted from the NetBSD style guide [2].
|
||||
|
||||
When programs are invoked incorrectly and in the synopsis of manual pages, uasge
|
||||
text should be provided to the user. The following is the format used by this
|
||||
project for this purpose:
|
||||
|
||||
All optional arguments are to be placed in square brackets (U+005B, U+005D).
|
||||
Mutually exclusive arguments can be separated by a vertical line (U+007C).
|
||||
Groups of arguments should be specified in alphabetical order in most cases. The
|
||||
order of arguments and an example of these rules follows:
|
||||
|
||||
0. Options with no option arguments.
|
||||
1. Options with option arguments. Arguments should be specified inside the same
|
||||
square brackets as the options.
|
||||
3. Non-option arguments.
|
||||
|
||||
"usage: f [-aDde] [-b b_arg] [-m m_arg] req1 req2 [opt1 [opt2]]\n"
|
||||
"usage: f [-a | -b] [-c [-de] [-n number]]\n"
|
||||
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
[0] <https://web.eecs.umich.edu/~imarkov/10rules.pdf>
|
||||
[1] <http://sekrit.de/webdocs/c/beginners-guide-away-from-scanf.html>
|
||||
[2] <http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/share/misc/style>
|
||||
|
||||
--
|
||||
Copyright © 2024 Emma Tebibyte <emma@tebibyte.media>
|
||||
Copyright © 2024 DTB <trinity@trinity.moe>
|
||||
Copyright © Wikipedia contributors
|
||||
|
||||
This work is licensed under CC BY-SA 4.0. To view a copy of this license, visit
|
||||
<http://creativecommons.org/licenses/by-sa/4.0/>.
|
||||
@@ -15,7 +15,7 @@ always be returned.
|
||||
.\"
|
||||
.SH RATIONALE
|
||||
|
||||
In POSIX.1-2017,
|
||||
In \*(Px.1-2024,
|
||||
.BR false (1p)
|
||||
exists for the construction of control flow and loops based on a failure. This
|
||||
implementation functions as described in that standard.
|
||||
|
||||
18
docs/fop.1
18
docs/fop.1
@@ -1,5 +1,5 @@
|
||||
.\" Copyright (c) 2024 DTB <trinity@trinity.moe>
|
||||
.\" Copyright (c) 2024 Emma Tebibyte <emma@tebibyte.media>
|
||||
.\" Copyright (c) 2024–2025 Emma Tebibyte <emma@tebibyte.media>
|
||||
.\"
|
||||
.\" This work is licensed under CC BY-SA 4.0. To see a copy of this license,
|
||||
.\" visit <http://creativecommons.org/licenses/by-sa/4.0/>.
|
||||
@@ -11,10 +11,9 @@ fop \(en field operator
|
||||
.SH SYNOPSIS
|
||||
|
||||
fop
|
||||
.RB ( -d )
|
||||
.RB [ delimiter ]
|
||||
.RB index
|
||||
.RB program...
|
||||
.RB [ -d\ delimiter ]
|
||||
index program
|
||||
.RB [ arguments... ]
|
||||
.\"
|
||||
.SH DESCRIPTION
|
||||
|
||||
@@ -26,8 +25,17 @@ Performs operations on specified fields in data read from the standard input.
|
||||
Sets a delimiter by which the input data will be split into fields. The default
|
||||
is an ASCII record separator.
|
||||
.\"
|
||||
.SH DIAGNOSTICS
|
||||
in the event of an error, a debug message will be printed and the program will
|
||||
exit with the appropriate sysexits.h(3) error code.
|
||||
.\"
|
||||
.SH CAVEATS
|
||||
|
||||
If the specified index does not exist in the data, the program
|
||||
will print all data to the standard output before exiting with an error. If
|
||||
input data is not delimited by the specified delimiter, the program will fill
|
||||
memory with the contents of the stream before it is output.
|
||||
|
||||
Field indices are zero-indexed, which may be unexpected behavior for some
|
||||
users.
|
||||
.\"
|
||||
|
||||
@@ -76,7 +76,7 @@ this is elegant but unintuitive.
|
||||
.\"
|
||||
.SH RATIONALE
|
||||
|
||||
The traditional tool for integer comparisons in POSIX and other Unix shells has
|
||||
The traditional tool for integer comparisons in \*(Px and other Unix shells has
|
||||
been
|
||||
.BR test (1).
|
||||
This tool also handles string comparisons and file scrutiny. These parts of its
|
||||
|
||||
@@ -75,11 +75,15 @@ utility\(cqs niche.
|
||||
|
||||
Written by DTB
|
||||
.MT trinity@trinity.moe
|
||||
.ME , \
|
||||
ported to Rust by Emma Tebibyte
|
||||
.MT emma@tebibyte.media
|
||||
.ME .
|
||||
.\"
|
||||
.SH COPYRIGHT
|
||||
|
||||
Copyright © 2023 DTB. License AGPLv3+: GNU AGPL version 3 or later
|
||||
Copyright © 2024 DTB, 2024\(en2025 Emma Tebibyte. \
|
||||
License AGPLv3+: GNU AGPL version 3 or later
|
||||
<https://gnu.org/licenses/agpl.html>.
|
||||
.\"
|
||||
.SH SEE ALSO
|
||||
|
||||
@@ -45,7 +45,7 @@ The program operates in single-byte chunks regardless of intended encoding.
|
||||
.\"
|
||||
.SH RATIONALE
|
||||
|
||||
POSIX currently lacks a way to display non-printing characters in the terminal
|
||||
\*(Px currently lacks a way to display non-printing characters in the terminal
|
||||
using a standard tool. A popular extension to
|
||||
.BR cat (1p),
|
||||
the
|
||||
|
||||
@@ -47,13 +47,13 @@ visual similarity and not byte similarity.
|
||||
.\"
|
||||
.SH RATIONALE
|
||||
|
||||
The traditional tool for string comparisons in POSIX and other Unix shells has
|
||||
The traditional tool for string comparisons in \*(Px and other Unix shells has
|
||||
been
|
||||
.BR test (1).
|
||||
This tool also handles integer comparisons and file scrutiny. These parts of
|
||||
its functionality have been broken out into multiple utilities.
|
||||
|
||||
This program\(cqs functionality may be performed on a POSIX-compliant system
|
||||
This program\(cqs functionality may be performed on a \*(Px-compliant system
|
||||
with
|
||||
.BR test (1p).
|
||||
.\"
|
||||
|
||||
@@ -15,7 +15,7 @@ always be returned.
|
||||
.\"
|
||||
.SH RATIONALE
|
||||
|
||||
In \fIPOSIX.1-2017\fP,
|
||||
In \fI\*(Px.1-2024\fP,
|
||||
.BR true (1p)
|
||||
exists for the construction of control flow and loops based on a success. This
|
||||
implementation functions as described in that standard.
|
||||
|
||||
154
src/fop.rs
154
src/fop.rs
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2023–2024 Emma Tebibyte <emma@tebibyte.media>
|
||||
* Copyright (c) 2023–2025 Emma Tebibyte <emma@tebibyte.media>
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
@@ -18,14 +18,16 @@
|
||||
|
||||
use std::{
|
||||
env::args,
|
||||
io::{ Error, Read, Write, stdin, stdout },
|
||||
io::{ Error, Write, stdin, stdout },
|
||||
process::{ Command, ExitCode, Stdio, exit },
|
||||
};
|
||||
|
||||
extern crate delimit;
|
||||
extern crate getopt;
|
||||
extern crate strerror;
|
||||
extern crate sysexits;
|
||||
|
||||
use delimit::Delimited;
|
||||
use getopt::GetOpt;
|
||||
use strerror::StrError;
|
||||
use sysexits::{ EX_DATAERR, EX_IOERR, EX_UNAVAILABLE, EX_USAGE };
|
||||
@@ -90,78 +92,94 @@ fn main() -> ExitCode {
|
||||
exit(usage(&argv[0]).into());
|
||||
});
|
||||
|
||||
/* read entire standard input into memory */
|
||||
let mut buf = String::new();
|
||||
if let Err(e) = stdin().read_to_string(&mut buf) {
|
||||
err(&argv[0], e);
|
||||
exit(EX_IOERR.into());
|
||||
};
|
||||
let stdin = stdin().lock();
|
||||
|
||||
/* split the buffer by the delimiter (by default, '\u{1E}') */
|
||||
let mut fields = buf.split(&d).collect::<Vec<&str>>();
|
||||
let mut input = Delimited::new(stdin, d.clone());
|
||||
let mut n = 0;
|
||||
|
||||
/* collect arguments for the operator command */
|
||||
let command_args = argv
|
||||
.iter()
|
||||
.clone()
|
||||
.skip(command_arg + 1) /* skip the command name */
|
||||
.collect::<Vec<&String>>();
|
||||
let mut fopped = false;
|
||||
|
||||
/* spawn the command to operate on the field */
|
||||
let mut spawned = Command::new(operator)
|
||||
.args(command_args) /* spawn with the specified arguments */
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped()) /* piped stdout to handle output ourselves */
|
||||
.spawn()
|
||||
.unwrap_or_else( |e| {
|
||||
while let Some(i) = input.next() {
|
||||
let v = match i {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
err(&argv[0], e);
|
||||
return EX_IOERR.into();
|
||||
},
|
||||
};
|
||||
|
||||
let mut out = Vec::new();
|
||||
|
||||
if n == index { /* fop it */
|
||||
/* collect arguments for the operator command */
|
||||
let command_args = argv
|
||||
.iter()
|
||||
.clone()
|
||||
.skip(command_arg + 1) /* skip the command name */
|
||||
.collect::<Vec<&String>>();
|
||||
|
||||
/* spawn the command to operate on the field */
|
||||
let mut spawned = Command::new(operator)
|
||||
.args(command_args) /* spawn with the specified arguments */
|
||||
.stdin(Stdio::piped())
|
||||
/* piped stdout to handle output ourselves */
|
||||
.stdout(Stdio::piped())
|
||||
.spawn()
|
||||
.unwrap_or_else( |e| {
|
||||
err(&argv[0], e);
|
||||
exit(EX_UNAVAILABLE.into());
|
||||
});
|
||||
|
||||
/* feed the spawned program’s stdin the field value */
|
||||
if let Some(mut child_stdin) = spawned.stdin.take() {
|
||||
let _ = child_stdin.write_all(&v);
|
||||
drop(child_stdin); /* stay safe! drop your children! */
|
||||
|
||||
let output = spawned.wait_with_output().unwrap_or_else(|e| {
|
||||
err(&argv[0], e);
|
||||
exit(EX_IOERR.into());
|
||||
});
|
||||
|
||||
/* get the output with which the original field will
|
||||
* be replaced */
|
||||
let mut replace = output.stdout.clone();
|
||||
|
||||
/* pop trailing newline out if the input did not contain it */
|
||||
if v.iter().last() != Some(&b'\n')
|
||||
&& replace.pop() != Some(b'\n')
|
||||
{
|
||||
out = output.stdout;
|
||||
} else {
|
||||
out = replace;
|
||||
}
|
||||
}
|
||||
|
||||
fopped = true;
|
||||
} else {
|
||||
out = v;
|
||||
}
|
||||
|
||||
/* since we cannot know when we’re done, place a new delimiter before
|
||||
* each index unless it is the 0th */
|
||||
if n != 0 {
|
||||
stdout().write_all(d.as_bytes()).unwrap_or_else(|e| {
|
||||
err(&argv[0], e);
|
||||
exit(EX_IOERR.into());
|
||||
});
|
||||
}
|
||||
|
||||
stdout().write_all(&out).unwrap_or_else(|e| {
|
||||
err(&argv[0], e);
|
||||
exit(EX_UNAVAILABLE.into());
|
||||
exit(EX_IOERR.into());
|
||||
});
|
||||
|
||||
/* get field we want to pipe into spawned program */
|
||||
let field = fields.get(index).unwrap_or_else(|| {
|
||||
n += 1;
|
||||
}
|
||||
|
||||
if fopped {
|
||||
return ExitCode::SUCCESS;
|
||||
} else {
|
||||
eprintln!("{}: {}: no such index in input", argv[0], index);
|
||||
exit(EX_DATAERR.into());
|
||||
});
|
||||
|
||||
/* get the stdin of the newly spawned program and feed it the field val */
|
||||
if let Some(mut child_stdin) = spawned.stdin.take() {
|
||||
let _ = child_stdin.write_all(field.as_bytes());
|
||||
drop(child_stdin); /* stay safe! drop your children! */
|
||||
return EX_DATAERR.into();
|
||||
}
|
||||
|
||||
let output = spawned.wait_with_output().unwrap_or_else(|e| {
|
||||
err(&argv[0], e);
|
||||
exit(EX_IOERR.into());
|
||||
});
|
||||
|
||||
/* get the output with which the original field will be replaced */
|
||||
let mut replace = output.stdout.clone();
|
||||
|
||||
/* pop trailing newline out if the input did not contain it */
|
||||
if fields[index].chars().last() != Some('\n') /* no newline */
|
||||
&& replace.pop() != Some(b'\n') { /* pop last char of replacement */
|
||||
/* restore replacement to original command output if popped char was
|
||||
* not a newline */
|
||||
replace = output.stdout;
|
||||
}
|
||||
|
||||
/* convert the output of the program to UTF-8 */
|
||||
let new_field = String::from_utf8(replace).unwrap_or_else(|e| {
|
||||
eprintln!("{}: {}", argv[0], e);
|
||||
exit(EX_IOERR.into());
|
||||
});
|
||||
|
||||
/* store the new field in the old fields vector */
|
||||
fields[index] = &new_field;
|
||||
|
||||
/* fop it */
|
||||
stdout().write_all(
|
||||
fields.join(&d.to_string()).as_bytes()
|
||||
).unwrap_or_else(|e| {
|
||||
err(&argv[0], e);
|
||||
exit(EX_IOERR.into());
|
||||
});
|
||||
|
||||
ExitCode::SUCCESS
|
||||
}
|
||||
|
||||
105
src/libdelimit.rs
Normal file
105
src/libdelimit.rs
Normal file
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Emma Tebibyte <emma@tebibyte.media>
|
||||
* Copyright (c) 2025 silty silt <silt@tebibyte.media>
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Affero General Public License as published by the
|
||||
* Free Software Foundation, either version 3 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see https://www.gnu.org/licenses/.
|
||||
*/
|
||||
|
||||
use std::{
|
||||
io::{ Read, Result },
|
||||
mem,
|
||||
};
|
||||
|
||||
const BUFFER_SIZE: usize = 4096;
|
||||
|
||||
pub struct Delimited<T: Read> {
|
||||
delimiter: Vec<u8>,
|
||||
buffer: Vec<u8>,
|
||||
stream: T,
|
||||
}
|
||||
|
||||
impl<T> Delimited<T> where T: Read {
|
||||
pub fn new<R>(stream: T, delimiter: R) -> Self where R: AsRef<[u8]> {
|
||||
Delimited {
|
||||
stream,
|
||||
delimiter: delimiter.as_ref().to_vec(),
|
||||
buffer: Vec::with_capacity(BUFFER_SIZE),
|
||||
}
|
||||
}
|
||||
|
||||
fn find_subslice(&self) -> Option<usize> {
|
||||
match self.delimiter.len() {
|
||||
/* TODO: is this optimization necessary? */
|
||||
1 => self.buffer.iter().position(|&b| b == self.delimiter[0]),
|
||||
_ => {
|
||||
self.buffer
|
||||
.windows(self.delimiter.len())
|
||||
.position(|w| w == self.delimiter)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Iterator for Delimited<T> where T: Read {
|
||||
type Item = Result<Vec<u8>>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let mut buf = [0; BUFFER_SIZE];
|
||||
|
||||
loop {
|
||||
if let Some(p) = self.find_subslice() {
|
||||
let chunk = self.buffer.drain(..p).collect::<Vec<_>>();
|
||||
|
||||
let _ = self.buffer.drain(..self.delimiter.len());
|
||||
|
||||
return Some(Ok(chunk));
|
||||
}
|
||||
|
||||
match self.stream.read(&mut buf) {
|
||||
Ok(0) => { /* no bytes read, we’re probably done */
|
||||
let _ = self.buffer.is_empty() && return None;
|
||||
|
||||
return Some(Ok(mem::take(&mut self.buffer)));
|
||||
},
|
||||
Ok(n) => {
|
||||
self.buffer.extend_from_slice(&buf[..n]);
|
||||
},
|
||||
Err(e) => {
|
||||
return Some(Err(e));
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use Delimited;
|
||||
|
||||
#[test]
|
||||
fn testing() {
|
||||
let d = '\u{1E}'.to_string();
|
||||
let input = vec!["meow", "woof", "ribbit"];
|
||||
let r = input.join(&d);
|
||||
|
||||
let mut output = Delimited::new(r.as_bytes(), d);
|
||||
|
||||
let mut i = 0;
|
||||
while let Some(item) = output.next() {
|
||||
assert_eq!(input[i].as_bytes(), item.unwrap());
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
116
src/scrut.c
116
src/scrut.c
@@ -1,116 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023–2024 DTB <trinity@trinity.moe>
|
||||
* Copyright (c) 2024 Emma Tebibyte <emma@tebibyte.media>
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Affero General Public License as published by the
|
||||
* Free Software Foundation, either version 3 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see https://www.gnu.org/licenses/.
|
||||
*/
|
||||
|
||||
#include <assert.h> /* assert(3) */
|
||||
#include <stdio.h> /* fprintf(3), stderr, NULL */
|
||||
#include <stdlib.h> /* EXIT_FAILURE, EXIT_SUCCESS */
|
||||
#include <string.h> /* memset(3), strchr(3) */
|
||||
#include <sysexits.h> /* EX_OSERR, EX_USAGE */
|
||||
#include <unistd.h> /* access(3), getopt(3), pledge(2), unveil(2), F_OK, R_OK,
|
||||
* W_OK, X_OK */
|
||||
#include <sys/stat.h> /* lstat(3), stat struct, S_ISBLK, S_ISCHR, S_ISDIR,
|
||||
* S_ISFIFO, S_ISGID, S_ISREG, S_ISLNK, S_ISSOCK,
|
||||
* S_ISUID, S_ISVTX */
|
||||
|
||||
char *program_name = "scrut";
|
||||
#define OPTS "bcdefgkprsuwxLS"
|
||||
/* this is an array so main:sel's size can be known at compile time */
|
||||
static char opts[] = OPTS;
|
||||
|
||||
static int
|
||||
usage(char *argv0) {
|
||||
(void)fprintf(stderr, "Usage: %s [-" OPTS "] file...\n", argv0);
|
||||
|
||||
return EX_USAGE;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
char sel[(sizeof opts) / (sizeof *opts)];
|
||||
|
||||
program_name = argv[0] == NULL ? program_name : argv[0];
|
||||
|
||||
#ifdef __OpenBSD__
|
||||
if (pledge("rpath stdio unveil", "") == -1) {
|
||||
perror(program_name);
|
||||
return EX_OSERR;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (argc < 2) { return usage(program_name); }
|
||||
|
||||
{ /* option parsing */
|
||||
char *p;
|
||||
|
||||
memset(sel, '\0', sizeof sel);
|
||||
for (int c; (c = getopt(argc, argv, opts)) != -1;) {
|
||||
if ((p = strchr(opts, c)) == NULL) { return usage(argv[0]); }
|
||||
else {
|
||||
assert(p - opts < sizeof sel / sizeof *sel); /* bounds check */
|
||||
sel[p - opts] = c;
|
||||
}
|
||||
}
|
||||
|
||||
/* straighten out selections; permute out nulls */
|
||||
p = sel;
|
||||
for (size_t i = 0; i < (sizeof sel) / (sizeof *sel); ++i) {
|
||||
if (sel[i] != '\0') {
|
||||
*p = sel[i];
|
||||
if (&sel[i] != p++) { sel[i] = '\0'; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (optind == argc) { return usage(argv[0]); }
|
||||
|
||||
for (argv += optind ; *argv != NULL; argv = &argv[1]) {
|
||||
struct stat buf;
|
||||
|
||||
#ifdef __OpenBSD__
|
||||
if (unveil(*argv, "rw") == -1) {
|
||||
perror(program_name);
|
||||
return EX_OSERR;
|
||||
}
|
||||
#endif
|
||||
|
||||
if(access(*argv, F_OK) != 0 || lstat(*argv, &buf) == -1) {
|
||||
return EXIT_FAILURE; /* doesn't exist or isn't stattable */
|
||||
}
|
||||
|
||||
for (size_t i = 0; sel[i] != '\0'; ++i) {
|
||||
if (
|
||||
(sel[i] == 'b' && !S_ISBLK(buf.st_mode))
|
||||
|| (sel[i] == 'c' && !S_ISCHR(buf.st_mode))
|
||||
|| (sel[i] == 'd' && !S_ISDIR(buf.st_mode))
|
||||
|| (sel[i] == 'e' && 0)
|
||||
|| (sel[i] == 'f' && !S_ISREG(buf.st_mode))
|
||||
|| (sel[i] == 'g' && !(buf.st_mode & S_ISGID))
|
||||
|| (sel[i] == 'k' && !(buf.st_mode & S_ISVTX))
|
||||
|| (sel[i] == 'p' && !S_ISFIFO(buf.st_mode))
|
||||
|| (sel[i] == 'r' && access(*argv, R_OK) != 0)
|
||||
|| (sel[i] == 'u' && !(buf.st_mode & S_ISUID))
|
||||
|| (sel[i] == 'w' && access(*argv, W_OK) != 0)
|
||||
|| (sel[i] == 'x' && access(*argv, X_OK) != 0)
|
||||
|| (sel[i] == 'L' && !S_ISLNK(buf.st_mode))
|
||||
|| (sel[i] == 'S' && !S_ISSOCK(buf.st_mode))
|
||||
) { return EXIT_FAILURE; }
|
||||
}
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
Reference in New Issue
Block a user