Replace str(1) with stris(1) #102

Open
trinity wants to merge 9 commits from stris into main
6 changed files with 224 additions and 169 deletions

View File

@ -42,7 +42,7 @@ BIN = build/bin
default: all test
.PHONY: all
all: dj false fop hru intcmp mm npc rpn scrut str strcmp swab true
all: dj false fop hru intcmp mm npc rpn scrut strcmp stris swab true
# keep build/include until bindgen(1) has stdin support
# https://github.com/rust-lang/rust-bindgen/issues/2703
@ -147,10 +147,10 @@ scrut: build/bin/scrut
build/bin/scrut: src/scrut.c build
$(CC) $(CFLAGS) -o $@ src/scrut.c
.PHONY: str
str: build/bin/str
build/bin/str: src/str.c build
$(CC) $(CFLAGS) -o $@ src/str.c
.PHONY: stris
stris: build/bin/stris
build/bin/stris: src/stris.rs build rustlibs
$(RUSTC) $(RUSTFLAGS) -o $@ src/stris.rs
.PHONY: strcmp
strcmp: build/bin/strcmp

View File

@ -1,59 +0,0 @@
.\" Copyright (c) 20232024 DTB <trinity@trinity.moe>
.\" Copyright (c) 20232024 Emma Tebibyte <emma@tebibyte.media>
.\"
.\" This work is licensed under CC BY-SA 4.0. To see a copy of this license,
.\" visit <http://creativecommons.org/licenses/by-sa/4.0/>.
.\"
.TH STR 1 2024-06-17 "Harakit X.X.X"
.SH NAME
str \(en test string arguments
.\"
.SH SYNOPSIS
str
.B type string...
.\"
.SH DESCRIPTION
Test the character types of string arguments.
The tests in this program are equivalent to the functions with the same names in
.BR ctype.h (0p)
and are the methods by which string arguments are tested.
.\"
.SH DIAGNOSTICS
If all tests pass, the program will exit successfully. If any of the tests fail,
the program will exit unsuccessfully with an error code of 1.
When invoked incorrectly, a debug message will be printed and the program will
exit with the appropriate
.BR sysexits.h (3)
error code.
.\"
.SH CAVEATS
None of an empty string\(cqs contents pass any of the tests, so the program will
exit unsuccessfully if one is specified.
There\(cqs no way of knowing which argument failed the test without re-testing
arguments individually.
If a character in a string isn\(cqt valid ASCII, the program will exit
unsuccessfully.
.\"
.SH AUTHOR
Written by DTB
.MT trinity@trinity.moe
.ME .
.\"
.SH COPYRIGHT
Copyright \(co 2023 DTB. License AGPLv3+: GNU AGPL version 3 or later
<https://gnu.org/licenses/gpl.html>.
.\"
.SH SEE ALSO
.BR ctype (3p),
.BR strcmp(1),
.BR ascii(7)

118
docs/stris.1 Normal file
View File

@ -0,0 +1,118 @@
.\" Copyright (c) 20232024 DTB <trinity@trinity.moe>
.\" Copyright (c) 2023 Emma Tebibyte <emma@tebibyte.media>
.\"
.\" This work is licensed under CC BY-SA 4.0. To see a copy of this license,
.\" visit <http://creativecommons.org/licenses/by-sa/4.0/>.
.TH STRIS 1
.SH NAME
stris \(en test the character types of string arguments
.SH SYNOPSIS
stris
.RB ( -7bcdlu )
.RB ( -i [ inclusions ])
.RB [ strings... ]
.SH DESCRIPTION
Stris tests each rune in an arbitrary quantity of string arguments, ensuring
each meets any of the parameters specified in the program options.
.SH OPTIONS
.B -7
.RS
Tests to see if runes are fit within seven bits; that is, that they are encoded
with ASCII.
.RE
.B -b
.RS
Tests to see if runes are blank or "whitespace"; characters that do not print
but fill a predictable amount of space.
.RE
.B -c
.RS
Tests to see if runes are control characters; characters that are not printing
or graphical.
.RE
.B -d
.RS
Tests to see if runes are numeric. This test does not only allow the ASCII
digits but any numeric symbol.
.RE
.B -i
.RS
Permits, in addition to the given specified parameters, all of the runes
supplied in its option argument.
.RE
.B -l
.RS
Tests to see if runes are in their lower case.
.RE
.B -u
.RS
Tests to see if runes are in their upper case, or capitalized.
.RE
.SH DIAGNOSTICS
Stris exits successfully if all runes in all given strings meet any of the
specified parameters, or if no parameters were specified but all given strings
were legibly encoded. It exits unsuccessfully if the previous is untrue, and if
invalid options were given, or if no strings were given, a usage synopsis will
be printed to the standard error.
.SH BUGS
There's no way of knowing which argument failed the test without re-testing
arguments individually.
Some runes that can losslessly be encoded into ASCII from UTF-8 but in an
"overlong encoding", where the rune was encoded with unnecessary leading
zeroes causing it to span multiple bytes, won't be detected as ASCII.
.SH EXAMPLES
This is an sh(1p) snippet that checks to see if an environment variable is an
ASCII digit.
.RS
.R stris -7 "$v" && stris -d "$v" && echo ASCII digit.
.RE
This is an sh(1p) snippet that checks to see if an environment variable is a
hexadecimal number.
.RS
.R stris -7 "$v" && stris -di ABCDEFabcdef "$v" && echo Hexadecimal number.
.RE
.SH AUTHOR
Written by DTB <trinity@trinity.moe>.
.SH HISTORY
Stris replaces the former str(1) which took the name of a function from
ctype(3) as its first argument and checked the following strings against it;
str(1) exited unsuccessfully when it encountered any non-ASCII runes and could
only have one parameter specified.
.SH COPYRIGHT
Copyright © 20232024 DTB. License AGPLv3+: GNU AGPL version 3 or later
<https://gnu.org/licenses/gpl.html>.
.SH SEE ALSO
ascii(7), ctype(3p), strcmp(1)

View File

@ -1,96 +0,0 @@
/*
* Copyright (c) 2023 DTB <trinity@trinity.moe>
* Copyright (c) 2023 Marceline Cramer <mars@tebibyte.media>
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
* details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see https://www.gnu.org/licenses/.
*/
#include <ctype.h>
#include <stddef.h> /* NULL */
#include <stdio.h> /* fprintf(3), perror(3) */
#include <stdlib.h> /* size_t, EXIT_FAILURE */
#include <string.h> /* strcmp(3) */
#include <sysexits.h> /* EX_OSERR, EX_USAGE */
#ifdef __OpenBSD__
# include <unistd.h> /* pledge(2) */
#endif
char *program_name = "str";
static struct {
char *name;
int (*f)(int);
} ctypes[] = {
{ "isalnum", isalnum },
{ "isalpha", isalpha },
{ "isblank", isblank },
{ "iscntrl", iscntrl },
{ "isdigit", isdigit },
{ "isxdigit", isxdigit },
{ "isgraph", isgraph },
{ "islower", islower },
{ "isprint", isprint },
{ "ispunct", ispunct },
{ "isspace", isspace },
{ "isupper", isupper },
{ NULL, NULL } /* marks end */
};
static int
usage(char *argv0) {
(void)fprintf(stderr, "Usage: %s type string...\n", argv0);
return EX_USAGE;
}
int main(int argc, char *argv[]) {
size_t ctype; // selected from ctypes.h; index of ctype
int retval; // initially fail but becomes success on the first valid char
program_name = argv[0] == NULL ? program_name : argv[0];
#ifdef __OpenBSD__
if (pledge("stdio", NULL) == -1) {
perror(program_name);
return EX_OSERR;
}
#endif
if (argc < 3) { return usage(program_name); }
for ( /* iterate ctypes */
ctype = 0;
ctypes[ctype].f != NULL /* break at the end of ctypes */
&& strcmp(argv[1], ctypes[ctype].name) != 0; /* break at match */
++ctype
);
if (ctypes[ctype].f == NULL) { return usage(argv[0]); }
/* iterate args */
for (argv += 2, retval = EXIT_FAILURE; *argv != NULL; ++argv) {
for (size_t i = 0; argv[0][i] != '\0'; ++i) { /* iterate arg bytes */
/* First checks if argv[0][i] is valid ASCII; ctypes(3) don't
* handle non-ASCII. This is bad. */
if(
(unsigned char)argv[0][i] < 0x80 // argv[0][i] is ASCII,
&& !ctypes[ctype].f(argv[0][i]) // so use ctypes(3)
) { return EXIT_FAILURE; }
else { retval = EXIT_SUCCESS; }
}
}
return retval;
}

87
src/stris.rs Normal file
View File

@ -0,0 +1,87 @@
/*
* Copyright (c) 20232024 DTB <trinity@trinity.moe>
* Copyright (c) 2023 Marceline Cramer <mars@tebibyte.media>
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU Affero General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
* details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see https://www.gnu.org/licenses/.
*/
use std::{
env::args,
process::ExitCode,
};
extern crate getopt;
extern crate sysexits;
use getopt::GetOpt;
use sysexits::EX_USAGE;
struct Reqs {
ascii: bool, blank: bool, cntrl: bool, digit: bool, lower: bool,
upper: bool, inuse: bool, extra: String
}
fn usage(s: &str) -> ExitCode {
eprintln!("Usage: {} [-7bcdlu] [-i inclusions] [strings...]", s);
ExitCode::from(EX_USAGE as u8)
}
fn main() -> ExitCode {
let argv = args().collect::<Vec<String>>();
let mut optind = 1;
let mut reqs = Reqs {
ascii: false, blank: false, cntrl: false, digit: false, lower: false,
upper: false, inuse: false, extra: String::default()
};
while let Some(opt) = argv.getopt("7bcdi:lu") {
match opt.opt() {
Ok("7") => reqs.ascii = true,
Ok("b") => reqs.blank = true,
Ok("c") => reqs.cntrl = true,
Ok("d") => reqs.digit = true,
Ok("i") => reqs.extra = opt.arg().unwrap(),
Ok("l") => reqs.lower = true,
Ok("u") => reqs.upper = true,
_ => { return usage(&argv[0]); }
}
optind = opt.ind();
reqs.inuse = true;
}
if argv.len() == optind { return usage(&argv[0]); }
drop(argv);
if reqs.inuse {
for arg in args().skip(optind) {
for c in arg.chars() {
if (reqs.ascii && c.is_ascii())
|| (reqs.blank && c.is_whitespace())
|| (reqs.cntrl && c.is_control())
|| (reqs.digit && c.is_numeric())
|| (reqs.lower && c.is_lowercase())
|| (reqs.upper && c.is_uppercase())
|| reqs.extra.contains(c) {
continue;
} else {
return ExitCode::FAILURE;
}
}
}
}
ExitCode::SUCCESS
}

View File

@ -7,14 +7,19 @@
.PRAGMA: command_comment
.PHONY: str_tests
str_tests: str_help str_isalpha
.PHONY: stris_tests
stris_tests: stris_help stris_7 stris_b
.PHONY: str_help
str_help: $(BIN)/str
! $(BIN)/str -h
.PHONY: stris_help
stris_help: $(BIN)/stris
! $(BIN)/stris -h
.PHONY: str_isalpha
str_isalpha: $(BIN)/str
$(BIN)/str isalpha c
! $(BIN)/str isalpha 3
.PHONY: stris_7
stris_7: $(BIN)/stris
$(BIN)/stris -7 !1Aa' '
! $(BIN)/stris -7 今日は
.PHONY: stris_b
stris_b: $(BIN)/stris
$(BIN)/stris -b "$(printf ' \t\v\r\n')"
! $(BIN)/stris -b !1Aa