From 88a66bcc0189f3683d3774a39ca5718671ee7e5b Mon Sep 17 00:00:00 2001 From: DTB Date: Tue, 9 Jan 2024 23:43:45 -0700 Subject: [PATCH 1/2] dj(1): import from git.sr.ht/~trinity/src --- GNUmakefile | 3 + docs/dj.1 | 155 +++++++++++++++++++ src/dj.c | 429 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 587 insertions(+) create mode 100644 docs/dj.1 create mode 100644 src/dj.c diff --git a/GNUmakefile b/GNUmakefile index 7c03eea..46f3aac 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -57,6 +57,9 @@ test: build tests/cc-compat.sh tests/posix-compat.sh +dj: src/dj.c build_dir + $(CC) $(CFLAGS) -o build/bin/dj src/dj.c + false: src/false.rs build_dir $(RUSTC) $(RUSTCFLAGS) -o build/bin/false src/false.rs diff --git a/docs/dj.1 b/docs/dj.1 new file mode 100644 index 0000000..7963adb --- /dev/null +++ b/docs/dj.1 @@ -0,0 +1,155 @@ +.TH dj 1 + +.SH NAME + +dj \(en disk jockey + +.SH SYNOPSIS + +dj +.RB ( -AdHnq ) +.RB ( -a +.RB [ byte ]) +.RB ( -c +.RB [ count ]) + +.RB ( -i +.R [ +.B input file +.R ]) +.RB ( -b +.R [ +.B input block size +.R ]) +.RB ( -s +.R [ +.B input offset +.R ]) + +.RB ( -o +.R [ +.B output file +.R ]) +.RB ( -B +.R [ +.B output block size +.R ]) +.RB ( -S +.R [ +.B output offset +.R ]) + +.SH USAGE + +The +.B -i +option takes a path as an argument to open and use in place of standard input. +The +.B -o +option does the same in place of standard output. Dj does not truncate output +files and instead writes over the bytes in the existing file. +.PP +The +.B -b +option takes a numeric argument as the size in bytes of the input buffer and +the +.B -B +option does the same for the output buffer, the default for both being 1024 +bytes, or one kibibyte (KiB). +.PP +The +.B -s +option takes a numeric argument as the number of bytes to skip into the input +before starting to read, and the +.B -S +option skips a number of bytes through the output before starting to write from +the input. If the input is a stream the bytes are read and discarded. If the +output is a stream, nul characters are printed. +.PP +The +.B -a +option takes one argument of one byte in length and pads the input buffer with +that byte in the event that a read doesn't fill the input buffer, and the +.B -A +option takes no arguments and pads with nuls. +The +.B -c +option specifies an amount of reads to make, and if 0 (the default) dj will +continue reading until a partial or empty read. +.PP +On a partial or empty read, dj prints a diagnostic message (unless the +.B -q +option is specified) and exits (unless the +.B -n +option is specified, in which case only two consecutive empty reads will cause +dj to exit). +At exit, usage statistics are printed unless the option +.B -q +is specified a second time. The +.B -H +option will make these diagnostics human-readable. + +.SH DIAGNOSTICS + +The +.B -d +option prints all information, user-specified or otherwise, before program +execution. +.PP +When dj exits, by default statistics are printed for input and output to +standard error in the following format: +.PP +.R {records read} {ASCII unit separator} {partial records read} +.R {ASCII record separator} {records written} {ASCII unit separator} +.R {partial records written} {ASCII group separator} {bytes read} +.R {ASCII record separator} {bytes written} {ASCII file separator} +.PP +If the +.B -H +option is specified dj instead uses this following format: +.PP +.R {records read} '+' {partial records read} '>' {records written} +.R '+' {partial records written} ';' {bytes read} '>' {bytes written} +.R {ASCII line feed} +.PP +The +.B -q +option suppresses error messages which print when a read or write is partial or +empty and when used twice suppresses diagnostic output entirely. +.PP +In non-recoverable errors that don't pertain to dj's read-write cycle, a +diagnostic message is printed and dj exits with the appropriate sysexits(3) +status. + +.SH BUGS + +If +.B -n +is specified along with a specified count, actual byte output may be lower than +expected (the product of the count multiplied by the input block size). If the +.B -a +or +.B -A +options are used this could make data written nonsensical. +.PP +Many lowercase options have capitalized variants and vice-versa which can be +confusing. Capitalized options tend to affect output or are more intense +versions of lowercase options. + +.SH RATIONALE + +Dj was modeled after the dd utility specified in POSIX but adds additional +features: typical option formatting, allowing seeks to be specified in bytes +rather than in blocks, allowing arbitrary bytes as padding, and printing in a +format that's easy to parse for machines. It also neglects character +conversion, which may be dd's original intent but is irrelevant to its modern +use. + +.SH COPYRIGHT + +Copyright (C) 2023 DTB. License AGPLv3+: GNU AGPL version 3 or later +. + +.SH SEE ALSO + +dd(1) diff --git a/src/dj.c b/src/dj.c new file mode 100644 index 0000000..e407500 --- /dev/null +++ b/src/dj.c @@ -0,0 +1,429 @@ +#include /* isupper(3), tolower(3) */ +#include /* errno */ +#include /* open(2) */ +#include /* fprintf(3), stderr */ +#include /* free(3), malloc(3), strtol(3), size_t */ +#include /* memcpy(3), memmove(3), memset(3) */ +#include /* EX_OK, EX_USAGE */ +#include /* close(2), getopt(3), lseek(2), read(2), write(2), + optarg, optind, STDIN_FILENO, STDOUT_FILENO */ +extern int errno; + +/* dj uses two structures that respectively correspond to the reading and + * writing ends of its jockeyed "pipe". User-configurable members are noted + * with their relevant options. */ +struct Io{ + int bs; /* buffer size (-bB) */ + size_t bufuse; /* buffer usage */ + char *buf; /* buffer */ + int bytes; /* bytes processed */ + int fd; /* file descriptor */ + int fl; /* file opening flags */ + char *fn; /* file name (may be stdin_name or stdout_name) (-io) */ + int prec; /* partial records processed */ + int rec; /* records processed */ + long seek; /* bytes to seek/skip (will be 0 after skippage) (-sS) */ +} ep[2]; /* "engineered pipe"; also "extended play", for the deejay */ + +/* Additionally, the following global variables are used to store user options. + */ + +/* (-a) */ static int align; /* Only the lower 8b are used but align is + * negative if no alignment is being done. */ + +/* (-c) */ static int count; /* 0 if dj(1) runs until no more reads are + * possible. */ + +/* ASCII field separator delimited statistics */ + static char *fmt_asv = "%d\037%d\036%d\037%d\035%d\036%d\034"; +/* human-readable statistics */ + static char *fmt_human = "%d+%d > %d+%d; %d > %d\n"; +/* pointer to chosen formatting */ +/* (-H) */ static char *fmt_output; /* fmt_asv (default) or fmt_human (-H) */ + +/* (-dq) */ static char debug; /* + * -d increments dj -qq | 0 - no diagnostic output whatsoever + * -q decrements dj -q | 1 - typical output without + * | notifications on partial reads or + * | writes + * dj | 2 - typical output (default) + * dj -d | 3 - verbose status messages */ + +/* (-n) */ static char noerror; /* 0 - exits on partial reads or writes + * (default) + * 1 - retries on partial reads/writes + * (-f) */ + +/* Non-configurable defaults. */ +#define bs_default 1024 /* GNU dd(1) default; twice POSIX but a neat 2^10 */ +static char *program_name = ""; +static char *stdin_name = ""; +static char *stdout_name = ""; +static int read_flags = O_RDONLY; /* These flags are consistent with Busybox */ +static int write_flags = O_WRONLY | O_CREAT; /* dd(1). */ + +/* Macro to set defaults for user-configurable options. */ +#define setdefaults do{ \ + align = -1; \ + count = 0; \ + debug = 2; \ + fmt_output = fmt_asv; \ + noerror = 0; \ + ep[0].fl = read_flags; \ + Io_setdefaults(&ep[0]); \ + ep[1].fl = write_flags; \ + Io_setdefaults(&ep[1]); }while(0) + +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +/* Macro to check if fd is a std* file, e.g. stdin. */ +#define fdisstd(fd) \ + ((fd) == STDIN_FILENO \ + || (fd) == STDOUT_FILENO \ + || (fd) == STDERR_FILENO) + +/* Macro to call the cleanup functions that operate on struct io on the + * particular io[2] used in main. Error conditions are not checked because this + * is only used when the program is about to terminate (hence its name). */ +#define terminate(io) do{ \ + Io_buffree(&(io)[0]); \ + Io_buffree(&(io)[1]); \ + Io_fdclose(&(io)[0]); \ + Io_fdclose(&(io)[1]); }while(0) + +/* Allocates *io's buffer. Returns NULL if unsuccessful. */ +static void * +Io_bufalloc(struct Io *io){ + + return (io->buf = malloc(io->bs * (sizeof *io->buf))); +} + +/* Frees *io's buffer. Returns io. */ +static struct Io * +Io_buffree(struct Io *io){ + + free(io->buf); + + return io; +} + +/* Fills the unused portion of io's buffer with padding, updating io->bufuse. + * Returns io. */ +static struct Io * +Io_bufrpad(struct Io *io, int padding){ + + memset(io->buf + io->bufuse, padding, io->bs - io->bufuse); + io->bufuse = io->bs; + + return io; +} + +/* Copies from the buffer in src as much as possible to the free space in the + * dest buffer, removing the copied units from src and permuting the remaining + * units in the src buffer to the start of the buffer, modifying both the src + * and dest bufuse and returning dest. */ +static struct Io* +Io_bufxapp(struct Io *dest, struct Io *src){ + int n; + + n = MIN(src->bufuse, dest->bs - dest->bufuse); + memcpy(dest->buf + dest->bufuse, src->buf, n); + dest->bufuse += n; + memmove(src->buf, src->buf + n, src->bs - n); + src->bufuse -= n; + + return dest; +} + +/* Copies from the buffer in src to the buffer in dest no more than n units, + * removing the copied units from src and permuting the remaining units in the + * src buffer to the start of the buffer, modifying both the src and dest + * bufuse and returning dest. */ +static struct Io* +Io_bufxfer(struct Io *dest, struct Io *src, int n){ + + memcpy(dest->buf, src->buf, (dest->bufuse = n)); + memmove(src->buf, src->buf + n, (src->bufuse -= n)); + + return dest; +} + +/* Closes io->fn and returns -1 on error, otherwise io->fd. */ +static int +Io_fdclose(struct Io *io){ + + return fdisstd(io->fd) + ? 0 + : close(io->fd); +} + +/* Opens io->fn and saves the file descriptor into io->fd. Returns io->fd, + * which will be -1 if an error occured. */ +static int +Io_fdopen(struct Io *io, char *fn){ + int fd; + + if((fd = open(fn, io->fl, + /* these are the flags used by touch(1p) */ + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) + != -1 + && Io_fdclose(io) == 0){ + io->fd = fd; + io->fn = fn; + } + + return fd; +} + +/* Seeks io->seek bytes through *io's file descriptor, (counter-intuitively) + * returning -1 if successful and a sysexits.h exit code if an unrecoverable + * error occurred. io->buf will be cleared of useful bytes and io->seek will + * be set to zero to indicate the seek occurred. */ +static int +Io_fdseek(struct Io *io){ + int (*op)(int, void *, size_t); + + if(!fdisstd(io->fd) && lseek(io->fd, io->seek, SEEK_SET) != -1) + return -1; + else if(io->fl == write_flags){ + memset(io->buf, '\0', io->bs); + /* This is a dirty trick; rather than testing conditions and operating + * likewise, because the parameters to read or write are going to be + * the same either way, just use a function pointer to keep track of + * the intended operation. */ + op = (int (*)(int, void *, size_t))&write; + /* Function pointer casts are risky; this works because the difference + * is in the second parameter and only that write(2) makes the buffer + * const whereas read(2) does not. To avoid even the slightest + * undefined behavior comment out the cast, just be ready for a + * -Wincompatible-function-pointer-types if your compiler notices it. + */ + }else + op = &read; + + /* We're going to cheat and use bufuse as the retval for write(2), which is + * fine because it'll be zeroed as this function returns anyway. */ + do{ if( (io->bufuse = (*op)(io->fd, io->buf, MIN(io->bs, io->seek))) == 0) + /* second chance */ + io->bufuse = (*op)(io->fd, io->buf, MIN(io->bs, io->seek)); + }while((io->seek -= io->bufuse) > 0 && io->bufuse != 0); + + io->bufuse = 0; + + return -1; +} + +/* Reads io->bs bytes from *io's file descriptor into io->buf, storing the + * number of read bytes in io->bufuse and updating io->bytes. If io->bufuse is + * 0, errno will probably be set. Returns io. */ +static struct Io * +Io_read(struct Io *io){ + + io->bytes += (io->bufuse = read(io->fd, io->buf, io->bs)); + + return io; +} + +/* Sets the variables in a struct *io to the defaults. Identifies the read/ + * write ends of the "pipe" by checking io->fl. Returns io. */ +static struct Io * +Io_setdefaults(struct Io *io){ + + io->bs = bs_default; + io->buf = NULL; + io->bytes = 0; + io->fd = (io->fl == read_flags) ? STDIN_FILENO : STDOUT_FILENO; + io->fn = (io->fl == read_flags) ? stdin_name : stdout_name; + io->prec = 0; + io->rec = 0; + io->seek = 0; + + return io; +} + +/* Writes io->bufuse units from io->buf to io->fd, permuting any unwritten + * bytes to the start of io->buf and updating io->bufuse. If io->bufuse doesn't + * change, errno will probably be set. Returns io. */ +static struct Io * +Io_write(struct Io *io){ + int t; + + if((t = write(io->fd, io->buf, io->bufuse)) > 0) + memmove(io->buf, io->buf + t, (io->bufuse -= t)); + io->bytes += t; + + return io; +} + +/* Prints an error message suitable for the event of an operating system error, + * with the error itself to be described in the string s. */ +static int +oserr(char *s){ + + fprintf(stderr, "%s: %s: %s\n", program_name, s, strerror(errno)); + + return EX_OSERR; +} + +/* Prints statistics regarding the use of dj, particularly partially and + * completely read and written records, accessing debug, ep, and fmt_output. */ +static void +output(void){ + + if(debug >= 1) + fprintf(stderr, fmt_output, + ep[0].rec, ep[0].prec, ep[1].rec, ep[1].prec, + ep[0].bytes, ep[1].bytes); + + return; +} + +/* Parses the string s to an integer, returning either the integer or in the + * case of an error a negative integer. This is used for argument parsing + * (e.g. -B [int]) in dj and no negative integer would be valid anyway. */ +static long +parse(char *s){ + long r; + + errno = 0; + r = strtol(s, &s, 0); + return (*s == '\0' /* no chars left unparsed */ && errno == 0) + ? r + : -1; +} + +static int +usage(void){ + + fprintf(stderr, "Usage: %s (-AdfHqQ) (-a [byte]) (-c [count])\n" + "\t(-i [input file]) (-b [input block size]) (-s [input offset])\n" + "\t(-o [output file]) (-B [output block size]) (-S [output offset])\n", + program_name); + + return EX_USAGE; +} + +int main(int argc, char *argv[]){ + int c; + int i; + + setdefaults; + + if(argc > 0){ + program_name = argv[0]; + while((c = getopt(argc, argv, "a:Ab:B:c:di:fhHqs:S:o:")) != -1) + switch(c){ + case 'i': case 'o': + if(Io_fdopen(&ep[c == 'o'], optarg) != -1) + break; + terminate(ep); + return oserr(optarg); + case 'A': align = '\0'; break; + case 'd': ++debug; break; + case 'n': noerror = 1; break; + case 'H': fmt_output = fmt_human; break; + case 'q': --debug; break; + case 'a': + if(optarg[0] != '\0' && optarg[1] == '\0'){ + align = optarg[0]; + break; + } + /* FALLTHROUGH */ + case 'c': case 'b': case 's': case 'B': case 'S': + if(c == 'c' && (count = parse(optarg)) >= 0) + break; + i = isupper(c); + c = tolower(c); + if((c == 'b' && (ep[i].bs = parse(optarg)) > 0) + || (c == 's' && (ep[i].seek = parse(optarg)) >= 0)) + break; + /* FALLTHROUGH */ + default: + terminate(ep); + return usage(); + } + } + + if(debug >= 3) + fprintf(stderr, + "argv0=%s\n" + "in=%s\tibs=%d\tskip=%ld\talign=%hhx\tcount=%d\n" + "out=%s\tobs=%d\tseek=%ld\tdebug=%2d\tnoerror=%d\n", + program_name, + ep[0].fn, ep[0].bs, ep[0].seek, align, count, + ep[1].fn, ep[1].bs, ep[1].seek, debug, noerror); + + if(argc > optind){ + terminate(ep); + return usage(); + } + + for(i = 0; i <= 1; ++i){ + if(Io_bufalloc(&ep[i]) == NULL){ + fprintf(stderr, "%s: Failed to allocate %d bytes\n", + program_name, ep[i].bs); + terminate(ep); + return EX_OSERR; + }else if(ep[i].seek > 0) + switch(Io_fdseek(&ep[i])){ + case EX_OK: + output(); + terminate(ep); + return EX_OK; + } + } + + do{ /* read */ + Io_read(&ep[0]); + if(!noerror && ep[0].bufuse == 0) + Io_read(&ep[0]); /* second chance */ + if(ep[0].bufuse == 0) /* that's all she wrote */ + break; + else if(ep[0].bufuse < ep[0].bs){ + ++ep[0].prec; + if(debug >= 2){ + fprintf(stderr, "%s: Partial read:\n\t", program_name); + output(); + } + if(!noerror) + count = 1; + if(align >= 0) + Io_bufrpad(&ep[0], align); + }else + ++ep[0].rec; + + /* write */ + do{ if(ep[1].bs > ep[0].bs){ /* io[1].bs > io[0].bs */ + Io_bufxapp(&ep[1], &ep[0]); + if(ep[0].bs + ep[1].bufuse <= ep[1].bs && count != 1) + continue; /* we could write more */ + }else + Io_bufxfer(&ep[1], &ep[0], MIN(ep[0].bufuse, ep[1].bs)); + + c = ep[1].bufuse; + Io_write(&ep[1]); + if(!noerror && ep[1].bufuse == c) + Io_write(&ep[1]); /* second chance */ + if(c == ep[1].bufuse){ /* no more love */ + count = 1; + break; + }else if(c > ep[1].bufuse && ep[1].bufuse > 0){ + ep[1].prec += 1; + if(debug >= 2){ + fprintf(stderr, "%s: Partial write:\n\t", program_name); + output(); + } + if(!noerror) + count = 1; + }else if(ep[1].bufuse == 0 && c < ep[1].bs) + ++ep[1].prec; + else + ++ep[1].rec; + }while(ep[0].bufuse > 0); + }while(count == 0 || --count > 0); + + output(); + terminate(ep); + + return EX_OK; +} -- 2.46.1 From 612067890f8df9b582d883b14ba6fdc2697a225e Mon Sep 17 00:00:00 2001 From: DTB Date: Wed, 10 Jan 2024 15:03:04 -0700 Subject: [PATCH 2/2] dj(1): interpret a '-' file name as standard input/output --- src/dj.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/dj.c b/src/dj.c index e407500..db847bd 100644 --- a/src/dj.c +++ b/src/dj.c @@ -314,7 +314,12 @@ int main(int argc, char *argv[]){ while((c = getopt(argc, argv, "a:Ab:B:c:di:fhHqs:S:o:")) != -1) switch(c){ case 'i': case 'o': - if(Io_fdopen(&ep[c == 'o'], optarg) != -1) + i = (c == 'o'); + if(optarg[0] == '-' && optarg[1] == '\0'){ /* optarg == "-" */ + ep[i].fd = (i == 0) ? STDIN_FILENO : STDOUT_FILENO; + ep[i].fn = (i == 0) ? stdin_name : stdout_name; + break; + }else if(Io_fdopen(&ep[i], optarg) != -1) break; terminate(ep); return oserr(optarg); -- 2.46.1