dj(1) - disk jockey #28

Merged
trinity merged 2 commits from dj into main 2024-01-10 16:07:06 -07:00
3 changed files with 587 additions and 0 deletions
Showing only changes of commit 88a66bcc01 - Show all commits

View File

@ -57,6 +57,9 @@ test: build
tests/cc-compat.sh tests/cc-compat.sh
tests/posix-compat.sh tests/posix-compat.sh
dj: src/dj.c build_dir
$(CC) $(CFLAGS) -o build/bin/dj src/dj.c
false: src/false.rs build_dir false: src/false.rs build_dir
$(RUSTC) $(RUSTCFLAGS) -o build/bin/false src/false.rs $(RUSTC) $(RUSTCFLAGS) -o build/bin/false src/false.rs

155
docs/dj.1 Normal file
View File

@ -0,0 +1,155 @@
.TH dj 1
.SH NAME
dj \(en disk jockey
.SH SYNOPSIS
dj
.RB ( -AdHnq )
.RB ( -a
.RB [ byte ])
.RB ( -c
.RB [ count ])
.RB ( -i
.R [
.B input file
.R ])
.RB ( -b
.R [
.B input block size
.R ])
.RB ( -s
.R [
.B input offset
.R ])
.RB ( -o
.R [
.B output file
.R ])
.RB ( -B
.R [
.B output block size
.R ])
.RB ( -S
.R [
.B output offset
.R ])
.SH USAGE
The
.B -i
option takes a path as an argument to open and use in place of standard input.
The
.B -o
option does the same in place of standard output. Dj does not truncate output
files and instead writes over the bytes in the existing file.
.PP
The
.B -b
option takes a numeric argument as the size in bytes of the input buffer and
the
.B -B
option does the same for the output buffer, the default for both being 1024
bytes, or one kibibyte (KiB).
.PP
The
.B -s
option takes a numeric argument as the number of bytes to skip into the input
before starting to read, and the
.B -S
option skips a number of bytes through the output before starting to write from
the input. If the input is a stream the bytes are read and discarded. If the
output is a stream, nul characters are printed.
.PP
The
.B -a
option takes one argument of one byte in length and pads the input buffer with
that byte in the event that a read doesn't fill the input buffer, and the
.B -A
option takes no arguments and pads with nuls.
The
.B -c
option specifies an amount of reads to make, and if 0 (the default) dj will
continue reading until a partial or empty read.
.PP
On a partial or empty read, dj prints a diagnostic message (unless the
.B -q
option is specified) and exits (unless the
.B -n
option is specified, in which case only two consecutive empty reads will cause
dj to exit).
At exit, usage statistics are printed unless the option
.B -q
is specified a second time. The
.B -H
option will make these diagnostics human-readable.
.SH DIAGNOSTICS
The
.B -d
option prints all information, user-specified or otherwise, before program
execution.
.PP
When dj exits, by default statistics are printed for input and output to
standard error in the following format:
.PP
.R {records read} {ASCII unit separator} {partial records read}
.R {ASCII record separator} {records written} {ASCII unit separator}
.R {partial records written} {ASCII group separator} {bytes read}
.R {ASCII record separator} {bytes written} {ASCII file separator}
.PP
If the
.B -H
option is specified dj instead uses this following format:
.PP
.R {records read} '+' {partial records read} '>' {records written}
.R '+' {partial records written} ';' {bytes read} '>' {bytes written}
.R {ASCII line feed}
.PP
The
.B -q
option suppresses error messages which print when a read or write is partial or
empty and when used twice suppresses diagnostic output entirely.
.PP
In non-recoverable errors that don't pertain to dj's read-write cycle, a
diagnostic message is printed and dj exits with the appropriate sysexits(3)
status.
.SH BUGS
If
.B -n
is specified along with a specified count, actual byte output may be lower than
expected (the product of the count multiplied by the input block size). If the
.B -a
or
.B -A
options are used this could make data written nonsensical.
.PP
Many lowercase options have capitalized variants and vice-versa which can be
confusing. Capitalized options tend to affect output or are more intense
versions of lowercase options.
.SH RATIONALE
Dj was modeled after the dd utility specified in POSIX but adds additional
features: typical option formatting, allowing seeks to be specified in bytes
rather than in blocks, allowing arbitrary bytes as padding, and printing in a
format that's easy to parse for machines. It also neglects character
conversion, which may be dd's original intent but is irrelevant to its modern
use.
.SH COPYRIGHT
Copyright (C) 2023 DTB. License AGPLv3+: GNU AGPL version 3 or later
<https://gnu.org/licenses/agpl.html>.
.SH SEE ALSO
dd(1)

429
src/dj.c Normal file
View File

@ -0,0 +1,429 @@
#include <ctype.h> /* isupper(3), tolower(3) */
#include <errno.h> /* errno */
#include <fcntl.h> /* open(2) */
#include <stdio.h> /* fprintf(3), stderr */
#include <stdlib.h> /* free(3), malloc(3), strtol(3), size_t */
#include <string.h> /* memcpy(3), memmove(3), memset(3) */
#include <sysexits.h> /* EX_OK, EX_USAGE */
#include <unistd.h> /* close(2), getopt(3), lseek(2), read(2), write(2),
optarg, optind, STDIN_FILENO, STDOUT_FILENO */
extern int errno;
/* dj uses two structures that respectively correspond to the reading and
* writing ends of its jockeyed "pipe". User-configurable members are noted
* with their relevant options. */
struct Io{
int bs; /* buffer size (-bB) */
size_t bufuse; /* buffer usage */
char *buf; /* buffer */
int bytes; /* bytes processed */
int fd; /* file descriptor */
int fl; /* file opening flags */
char *fn; /* file name (may be stdin_name or stdout_name) (-io) */
int prec; /* partial records processed */
int rec; /* records processed */
long seek; /* bytes to seek/skip (will be 0 after skippage) (-sS) */
} ep[2]; /* "engineered pipe"; also "extended play", for the deejay */
/* Additionally, the following global variables are used to store user options.
*/
/* (-a) */ static int align; /* Only the lower 8b are used but align is
* negative if no alignment is being done. */
/* (-c) */ static int count; /* 0 if dj(1) runs until no more reads are
* possible. */
/* ASCII field separator delimited statistics */
static char *fmt_asv = "%d\037%d\036%d\037%d\035%d\036%d\034";
/* human-readable statistics */
static char *fmt_human = "%d+%d > %d+%d; %d > %d\n";
/* pointer to chosen formatting */
/* (-H) */ static char *fmt_output; /* fmt_asv (default) or fmt_human (-H) */
/* (-dq) */ static char debug; /*
* -d increments dj -qq | 0 - no diagnostic output whatsoever
* -q decrements dj -q | 1 - typical output without
* | notifications on partial reads or
* | writes
* dj | 2 - typical output (default)
* dj -d | 3 - verbose status messages */
/* (-n) */ static char noerror; /* 0 - exits on partial reads or writes
* (default)
* 1 - retries on partial reads/writes
* (-f) */
/* Non-configurable defaults. */
#define bs_default 1024 /* GNU dd(1) default; twice POSIX but a neat 2^10 */
static char *program_name = "<no argv[0]>";
static char *stdin_name = "<stdin>";
static char *stdout_name = "<stdout>";
static int read_flags = O_RDONLY; /* These flags are consistent with Busybox */
static int write_flags = O_WRONLY | O_CREAT; /* dd(1). */
/* Macro to set defaults for user-configurable options. */
#define setdefaults do{ \
align = -1; \
count = 0; \
debug = 2; \
fmt_output = fmt_asv; \
noerror = 0; \
ep[0].fl = read_flags; \
Io_setdefaults(&ep[0]); \
ep[1].fl = write_flags; \
Io_setdefaults(&ep[1]); }while(0)
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
/* Macro to check if fd is a std* file, e.g. stdin. */
#define fdisstd(fd) \
((fd) == STDIN_FILENO \
|| (fd) == STDOUT_FILENO \
|| (fd) == STDERR_FILENO)
/* Macro to call the cleanup functions that operate on struct io on the
* particular io[2] used in main. Error conditions are not checked because this
* is only used when the program is about to terminate (hence its name). */
#define terminate(io) do{ \
Io_buffree(&(io)[0]); \
Io_buffree(&(io)[1]); \
Io_fdclose(&(io)[0]); \
Io_fdclose(&(io)[1]); }while(0)
/* Allocates *io's buffer. Returns NULL if unsuccessful. */
static void *
Io_bufalloc(struct Io *io){
return (io->buf = malloc(io->bs * (sizeof *io->buf)));
}
/* Frees *io's buffer. Returns io. */
static struct Io *
Io_buffree(struct Io *io){
free(io->buf);
return io;
}
/* Fills the unused portion of io's buffer with padding, updating io->bufuse.
* Returns io. */
static struct Io *
Io_bufrpad(struct Io *io, int padding){
memset(io->buf + io->bufuse, padding, io->bs - io->bufuse);
io->bufuse = io->bs;
return io;
}
/* Copies from the buffer in src as much as possible to the free space in the
* dest buffer, removing the copied units from src and permuting the remaining
* units in the src buffer to the start of the buffer, modifying both the src
* and dest bufuse and returning dest. */
static struct Io*
Io_bufxapp(struct Io *dest, struct Io *src){
int n;
n = MIN(src->bufuse, dest->bs - dest->bufuse);
memcpy(dest->buf + dest->bufuse, src->buf, n);
dest->bufuse += n;
memmove(src->buf, src->buf + n, src->bs - n);
src->bufuse -= n;
return dest;
}
/* Copies from the buffer in src to the buffer in dest no more than n units,
* removing the copied units from src and permuting the remaining units in the
* src buffer to the start of the buffer, modifying both the src and dest
* bufuse and returning dest. */
static struct Io*
Io_bufxfer(struct Io *dest, struct Io *src, int n){
memcpy(dest->buf, src->buf, (dest->bufuse = n));
memmove(src->buf, src->buf + n, (src->bufuse -= n));
return dest;
}
/* Closes io->fn and returns -1 on error, otherwise io->fd. */
static int
Io_fdclose(struct Io *io){
return fdisstd(io->fd)
? 0
: close(io->fd);
}
/* Opens io->fn and saves the file descriptor into io->fd. Returns io->fd,
* which will be -1 if an error occured. */
static int
Io_fdopen(struct Io *io, char *fn){
int fd;
if((fd = open(fn, io->fl,
/* these are the flags used by touch(1p) */
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH))
!= -1
&& Io_fdclose(io) == 0){
io->fd = fd;
io->fn = fn;
}
return fd;
}
/* Seeks io->seek bytes through *io's file descriptor, (counter-intuitively)
* returning -1 if successful and a sysexits.h exit code if an unrecoverable
* error occurred. io->buf will be cleared of useful bytes and io->seek will
* be set to zero to indicate the seek occurred. */
static int
Io_fdseek(struct Io *io){
int (*op)(int, void *, size_t);
if(!fdisstd(io->fd) && lseek(io->fd, io->seek, SEEK_SET) != -1)
return -1;
else if(io->fl == write_flags){
memset(io->buf, '\0', io->bs);
/* This is a dirty trick; rather than testing conditions and operating
* likewise, because the parameters to read or write are going to be
* the same either way, just use a function pointer to keep track of
* the intended operation. */
op = (int (*)(int, void *, size_t))&write;
/* Function pointer casts are risky; this works because the difference
* is in the second parameter and only that write(2) makes the buffer
* const whereas read(2) does not. To avoid even the slightest
* undefined behavior comment out the cast, just be ready for a
* -Wincompatible-function-pointer-types if your compiler notices it.
*/
}else
op = &read;
/* We're going to cheat and use bufuse as the retval for write(2), which is
* fine because it'll be zeroed as this function returns anyway. */
do{ if( (io->bufuse = (*op)(io->fd, io->buf, MIN(io->bs, io->seek))) == 0)
/* second chance */
io->bufuse = (*op)(io->fd, io->buf, MIN(io->bs, io->seek));
}while((io->seek -= io->bufuse) > 0 && io->bufuse != 0);
io->bufuse = 0;
return -1;
}
/* Reads io->bs bytes from *io's file descriptor into io->buf, storing the
* number of read bytes in io->bufuse and updating io->bytes. If io->bufuse is
* 0, errno will probably be set. Returns io. */
static struct Io *
Io_read(struct Io *io){
io->bytes += (io->bufuse = read(io->fd, io->buf, io->bs));
return io;
}
/* Sets the variables in a struct *io to the defaults. Identifies the read/
* write ends of the "pipe" by checking io->fl. Returns io. */
static struct Io *
Io_setdefaults(struct Io *io){
io->bs = bs_default;
io->buf = NULL;
io->bytes = 0;
io->fd = (io->fl == read_flags) ? STDIN_FILENO : STDOUT_FILENO;
io->fn = (io->fl == read_flags) ? stdin_name : stdout_name;
io->prec = 0;
io->rec = 0;
io->seek = 0;
return io;
}
/* Writes io->bufuse units from io->buf to io->fd, permuting any unwritten
* bytes to the start of io->buf and updating io->bufuse. If io->bufuse doesn't
* change, errno will probably be set. Returns io. */
static struct Io *
Io_write(struct Io *io){
int t;
if((t = write(io->fd, io->buf, io->bufuse)) > 0)
memmove(io->buf, io->buf + t, (io->bufuse -= t));
io->bytes += t;
return io;
}
/* Prints an error message suitable for the event of an operating system error,
* with the error itself to be described in the string s. */
static int
oserr(char *s){
fprintf(stderr, "%s: %s: %s\n", program_name, s, strerror(errno));
return EX_OSERR;
}
/* Prints statistics regarding the use of dj, particularly partially and
* completely read and written records, accessing debug, ep, and fmt_output. */
static void
output(void){
if(debug >= 1)
fprintf(stderr, fmt_output,
ep[0].rec, ep[0].prec, ep[1].rec, ep[1].prec,
ep[0].bytes, ep[1].bytes);
return;
}
/* Parses the string s to an integer, returning either the integer or in the
* case of an error a negative integer. This is used for argument parsing
* (e.g. -B [int]) in dj and no negative integer would be valid anyway. */
static long
parse(char *s){
long r;
errno = 0;
r = strtol(s, &s, 0);
return (*s == '\0' /* no chars left unparsed */ && errno == 0)
? r
: -1;
}
static int
usage(void){
fprintf(stderr, "Usage: %s (-AdfHqQ) (-a [byte]) (-c [count])\n"
"\t(-i [input file]) (-b [input block size]) (-s [input offset])\n"
"\t(-o [output file]) (-B [output block size]) (-S [output offset])\n",
program_name);
return EX_USAGE;
}
int main(int argc, char *argv[]){
int c;
int i;
setdefaults;
if(argc > 0){
program_name = argv[0];
while((c = getopt(argc, argv, "a:Ab:B:c:di:fhHqs:S:o:")) != -1)
switch(c){
case 'i': case 'o':
if(Io_fdopen(&ep[c == 'o'], optarg) != -1)
break;
terminate(ep);
return oserr(optarg);
case 'A': align = '\0'; break;
case 'd': ++debug; break;
case 'n': noerror = 1; break;
case 'H': fmt_output = fmt_human; break;
case 'q': --debug; break;
case 'a':
if(optarg[0] != '\0' && optarg[1] == '\0'){
align = optarg[0];
break;
}
/* FALLTHROUGH */
case 'c': case 'b': case 's': case 'B': case 'S':
if(c == 'c' && (count = parse(optarg)) >= 0)
break;
i = isupper(c);
c = tolower(c);
if((c == 'b' && (ep[i].bs = parse(optarg)) > 0)
|| (c == 's' && (ep[i].seek = parse(optarg)) >= 0))
break;
/* FALLTHROUGH */
default:
terminate(ep);
return usage();
}
}
if(debug >= 3)
fprintf(stderr,
"argv0=%s\n"
"in=%s\tibs=%d\tskip=%ld\talign=%hhx\tcount=%d\n"
"out=%s\tobs=%d\tseek=%ld\tdebug=%2d\tnoerror=%d\n",
program_name,
ep[0].fn, ep[0].bs, ep[0].seek, align, count,
ep[1].fn, ep[1].bs, ep[1].seek, debug, noerror);
if(argc > optind){
terminate(ep);
return usage();
}
for(i = 0; i <= 1; ++i){
if(Io_bufalloc(&ep[i]) == NULL){
fprintf(stderr, "%s: Failed to allocate %d bytes\n",
program_name, ep[i].bs);
terminate(ep);
return EX_OSERR;
}else if(ep[i].seek > 0)
switch(Io_fdseek(&ep[i])){
case EX_OK:
output();
terminate(ep);
return EX_OK;
}
}
do{ /* read */
Io_read(&ep[0]);
if(!noerror && ep[0].bufuse == 0)
Io_read(&ep[0]); /* second chance */
if(ep[0].bufuse == 0) /* that's all she wrote */
break;
else if(ep[0].bufuse < ep[0].bs){
++ep[0].prec;
if(debug >= 2){
fprintf(stderr, "%s: Partial read:\n\t", program_name);
output();
}
if(!noerror)
count = 1;
if(align >= 0)
Io_bufrpad(&ep[0], align);
}else
++ep[0].rec;
/* write */
do{ if(ep[1].bs > ep[0].bs){ /* io[1].bs > io[0].bs */
Io_bufxapp(&ep[1], &ep[0]);
if(ep[0].bs + ep[1].bufuse <= ep[1].bs && count != 1)
continue; /* we could write more */
}else
Io_bufxfer(&ep[1], &ep[0], MIN(ep[0].bufuse, ep[1].bs));
c = ep[1].bufuse;
Io_write(&ep[1]);
if(!noerror && ep[1].bufuse == c)
Io_write(&ep[1]); /* second chance */
if(c == ep[1].bufuse){ /* no more love */
count = 1;
break;
}else if(c > ep[1].bufuse && ep[1].bufuse > 0){
ep[1].prec += 1;
if(debug >= 2){
fprintf(stderr, "%s: Partial write:\n\t", program_name);
output();
}
if(!noerror)
count = 1;
}else if(ep[1].bufuse == 0 && c < ep[1].bs)
++ep[1].prec;
else
++ep[1].rec;
}while(ep[0].bufuse > 0);
}while(count == 0 || --count > 0);
output();
terminate(ep);
return EX_OK;
}