WIP article
This commit is contained in:
parent
651320aeba
commit
9335f815d1
219
homepage/knowledge/cat.html
Normal file
219
homepage/knowledge/cat.html
Normal file
@ -0,0 +1,219 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<HTML LANG="en">
|
||||||
|
<HEAD>
|
||||||
|
<LINK HREF="http://www.trinity.moe/FILL_IN" REL="canonical" />
|
||||||
|
<LINK HREF="https://raw.githubusercontent.com/devenblake/homepage/main/favicon.ico" REL="shortcut icon" TYPE="image/x-icon" />
|
||||||
|
<LINK HREF="/css/blank.css" ID="styling" REL="stylesheet" />
|
||||||
|
<META CHARSET="UTF-8" />
|
||||||
|
<META CONTENT="interest-cohort=()" HTTP-EQUIV="Permissions-Policy" /> <!-- FUCK GOOGLE -->
|
||||||
|
<META NAME="viewport" CONTENT="width=device-width, initial-scale=1" />
|
||||||
|
<TITLE>cat(1)</TITLE>
|
||||||
|
</HEAD>
|
||||||
|
<BODY>
|
||||||
|
<P><A HREF="/">~ Return to the rest of the site</A></P>
|
||||||
|
<SCRIPT SRC="/cookies.js" TYPE="application/javascript"></SCRIPT>
|
||||||
|
<SCRIPT SRC="/sheets.js" TYPE="application/javascript"></SCRIPT>
|
||||||
|
<SCRIPT TYPE="application/javascript">window.onload = window.initializesheets;</SCRIPT>
|
||||||
|
<H1>POSIX cat(1)</H1>
|
||||||
|
<H3>updated 2021-06-19</H3>
|
||||||
|
<HR ALIGN="left" SIZE="1" WIDTH="25%" />
|
||||||
|
<P>
|
||||||
|
cat on a POSIX or otherwise UNIX-like system is a program that exists to concatenate files; to “join” one file at its end to another at its start, and output that resulting file to standard output.
|
||||||
|
In practice this is usually accomplished by printing the contents of each sequential file argument to standard output.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
cat was introduced in UNIX v1 to supercede the program pr which printed the contents of a single file to the screen (McIlroy); its first-edition manual page described cat as “about the easiest way to print a file” (“cat(1)”).
|
||||||
|
cat’s modern, typical use is more or less the same; it’s often introduced to UNIX beginners as a method to print the contents of a file to the screen, which is why many implementations of cat include options that are technically redundant - see the often-included cat -e, -t, and -v that replace the ends of lines, tabs, and invisible characters respectively with printing portrayals (“cat(1p)”).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The POSIX standard as of 2003 requires only the option -u to be implemented, which prevents cat from buffering its output - on some systems, cat buffers its output in 512-byte blocks (McIlroy), similarly to dd’s default as defined by POSIX (“dd(1p)”), though most currently popular cat implementations do this by default and ignore the -u flag altogether (busybox, GNU coreutils).
|
||||||
|
POSIX doesn’t mandate buffering by default.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
This is a POSIX-compliant implementation of UNIX cat with no additional features nor buffered output (it ignores <CODE>cat -u</CODE>) in C:
|
||||||
|
</P>
|
||||||
|
<PRE><CODE>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#define STDIN_NAME "<stdin>"
|
||||||
|
#define STDOUT_NAME "<stdout>"
|
||||||
|
|
||||||
|
/* these two errors will exit out of the program with an unsuccessful status,
|
||||||
|
* and print a diagnostic message to standard error */
|
||||||
|
void
|
||||||
|
file_access_error(char *argv0, char *file_name){
|
||||||
|
fprintf(stderr, "%s: %s: cannot open file\n", argv0, file_name);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
file_write_error(char *argv0, char *file_name){
|
||||||
|
fprintf(stderr, "%s: %s: error writing to file\n", argv0, file_name);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* print input to output, returns 0 if successful and 1 if unsuccessful */
|
||||||
|
int
|
||||||
|
file_copy(FILE *input, FILE *output){
|
||||||
|
char c;
|
||||||
|
while((c = getc(input)) != EOF)
|
||||||
|
if(putc(c, output) == EOF)
|
||||||
|
return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int argc, char *argv[]){
|
||||||
|
/* the name of the file being printed (for diagnostics) */
|
||||||
|
char *file_name;
|
||||||
|
|
||||||
|
/* allocate this ahead of time */
|
||||||
|
char *stdin_file_name = STDIN_NAME;
|
||||||
|
|
||||||
|
/* the file pointer of the file being printed */
|
||||||
|
FILE *input;
|
||||||
|
|
||||||
|
/* this will always be stdout */
|
||||||
|
FILE *output = stdout;
|
||||||
|
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* whether or not options are being parsed */
|
||||||
|
int parsing_opts = 1;
|
||||||
|
|
||||||
|
/* usage with 0 arguments - print standard input to standard output */
|
||||||
|
if(argc == 1 && file_copy(stdin, stdout))
|
||||||
|
file_write_error(argv[0], STDOUT_NAME);
|
||||||
|
else if(argc == 1)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
for(i = 1; i < argc; ++i){
|
||||||
|
/* parsing options */
|
||||||
|
|
||||||
|
/* after `--`, interpret `--`, `-`, and `-u` as literal
|
||||||
|
* filenames */
|
||||||
|
if(parsing_opts && !strcmp(argv[i], "--")){
|
||||||
|
parsing_opts = 0;
|
||||||
|
continue;
|
||||||
|
/* ignore `-u` if still parsing options */
|
||||||
|
}else if(parsing_opts && !strcmp(argv[i], "-u"))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* take `-` to mean standard input if still parsing options */
|
||||||
|
else if(parsing_opts && !strcmp(argv[i], "-")){
|
||||||
|
file_name = stdin_file_name;
|
||||||
|
input = stdin;
|
||||||
|
|
||||||
|
/* non-option; open the file and make sure file_name points to
|
||||||
|
* the right string */
|
||||||
|
}else{
|
||||||
|
file_name = argv[i];
|
||||||
|
input = fopen(file_name, "r");
|
||||||
|
if(input == NULL)
|
||||||
|
file_access_error(argv[0], file_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* print input to output */
|
||||||
|
if(file_copy(input, output))
|
||||||
|
file_write_error(argv[0], STDOUT_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* exit successfully */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
</CODE></PRE>
|
||||||
|
|
||||||
|
<P>It’s worth noting that this concept of cat as a utility that sequentially prints given files to standard output means cat can be replaced by a simple shell script that does the same using dd and printf; cat as defined by POSIX is actually totally redundant to other POSIX utilities. Here’s the shell script:</P>
|
||||||
|
<PRE><CODE>
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# some older systems will use the former POSIX_ME_HARDER rather than
|
||||||
|
# POSIXLY_CORRECT to request strict POSIX coherence
|
||||||
|
[ -n "$POSIX_ME_HARDER" ] & [ -z "$POSIXLY_CORRECT" ] \
|
||||||
|
&& POSIXLY_CORRECT=1
|
||||||
|
|| true
|
||||||
|
|
||||||
|
# for usage()
|
||||||
|
argv0="$0"
|
||||||
|
|
||||||
|
# dd_ is used so that dd can easily be re-defined to the unbuffered variant -
|
||||||
|
# dd bs=1
|
||||||
|
dd_() { dd "$@"; }
|
||||||
|
|
||||||
|
# if the system wants strict POSIX coherence, don't permit usage of `cat -h`
|
||||||
|
# because it's not defined by POSIX
|
||||||
|
usage() {
|
||||||
|
[ -z "$POSIXLY_CORRECT" ] \
|
||||||
|
&& printf "Usage: %s [-hu] [file...]\n" "$argv0" \
|
||||||
|
|| printf "Usage: %s [-u] [file...]\n" "$argv0"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# usage with 0 arguments - print standard input to standard output
|
||||||
|
if [ -z "$1" ]; then
|
||||||
|
dd_ 2>/dev/null
|
||||||
|
exit $?
|
||||||
|
fi
|
||||||
|
|
||||||
|
while [ -n "$1" ]; do
|
||||||
|
# parsing options
|
||||||
|
|
||||||
|
# after `--`, interpret `--`, `-`, and `-u` as literal filenames
|
||||||
|
[ "$1" = "--" ] && [ -z "$DONT_PARSE_ARGS" ] \
|
||||||
|
&& DONT_PARSE_ARGS=1 && shift 1 && continue \
|
||||||
|
|| true
|
||||||
|
|
||||||
|
# if `-u` specified and still parsing options, enable unbuffered output
|
||||||
|
# this dd is probably super slow. TODO?: find better way
|
||||||
|
[ "$1" = "-u" ] && [ -z "$DONT_PARSE_ARGS" ] \
|
||||||
|
&& dd_() { dd bs=1 "$@"; } && shift 1 && continue \
|
||||||
|
|| true
|
||||||
|
|
||||||
|
# the `-h` flag isn't specified within POSIX, so ignore it if the
|
||||||
|
# environment is strictly conforming to POSIX
|
||||||
|
[ "$1" = "-h" ] && [ -z "$DONT_PARSE_ARGS" ] \
|
||||||
|
&& [ -z "$POSIXLY_CORRECT" ] \
|
||||||
|
&& usage \
|
||||||
|
|| true
|
||||||
|
|
||||||
|
# take `-` to mean standard input if still parsing options
|
||||||
|
if [ "$1" = "-" ] && [ -z "$DONT_PARSE_ARGS" ]; then
|
||||||
|
dd_ </dev/stdin 2>/dev/null || exit $?
|
||||||
|
shift 1
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# print input to output
|
||||||
|
dd_ <"$1" 2>/dev/null || exit $?
|
||||||
|
|
||||||
|
shift 1
|
||||||
|
done
|
||||||
|
|
||||||
|
# exit successfully
|
||||||
|
exit 0
|
||||||
|
</CODE></PRE>
|
||||||
|
|
||||||
|
<P>cat still has a purpose though. This shell script is relatively slow for short files and very slow for very large files (though dd itself should probably be used to copy large files from one medium to another anyway). This is provided for educational purposes (though I personally use this shell script in my system PATH; the C implementation provided compiles to a much larger binary using gcc 11.1.0, so this saves a couple kilobytes).</P>
|
||||||
|
|
||||||
|
<H2>Cited media and further reading</H2><UL>
|
||||||
|
<LI>Articles<UL>
|
||||||
|
<LI><A HREF="https://www.cs.dartmouth.edu/~doug/reader.pdf">McIlroy, M. Douglas - “A Research Unix Reader”</A></LI>
|
||||||
|
<LI><A HREF="https://en.wikipedia.org/wiki/POSIX#512-_vs_1024-byte_blocks">Wikipedia - “POSIX § 512- vs 1024-byte blocks”</A><UL>
|
||||||
|
As of 2021-06-19 the publicly editable section reads: <I>POSIX mandates 512-byte default block sizes for the df and du utilities, reflecting the typical size of blocks on disks. When Richard Stallman and the GNU team were implementing POSIX for the GNU operating system, they objected to this on the grounds that most people think in terms of 1024 byte (or 1 KiB) blocks. The environment variable POSIX_ME_HARDER was introduced to allow the user to force the standards-compliant behaviour. The variable name was later changed to POSIXLY_CORRECT. This variable is now also used for a number of other behaviour quirks.</I>
|
||||||
|
</UL></LI>
|
||||||
|
</UL></LI>
|
||||||
|
<LI>Common cat implementations<UL>
|
||||||
|
<LI>busybox/busybox - coreutils/cat.c</LI>
|
||||||
|
<LI>GNU/coreutils - src/cat.c</LI>
|
||||||
|
</UL></LI>
|
||||||
|
<LI>Manual pages<UL>
|
||||||
|
<LI><A HREF="http://man.cat-v.org/unix-1st/1/cat">cat(1)</A> (UNIX v1)</LI>
|
||||||
|
<LI><A HREF="https://www.unix.com/man-page/posix/1posix/cat/">cat(1p)</A> (The Open Group, 2003)</LI>
|
||||||
|
<LI><A HREF="https://www.unix.com/man-page/posix/1posix/dd/">dd(1p)</A> (The Open Group, 2003)</LI>
|
||||||
|
<LI><A HREF="https://www.unix.com/man-page/POSIX/1posix/printf/">printf(1p)</A> (The Open Group, 2003)</LI>
|
||||||
|
</UL></LI>
|
||||||
|
</UL>
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
Loading…
Reference in New Issue
Block a user