291 lines
12 KiB
HTML
291 lines
12 KiB
HTML
<!DOCTYPE html>
|
||
<HTML LANG="en">
|
||
<HEAD>
|
||
<LINK HREF="http://www.trinity.moe/knowledge/cat" REL="canonical" />
|
||
<LINK HREF="https://raw.githubusercontent.com/devenblake/homepage/main/favicon.ico" REL="shortcut icon" TYPE="image/x-icon" />
|
||
<LINK HREF="/css/blank.css" ID="highlight" REL="stylesheet" />
|
||
<LINK HREF="/css/blank.css" ID="styling" REL="stylesheet" />
|
||
<META CHARSET="UTF-8" />
|
||
<META CONTENT="noindex" NAME="googlebot" /> <!-- FUCK GOOGLE -->
|
||
<META CONTENT="interest-cohort=()" HTTP-EQUIV="Permissions-Policy" /> <!-- FUCK GOOGLE -->
|
||
<META NAME="viewport" CONTENT="width=device-width, initial-scale=1" />
|
||
<TITLE>cat(1)</TITLE>
|
||
</HEAD>
|
||
<BODY>
|
||
<P><A HREF="/">~ Return to the rest of the site</A></P>
|
||
|
||
<SCRIPT SRC="/js/cookies.js" TYPE="application/javascript"></SCRIPT>
|
||
<SCRIPT SRC="/js/sheets.js" TYPE="application/javascript"></SCRIPT>
|
||
<SCRIPT TYPE="application/javascript">window.onload = window.initializesheets;</SCRIPT>
|
||
<SCRIPT ASYNC TYPE="application/javascript">
|
||
/* see on page: acknowledgements */
|
||
window.is_highlightjs_here = false;
|
||
window.is_highlighted_languages = [];
|
||
window.to_be_highlighted = [];
|
||
window.is_highlight_stylesheet_here = false;
|
||
window.load_highlighting = function(language){
|
||
var element;
|
||
var script;
|
||
document.getElementById(language + "_toggle").remove();
|
||
if(!window.is_highlight_stylesheet_here){
|
||
document.getElementById("highlight").setAttribute("href", "https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.0.1/styles/default.min.css");
|
||
window.is_highlight_stylesheet_here = true;
|
||
}
|
||
if(!window.is_highlightjs_here){
|
||
script = document.createElement('script');
|
||
script.async = false;
|
||
script.src = "https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.0.1/highlight.min.js";
|
||
document.body.appendChild(script);
|
||
window.is_highlightjs_here = true;
|
||
}
|
||
if(window.is_highlighted_languages.indexOf(language) == -1){
|
||
window.to_be_highlighted.push(language);
|
||
|
||
script = document.createElement('script');
|
||
script.async = false;
|
||
script.src = "https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.0.1/languages/" + language + ".min.js";
|
||
script.onload = function(){
|
||
var language = window.to_be_highlighted.shift()
|
||
var elements = document.getElementsByClassName("language-" + language);
|
||
for(var i = 0; i < elements.length; ++i)
|
||
try{
|
||
hljs.highlightElement(elements[i]);
|
||
}catch(err){
|
||
console.log(err.message);
|
||
}
|
||
|
||
window.is_highlighted_languages.push(language);
|
||
}
|
||
document.body.appendChild(script);
|
||
}
|
||
}
|
||
</SCRIPT>
|
||
|
||
<H1>POSIX cat(1)</H1>
|
||
<H3>updated 2021-08-06</H3>
|
||
<HR ALIGN="left" SIZE="1" WIDTH="25%" />
|
||
<P><CODE>cat</CODE> on a POSIX or otherwise UNIX-like system is a program that exists to concatenate files; to “join” one file at its end to another at its start, and output that resulting file to standard output.</P>
|
||
<P>
|
||
<CODE>cat</CODE> was introduced in UNIX v1 to supercede the program pr which printed the contents of a single file to the screen (McIlroy); its first-edition manual page described cat as "about the easiest way to print a file" ("cat(1)").
|
||
<CODE>cat</CODE>’s modern, typical use is more or less the same; it’s often introduced to UNIX beginners as a method to print the contents of a file to the screen, which is why many implementations of <CODE>cat</CODE> include options that, while possibly useful, can be redundant - see the often-included <CODE>cat</CODE> <CODE>-e</CODE>, <CODE>-t</CODE>, and <CODE>-v</CODE> that replace the ends of lines, tabs, and invisible characters respectively with printing portrayals ("cat(1p)").
|
||
|
||
</P>
|
||
<P>
|
||
The POSIX standard as of 2003 requires only the option <CODE>-u</CODE> to be implemented, which prevents <CODE>cat</CODE> from buffering its output - on some systems, <CODE>cat</CODE> buffers its output in 512-byte blocks (McIlroy), similarly to <CODE>dd</CODE>’s default as defined by POSIX (“dd(1p)”), though most currently popular <CODE>cat</CODE> implementations do this by default and ignore the <CODE>-u</CODE> flag altogether (busybox, GNU coreutils).
|
||
POSIX doesn’t mandate buffering by default - specifically, <CODE>-u</CODE> <I>has</I> to guarantee that the output is unbuffered, but <CODE>cat</CODE> doesn't have to buffer it in the first place and can ignore <CODE>-u</CODE> in that case.
|
||
</P>
|
||
<P>This is a POSIX-compatible implementation of UNIX <CODE>cat</CODE> with no additional features nor buffered output in C:</P>
|
||
<INPUT ID="c_toggle" ONCLICK="window.load_highlighting('c');" TYPE="button" VALUE="Press this button to enable syntax highlighting within this code." />
|
||
<PRE><CODE CLASS="language-c" DATA-LANG="c">
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
|
||
#define STDIN_NAME "<stdin>"
|
||
#define STDOUT_NAME "<stdout>"
|
||
|
||
/* these are the predicted errors that could occur */
|
||
enum error_type{
|
||
FILE_ACCESS,
|
||
FILE_CLOSE,
|
||
FILE_WRITE
|
||
};
|
||
|
||
/* this is an error function that will print to standard error the error that
|
||
* occurred in the program and exit */
|
||
void
|
||
error(enum error_type type, char *argv0, char *file_name){
|
||
switch(type){
|
||
case FILE_ACCESS:
|
||
fprintf(stderr, "%s: %s: cannot open file\n", argv0, file_name);
|
||
break;
|
||
case FILE_CLOSE:
|
||
fprintf(stderr, "%s: %s: cannot close file\n", argv0, file_name);
|
||
break;
|
||
case FILE_WRITE:
|
||
fprintf(stderr, "%s: %s: cannot write to file\n", argv0, file_name);
|
||
break;
|
||
}
|
||
exit(1);
|
||
}
|
||
|
||
/* print input to output, returns 0 if successful and 1 if unsuccessful */
|
||
int
|
||
file_copy(FILE *input, FILE *output){
|
||
char c;
|
||
while((c = getc(input)) != EOF)
|
||
if(putc(c, output) == EOF)
|
||
return 1;
|
||
return 0;
|
||
}
|
||
|
||
int
|
||
main(int argc, char *argv[]){
|
||
/* the name of the file being printed (for diagnostics) */
|
||
char *file_name;
|
||
|
||
/* allocate this ahead of time */
|
||
char *stdin_file_name = STDIN_NAME;
|
||
|
||
/* the file pointer of the file being printed */
|
||
FILE *input;
|
||
|
||
/* this will always be stdout */
|
||
FILE *output = stdout;
|
||
|
||
int i;
|
||
|
||
/* whether or not options are being parsed */
|
||
int parsing_opts = 1;
|
||
|
||
/* usage with 0 arguments - print standard input to standard output */
|
||
if(argc == 1 && file_copy(stdin, stdout))
|
||
error(FILE_WRITE, argv[0], STDOUT_NAME);
|
||
else if(argc == 1)
|
||
return 0;
|
||
|
||
for(i = 1; i < argc; ++i){
|
||
/* parsing options */
|
||
|
||
/* after `--`, interpret `--`, `-`, and `-u` as literal
|
||
* filenames */
|
||
if(parsing_opts && !strcmp(argv[i], "--")){
|
||
parsing_opts = 0;
|
||
continue;
|
||
|
||
/* ignore `-u` if still parsing options */
|
||
}else if(parsing_opts && !strcmp(argv[i], "-u"))
|
||
continue;
|
||
|
||
/* take `-` to mean standard input if still parsing options */
|
||
else if(parsing_opts && !strcmp(argv[i], "-")){
|
||
file_name = stdin_file_name;
|
||
input = stdin;
|
||
|
||
/* non-option; open the file and make sure file_name points to
|
||
* the right string */
|
||
}else{
|
||
file_name = argv[i];
|
||
input = fopen(file_name, "r");
|
||
if(input == NULL)
|
||
error(FILE_ACCESS, argv[0], file_name);
|
||
}
|
||
|
||
/* print input to output */
|
||
if(file_copy(input, output))
|
||
error(FILE_WRITE, argv[0], STDOUT_NAME);
|
||
|
||
/* close input file if it's not stdin */
|
||
if(input != stdin && fclose(input))
|
||
error(FILE_CLOSE, argv[0], file_name);
|
||
}
|
||
|
||
/* exit successfully */
|
||
return 0;
|
||
}
|
||
</CODE></PRE>
|
||
|
||
<P>
|
||
This is also available at <A HREF="/knowledge/cat/cat.c">/knowledge/cat/cat.c on this website</A> as a plain .c file with which you can toy.
|
||
</P>
|
||
<P>It’s worth noting that this concept of cat as a utility that sequentially prints given files to standard output means <CODE>cat</CODE> can be replaced by a simple shell script that does the same using <CODE>dd</CODE> and <CODE>printf</CODE>; <CODE>cat</CODE> as defined by POSIX is actually totally redundant to other POSIX utilities. Here’s the shell script:</P>
|
||
<INPUT ID="shell_toggle" ONCLICK="window.load_highlighting('shell');" TYPE="button" VALUE="Press this button to enable syntax highlighting within this code." />
|
||
<PRE><CODE CLASS="language-shell" DATA-LANG="shell">
|
||
#!/bin/sh
|
||
|
||
# dd_ is used so that dd can easily be re-defined
|
||
dd_() { dd "$@"; }
|
||
|
||
# usage with 0 arguments - print standard input to standard output
|
||
if [ -z "$1" ]; then
|
||
dd_ 2>/dev/null
|
||
exit $?
|
||
fi
|
||
|
||
while [ -n "$1" ]; do
|
||
# parsing options
|
||
|
||
# after `--`, interpret `--`, `-`, and `-u` as literal filenames
|
||
[ "$1" = "--" ] && [ -z "$DONT_PARSE_ARGS" ] \
|
||
&& DONT_PARSE_ARGS=1 && shift 1 && continue \
|
||
|| true
|
||
|
||
# if `-u` specified and still parsing options, enable unbuffered output
|
||
# this is kind of a hack and a bit slow. technically it is buffered,
|
||
# just one byte at a time
|
||
[ "$1" = "-u" ] && [ -z "$DONT_PARSE_ARGS" ] \
|
||
&& dd_() { dd bs=1 "$@"; } && shift 1 && continue \
|
||
|| true
|
||
|
||
# take `-` to mean standard input if still parsing options
|
||
if [ "$1" = "-" ] && [ -z "$DONT_PARSE_ARGS" ]; then
|
||
dd_ </dev/stdin 2>/dev/null || exit $?
|
||
shift 1
|
||
continue
|
||
fi
|
||
|
||
# print input to output
|
||
dd_ <"$1" 2>/dev/null || exit $?
|
||
|
||
shift 1
|
||
done
|
||
|
||
# exit successfully
|
||
exit 0
|
||
</CODE></PRE>
|
||
|
||
<P>
|
||
It's worth noting that the <CODE>dd_</CODE> shell function in the above sample that allows for re-aliasing of <CODE>dd</CODE> to <CODE>dd bs=1</CODE> could be replaced with a shell variable <CODE>$DD</CODE> with the initial value <CODE>dd</CODE> and a changed value according to <CODE>-u</CODE> of <CODE>dd bs=1</CODE>.
|
||
However, <CODE>alias dd="dd bs=1"</CODE> would not work due to how shell aliases are parsed (ShellCheck).
|
||
</P>
|
||
|
||
<P>
|
||
<CODE>cat</CODE> doesn't work well as a shell script though.
|
||
The script is relatively slow for short files and very slow for very large files (though <CODE>dd</CODE> itself should probably be used to copy large files from one medium to another anyway).
|
||
This is provided for educational purposes.
|
||
</P>
|
||
|
||
<H2>Cited media and further reading</H2><UL>
|
||
<LI>Articles<UL>
|
||
<LI><A HREF="https://www.cs.dartmouth.edu/~doug/reader.pdf">McIlroy, M. Douglas - “A Research Unix Reader”</A></LI>
|
||
<LI><A HREF="https://github.com/koalaman/shellcheck/wiki/SC2262">ShellCheck wiki page SC2262 ("This alias can't be defined and used in the same parsing unit. Use a function instead.")</A></LI>
|
||
</UL></LI>
|
||
<LI>Manual pages<UL>
|
||
<LI><A HREF="http://man.cat-v.org/unix-1st/1/cat">cat(1)</A> (UNIX v1)</LI>
|
||
<LI><A HREF="https://www.unix.com/man-page/posix/1posix/cat/">cat(1p)</A> (The Open Group, 2003)</LI>
|
||
<LI><A HREF="https://www.unix.com/man-page/posix/1posix/dd/">dd(1p)</A> (The Open Group, 2003)</LI>
|
||
<LI><A HREF="https://www.unix.com/man-page/POSIX/1posix/printf/">printf(1p)</A> (The Open Group, 2003)</LI>
|
||
</UL></LI>
|
||
<LI>Notable cat implementations<UL>
|
||
<LI>1979 - <A HREF="http://www.bell-labs.com/">Bell Labs</A>/<A HREF="https://en.wikipedia.org/wiki/Unix">UNIX v7</A> - <A HREF="https://www.tuhs.org/cgi-bin/utree.pl?file=V7/usr/src/cmd/cat.c">/usr/src/cmd/cat.c</A></LI>
|
||
<LI>1992 - <A HREF="http://www.bell-labs.com/">Bell Labs</A>/<A HREF="https://9p.io/plan9/">Plan 9 from Bell Labs Fourth Edition</A> - <A HREF="https://github.com/plan9foundation/plan9/blob/main/sys/src/cmd/cat.c">/sys/src/cmd/cat.c</A></LI>
|
||
<LI><B>ongoing</B> started 1992 - <A HREF="https://www.gnu.org/">GNU</A>/<A HREF="https://www.gnu.org/software/coreutils/">coreutils</A> - <A HREF="https://git.savannah.gnu.org/cgit/coreutils.git/tree/src/cat.c">src/cat.c</A></LI>
|
||
<LI>1995 - <A HREF="https://www.berkeley.edu/">University of California, Berkeley</A>/<A HREF="https://en.wikipedia.org/wiki/Berkeley_Software_Distribution">4.4BSD-Lite2</A> - <A HREF="https://github.com/sergev/4.4BSD-Lite2/blob/master/usr/src/bin/cat/cat.c">/usr/src/bin/cat/cat.c</A></LI>
|
||
<LI><B>ongoing</B> forked from 4.4BSD-Lite2 - <A HREF="https://www.netbsd.org/">NetBSD</A>/<A HREF="https://github.com/NetBSD/src">src</A> - <A HREF="https://github.com/NetBSD/src/blob/trunk/bin/cat/cat.c">bin/cat/cat.c</A></LI>
|
||
<LI><B>ongoing</B> started 1995 - <A HREF="https://www.busybox.net/">busybox</A>/<A HREF="https://git.busybox.net/busybox/">busybox</A> - <A HREF="https://git.busybox.net/busybox/tree/coreutils/cat.c">coreutils/cat.c</A></LI>
|
||
</UL></LI>
|
||
</UL>
|
||
|
||
<H2>Acknowledgements</H2><UL>
|
||
<LI>Content help<UL>
|
||
<LI>Miles</LI>
|
||
<LI>WeedSmokingJew</LI>
|
||
</UL></LI>
|
||
<LI>JavaScript help<UL>
|
||
<LI>adamz01h</H1>
|
||
<LI>wiresToGround</LI>
|
||
</UL></LI>
|
||
<LI>JavaScript libraries used<UL>
|
||
<LI><A HREF="https://highlightjs.org/">highlight.js</A></LI>
|
||
</UL></LI>
|
||
<LI>Sample code help<UL>
|
||
<LI>Ando_Bando</LI>
|
||
<LI>Miles</LI>
|
||
<LI>u/oh5nxo</LI>
|
||
<LI>WeedSmokingJew</LI>
|
||
</UL></LI>
|
||
</UL>
|
||
|
||
</BODY>
|
||
</HTML>
|