diff --git a/homepage/cat/cat.c b/homepage/cat/cat.c deleted file mode 100644 index 7ee4c6a..0000000 --- a/homepage/cat/cat.c +++ /dev/null @@ -1,106 +0,0 @@ -#include -#include -#include - -#define STDIN_NAME "" -#define STDOUT_NAME "" - -/* these are the predicted errors that could occur */ -enum error_type{ - FILE_ACCESS, - FILE_CLOSE, - FILE_WRITE -}; - -/* this is an error function that will print to standard error the error that - * occurred in the program and exit */ -void -error(enum error_type type, char *argv0, char *file_name){ - switch(type){ - case FILE_ACCESS: - fprintf(stderr, "%s: %s: cannot open file\n", argv0, file_name); - break; - case FILE_CLOSE: - fprintf(stderr, "%s: %s: cannot close file\n", argv0, file_name); - break; - case FILE_WRITE: - fprintf(stderr, "%s: %s: cannot write to file\n", argv0, file_name); - break; - } - exit(1); -} - -/* print input to output, returns 0 if successful and 1 if unsuccessful */ -int -file_copy(FILE *input, FILE *output){ - int c; - while((c = getc(input)) != EOF) - if(putc(c, output) == EOF) - return 1; - return 0; -} - -int -main(int argc, char *argv[]){ - /* the name of the file being printed (for diagnostics) */ - char *file_name; - - /* allocate this ahead of time */ - char *stdin_file_name = STDIN_NAME; - - /* the file pointer of the file being printed */ - FILE *input; - - /* this will always be stdout */ - FILE *output = stdout; - - int i; - - /* whether or not options are being parsed */ - int parsing_opts = 1; - - /* usage with 0 arguments - print standard input to standard output */ - if(argc == 1 && file_copy(stdin, stdout)) - error(FILE_WRITE, argv[0], STDOUT_NAME); - else if(argc == 1) - return 0; - - for(i = 1; i < argc; ++i){ - /* parsing options */ - - /* after `--`, interpret `--`, `-`, and `-u` as literal - * filenames */ - if(parsing_opts && !strcmp(argv[i], "--")){ - parsing_opts = 0; - continue; - - /* ignore `-u` if still parsing options */ - }else if(parsing_opts && !strcmp(argv[i], "-u")) - setbuf(output, (char *)NULL); - - /* take `-` to mean standard input if still parsing options */ - else if(parsing_opts && !strcmp(argv[i], "-")){ - file_name = stdin_file_name; - input = stdin; - - /* non-option; open the file and make sure file_name points to - * the right string */ - }else{ - file_name = argv[i]; - input = fopen(file_name, "r"); - if(input == NULL) - error(FILE_ACCESS, argv[0], file_name); - } - - /* print input to output */ - if(file_copy(input, output)) - error(FILE_WRITE, argv[0], STDOUT_NAME); - - /* close input file if it's not stdin */ - if(input != stdin && fclose(input)) - error(FILE_CLOSE, argv[0], file_name); - } - - /* exit successfully */ - return 0; -} diff --git a/homepage/cat/index.html b/homepage/cat/index.html deleted file mode 100644 index 95ee681..0000000 --- a/homepage/cat/index.html +++ /dev/null @@ -1,290 +0,0 @@ - - - - - - - - - - - -cat(1) - - -

~ Return to the rest of the site

- - - - - - -

POSIX cat(1)

-

updated 2021-08-06

-
-

cat on a POSIX or otherwise UNIX-like system is a program that exists to concatenate files; to “join” one file at its end to another at its start, and output that resulting file to standard output.

-

-cat was introduced in UNIX v1 to supercede the program pr which printed the contents of a single file to the screen (McIlroy); its first-edition manual page described cat as "about the easiest way to print a file" ("cat(1)"). -cat’s modern, typical use is more or less the same; it’s often introduced to UNIX beginners as a method to print the contents of a file to the screen, which is why many implementations of cat include options that, while possibly useful, can be redundant - see the often-included cat -e, -t, and -v that replace the ends of lines, tabs, and invisible characters respectively with printing portrayals ("cat(1p)"). - -

-

-The POSIX standard as of 2003 requires only the option -u to be implemented, which prevents cat from buffering its output - on some systems, cat buffers its output in 512-byte blocks (McIlroy), similarly to dd’s default as defined by POSIX (“dd(1p)”), though most currently popular cat implementations do this by default and ignore the -u flag altogether (busybox, GNU coreutils). -POSIX doesn’t mandate buffering by default - specifically, -u has to guarantee that the output is unbuffered, but cat doesn't have to buffer it in the first place and can ignore -u in that case. -

-

This is a POSIX-compatible implementation of UNIX cat with no additional features nor buffered output in C:

- -

-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#define STDIN_NAME "<stdin>"
-#define STDOUT_NAME "<stdout>"
-
-/* these are the predicted errors that could occur */
-enum error_type{
-	FILE_ACCESS,
-	FILE_CLOSE,
-	FILE_WRITE
-};
-
-/* this is an error function that will print to standard error the error that
- * occurred in the program and exit */
-void
-error(enum error_type type, char *argv0, char *file_name){
-	switch(type){
-	case FILE_ACCESS:
-		fprintf(stderr, "%s: %s: cannot open file\n", argv0, file_name);
-		break;
-	case FILE_CLOSE:
-		fprintf(stderr, "%s: %s: cannot close file\n", argv0, file_name);
-		break;
-	case FILE_WRITE:
-		fprintf(stderr, "%s: %s: cannot write to file\n", argv0, file_name);
-		break;
-	}
-	exit(1);
-}
-
-/* print input to output, returns 0 if successful and 1 if unsuccessful */
-int
-file_copy(FILE *input, FILE *output){
-	char c;
-	while((c = getc(input)) != EOF)
-		if(putc(c, output) == EOF)
-			return 1;
-	return 0;
-}
-
-int
-main(int argc, char *argv[]){
-	/* the name of the file being printed (for diagnostics) */
-	char *file_name;
-
-	/* allocate this ahead of time */
-	char *stdin_file_name = STDIN_NAME;
-
-	/* the file pointer of the file being printed */
-	FILE *input;
-
-	/* this will always be stdout */
-	FILE *output = stdout;
-
-	int i;
-
-	/* whether or not options are being parsed */
-	int parsing_opts = 1;
-
-	/* usage with 0 arguments - print standard input to standard output */
-	if(argc == 1 && file_copy(stdin, stdout))
-		error(FILE_WRITE, argv[0], STDOUT_NAME);
-	else if(argc == 1)
-		return 0;
-
-	for(i = 1; i < argc; ++i){
-		/* parsing options */
-
-		/* after `--`, interpret `--`, `-`, and `-u` as literal
-		 * filenames */
-		if(parsing_opts && !strcmp(argv[i], "--")){
-			parsing_opts = 0;
-			continue;
-
-		/* ignore `-u` if still parsing options */
-		}else if(parsing_opts && !strcmp(argv[i], "-u"))
-			continue;
-
-		/* take `-` to mean standard input if still parsing options */
-		else if(parsing_opts && !strcmp(argv[i], "-")){
-			file_name = stdin_file_name;
-			input = stdin;
-
-		/* non-option; open the file and make sure file_name points to
-		 * the right string */
-		}else{
-			file_name = argv[i];
-			input = fopen(file_name, "r");
-			if(input == NULL)
-				error(FILE_ACCESS, argv[0], file_name);
-		}
-
-		/* print input to output */
-		if(file_copy(input, output))
-			error(FILE_WRITE, argv[0], STDOUT_NAME);
-
-		/* close input file if it's not stdin */
-		if(input != stdin && fclose(input))
-			error(FILE_CLOSE, argv[0], file_name);
-	}
-
-	/* exit successfully */
-	return 0;
-}
-
- -

-This is also available at /knowledge/cat/cat.c on this website as a plain .c file with which you can toy. -

-

It’s worth noting that this concept of cat as a utility that sequentially prints given files to standard output means cat can be replaced by a simple shell script that does the same using dd and printf; cat as defined by POSIX is actually totally redundant to other POSIX utilities. Here’s the shell script:

- -

-#!/bin/sh
-
-# dd_ is used so that dd can easily be re-defined
-dd_() { dd "$@"; }
-
-# usage with 0 arguments - print standard input to standard output
-if [ -z "$1" ]; then
-	dd_ 2>/dev/null
-	exit $?
-fi
-
-while [ -n "$1" ]; do
-	# parsing options
-
-	# after `--`, interpret `--`, `-`, and `-u` as literal filenames
-	[ "$1" = "--" ] && [ -z "$DONT_PARSE_ARGS" ] \
-		&& DONT_PARSE_ARGS=1 && shift 1 && continue \
-		|| true
-
-	# if `-u` specified and still parsing options, enable unbuffered output
-	# this is kind of a hack and a bit slow. technically it is buffered,
-	# just one byte at a time
-	[ "$1" = "-u" ] && [ -z "$DONT_PARSE_ARGS" ] \
-		&& dd_() { dd bs=1 "$@"; } && shift 1 && continue \
-		|| true
-
-	# take `-` to mean standard input if still parsing options
-	if [ "$1" = "-" ] && [ -z "$DONT_PARSE_ARGS" ]; then
-		dd_ </dev/stdin 2>/dev/null || exit $?
-		shift 1
-		continue
-	fi
-
-	# print input to output
-	dd_ <"$1" 2>/dev/null || exit $?
-
-	shift 1
-done
-
-# exit successfully
-exit 0
-
- -

-It's worth noting that the dd_ shell function in the above sample that allows for re-aliasing of dd to dd bs=1 could be replaced with a shell variable $DD with the initial value dd and a changed value according to -u of dd bs=1. -However, alias dd="dd bs=1" would not work due to how shell aliases are parsed (ShellCheck). -

- -

-cat doesn't work well as a shell script though. -The script is relatively slow for short files and very slow for very large files (though dd itself should probably be used to copy large files from one medium to another anyway). -This is provided for educational purposes. -

- -

Cited media and further reading

- -

Acknowledgements

    -
  • Content help
      -
    • Miles
    • -
    • WeedSmokingJew
    • -
  • -
  • JavaScript help
      -
    • adamz01h -
    • wiresToGround
    • -
  • -
  • JavaScript libraries used
  • -
  • Sample code help
      -
    • Ando_Bando
    • -
    • Miles
    • -
    • u/oh5nxo
    • -
    • WeedSmokingJew
    • -
  • -
- - - diff --git a/homepage/navigation.m4 b/homepage/navigation.m4 index 83dbdf2..bc3d5fd 100644 --- a/homepage/navigation.m4 +++ b/homepage/navigation.m4 @@ -9,7 +9,6 @@ blah/, thegame/; knowledge: -cat(1), c78, software, X200T; diff --git a/homepage/software/index.m4 b/homepage/software/index.m4 index f4edc1d..fc157ee 100644 --- a/homepage/software/index.m4 +++ b/homepage/software/index.m4 @@ -33,20 +33,103 @@ Non-standard, proprietary operating systems such as VMS, OS/2, Z/OS, and Microso Plan 9 from Bell Labs, due to its historical relationship with UNIX, will be mentioned when appropriate.

-

UNIX

+

UNIX

  • _hyperlink(`https://en.wikipedia.org/wiki/Unix', `Unix') (Wikipedia)
  • _hyperlink(`https://github.com/dspinellis/unix-history-repo', `unix-history-repo') (GitHub)
-

POSIX

+

POSIX

+
    +
  • _hyperlink(`https://en.wikipedia.org/wiki/POSIX', `POSIX') (Wikipedia)
  • +
+ +

cat(1)

+
    +
  • 4.4BSD-Lite2/usr/src/bin/cat/cat.c
  • +
  • busybox/coreutils/cat.c
  • +
  • _hyperlink(`http://man.cat-v.org/unix-1st/1/cat', `cat(1)') (UNIX v1)
  • +
  • _hyperlink(`https://www.unix.com/man-page/posix/1posix/cat/', `cat(1p)') (The Open Group, 2003)
  • +
  • _hyperlink(`http://harmful.cat-v.org/cat-v/', `UNIX Style, or cat -v Considered Harmful')
  • +
  • _hyperlink(`https://www.unix.com/man-page/posix/1posix/dd/', `dd(1p)') (The Open Group, 2003)
  • +
  • _hyperlink(`https://www.freebsd.org/', `FreeBSD')/_hyperlink(`https://github.com/freebsd/freebsd-src/blob/main/bin/cat/cat.c', `bin/cat/cat.c')
  • +
  • GNU coreutils/src/cat.c
  • +
  • _hyperlink(`https://lyngvaer.no/log/cat-v-history', `The history of why cat -v is considered harmful')
  • +
  • NetBSD/bin/cat/cat.c
  • +
  • Plan 9 from Bell Labs Fourth Edition/sys/src/cmd/cat.c
  • +
  • _hyperlink(`https://www.unix.com/man-page/POSIX/1posix/printf/', `printf(1p)') (The Open Group, 2003)
  • +
  • _hyperlink(`https://harmful.cat-v.org/cat-v/unix_prog_design.pdf', `Program Design in the UNIX Environment')
  • +
  • _hyperlink(`https://www.cs.dartmouth.edu/~doug/reader.pdf', `A Research Unix Reader')
  • +
  • UNIX v7/usr/src/cmd/cat.c
  • +
  • Thanks to Miles and WeedSmokingJew for help with content.
  • +
  • Thanks to adamz01h and wiresToGround for help with the JavaScript that used to accompany this article (to facilitate syntax highlighting in code samples using _hyperlink(`https://highlightjs.org/', `highlight.js')).
  • +
  • Thanks to Ando_Bando, Miles, u/oh5nxo, and WeedSmokingJew for help with the accompanying code samples.
  • +
+

+_code(`cat(1)') is a program that exists to catenate files; to "join" one file at its end to another at its start. +

+

+_code(`cat(1)') was introduced in UNIX's first edition to succeed _code(`pr(1)'), which prints the contents of a single file to the screen. +Most use of _code(`cat(1)') is similar; it's often introduced to beginners as a means to print the contents of a file to the screen, which is why many implementations include options that modify output to make it easier to read on a display. +POSIX requires only _code(`-u') to be implemented, which guarantees output is unbuffered - on some systems output is buffered in 512-byte blocks, which is also the default of _code(`dd(1)'), though most current implementations (busybox, GNU coreutils) don't buffer output regardless. +Various implementations include _code(`-s') to strip duplicate blank lines (cat "$@" | sed '/^\s*$/d' would also work), +_code(`-n') to number lines (to which Pike and Kernighan offered awk '{ print NR "\t" $0 }' "$@" as a replacement) +and _code(`-b') to number non-blank lines (both cases for which _code(`nl(1)') was later made), +and _code(`-v') to mark invisible characters. +

+

+Additions to _code(`cat(1)') are controversial; Rob Pike and Brian Kernighan explain this in _italic(`Program Design in the UNIX Environment'), the paper that accompanied Rob Pike's presentation _italic(`UNIX Style, or cat -v Considered Harmful') at the 1983 USENIX Summer Conference. +

+ +

+The following shell script is a POSIX-compliant implementation of _code(`cat(1)'): +

+ +
+#!/bin/sh
+set -e
+
+DD=dd
+
+# usage with 0 arguments - print standard input to standard output
+if test -z "$1"; then
+	dd 2>/dev/null
+	exit $?
+fi
+
+while test -n "$1"; do
+	# Parse options
+
+	if test -z "$DONT_PARSE_ARGS"
+		then case "$1" in
+		--)
+			DONT_PARSE_ARGS=1
+			shift; continue; ;;
+		-u)
+			DD="dd bs=1"
+			shift; continue; ;;
+		-)
+			$DD </dev/stdin 2>/dev/null
+			shift; continue; ;;
+		esac
+	fi
+
+	# Print input to output.
+	$DD <"$1" 2>/dev/null
+
+	shift
+done
+
+exit 0
+

echo(1)

  • _hyperlink(`https://en.wikipedia.org/wiki/Echo_(command)', `echo') (Wikipedia)
  • _hyperlink(`https://man7.org/linux/man-pages/man1/echo.1p.html', `echo(1p)') (man7)
  • NetBSD/bin/echo/echo.sh
  • -
  • Variations in echo implementations
  • +
  • _hyperlink(`#unix', `UNIX v5')/usr/source/s1/echo.c
  • +
  • _hyperlink(`https://www.in-ulm.de/~mascheck/various/echo+printf/', `Variations in echo implementations')

Don't use _code(`echo(1)'), use _code(`printf(1)'). @@ -63,12 +146,13 @@ The following is an implementation of _code(`echo(1)') in the C programming lang int main(int argc, char *argv[]) { int i; for(i = 1; ; ) { - printf("%s", argv[i]); - ++i; - if(i == argc) { - putchar('\n'); + if(i >= argc) break; - } else + printf("%s", argv[i]); + ++i; + if(i == argc) + putchar('\n'); + else putchar(' '); } return 0; @@ -79,6 +163,9 @@ The following is an implementation of _code(`echo(1)') in shell.

 while :; do
+	if test -z "$1"
+		then break
+	fi
 	printf "%s" "$1"
 	`shift'
 	if test -z "$1"; then
@@ -105,6 +192,14 @@ A traditional _code(`ed(1)') implementation is in plan9ports.
 I'm pretty sure some later UNIX-based OSes doubled the _code(`ed(1)') buffers, there's pretty much no downside to doing so in the modern era but it should be very easy to do yourself if it hasn't already been done (just double some of the array sizes in the beginning of _code(`ed.c')).
 

+

mkfifo(1)

+
    +
  • _hyperlink(`https://man.netbsd.org/mkfifo.1', `mkfifo(1)') (NetBSD)
  • +
  • _hyperlink(`https://man.netbsd.org/mkfifo.2', `mkfifo(2)') (NetBSD)
  • +
  • _hyperlink(`https://dev.to/0xbf/use-mkfifo-to-create-named-pipe-linux-tips-5bbk', `Use mkfifo to create named pipe')
  • +
  • _hyperlink(`https://unix.stackexchange.com/questions/433488/what-is-the-purpose-of-using-a-fifo-vs-a-temporary-file-or-a-pipe', `What is the purpose of using a FIFO vs a temporary file or a pipe?')
  • +
+

true(1)