1
0

finally release cat knowledgepage

This commit is contained in:
dtb 2021-08-06 20:48:07 -04:00
parent 7c4d991c5f
commit 85145ba1e2
3 changed files with 154 additions and 22 deletions

View File

@ -71,6 +71,7 @@ I'm vaccinated against COVID-19. Are you?
<A HREF="#privacy">#privacy</A>,
<A HREF="/thegame">/thegame</A>;
<B>knowledge</B>:
<A HREF="/knowledge/cat/">cat(1)</A>;
<A HREF="/knowledge/netbsd/">NetBSD</A>;
<A HREF="/knowledge/true">true(1)</A>;
<B>shilling</B>:

View File

@ -0,0 +1,106 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define STDIN_NAME "<stdin>"
#define STDOUT_NAME "<stdout>"
/* these are the predicted errors that could occur */
enum error_type{
FILE_ACCESS,
FILE_CLOSE,
FILE_WRITE
};
/* this is an error function that will print to standard error the error that
* occurred in the program and exit */
void
error(enum error_type type, char *argv0, char *file_name){
switch(type){
case FILE_ACCESS:
fprintf(stderr, "%s: %s: cannot open file\n", argv0, file_name);
break;
case FILE_CLOSE:
fprintf(stderr, "%s: %s: cannot close file\n", argv0, file_name);
break;
case FILE_WRITE:
fprintf(stderr, "%s: %s: cannot write to file\n", argv0, file_name);
break;
}
exit(1);
}
/* print input to output, returns 0 if successful and 1 if unsuccessful */
int
file_copy(FILE *input, FILE *output){
char c;
while((c = getc(input)) != EOF)
if(putc(c, output) == EOF)
return 1;
return 0;
}
int
main(int argc, char *argv[]){
/* the name of the file being printed (for diagnostics) */
char *file_name;
/* allocate this ahead of time */
char *stdin_file_name = STDIN_NAME;
/* the file pointer of the file being printed */
FILE *input;
/* this will always be stdout */
FILE *output = stdout;
int i;
/* whether or not options are being parsed */
int parsing_opts = 1;
/* usage with 0 arguments - print standard input to standard output */
if(argc == 1 && file_copy(stdin, stdout))
error(FILE_WRITE, argv[0], STDOUT_NAME);
else if(argc == 1)
return 0;
for(i = 1; i < argc; ++i){
/* parsing options */
/* after `--`, interpret `--`, `-`, and `-u` as literal
* filenames */
if(parsing_opts && !strcmp(argv[i], "--")){
parsing_opts = 0;
continue;
/* ignore `-u` if still parsing options */
}else if(parsing_opts && !strcmp(argv[i], "-u"))
continue;
/* take `-` to mean standard input if still parsing options */
else if(parsing_opts && !strcmp(argv[i], "-")){
file_name = stdin_file_name;
input = stdin;
/* non-option; open the file and make sure file_name points to
* the right string */
}else{
file_name = argv[i];
input = fopen(file_name, "r");
if(input == NULL)
error(FILE_ACCESS, argv[0], file_name);
}
/* print input to output */
if(file_copy(input, output))
error(FILE_WRITE, argv[0], STDOUT_NAME);
/* close input file if it's not stdin */
if(input != stdin && fclose(input))
error(FILE_CLOSE, argv[0], file_name);
}
/* exit successfully */
return 0;
}

View File

@ -60,8 +60,8 @@ window.load_highlighting = function(language){
}
</SCRIPT>
<H1>POSIX cat(1) WIP ARTICLE</H1>
<H3>updated 2021-06-21</H3>
<H1>POSIX cat(1)</H1>
<H3>updated 2021-08-06</H3>
<HR ALIGN="left" SIZE="1" WIDTH="25%" />
<P><CODE>cat</CODE> on a POSIX or otherwise UNIX-like system is a program that exists to concatenate files; to “join” one file at its end to another at its start, and output that resulting file to standard output.</P>
<P><CODE>cat</CODE> was introduced in UNIX v1 to supercede the program pr which printed the contents of a single file to the screen (McIlroy); its first-edition manual page described cat as “about the easiest way to print a file” (“cat(1)”). <CODE>cat</CODE>s modern, typical use is more or less the same; its often introduced to UNIX beginners as a method to print the contents of a file to the screen, which is why many implementations of <CODE>cat</CODE> include options that are technically redundant - see the often-included <CODE>cat</CODE> <CODE>-e</CODE>, <CODE>-t</CODE>, and <CODE>-v</CODE> that replace the ends of lines, tabs, and invisible characters respectively with printing portrayals (“cat(1p)”).</P>
@ -73,20 +73,31 @@ window.load_highlighting = function(language){
#include &lt;stdlib.h&gt;
#include &lt;string.h&gt;
#define STDIN_NAME "&lt;stdin&gt;"
#define STDOUT_NAME "&lt;stdout&gt;"
#define STDIN_NAME &quot;&lt;stdin&gt;&quot;
#define STDOUT_NAME &quot;&lt;stdout&gt;&quot;
/* these two errors will exit out of the program with an unsuccessful status,
* and print a diagnostic message to standard error */
void
file_access_error(char *argv0, char *file_name){
fprintf(stderr, "%s: %s: cannot open file\n", argv0, file_name);
exit(1);
}
/* these are the predicted errors that could occur */
enum error_type{
FILE_ACCESS,
FILE_CLOSE,
FILE_WRITE
};
/* this is an error function that will print to standard error the error that
* occurred in the program and exit */
void
file_write_error(char *argv0, char *file_name){
fprintf(stderr, "%s: %s: error writing to file\n", argv0, file_name);
error(enum error_type type, char *argv0, char *file_name){
switch(type){
case FILE_ACCESS:
fprintf(stderr, &quot;%s: %s: cannot open file\n&quot;, argv0, file_name);
break;
case FILE_CLOSE:
fprintf(stderr, &quot;%s: %s: cannot close file\n&quot;, argv0, file_name);
break;
case FILE_WRITE:
fprintf(stderr, &quot;%s: %s: cannot write to file\n&quot;, argv0, file_name);
break;
}
exit(1);
}
@ -121,7 +132,7 @@ main(int argc, char *argv[]){
/* usage with 0 arguments - print standard input to standard output */
if(argc == 1 &amp;&amp; file_copy(stdin, stdout))
file_write_error(argv[0], STDOUT_NAME);
error(FILE_WRITE, argv[0], STDOUT_NAME);
else if(argc == 1)
return 0;
@ -130,16 +141,16 @@ main(int argc, char *argv[]){
/* after `--`, interpret `--`, `-`, and `-u` as literal
* filenames */
if(parsing_opts &amp;&amp; !strcmp(argv[i], "--")){
if(parsing_opts &amp;&amp; !strcmp(argv[i], &quot;--&quot;)){
parsing_opts = 0;
continue;
/* ignore `-u` if still parsing options */
}else if(parsing_opts &amp;&amp; !strcmp(argv[i], "-u"))
}else if(parsing_opts &amp;&amp; !strcmp(argv[i], &quot;-u&quot;))
continue;
/* take `-` to mean standard input if still parsing options */
else if(parsing_opts &amp;&amp; !strcmp(argv[i], "-")){
else if(parsing_opts &amp;&amp; !strcmp(argv[i], &quot;-&quot;)){
file_name = stdin_file_name;
input = stdin;
@ -147,14 +158,18 @@ main(int argc, char *argv[]){
* the right string */
}else{
file_name = argv[i];
input = fopen(file_name, "r");
input = fopen(file_name, &quot;r&quot;);
if(input == NULL)
file_access_error(argv[0], file_name);
error(FILE_ACCESS, argv[0], file_name);
}
/* print input to output */
if(file_copy(input, output))
file_write_error(argv[0], STDOUT_NAME);
error(FILE_WRITE, argv[0], STDOUT_NAME);
/* close input file if it's not stdin */
if(input != stdin &amp;&amp; fclose(input))
error(FILE_CLOSE, argv[0], file_name);
}
/* exit successfully */
@ -162,6 +177,9 @@ main(int argc, char *argv[]){
}
</CODE></PRE>
<P>
This is also available at <A HREF="/knowledge/cat/cat.c">/knowledge/cat/cat.c on this website</A> as a plain .c file with which you can toy.
</P>
<P>Its worth noting that this concept of cat as a utility that sequentially prints given files to standard output means <CODE>cat</CODE> can be replaced by a simple shell script that does the same using <CODE>dd</CODE> and <CODE>printf</CODE>; <CODE>cat</CODE> as defined by POSIX is actually totally redundant to other POSIX utilities. Heres the shell script:</P>
<INPUT ID="shell_toggle" ONCLICK="window.load_highlighting('shell');" TYPE="button" VALUE="Press this button to enable syntax highlighting within this code." />
<PRE><CODE CLASS="language-shell" DATA-LANG="shell">
@ -208,9 +226,16 @@ done
exit 0
</CODE></PRE>
<P>It's worth noting that the <CODE>dd_</CODE> shell function in the above sample that allows for re-aliasing of <CODE>dd</CODE> to <CODE>dd bs=1</CODE> could be replaced with a shell variable <CODE>$DD</CODE> with the initial value <CODE>dd</CODE> and a changed value according to <CODE>-u</CODE> of <CODE>dd bs=1</CODE>. However, <CODE>alias dd="dd bs=1"</CODE> would not work due to how shell aliases are parsed (ShellCheck).</P>
<P>
It's worth noting that the <CODE>dd_</CODE> shell function in the above sample that allows for re-aliasing of <CODE>dd</CODE> to <CODE>dd bs=1</CODE> could be replaced with a shell variable <CODE>$DD</CODE> with the initial value <CODE>dd</CODE> and a changed value according to <CODE>-u</CODE> of <CODE>dd bs=1</CODE>.
However, <CODE>alias dd="dd bs=1"</CODE> would not work due to how shell aliases are parsed (ShellCheck).
</P>
<P><CODE>cat</CODE> doesn't work well as a shell script though. The script is relatively slow for short files and very slow for very large files (though <CODE>dd</CODE> itself should probably be used to copy large files from one medium to another anyway). This is provided for educational purposes (though I personally use this shell script in my system PATH; the C implementation provided compiles to a much larger binary using gcc 11.1.0, so this saves a couple kilobytes).</P>
<P>
<CODE>cat</CODE> doesn't work well as a shell script though.
The script is relatively slow for short files and very slow for very large files (though <CODE>dd</CODE> itself should probably be used to copy large files from one medium to another anyway).
This is provided for educational purposes.
</P>
<H2>Cited media and further reading</H2><UL>
<LI>Articles<UL>