From 4c36ec18070106d4a1690fedb3d24eeef837ace3 Mon Sep 17 00:00:00 2001 From: DTB Date: Wed, 31 Jul 2024 17:28:26 -0600 Subject: [PATCH] pg(1): more work on strtok_quoted --- src/pg.c | 104 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 58 insertions(+), 46 deletions(-) diff --git a/src/pg.c b/src/pg.c index 72cdb2a..14c8da1 100644 --- a/src/pg.c +++ b/src/pg.c @@ -23,7 +23,7 @@ * fprintf(3), fputc(3), perror(3), stderr, stdin, stdout, * EOF, FILE, NULL */ #include /* size_t */ -#include /* strchr(3), strcmp(3), strerror(3), strtok(3) */ +#include /* strchr(3), strcmp(3) */ #include /* getopt(3) */ /* Commands start with cmd_. They take an argc and NULL-terminated argv, like @@ -36,6 +36,14 @@ static char *whitespace = " \n\r\t\v"; +//static +struct Tube { + char *name; // command line + FILE *in; // process stdin + FILE *out; // process stdoout + size_t index; // in pipeline +}; + static struct { size_t quantity; enum { LINES = 0, BYTES = 1 } type; @@ -45,57 +53,69 @@ static char *prompt = ": "; static char *program_name = "pg"; static char * -permute_out(char *str, size_t i) { +permute_out(char *s, size_t i) { for ( ; - str[i - 1] != '\0'; - str[i - 1] = str[i], ++i + s[i] != '\0'; + s[i] = s[i + 1], ++i ); - return str; + return s; } /* strtok(3p), but supports double-quotes and escapes (but only for escaping - * quotes). UTF-8 is safe only in str. Unmatched quotes in str are considered - * literal. The behavior of strtok_quoted when '"' or '\\' are in sep is - * undefined. */ -/* TODO: Seems to only ever return NULL. */ + * quotes). Unmatched quotes in str are considered literal. The behavior of + * strtok_quoted when '\'', '"', or '\\' are in sep is undefined. Use of UTF-8 + * separators with strtok_quoted is undefined. */ static char * strtok_quoted(char *str, char *sep) { static char *s; if (str != NULL) { s = str; } - while (strchr(sep, *s) == NULL) { - if(*++s == '\0') { return NULL; } /* no remaining except seps */ + while (strchr(sep, *s) != NULL) { // skip beginning whitespace + if(*++s == '\0') { return NULL; } // no remaining except seps } { - bool in_escape = 0; - int in_quotes = -1; /* index of quote, or -1 if none */ + bool in_escape = 0; // previous char was '\\' + char quote = '\0'; // quotation mark used, or '\0' if none - for (size_t i = 0; s[i] != '\0'; ++i) + for (int i = 0; s[i] != '\0'; ++i) switch (s[i]) { case '\\': - /* if literal \\, permute out a backslash */ - if (in_escape) { permute_out(s, i--); } + // if literal "\\", permute out a backslash + if (in_escape) { (void)permute_out(s, i--); } in_escape = !in_escape; break; - case '"': + case '\'': case '"': if (in_escape) { // \" s[i] = s[i - 1]; (void)permute_out(s, i--); // permute out backslash - } else if (in_quotes != -1) { - (void)permute_out(s, in_quotes); --i; // first " - (void)permute_out(s, i--); // second " - in_quotes = -1; - } else { in_quotes = i; } + } else if (s[i] == quote) { + quote = '\0'; + (void)permute_out(s, i--); // second quote + } else { + quote = s[i]; + if (strchr(&s[i + 1], quote) != NULL) { // has a match + (void)permute_out(s, i--); // permute out lquote + } + } break; case '\0': return s; default: - if (!in_escape && strchr(sep, s[i]) != NULL) { - s[i] = '\0'; - return s; + if (!in_escape + && quote == '\0' + && (strchr(sep, s[i]) != NULL || s[i] == '\0')) { + char *t; // start of current token + + t = s; + s = s[i] != '\0' + ? &t[i + 1] // store start of next token, + : &t[i]; // or the address of the nul if found + s[i] = '\0'; // NUL terminate current token + + return t; } } } @@ -173,19 +193,16 @@ builtins[] = { * command line. */ static int cmdline_exec(struct CmdMap *map, char *cmdline, char **envp) { - /* Command line word splitting is naive and based on whitespace ONLY; no - * fancy quoting or escaping here. Adding that would (ideally) entail - * replacing strtok(3) with something specific to this task. */ static int argc; static char *argv[ARGV_MAX]; - if ((argv[(argc = 0)] = strtok(cmdline, whitespace)) == NULL) { + if ((argv[(argc = 0)] = strtok_quoted(cmdline, whitespace)) == NULL) { while (cmdline[0] != '\0') { cmdline = &cmdline[1]; } argv[argc] = cmdline; argv[++argc] = NULL; } else { while ( - (argv[++argc] = strtok(NULL, whitespace)) != NULL + (argv[++argc] = strtok_quoted(NULL, whitespace)) != NULL && argc < ARGV_MAX ); } @@ -231,33 +248,28 @@ int main(int argc, char *argv[]) { } } - if(argc > optind) { return usage(program_name); } + if (argc > optind) { return usage(program_name); } if ((t = fopen("/dev/tty", "rb")) == NULL) { - (void)fprintf( - stderr, - "%s: /dev/tty: %s\n", - program_name, - strerror(errno) - ); + perror(program_name); return EX_OSERR; } for (;;) { - if ( /* prompt and receive */ - fputs(prompt, stderr) == EOF - || fgets((char *)cmd, (sizeof cmd) / (sizeof *cmd), t) == NULL - ) { return ioerr(program_name); } + if (fputs(prompt, stderr) == EOF) { return ioerr(program_name); } - if (strchr((char *)cmd, '\n') == NULL) { /* fast-forward stream */ - int c; + // if the line... + if (fgets((char *)cmd, (sizeof cmd) / (sizeof *cmd), t) != NULL) { + if (strchr((char *)cmd, '\n') == NULL) { // was taken incompletely + int c; - while ((c = fgetc(t)) != '\n') { - if (c == EOF) { break; } + while ((c = fgetc(t)) != '\n') { // ...fast-forward stream + if (c == EOF) { break; } + } } - } + } else { fputc('\n', stdout); } // EOF at start of line; finish prompt if (feof(t)) { return EX_OK; }