pg(1): more work on strtok_quoted

This commit is contained in:
dtb 2024-07-31 17:28:26 -06:00
parent c01af89e52
commit 4c36ec1807
Signed by: trinity
GPG Key ID: 34C0543BBB6AF81B

View File

@ -23,7 +23,7 @@
* fprintf(3), fputc(3), perror(3), stderr, stdin, stdout,
* EOF, FILE, NULL */
#include <stdlib.h> /* size_t */
#include <string.h> /* strchr(3), strcmp(3), strerror(3), strtok(3) */
#include <string.h> /* strchr(3), strcmp(3) */
#include <unistd.h> /* getopt(3) */
/* Commands start with cmd_. They take an argc and NULL-terminated argv, like
@ -36,6 +36,14 @@
static char *whitespace = " \n\r\t\v";
//static
struct Tube {
char *name; // command line
FILE *in; // process stdin
FILE *out; // process stdoout
size_t index; // in pipeline
};
static struct {
size_t quantity;
enum { LINES = 0, BYTES = 1 } type;
@ -45,57 +53,69 @@ static char *prompt = ": ";
static char *program_name = "pg";
static char *
permute_out(char *str, size_t i) {
permute_out(char *s, size_t i) {
for (
;
str[i - 1] != '\0';
str[i - 1] = str[i], ++i
s[i] != '\0';
s[i] = s[i + 1], ++i
);
return str;
return s;
}
/* strtok(3p), but supports double-quotes and escapes (but only for escaping
* quotes). UTF-8 is safe only in str. Unmatched quotes in str are considered
* literal. The behavior of strtok_quoted when '"' or '\\' are in sep is
* undefined. */
/* TODO: Seems to only ever return NULL. */
* quotes). Unmatched quotes in str are considered literal. The behavior of
* strtok_quoted when '\'', '"', or '\\' are in sep is undefined. Use of UTF-8
* separators with strtok_quoted is undefined. */
static char *
strtok_quoted(char *str, char *sep) {
static char *s;
if (str != NULL) { s = str; }
while (strchr(sep, *s) == NULL) {
if(*++s == '\0') { return NULL; } /* no remaining except seps */
while (strchr(sep, *s) != NULL) { // skip beginning whitespace
if(*++s == '\0') { return NULL; } // no remaining except seps
}
{
bool in_escape = 0;
int in_quotes = -1; /* index of quote, or -1 if none */
bool in_escape = 0; // previous char was '\\'
char quote = '\0'; // quotation mark used, or '\0' if none
for (size_t i = 0; s[i] != '\0'; ++i)
for (int i = 0; s[i] != '\0'; ++i)
switch (s[i]) {
case '\\':
/* if literal \\, permute out a backslash */
if (in_escape) { permute_out(s, i--); }
// if literal "\\", permute out a backslash
if (in_escape) { (void)permute_out(s, i--); }
in_escape = !in_escape;
break;
case '"':
case '\'': case '"':
if (in_escape) { // \"
s[i] = s[i - 1];
(void)permute_out(s, i--); // permute out backslash
} else if (in_quotes != -1) {
(void)permute_out(s, in_quotes); --i; // first "
(void)permute_out(s, i--); // second "
in_quotes = -1;
} else { in_quotes = i; }
} else if (s[i] == quote) {
quote = '\0';
(void)permute_out(s, i--); // second quote
} else {
quote = s[i];
if (strchr(&s[i + 1], quote) != NULL) { // has a match
(void)permute_out(s, i--); // permute out lquote
}
}
break;
case '\0': return s;
default:
if (!in_escape && strchr(sep, s[i]) != NULL) {
s[i] = '\0';
return s;
if (!in_escape
&& quote == '\0'
&& (strchr(sep, s[i]) != NULL || s[i] == '\0')) {
char *t; // start of current token
t = s;
s = s[i] != '\0'
? &t[i + 1] // store start of next token,
: &t[i]; // or the address of the nul if found
s[i] = '\0'; // NUL terminate current token
return t;
}
}
}
@ -173,19 +193,16 @@ builtins[] = {
* command line. */
static int
cmdline_exec(struct CmdMap *map, char *cmdline, char **envp) {
/* Command line word splitting is naive and based on whitespace ONLY; no
* fancy quoting or escaping here. Adding that would (ideally) entail
* replacing strtok(3) with something specific to this task. */
static int argc;
static char *argv[ARGV_MAX];
if ((argv[(argc = 0)] = strtok(cmdline, whitespace)) == NULL) {
if ((argv[(argc = 0)] = strtok_quoted(cmdline, whitespace)) == NULL) {
while (cmdline[0] != '\0') { cmdline = &cmdline[1]; }
argv[argc] = cmdline;
argv[++argc] = NULL;
} else {
while (
(argv[++argc] = strtok(NULL, whitespace)) != NULL
(argv[++argc] = strtok_quoted(NULL, whitespace)) != NULL
&& argc < ARGV_MAX
);
}
@ -231,33 +248,28 @@ int main(int argc, char *argv[]) {
}
}
if(argc > optind) { return usage(program_name); }
if (argc > optind) { return usage(program_name); }
if ((t = fopen("/dev/tty", "rb")) == NULL) {
(void)fprintf(
stderr,
"%s: /dev/tty: %s\n",
program_name,
strerror(errno)
);
perror(program_name);
return EX_OSERR;
}
for (;;) {
if ( /* prompt and receive */
fputs(prompt, stderr) == EOF
|| fgets((char *)cmd, (sizeof cmd) / (sizeof *cmd), t) == NULL
) { return ioerr(program_name); }
if (fputs(prompt, stderr) == EOF) { return ioerr(program_name); }
if (strchr((char *)cmd, '\n') == NULL) { /* fast-forward stream */
// if the line...
if (fgets((char *)cmd, (sizeof cmd) / (sizeof *cmd), t) != NULL) {
if (strchr((char *)cmd, '\n') == NULL) { // was taken incompletely
int c;
while ((c = fgetc(t)) != '\n') {
while ((c = fgetc(t)) != '\n') { // ...fast-forward stream
if (c == EOF) { break; }
}
}
} else { fputc('\n', stdout); } // EOF at start of line; finish prompt
if (feof(t)) { return EX_OK; }