diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2022-08-24 16:42:27 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2022-08-24 16:42:27 +0300 |
commit | f3f5210cfac77479e6cf6ca2f21f8f08c012aa85 (patch) | |
tree | a537db0a38eebba9d37a673d4906cf318a1187ff | |
parent | 66a543eeadeed25d152a53c67ff0dd25ac017730 (diff) | |
download | one-true-awk-f3f5210cfac77479e6cf6ca2f21f8f08c012aa85.tar.gz |
Latest additions from BWK.
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | awk.h | 3 | ||||
-rwxr-xr-x[-rw-r--r--] | b.c | 9 | ||||
-rwxr-xr-x[-rw-r--r--] | lib.c | 62 | ||||
-rw-r--r-- | main.c | 2 | ||||
-rwxr-xr-x[-rw-r--r--] | run.c | 1 |
6 files changed, 71 insertions, 8 deletions
@@ -5,3 +5,5 @@ proctab.c ytab* testdir/foo* testdir/temp* +*.pdf +*.mail @@ -225,7 +225,8 @@ extern int pairstack[], paircnt; /* structures used by regular expression matching machinery, mostly b.c: */ -#define NCHARS (256+3) /* 256 handles 8-bit chars; 128 does 7-bit */ +#define NCHARS (1256+3) /* 256 handles 8-bit chars; 128 does 7-bit */ + /* BUG: some overflows (caught) if we use 256 */ /* watch out in match(), etc. */ #define HAT (NCHARS+2) /* matches ^ in regular expr */ #define NSTATES 32 @@ -441,8 +441,13 @@ int *cclenter(const char *argp) /* add a character class */ continue; } } - if (!adjbuf((char **) &buf, &bufsz, bp-buf+8, 100, (char **) &bp, "cclenter2")) - FATAL("out of space for character class [%.10s...] 3", p); + if (i >= bufsz) { + bufsz *= 2; + buf = (int *) realloc(buf, bufsz * sizeof(int)); + if (buf == NULL) + FATAL("out of space for character class [%.10s...] 2", p); + bp = buf + i; + } *bp++ = c; i++; } @@ -301,6 +301,9 @@ void setclvar(char *s) /* set var=value from s */ Cell *q; double result; +/* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */ +/* I don't understand why it was changed. */ + for (p=s; *p != '='; p++) ; e = p; @@ -324,7 +327,7 @@ void fldbld(void) /* create fields from current record */ /* possibly with a final trailing \0 not associated with any field */ char *r, *fr, sep; Cell *p; - int i, j, n; + int i, j, n, quote; if (donefld) return; @@ -363,6 +366,57 @@ void fldbld(void) /* create fields from current record */ *fr++ = 0; } *fr = 0; + } else if ((sep = *inputFS) == ',') { /* CSV: handle quotes, \x, etc. */ + for (i = 0; *r != '\0'; ) { + i++; + if (i > nfields) + growfldtab(i); + if (freeable(fldtab[i])) + xfree(fldtab[i]->sval); + fldtab[i]->sval = fr; + fldtab[i]->tval = FLD | STR | DONTFREE; + +/* printf("fldbld 1 [%s] [%d:] [%s]\n", r, i, fr); */ + + if (*r == '"' /* || *r == '\'' */ ) { /* "..."; do not include '...' */ + quote = *r++; + for ( ; *r != '\0'; ) { +/* printf("fldbld 2 [%s]\n", r); */ + if (*r == quote && r[1] != '\0' && r[1] == quote) { + r += 2; /* doubled quote */ + *fr++ = quote; + } else if (*r == '\\') { /* BUG: off end? */ + r++; /* backslashes inside "..." ??? */ + *fr++ = *r++; + } else if (*r == quote && (r[1] == '\0' || r[1] == ',')) { + r++; + if (*r == ',') + r++; + break; + } else { + *fr++ = *r++; + } + } + *fr++ = 0; + continue; + } + + /* unquoted field */ + for ( ; *r != '\0'; ) { + if (*r == ',') { /* bare comma ends field */ + r++; + *fr++ = 0; + break; + } else if (*r == '\\') { /* BUG: could walk off end */ + r++; + *fr++ = *r++; + } else { + *fr++ = *r++; + } + } + *fr++ = 0; + } + *fr = 0; } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */ for (i = 0; *r != '\0'; r += n) { char buf[MB_LEN_MAX + 1]; @@ -797,11 +851,11 @@ bool is_valid_number(const char *s, bool trailing_stuff_ok, while (isspace(*s)) s++; - // no hex floating point, sorry + /* no hex floating point, sorry */ if (s[0] == '0' && tolower(s[1]) == 'x') return false; - // allow +nan, -nan, +inf, -inf, any other letter, no + /* allow +nan, -nan, +inf, -inf, any other letter, no */ if (s[0] == '+' || s[0] == '-') { is_nan = (strncasecmp(s+1, "nan", 3) == 0); is_inf = (strncasecmp(s+1, "inf", 3) == 0); @@ -835,7 +889,7 @@ convert: if (no_trailing != NULL) *no_trailing = (*ep == '\0'); - // return true if found the end, or trailing stuff is allowed + /* return true if found the end, or trailing stuff is allowed */ retval = *ep == '\0' || trailing_stuff_ok; return retval; @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20220530"; +const char *version = "version 20220818"; #define DEBUG #include <stdio.h> @@ -1454,6 +1454,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ FATAL("illegal type of split"); sep = *fs; ap = execute(a[1]); /* array name */ +/* BUG 7/26/22: this appears not to reset array: see C1/asplit */ freesymtab(ap); DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); ap->tval &= ~STR; |