aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2022-08-24 16:42:27 +0300
committerArnold D. Robbins <arnold@skeeve.com>2022-08-24 16:42:27 +0300
commitf3f5210cfac77479e6cf6ca2f21f8f08c012aa85 (patch)
treea537db0a38eebba9d37a673d4906cf318a1187ff
parent66a543eeadeed25d152a53c67ff0dd25ac017730 (diff)
downloadone-true-awk-f3f5210cfac77479e6cf6ca2f21f8f08c012aa85.tar.gz
Latest additions from BWK.
-rw-r--r--.gitignore2
-rw-r--r--awk.h3
-rwxr-xr-x[-rw-r--r--]b.c9
-rwxr-xr-x[-rw-r--r--]lib.c62
-rw-r--r--main.c2
-rwxr-xr-x[-rw-r--r--]run.c1
6 files changed, 71 insertions, 8 deletions
diff --git a/.gitignore b/.gitignore
index 2e74ff7..96f6c40 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,5 @@ proctab.c
ytab*
testdir/foo*
testdir/temp*
+*.pdf
+*.mail
diff --git a/awk.h b/awk.h
index 186b1c7..9a802c8 100644
--- a/awk.h
+++ b/awk.h
@@ -225,7 +225,8 @@ extern int pairstack[], paircnt;
/* structures used by regular expression matching machinery, mostly b.c: */
-#define NCHARS (256+3) /* 256 handles 8-bit chars; 128 does 7-bit */
+#define NCHARS (1256+3) /* 256 handles 8-bit chars; 128 does 7-bit */
+ /* BUG: some overflows (caught) if we use 256 */
/* watch out in match(), etc. */
#define HAT (NCHARS+2) /* matches ^ in regular expr */
#define NSTATES 32
diff --git a/b.c b/b.c
index bdf8907..77aa838 100644..100755
--- a/b.c
+++ b/b.c
@@ -441,8 +441,13 @@ int *cclenter(const char *argp) /* add a character class */
continue;
}
}
- if (!adjbuf((char **) &buf, &bufsz, bp-buf+8, 100, (char **) &bp, "cclenter2"))
- FATAL("out of space for character class [%.10s...] 3", p);
+ if (i >= bufsz) {
+ bufsz *= 2;
+ buf = (int *) realloc(buf, bufsz * sizeof(int));
+ if (buf == NULL)
+ FATAL("out of space for character class [%.10s...] 2", p);
+ bp = buf + i;
+ }
*bp++ = c;
i++;
}
diff --git a/lib.c b/lib.c
index af23554..66bc55f 100644..100755
--- a/lib.c
+++ b/lib.c
@@ -301,6 +301,9 @@ void setclvar(char *s) /* set var=value from s */
Cell *q;
double result;
+/* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */
+/* I don't understand why it was changed. */
+
for (p=s; *p != '='; p++)
;
e = p;
@@ -324,7 +327,7 @@ void fldbld(void) /* create fields from current record */
/* possibly with a final trailing \0 not associated with any field */
char *r, *fr, sep;
Cell *p;
- int i, j, n;
+ int i, j, n, quote;
if (donefld)
return;
@@ -363,6 +366,57 @@ void fldbld(void) /* create fields from current record */
*fr++ = 0;
}
*fr = 0;
+ } else if ((sep = *inputFS) == ',') { /* CSV: handle quotes, \x, etc. */
+ for (i = 0; *r != '\0'; ) {
+ i++;
+ if (i > nfields)
+ growfldtab(i);
+ if (freeable(fldtab[i]))
+ xfree(fldtab[i]->sval);
+ fldtab[i]->sval = fr;
+ fldtab[i]->tval = FLD | STR | DONTFREE;
+
+/* printf("fldbld 1 [%s] [%d:] [%s]\n", r, i, fr); */
+
+ if (*r == '"' /* || *r == '\'' */ ) { /* "..."; do not include '...' */
+ quote = *r++;
+ for ( ; *r != '\0'; ) {
+/* printf("fldbld 2 [%s]\n", r); */
+ if (*r == quote && r[1] != '\0' && r[1] == quote) {
+ r += 2; /* doubled quote */
+ *fr++ = quote;
+ } else if (*r == '\\') { /* BUG: off end? */
+ r++; /* backslashes inside "..." ??? */
+ *fr++ = *r++;
+ } else if (*r == quote && (r[1] == '\0' || r[1] == ',')) {
+ r++;
+ if (*r == ',')
+ r++;
+ break;
+ } else {
+ *fr++ = *r++;
+ }
+ }
+ *fr++ = 0;
+ continue;
+ }
+
+ /* unquoted field */
+ for ( ; *r != '\0'; ) {
+ if (*r == ',') { /* bare comma ends field */
+ r++;
+ *fr++ = 0;
+ break;
+ } else if (*r == '\\') { /* BUG: could walk off end */
+ r++;
+ *fr++ = *r++;
+ } else {
+ *fr++ = *r++;
+ }
+ }
+ *fr++ = 0;
+ }
+ *fr = 0;
} else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
for (i = 0; *r != '\0'; r += n) {
char buf[MB_LEN_MAX + 1];
@@ -797,11 +851,11 @@ bool is_valid_number(const char *s, bool trailing_stuff_ok,
while (isspace(*s))
s++;
- // no hex floating point, sorry
+ /* no hex floating point, sorry */
if (s[0] == '0' && tolower(s[1]) == 'x')
return false;
- // allow +nan, -nan, +inf, -inf, any other letter, no
+ /* allow +nan, -nan, +inf, -inf, any other letter, no */
if (s[0] == '+' || s[0] == '-') {
is_nan = (strncasecmp(s+1, "nan", 3) == 0);
is_inf = (strncasecmp(s+1, "inf", 3) == 0);
@@ -835,7 +889,7 @@ convert:
if (no_trailing != NULL)
*no_trailing = (*ep == '\0');
- // return true if found the end, or trailing stuff is allowed
+ /* return true if found the end, or trailing stuff is allowed */
retval = *ep == '\0' || trailing_stuff_ok;
return retval;
diff --git a/main.c b/main.c
index 187ba99..7760de1 100644
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
-const char *version = "version 20220530";
+const char *version = "version 20220818";
#define DEBUG
#include <stdio.h>
diff --git a/run.c b/run.c
index 2b25dda..556fe7d 100644..100755
--- a/run.c
+++ b/run.c
@@ -1454,6 +1454,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
FATAL("illegal type of split");
sep = *fs;
ap = execute(a[1]); /* array name */
+/* BUG 7/26/22: this appears not to reset array: see C1/asplit */
freesymtab(ap);
DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
ap->tval &= ~STR;