diff options
author | Haibo Huang <hhb@google.com> | 2021-01-06 20:07:49 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2021-01-06 20:07:49 +0000 |
commit | 216471a6dee31ecdd61186b2a1bb1dae42de26b9 (patch) | |
tree | a687cc9f4a26bcbd6b4613ac3bc013c8ceccf389 | |
parent | 8936461438b57a41e0de5ab10c818e81dbd90301 (diff) | |
parent | 2b34dbd4fc9a0ff73557d2a3c34de0d09fc3185b (diff) | |
download | one-true-awk-216471a6dee31ecdd61186b2a1bb1dae42de26b9.tar.gz |
Upgrade one-true-awk to 7d1848cfa6b7b3bb9a7c851339626982198a57bc am: a451bd9a44 am: 9f8a9cd5c8 am: 2b34dbd4fc
Original change: https://android-review.googlesource.com/c/platform/external/one-true-awk/+/1541457
MUST ONLY BE SUBMITTED BY AUTOMERGER
Change-Id: I104ca1ea4ef83944bbe83a668c0043c75e7dd966
-rw-r--r-- | FIXES | 23 | ||||
-rw-r--r-- | METADATA | 8 | ||||
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | awk.1 | 53 | ||||
-rw-r--r-- | bugs-fixed/inf-nan-torture.awk | 4 | ||||
-rw-r--r-- | bugs-fixed/inf-nan-torture.in | 1 | ||||
-rw-r--r-- | bugs-fixed/inf-nan-torture.ok | 16 | ||||
-rw-r--r-- | lex.c | 7 | ||||
-rw-r--r-- | lib.c | 97 | ||||
-rw-r--r-- | main.c | 2 | ||||
-rw-r--r-- | proto.h | 4 | ||||
-rw-r--r-- | run.c | 32 | ||||
-rw-r--r-- | tran.c | 39 |
13 files changed, 234 insertions, 58 deletions
@@ -25,6 +25,29 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +December 18, 2020: + Fix problems converting inf and NaN values in lib.c:is_valid_number. + Enhance number to string conversion to do the right thing for + NaN and inf values. Things are now pretty much the same as in + gawk. (Found a gawk bug while we're at it.) Added a torture + test for these values. Thanks to Arnold Robbins. Allows closing + of PR #101. + +December 15, 2020: + Merge PR #99, which gets the right header for strcasecmp. + Thanks to GitHub user michaelforney. + +December 8, 2020: + Merge PR #98: Disallow hex data. Allow only +nan, -nan, + +inf, -inf (case independent) to give NaN and infinity values. + Improve things so that string to double conversion is only + done once, yielding something of a speedup. This obviate + PR #95. Thanks to Arnold Robbins. + +December 3, 2020: + Fix to argument parsing to avoid printing spurious newlines. + Thanks to Todd Miller. Merges PR #97. + October 13, 2020: Add casts before all the calls to malloc/calloc/realloc in order to get it to compile with g++. Thanks to Arnold Robbins. @@ -5,11 +5,11 @@ third_party { type: GIT value: "https://github.com/onetrueawk/awk.git" } - version: "a2a41a8e359dca5295c43dc2c2408094b96c3e0c" + version: "7d1848cfa6b7b3bb9a7c851339626982198a57bc" license_type: NOTICE last_upgrade_date { - year: 2020 - month: 11 - day: 30 + year: 2021 + month: 1 + day: 5 } } @@ -114,10 +114,6 @@ basis. We try to get to issues and pull requests as quickly as we can. Unfortunately, however, keeping this program going is not at the top of our priority list. -_If_ you (yes, you!) are interested in taking over active maintenance of -`awk`, please open an issue to indicate that fact, and give us a little bit of -your background and some idea of your plans and dreams. Thanks! - #### Last Updated -Tue Oct 13 20:00:09 IDT 2020 +Fri Dec 25 16:53:34 EST 2020 @@ -577,3 +577,56 @@ The scope rules for variables in functions are a botch; the syntax is worse. .PP Only eight-bit characters sets are handled correctly. +.SH UNUSUAL FLOATING-POINT VALUES +.I Awk +was designed before IEEE 754 arithmetic defined Not-A-Number (NaN) +and Infinity values, which are supported by all modern floating-point +hardware. +.PP +Because +.I awk +uses +.IR strtod (3) +and +.IR atof (3) +to convert string values to double-precision floating-point values, +modern C libraries also convert strings starting with +.B inf +and +.B nan +into infinity and NaN values respectively. This led to strange results, +with something like this: +.PP +.EX +.nf +echo nancy | awk '{ print $1 + 0 }' +.fi +.EE +.PP +printing +.B nan +instead of zero. +.PP +.I Awk +now follows GNU AWK, and prefilters string values before attempting +to convert them to numbers, as follows: +.TP +.I "Hexadecimal values" +Hexadecimal values (allowed since C99) convert to zero, as they did +prior to C99. +.TP +.I "NaN values" +The two strings +.B +nan +and +.B \-nan +(case independent) convert to NaN. No others do. +(NaNs can have signs.) +.TP +.I "Infinity values" +The two strings +.B +inf +and +.B \-inf +(case independent) convert to positive and negative infinity, respectively. +No others do. diff --git a/bugs-fixed/inf-nan-torture.awk b/bugs-fixed/inf-nan-torture.awk new file mode 100644 index 0000000..8d145f2 --- /dev/null +++ b/bugs-fixed/inf-nan-torture.awk @@ -0,0 +1,4 @@ +{ + for (i = 1; i <= NF; i++) + print i, $i, $i + 0 +} diff --git a/bugs-fixed/inf-nan-torture.in b/bugs-fixed/inf-nan-torture.in new file mode 100644 index 0000000..45dfdc8 --- /dev/null +++ b/bugs-fixed/inf-nan-torture.in @@ -0,0 +1 @@ +-inf -inform inform -nan -nancy nancy -123 0 123 +123 nancy +nancy +nan inform +inform +inf diff --git a/bugs-fixed/inf-nan-torture.ok b/bugs-fixed/inf-nan-torture.ok new file mode 100644 index 0000000..40d3194 --- /dev/null +++ b/bugs-fixed/inf-nan-torture.ok @@ -0,0 +1,16 @@ +1 -inf -inf +2 -inform 0 +3 inform 0 +4 -nan -nan +5 -nancy 0 +6 nancy 0 +7 -123 -123 +8 0 0 +9 123 123 +10 +123 123 +11 nancy 0 +12 +nancy 0 +13 +nan +nan +14 inform 0 +15 +inform 0 +16 +inf +inf @@ -191,7 +191,12 @@ int yylex(void) return word(buf); if (isdigit(c)) { char *cp = tostring(buf); - yylval.cp = setsymtab(buf, cp, atof(buf), CON|NUM, symtab); + double result; + + if (is_number(cp, & result)) + yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab); + else + yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab); free(cp); /* should this also have STR set? */ RET(NUMBER); @@ -25,11 +25,13 @@ THIS SOFTWARE. #define DEBUG #include <stdio.h> #include <string.h> +#include <strings.h> #include <ctype.h> #include <errno.h> #include <stdlib.h> #include <stdarg.h> #include <limits.h> +#include <math.h> #include "awk.h" char EMPTY[] = { '\0' }; @@ -181,12 +183,14 @@ int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record * innew = false; if (c != 0 || buf[0] != '\0') { /* normal record */ if (isrecord) { + double result; + if (freeable(fldtab[0])) xfree(fldtab[0]->sval); fldtab[0]->sval = buf; /* buf == record */ fldtab[0]->tval = REC | STR | DONTFREE; - if (is_number(fldtab[0]->sval)) { - fldtab[0]->fval = atof(fldtab[0]->sval); + if (is_number(fldtab[0]->sval, & result)) { + fldtab[0]->fval = result; fldtab[0]->tval |= NUM; } } @@ -293,6 +297,7 @@ void setclvar(char *s) /* set var=value from s */ { char *p; Cell *q; + double result; for (p=s; *p != '='; p++) ; @@ -300,8 +305,8 @@ void setclvar(char *s) /* set var=value from s */ p = qstring(p, '\0'); q = setsymtab(s, p, 0.0, STR, symtab); setsval(q, p); - if (is_number(q->sval)) { - q->fval = atof(q->sval); + if (is_number(q->sval, & result)) { + q->fval = result; q->tval |= NUM; } DPRINTF("command line set %s to |%s|\n", s, p); @@ -402,9 +407,11 @@ void fldbld(void) /* create fields from current record */ lastfld = i; donefld = true; for (j = 1; j <= lastfld; j++) { + double result; + p = fldtab[j]; - if(is_number(p->sval)) { - p->fval = atof(p->sval); + if(is_number(p->sval, & result)) { + p->fval = result; p->tval |= NUM; } } @@ -669,12 +676,11 @@ void error() fprintf(stderr, " source line number %d", curnode->lineno); else if (lineno) fprintf(stderr, " source line number %d", lineno); + if (compile_time == COMPILING && cursource() != NULL) + fprintf(stderr, " source file %s", cursource()); + fprintf(stderr, "\n"); + eprint(); } - - if (compile_time == COMPILING && cursource() != NULL) - fprintf(stderr, " source file %s", cursource()); - fprintf(stderr, "\n"); - eprint(); } void eprint(void) /* try to print context around error */ @@ -757,24 +763,69 @@ int isclvar(const char *s) /* is s of form var=something ? */ /* strtod is supposed to be a proper test of what's a valid number */ /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */ /* wrong: violates 4.10.1.4 of ansi C standard */ + /* well, not quite. As of C99, hex floating point is allowed. so this is - * a bit of a mess. + * a bit of a mess. We work around the mess by checking for a hexadecimal + * value and disallowing it. Similarly, we now follow gawk and allow only + * +nan, -nan, +inf, and -inf for NaN and infinity values. */ -#include <math.h> -int is_number(const char *s) +/* + * This routine now has a more complicated interface, the main point + * being to avoid the double conversion of a string to double, and + * also to convey out, if requested, the information that the numeric + * value was a leading string or is all of the string. The latter bit + * is used in getfval(). + */ + +bool is_valid_number(const char *s, bool trailing_stuff_ok, + bool *no_trailing, double *result) { double r; char *ep; + bool retval = false; + bool is_nan = false; + bool is_inf = false; + + if (no_trailing) + *no_trailing = false; + + while (isspace(*s)) + s++; + + // no hex floating point, sorry + if (s[0] == '0' && tolower(s[1]) == 'x') + return false; + + // allow +nan, -nan, +inf, -inf, any other letter, no + if (s[0] == '+' || s[0] == '-') { + is_nan = (strncasecmp(s+1, "nan", 3) == 0); + is_inf = (strncasecmp(s+1, "inf", 3) == 0); + if ((is_nan || is_inf) + && (isspace(s[4]) || s[4] == '\0')) + goto convert; + else if (! isdigit(s[1]) && s[1] != '.') + return false; + } + else if (! isdigit(s[0]) && s[0] != '.') + return false; + +convert: errno = 0; r = strtod(s, &ep); - if (ep == s || r == HUGE_VAL || errno == ERANGE) - return 0; - /* allow \r as well. windows files aren't going to go away. */ - while (*ep == ' ' || *ep == '\t' || *ep == '\n' || *ep == '\r') - ep++; - if (*ep == '\0') - return 1; - else - return 0; + if (ep == s || errno == ERANGE) + return false; + + if (isnan(r) && s[0] == '-' && signbit(r) == 0) + r = -r; + + if (result != NULL) + *result = r; + + retval = (isspace(*ep) || *ep == '\0' || trailing_stuff_ok); + + if (no_trailing != NULL) + *no_trailing = (*ep == '\0'); + + return retval; } @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20201013"; +const char *version = "version 20201218"; #define DEBUG #include <stdio.h> @@ -146,7 +146,9 @@ extern void eprint(void); extern void bclass(int); extern double errcheck(double, const char *); extern int isclvar(const char *); -extern int is_number(const char *); +extern bool is_valid_number(const char *s, bool trailing_stuff_ok, + bool *no_trailing, double *result); +#define is_number(s, val) is_valid_number(s, false, NULL, val) extern int adjbuf(char **pb, int *sz, int min, int q, char **pbp, const char *what); extern void run(Node *); @@ -407,6 +407,7 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */ int bufsize = recsize; int mode; bool newflag; + double result; if ((buf = (char *) malloc(bufsize)) == NULL) FATAL("out of memory in getline"); @@ -429,15 +430,15 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */ } else if (a[0] != NULL) { /* getline var <file */ x = execute(a[0]); setsval(x, buf); - if (is_number(x->sval)) { - x->fval = atof(x->sval); + if (is_number(x->sval, & result)) { + x->fval = result; x->tval |= NUM; } tempfree(x); } else { /* getline <file */ setsval(fldtab[0], buf); - if (is_number(fldtab[0]->sval)) { - fldtab[0]->fval = atof(fldtab[0]->sval); + if (is_number(fldtab[0]->sval, & result)) { + fldtab[0]->fval = result; fldtab[0]->tval |= NUM; } } @@ -448,8 +449,8 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */ n = getrec(&buf, &bufsize, false); x = execute(a[0]); setsval(x, buf); - if (is_number(x->sval)) { - x->fval = atof(x->sval); + if (is_number(x->sval, & result)) { + x->fval = result; x->tval |= NUM; } tempfree(x); @@ -726,7 +727,7 @@ Cell *indirect(Node **a, int n) /* $( a[0] ) */ if ((Awkfloat)INT_MAX < val) FATAL("trying to access out of range field %s", x->nval); m = (int) val; - if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */ + if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ FATAL("illegal field $(%s), name \"%s\"", s, x->nval); /* BUG: can x->nval ever be null??? */ tempfree(x); @@ -1259,6 +1260,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ int sep; char temp, num[50]; int n, tempstat, arg3type; + double result; y = execute(a[0]); /* source string */ origs = s = strdup(getsval(y)); @@ -1303,8 +1305,8 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ snprintf(num, sizeof(num), "%d", n); temp = *patbeg; setptr(patbeg, '\0'); - if (is_number(s)) - setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); + if (is_number(s, & result)) + setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); else setsymtab(num, s, 0.0, STR, (Array *) ap->sval); setptr(patbeg, temp); @@ -1322,8 +1324,8 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ } n++; snprintf(num, sizeof(num), "%d", n); - if (is_number(s)) - setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); + if (is_number(s, & result)) + setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); else setsymtab(num, s, 0.0, STR, (Array *) ap->sval); spdone: @@ -1343,8 +1345,8 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ temp = *s; setptr(s, '\0'); snprintf(num, sizeof(num), "%d", n); - if (is_number(t)) - setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); + if (is_number(t, & result)) + setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); setptr(s, temp); @@ -1372,8 +1374,8 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ temp = *s; setptr(s, '\0'); snprintf(num, sizeof(num), "%d", n); - if (is_number(t)) - setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); + if (is_number(t, & result)) + setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); setptr(s, temp); @@ -129,9 +129,11 @@ void arginit(int ac, char **av) /* set up ARGV and ARGC */ free(cp->sval); cp->sval = (char *) ARGVtab; for (i = 0; i < ac; i++) { + double result; + sprintf(temp, "%d", i); - if (is_number(*av)) - setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab); + if (is_number(*av, & result)) + setsymtab(temp, *av, result, STR|NUM, ARGVtab); else setsymtab(temp, *av, 0.0, STR, ARGVtab); av++; @@ -148,13 +150,15 @@ void envinit(char **envp) /* set up ENVIRON variable */ free(cp->sval); cp->sval = (char *) ENVtab; for ( ; *envp; envp++) { + double result; + if ((p = strchr(*envp, '=')) == NULL) continue; if( p == *envp ) /* no left hand side name in env string */ continue; *p++ = 0; /* split into two strings at = */ - if (is_number(p)) - setsymtab(*envp, p, atof(p), STR|NUM, ENVtab); + if (is_number(p, & result)) + setsymtab(*envp, p, result, STR|NUM, ENVtab); else setsymtab(*envp, p, 0.0, STR, ENVtab); p[-1] = '='; /* restore in case env is passed down to a shell */ @@ -399,19 +403,36 @@ Awkfloat getfval(Cell *vp) /* get float val of a Cell */ else if (isrec(vp) && !donerec) recbld(); if (!isnum(vp)) { /* not a number */ - vp->fval = atof(vp->sval); /* best guess */ - if (is_number(vp->sval) && !(vp->tval&CON)) - vp->tval |= NUM; /* make NUM only sparingly */ + double fval; + bool no_trailing; + + if (is_valid_number(vp->sval, true, & no_trailing, & fval)) { + vp->fval = fval; + if (no_trailing && !(vp->tval&CON)) + vp->tval |= NUM; /* make NUM only sparingly */ + } else + vp->fval = 0.0; } DPRINTF("getfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), vp->fval, vp->tval); return(vp->fval); } +static char *get_inf_nan(double d) +{ + if (isinf(d)) { + return (d < 0 ? "-inf" : "+inf"); + } else if (isnan(d)) { + return (signbit(d) != 0 ? "-nan" : "+nan"); + } else + return NULL; +} + static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */ { char s[256]; double dtemp; + char *p; if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "read value of"); @@ -448,7 +469,9 @@ static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cel { \ if (freeable(vp)) \ xfree(vp->sval); \ - if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \ + if ((p = get_inf_nan(vp->fval)) != NULL) \ + strcpy(s, p); \ + else if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \ snprintf(s, sizeof (s), "%.30g", vp->fval); \ else \ snprintf(s, sizeof (s), *fmt, vp->fval); \ |