aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaibo Huang <hhb@google.com>2021-01-06 20:07:49 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2021-01-06 20:07:49 +0000
commit216471a6dee31ecdd61186b2a1bb1dae42de26b9 (patch)
treea687cc9f4a26bcbd6b4613ac3bc013c8ceccf389
parent8936461438b57a41e0de5ab10c818e81dbd90301 (diff)
parent2b34dbd4fc9a0ff73557d2a3c34de0d09fc3185b (diff)
downloadone-true-awk-216471a6dee31ecdd61186b2a1bb1dae42de26b9.tar.gz
Upgrade one-true-awk to 7d1848cfa6b7b3bb9a7c851339626982198a57bc am: a451bd9a44 am: 9f8a9cd5c8 am: 2b34dbd4fc
Original change: https://android-review.googlesource.com/c/platform/external/one-true-awk/+/1541457 MUST ONLY BE SUBMITTED BY AUTOMERGER Change-Id: I104ca1ea4ef83944bbe83a668c0043c75e7dd966
-rw-r--r--FIXES23
-rw-r--r--METADATA8
-rw-r--r--README.md6
-rw-r--r--awk.153
-rw-r--r--bugs-fixed/inf-nan-torture.awk4
-rw-r--r--bugs-fixed/inf-nan-torture.in1
-rw-r--r--bugs-fixed/inf-nan-torture.ok16
-rw-r--r--lex.c7
-rw-r--r--lib.c97
-rw-r--r--main.c2
-rw-r--r--proto.h4
-rw-r--r--run.c32
-rw-r--r--tran.c39
13 files changed, 234 insertions, 58 deletions
diff --git a/FIXES b/FIXES
index 2aeb8d0..20b4bd8 100644
--- a/FIXES
+++ b/FIXES
@@ -25,6 +25,29 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the AWK book
was sent to the printers in August, 1987.
+December 18, 2020:
+ Fix problems converting inf and NaN values in lib.c:is_valid_number.
+ Enhance number to string conversion to do the right thing for
+ NaN and inf values. Things are now pretty much the same as in
+ gawk. (Found a gawk bug while we're at it.) Added a torture
+ test for these values. Thanks to Arnold Robbins. Allows closing
+ of PR #101.
+
+December 15, 2020:
+ Merge PR #99, which gets the right header for strcasecmp.
+ Thanks to GitHub user michaelforney.
+
+December 8, 2020:
+ Merge PR #98: Disallow hex data. Allow only +nan, -nan,
+ +inf, -inf (case independent) to give NaN and infinity values.
+ Improve things so that string to double conversion is only
+ done once, yielding something of a speedup. This obviate
+ PR #95. Thanks to Arnold Robbins.
+
+December 3, 2020:
+ Fix to argument parsing to avoid printing spurious newlines.
+ Thanks to Todd Miller. Merges PR #97.
+
October 13, 2020:
Add casts before all the calls to malloc/calloc/realloc in order
to get it to compile with g++. Thanks to Arnold Robbins.
diff --git a/METADATA b/METADATA
index 0c29003..833688d 100644
--- a/METADATA
+++ b/METADATA
@@ -5,11 +5,11 @@ third_party {
type: GIT
value: "https://github.com/onetrueawk/awk.git"
}
- version: "a2a41a8e359dca5295c43dc2c2408094b96c3e0c"
+ version: "7d1848cfa6b7b3bb9a7c851339626982198a57bc"
license_type: NOTICE
last_upgrade_date {
- year: 2020
- month: 11
- day: 30
+ year: 2021
+ month: 1
+ day: 5
}
}
diff --git a/README.md b/README.md
index 94075fd..b8089b3 100644
--- a/README.md
+++ b/README.md
@@ -114,10 +114,6 @@ basis. We try to get to issues and pull requests as quickly
as we can. Unfortunately, however, keeping this program going
is not at the top of our priority list.
-_If_ you (yes, you!) are interested in taking over active maintenance of
-`awk`, please open an issue to indicate that fact, and give us a little bit of
-your background and some idea of your plans and dreams. Thanks!
-
#### Last Updated
-Tue Oct 13 20:00:09 IDT 2020
+Fri Dec 25 16:53:34 EST 2020
diff --git a/awk.1 b/awk.1
index 050d253..d27dbff 100644
--- a/awk.1
+++ b/awk.1
@@ -577,3 +577,56 @@ The scope rules for variables in functions are a botch;
the syntax is worse.
.PP
Only eight-bit characters sets are handled correctly.
+.SH UNUSUAL FLOATING-POINT VALUES
+.I Awk
+was designed before IEEE 754 arithmetic defined Not-A-Number (NaN)
+and Infinity values, which are supported by all modern floating-point
+hardware.
+.PP
+Because
+.I awk
+uses
+.IR strtod (3)
+and
+.IR atof (3)
+to convert string values to double-precision floating-point values,
+modern C libraries also convert strings starting with
+.B inf
+and
+.B nan
+into infinity and NaN values respectively. This led to strange results,
+with something like this:
+.PP
+.EX
+.nf
+echo nancy | awk '{ print $1 + 0 }'
+.fi
+.EE
+.PP
+printing
+.B nan
+instead of zero.
+.PP
+.I Awk
+now follows GNU AWK, and prefilters string values before attempting
+to convert them to numbers, as follows:
+.TP
+.I "Hexadecimal values"
+Hexadecimal values (allowed since C99) convert to zero, as they did
+prior to C99.
+.TP
+.I "NaN values"
+The two strings
+.B +nan
+and
+.B \-nan
+(case independent) convert to NaN. No others do.
+(NaNs can have signs.)
+.TP
+.I "Infinity values"
+The two strings
+.B +inf
+and
+.B \-inf
+(case independent) convert to positive and negative infinity, respectively.
+No others do.
diff --git a/bugs-fixed/inf-nan-torture.awk b/bugs-fixed/inf-nan-torture.awk
new file mode 100644
index 0000000..8d145f2
--- /dev/null
+++ b/bugs-fixed/inf-nan-torture.awk
@@ -0,0 +1,4 @@
+{
+ for (i = 1; i <= NF; i++)
+ print i, $i, $i + 0
+}
diff --git a/bugs-fixed/inf-nan-torture.in b/bugs-fixed/inf-nan-torture.in
new file mode 100644
index 0000000..45dfdc8
--- /dev/null
+++ b/bugs-fixed/inf-nan-torture.in
@@ -0,0 +1 @@
+-inf -inform inform -nan -nancy nancy -123 0 123 +123 nancy +nancy +nan inform +inform +inf
diff --git a/bugs-fixed/inf-nan-torture.ok b/bugs-fixed/inf-nan-torture.ok
new file mode 100644
index 0000000..40d3194
--- /dev/null
+++ b/bugs-fixed/inf-nan-torture.ok
@@ -0,0 +1,16 @@
+1 -inf -inf
+2 -inform 0
+3 inform 0
+4 -nan -nan
+5 -nancy 0
+6 nancy 0
+7 -123 -123
+8 0 0
+9 123 123
+10 +123 123
+11 nancy 0
+12 +nancy 0
+13 +nan +nan
+14 inform 0
+15 +inform 0
+16 +inf +inf
diff --git a/lex.c b/lex.c
index df39c8e..9d1ae06 100644
--- a/lex.c
+++ b/lex.c
@@ -191,7 +191,12 @@ int yylex(void)
return word(buf);
if (isdigit(c)) {
char *cp = tostring(buf);
- yylval.cp = setsymtab(buf, cp, atof(buf), CON|NUM, symtab);
+ double result;
+
+ if (is_number(cp, & result))
+ yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
+ else
+ yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
free(cp);
/* should this also have STR set? */
RET(NUMBER);
diff --git a/lib.c b/lib.c
index f55e897..e8310a9 100644
--- a/lib.c
+++ b/lib.c
@@ -25,11 +25,13 @@ THIS SOFTWARE.
#define DEBUG
#include <stdio.h>
#include <string.h>
+#include <strings.h>
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <stdarg.h>
#include <limits.h>
+#include <math.h>
#include "awk.h"
char EMPTY[] = { '\0' };
@@ -181,12 +183,14 @@ int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record *
innew = false;
if (c != 0 || buf[0] != '\0') { /* normal record */
if (isrecord) {
+ double result;
+
if (freeable(fldtab[0]))
xfree(fldtab[0]->sval);
fldtab[0]->sval = buf; /* buf == record */
fldtab[0]->tval = REC | STR | DONTFREE;
- if (is_number(fldtab[0]->sval)) {
- fldtab[0]->fval = atof(fldtab[0]->sval);
+ if (is_number(fldtab[0]->sval, & result)) {
+ fldtab[0]->fval = result;
fldtab[0]->tval |= NUM;
}
}
@@ -293,6 +297,7 @@ void setclvar(char *s) /* set var=value from s */
{
char *p;
Cell *q;
+ double result;
for (p=s; *p != '='; p++)
;
@@ -300,8 +305,8 @@ void setclvar(char *s) /* set var=value from s */
p = qstring(p, '\0');
q = setsymtab(s, p, 0.0, STR, symtab);
setsval(q, p);
- if (is_number(q->sval)) {
- q->fval = atof(q->sval);
+ if (is_number(q->sval, & result)) {
+ q->fval = result;
q->tval |= NUM;
}
DPRINTF("command line set %s to |%s|\n", s, p);
@@ -402,9 +407,11 @@ void fldbld(void) /* create fields from current record */
lastfld = i;
donefld = true;
for (j = 1; j <= lastfld; j++) {
+ double result;
+
p = fldtab[j];
- if(is_number(p->sval)) {
- p->fval = atof(p->sval);
+ if(is_number(p->sval, & result)) {
+ p->fval = result;
p->tval |= NUM;
}
}
@@ -669,12 +676,11 @@ void error()
fprintf(stderr, " source line number %d", curnode->lineno);
else if (lineno)
fprintf(stderr, " source line number %d", lineno);
+ if (compile_time == COMPILING && cursource() != NULL)
+ fprintf(stderr, " source file %s", cursource());
+ fprintf(stderr, "\n");
+ eprint();
}
-
- if (compile_time == COMPILING && cursource() != NULL)
- fprintf(stderr, " source file %s", cursource());
- fprintf(stderr, "\n");
- eprint();
}
void eprint(void) /* try to print context around error */
@@ -757,24 +763,69 @@ int isclvar(const char *s) /* is s of form var=something ? */
/* strtod is supposed to be a proper test of what's a valid number */
/* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
/* wrong: violates 4.10.1.4 of ansi C standard */
+
/* well, not quite. As of C99, hex floating point is allowed. so this is
- * a bit of a mess.
+ * a bit of a mess. We work around the mess by checking for a hexadecimal
+ * value and disallowing it. Similarly, we now follow gawk and allow only
+ * +nan, -nan, +inf, and -inf for NaN and infinity values.
*/
-#include <math.h>
-int is_number(const char *s)
+/*
+ * This routine now has a more complicated interface, the main point
+ * being to avoid the double conversion of a string to double, and
+ * also to convey out, if requested, the information that the numeric
+ * value was a leading string or is all of the string. The latter bit
+ * is used in getfval().
+ */
+
+bool is_valid_number(const char *s, bool trailing_stuff_ok,
+ bool *no_trailing, double *result)
{
double r;
char *ep;
+ bool retval = false;
+ bool is_nan = false;
+ bool is_inf = false;
+
+ if (no_trailing)
+ *no_trailing = false;
+
+ while (isspace(*s))
+ s++;
+
+ // no hex floating point, sorry
+ if (s[0] == '0' && tolower(s[1]) == 'x')
+ return false;
+
+ // allow +nan, -nan, +inf, -inf, any other letter, no
+ if (s[0] == '+' || s[0] == '-') {
+ is_nan = (strncasecmp(s+1, "nan", 3) == 0);
+ is_inf = (strncasecmp(s+1, "inf", 3) == 0);
+ if ((is_nan || is_inf)
+ && (isspace(s[4]) || s[4] == '\0'))
+ goto convert;
+ else if (! isdigit(s[1]) && s[1] != '.')
+ return false;
+ }
+ else if (! isdigit(s[0]) && s[0] != '.')
+ return false;
+
+convert:
errno = 0;
r = strtod(s, &ep);
- if (ep == s || r == HUGE_VAL || errno == ERANGE)
- return 0;
- /* allow \r as well. windows files aren't going to go away. */
- while (*ep == ' ' || *ep == '\t' || *ep == '\n' || *ep == '\r')
- ep++;
- if (*ep == '\0')
- return 1;
- else
- return 0;
+ if (ep == s || errno == ERANGE)
+ return false;
+
+ if (isnan(r) && s[0] == '-' && signbit(r) == 0)
+ r = -r;
+
+ if (result != NULL)
+ *result = r;
+
+ retval = (isspace(*ep) || *ep == '\0' || trailing_stuff_ok);
+
+ if (no_trailing != NULL)
+ *no_trailing = (*ep == '\0');
+
+ return retval;
}
diff --git a/main.c b/main.c
index 6453981..5970cb4 100644
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
-const char *version = "version 20201013";
+const char *version = "version 20201218";
#define DEBUG
#include <stdio.h>
diff --git a/proto.h b/proto.h
index 71e3cd9..a64991b 100644
--- a/proto.h
+++ b/proto.h
@@ -146,7 +146,9 @@ extern void eprint(void);
extern void bclass(int);
extern double errcheck(double, const char *);
extern int isclvar(const char *);
-extern int is_number(const char *);
+extern bool is_valid_number(const char *s, bool trailing_stuff_ok,
+ bool *no_trailing, double *result);
+#define is_number(s, val) is_valid_number(s, false, NULL, val)
extern int adjbuf(char **pb, int *sz, int min, int q, char **pbp, const char *what);
extern void run(Node *);
diff --git a/run.c b/run.c
index 854463f..da4f555 100644
--- a/run.c
+++ b/run.c
@@ -407,6 +407,7 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
int bufsize = recsize;
int mode;
bool newflag;
+ double result;
if ((buf = (char *) malloc(bufsize)) == NULL)
FATAL("out of memory in getline");
@@ -429,15 +430,15 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
} else if (a[0] != NULL) { /* getline var <file */
x = execute(a[0]);
setsval(x, buf);
- if (is_number(x->sval)) {
- x->fval = atof(x->sval);
+ if (is_number(x->sval, & result)) {
+ x->fval = result;
x->tval |= NUM;
}
tempfree(x);
} else { /* getline <file */
setsval(fldtab[0], buf);
- if (is_number(fldtab[0]->sval)) {
- fldtab[0]->fval = atof(fldtab[0]->sval);
+ if (is_number(fldtab[0]->sval, & result)) {
+ fldtab[0]->fval = result;
fldtab[0]->tval |= NUM;
}
}
@@ -448,8 +449,8 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
n = getrec(&buf, &bufsize, false);
x = execute(a[0]);
setsval(x, buf);
- if (is_number(x->sval)) {
- x->fval = atof(x->sval);
+ if (is_number(x->sval, & result)) {
+ x->fval = result;
x->tval |= NUM;
}
tempfree(x);
@@ -726,7 +727,7 @@ Cell *indirect(Node **a, int n) /* $( a[0] ) */
if ((Awkfloat)INT_MAX < val)
FATAL("trying to access out of range field %s", x->nval);
m = (int) val;
- if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */
+ if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */
FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
/* BUG: can x->nval ever be null??? */
tempfree(x);
@@ -1259,6 +1260,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
int sep;
char temp, num[50];
int n, tempstat, arg3type;
+ double result;
y = execute(a[0]); /* source string */
origs = s = strdup(getsval(y));
@@ -1303,8 +1305,8 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
snprintf(num, sizeof(num), "%d", n);
temp = *patbeg;
setptr(patbeg, '\0');
- if (is_number(s))
- setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
+ if (is_number(s, & result))
+ setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
else
setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
setptr(patbeg, temp);
@@ -1322,8 +1324,8 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
}
n++;
snprintf(num, sizeof(num), "%d", n);
- if (is_number(s))
- setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
+ if (is_number(s, & result))
+ setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
else
setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
spdone:
@@ -1343,8 +1345,8 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
temp = *s;
setptr(s, '\0');
snprintf(num, sizeof(num), "%d", n);
- if (is_number(t))
- setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
+ if (is_number(t, & result))
+ setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
else
setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
setptr(s, temp);
@@ -1372,8 +1374,8 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
temp = *s;
setptr(s, '\0');
snprintf(num, sizeof(num), "%d", n);
- if (is_number(t))
- setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
+ if (is_number(t, & result))
+ setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
else
setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
setptr(s, temp);
diff --git a/tran.c b/tran.c
index 79964cd..add9d85 100644
--- a/tran.c
+++ b/tran.c
@@ -129,9 +129,11 @@ void arginit(int ac, char **av) /* set up ARGV and ARGC */
free(cp->sval);
cp->sval = (char *) ARGVtab;
for (i = 0; i < ac; i++) {
+ double result;
+
sprintf(temp, "%d", i);
- if (is_number(*av))
- setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
+ if (is_number(*av, & result))
+ setsymtab(temp, *av, result, STR|NUM, ARGVtab);
else
setsymtab(temp, *av, 0.0, STR, ARGVtab);
av++;
@@ -148,13 +150,15 @@ void envinit(char **envp) /* set up ENVIRON variable */
free(cp->sval);
cp->sval = (char *) ENVtab;
for ( ; *envp; envp++) {
+ double result;
+
if ((p = strchr(*envp, '=')) == NULL)
continue;
if( p == *envp ) /* no left hand side name in env string */
continue;
*p++ = 0; /* split into two strings at = */
- if (is_number(p))
- setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
+ if (is_number(p, & result))
+ setsymtab(*envp, p, result, STR|NUM, ENVtab);
else
setsymtab(*envp, p, 0.0, STR, ENVtab);
p[-1] = '='; /* restore in case env is passed down to a shell */
@@ -399,19 +403,36 @@ Awkfloat getfval(Cell *vp) /* get float val of a Cell */
else if (isrec(vp) && !donerec)
recbld();
if (!isnum(vp)) { /* not a number */
- vp->fval = atof(vp->sval); /* best guess */
- if (is_number(vp->sval) && !(vp->tval&CON))
- vp->tval |= NUM; /* make NUM only sparingly */
+ double fval;
+ bool no_trailing;
+
+ if (is_valid_number(vp->sval, true, & no_trailing, & fval)) {
+ vp->fval = fval;
+ if (no_trailing && !(vp->tval&CON))
+ vp->tval |= NUM; /* make NUM only sparingly */
+ } else
+ vp->fval = 0.0;
}
DPRINTF("getfval %p: %s = %g, t=%o\n",
(void*)vp, NN(vp->nval), vp->fval, vp->tval);
return(vp->fval);
}
+static char *get_inf_nan(double d)
+{
+ if (isinf(d)) {
+ return (d < 0 ? "-inf" : "+inf");
+ } else if (isnan(d)) {
+ return (signbit(d) != 0 ? "-nan" : "+nan");
+ } else
+ return NULL;
+}
+
static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */
{
char s[256];
double dtemp;
+ char *p;
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "read value of");
@@ -448,7 +469,9 @@ static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cel
{ \
if (freeable(vp)) \
xfree(vp->sval); \
- if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \
+ if ((p = get_inf_nan(vp->fval)) != NULL) \
+ strcpy(s, p); \
+ else if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \
snprintf(s, sizeof (s), "%.30g", vp->fval); \
else \
snprintf(s, sizeof (s), *fmt, vp->fval); \