From 2976507cc10587a8d6d540c099b1d481547d7807 Mon Sep 17 00:00:00 2001 From: Martijn Dekker Date: Fri, 10 Jan 2020 11:13:26 +0100 Subject: rename T.concat to T.csconcat to avoid case-insensitive conflict (#64) On case-insensitive file systems (i.e.: macOS), T.concat and t.concat are the same file, so these conflicted. This commit renames T.concat to avoid the conflict. --- testdir/T.concat | 29 ----------------------------- testdir/T.csconcat | 29 +++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 29 deletions(-) delete mode 100755 testdir/T.concat create mode 100755 testdir/T.csconcat diff --git a/testdir/T.concat b/testdir/T.concat deleted file mode 100755 index c6bd016..0000000 --- a/testdir/T.concat +++ /dev/null @@ -1,29 +0,0 @@ -echo T.concat: test constant string concatentation - -awk=${awk-../a.out} - -$awk ' -BEGIN { - $0 = "aaa" - print "abcdef" " " $0 -} -BEGIN { print "hello" "world"; print helloworld } -BEGIN { - print " " "hello" - print "hello" " " - print "hello" " " "world" - print "hello" (" " "world") -} -' > foo1 - -cat << \EOF > foo2 -abcdef aaa -helloworld - - hello -hello -hello world -hello world -EOF - -diff foo1 foo2 || echo 'BAD: T.concat (1)' diff --git a/testdir/T.csconcat b/testdir/T.csconcat new file mode 100755 index 0000000..5199600 --- /dev/null +++ b/testdir/T.csconcat @@ -0,0 +1,29 @@ +echo T.csconcat: test constant string concatentation + +awk=${awk-../a.out} + +$awk ' +BEGIN { + $0 = "aaa" + print "abcdef" " " $0 +} +BEGIN { print "hello" "world"; print helloworld } +BEGIN { + print " " "hello" + print "hello" " " + print "hello" " " "world" + print "hello" (" " "world") +} +' > foo1 + +cat << \EOF > foo2 +abcdef aaa +helloworld + + hello +hello +hello world +hello world +EOF + +diff foo1 foo2 || echo 'BAD: T.csconcat (1)' -- cgit v1.2.3 From fed1a562c3d1f3cf3cac0dd1413679191ac43002 Mon Sep 17 00:00:00 2001 From: Martijn Dekker Date: Fri, 17 Jan 2020 13:02:57 +0100 Subject: Make I/O errors fatal instead of mere warnings (#63) An input/output error indicates a fatal condition, even if it occurs when closing a file. Awk should not return success on I/O error, but treat I/O errors as it already treats write errors. Test case: $ (trap '' PIPE; awk 'BEGIN { print "hi"; }'; echo "E $?" >&2) | : awk: i/o error occurred while closing /dev/stdout source line number 1 E 2 The test case pipes a line into a dummy command that reads no input, with SIGPIPE ignored so we rely on awk's own I/O checking. No write error is detected, because the pipe is buffered; the broken pipe is only detected as an I/O error on closing stdout. Before this commit, "E 0" was printed (indicating status 0/success) because an I/O error merely produced a warning. A shell script was unable to detect the I/O error using the exit status. --- FIXES | 4 ++++ run.c | 8 ++++---- testdir/T.misc | 5 +++++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/FIXES b/FIXES index 6889e30..aac4a4a 100644 --- a/FIXES +++ b/FIXES @@ -25,6 +25,10 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +January 9, 2020: + Input/output errors on closing files are now fatal instead of + mere warnings. Thanks to Martijn Dekker . + January 5, 2020: Fix a bug in the concatentation of two string constants into one done in the grammar. Fixes GitHub issue #61. Thanks diff --git a/run.c b/run.c index a331449..136e20a 100644 --- a/run.c +++ b/run.c @@ -1769,13 +1769,13 @@ Cell *closefile(Node **a, int n) for (i = 0; i < nfiles; i++) { if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) { if (ferror(files[i].fp)) - WARNING( "i/o error occurred on %s", files[i].fname ); + FATAL( "i/o error occurred on %s", files[i].fname ); if (files[i].mode == '|' || files[i].mode == LE) stat = pclose(files[i].fp); else stat = fclose(files[i].fp); if (stat == EOF) - WARNING( "i/o error occurred closing %s", files[i].fname ); + FATAL( "i/o error occurred closing %s", files[i].fname ); if (i > 2) /* don't do /dev/std... */ xfree(files[i].fname); files[i].fname = NULL; /* watch out for ref thru this */ @@ -1795,13 +1795,13 @@ void closeall(void) for (i = 0; i < FOPEN_MAX; i++) { if (files[i].fp) { if (ferror(files[i].fp)) - WARNING( "i/o error occurred on %s", files[i].fname ); + FATAL( "i/o error occurred on %s", files[i].fname ); if (files[i].mode == '|' || files[i].mode == LE) stat = pclose(files[i].fp); else stat = fclose(files[i].fp); if (stat == EOF) - WARNING( "i/o error occurred while closing %s", files[i].fname ); + FATAL( "i/o error occurred while closing %s", files[i].fname ); } } } diff --git a/testdir/T.misc b/testdir/T.misc index 7fc196a..a191ae2 100755 --- a/testdir/T.misc +++ b/testdir/T.misc @@ -466,3 +466,8 @@ echo '' >foo0 $awk 'END { print NF, $0 }' foo0 >foo1 awk '{ print NF, $0 }' foo0| tail -1 >foo2 cmp -s foo1 foo2 || echo 'BAD: T.misc END must preserve $0' + +# Check for nonzero exit status on I/O error. +echo 'E 2' >foo1 +(trap '' PIPE; "$awk" 'BEGIN { print "hi"; }' 2>/dev/null; echo "E $?" >foo2) | : +cmp -s foo1 foo2 || echo 'BAD: T.misc exit status on I/O error' -- cgit v1.2.3 From 3ed74525f64cc60368bae56a6e3e40cafd9489f1 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Fri, 17 Jan 2020 14:03:52 +0200 Subject: Update date in version. --- main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.c b/main.c index abfa312..7a9732c 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20200105"; +const char *version = "version 20200109"; #define DEBUG #include -- cgit v1.2.3 From df6ccd29820f4e51d15eff6dab3c62014e877550 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Fri, 17 Jan 2020 14:08:59 +0200 Subject: Add TODO file. --- TODO | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 TODO diff --git a/TODO b/TODO new file mode 100644 index 0000000..3067377 --- /dev/null +++ b/TODO @@ -0,0 +1,22 @@ +Fri Jan 17 14:04:29 IST 2020 +============================ + +Here are some things that it'd be nice to have volunteer +help on. + +1. Rework the test suite so that it's easier to maintain +and see exactly which tests fail: + A. Extract beebe.tar into separate file and update scripts + B. Split apart multiple tests into separate tests with input + and "ok" files for comparisons. + +2. Pull in more of the tests from gawk that only test standard features. + The beebe.tar file appears to be from sometime in the 1990s. + +3. Make the One True Awk valgrind clean. In particular add a + a test suite target that runs valgrind on all the tests and + reports if there are any definite losses or any invalid reads + or writes (similar to gawk's test of this nature). + +4. Set the "close on exec" flag for file and pipe redirection + file descriptors. -- cgit v1.2.3 From de6284e0377e1c10f6249586df1a67311e9c5b2f Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Sun, 19 Jan 2020 20:37:33 +0200 Subject: Fix Issue 60; sub/gsub follow POSIX if POSIXLY_CORRECT in the environment. --- FIXES | 5 +++++ awk.1 | 10 ++++++++++ main.c | 2 +- run.c | 10 ++++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/FIXES b/FIXES index aac4a4a..1369d5e 100644 --- a/FIXES +++ b/FIXES @@ -25,6 +25,11 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +January 19, 2020: + If POSIXLY_CORRECT is set in the environment, then sub and gsub + use POSIX rules for multiple backslashes. This fixes Issue #66, + while maintaining backwards compatibility. + January 9, 2020: Input/output errors on closing files are now fatal instead of mere warnings. Thanks to Martijn Dekker . diff --git a/awk.1 b/awk.1 index 5c05b01..aa66f06 100644 --- a/awk.1 +++ b/awk.1 @@ -502,6 +502,16 @@ functions may be called recursively. Parameters are local to the function; all other variables are global. Thus local variables may be created by providing excess parameters in the function definition. +.SH ENVIRONMENT VARIABLES +If +.B POSIXLY_CORRECT +is set in the environment, then +.I awk +follows the POSIX rules for +.B sub +and +.B gsub +with respect to consecutive backslashes and ampersands. .SH EXAMPLES .TP .EX diff --git a/main.c b/main.c index 7a9732c..e92d17c 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20200109"; +const char *version = "version 20200119"; #define DEBUG #include diff --git a/run.c b/run.c index 136e20a..2bef988 100644 --- a/run.c +++ b/run.c @@ -1983,6 +1983,13 @@ void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ { /* sptr[0] == '\\' */ char *pb = *pb_ptr; const char *sptr = *sptr_ptr; + static bool first = true; + static bool do_posix = false; + + if (first) { + first = false; + do_posix = (getenv("POSIXLY_CORRECT") != NULL); + } if (sptr[1] == '\\') { if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ @@ -1992,6 +1999,9 @@ void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ } else if (sptr[2] == '&') { /* \\& -> \ + matched */ *pb++ = '\\'; sptr += 2; + } else if (do_posix) { /* \\x -> \x */ + sptr++; + *pb++ = *sptr++; } else { /* \\x -> \\x */ *pb++ = *sptr++; *pb++ = *sptr++; -- cgit v1.2.3 From 5a18f63b8dfc35fb7bcda4688661e354783d2bb7 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Wed, 22 Jan 2020 02:10:59 -0700 Subject: Set the close-on-exec flag for file and pipe redirections. --- ChangeLog | 5 +++++ TODO | 5 +---- run.c | 3 +++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1afd9de..4e95699 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2020-01-20 Arnold D. Robbins + + * run.c (openfile): Set the close-on-exec flag for file + and pipe redirections that aren't stdin/stdout/stderr. + 2020-01-06 Arnold D. Robbins Minor fixes. diff --git a/TODO b/TODO index 3067377..13f2925 100644 --- a/TODO +++ b/TODO @@ -1,4 +1,4 @@ -Fri Jan 17 14:04:29 IST 2020 +Wed Jan 22 02:10:35 MST 2020 ============================ Here are some things that it'd be nice to have volunteer @@ -17,6 +17,3 @@ and see exactly which tests fail: a test suite target that runs valgrind on all the tests and reports if there are any definite losses or any invalid reads or writes (similar to gawk's test of this nature). - -4. Set the "close on exec" flag for file and pipe redirection - file descriptors. diff --git a/run.c b/run.c index 2bef988..ad6432b 100644 --- a/run.c +++ b/run.c @@ -25,6 +25,7 @@ THIS SOFTWARE. #define DEBUG #include #include +#include #include #include #include @@ -1744,6 +1745,8 @@ FILE *openfile(int a, const char *us) files[i].fname = tostring(s); files[i].fp = fp; files[i].mode = m; + if (fp != stdin && fp != stdout && fp != stderr) + (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); } return fp; } -- cgit v1.2.3 From 6a8770929d4653725e75f4a7a3446f227c4c6817 Mon Sep 17 00:00:00 2001 From: zoulasc Date: Fri, 24 Jan 2020 04:11:59 -0500 Subject: Small fixes (#68) * sprinkle const, static * account for lineno in unput * Add an EMPTY string that is used when a non-const empty string is needed. * make inputFS static and dynamically allocated * Simplify and in the process avoid -Wwritable-strings * make fs const to avoid -Wwritable-strings --- awk.h | 3 +-- b.c | 4 ++-- lex.c | 12 +++++++----- lib.c | 24 +++++++++++++++++------- maketab.c | 8 ++------ run.c | 9 +++++---- 6 files changed, 34 insertions(+), 26 deletions(-) diff --git a/awk.h b/awk.h index bede1b0..7b8f564 100644 --- a/awk.h +++ b/awk.h @@ -60,6 +60,7 @@ extern bool safe; /* false => unsafe, true => safe */ #define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */ extern int recsize; /* size of current record, orig RECSIZE */ +extern char EMPTY[]; extern char **FS; extern char **RS; extern char **ORS; @@ -78,8 +79,6 @@ extern int lineno; /* line number in awk program */ extern int errorflag; /* 1 if error has occurred */ extern bool donefld; /* true if record broken into fields */ extern bool donerec; /* true if record is valid (no fld has changed */ -extern char inputFS[]; /* FS at time of input, for field splitting */ - extern int dbg; extern const char *patbeg; /* beginning of pattern matched */ diff --git a/b.c b/b.c index 5671796..34dfb20 100644 --- a/b.c +++ b/b.c @@ -873,7 +873,7 @@ int (xisblank)(int c) #endif -struct charclass { +static const struct charclass { const char *cc_name; int cc_namelen; int (*cc_func)(int); @@ -1017,7 +1017,7 @@ int relex(void) /* lexical analyzer for reparse */ static uschar *buf = NULL; static int bufsz = 100; uschar *bp; - struct charclass *cc; + const struct charclass *cc; int i; int num, m; bool commafound, digitfound; diff --git a/lex.c b/lex.c index d729516..503e41a 100644 --- a/lex.c +++ b/lex.c @@ -43,7 +43,7 @@ typedef struct Keyword { int type; } Keyword; -Keyword keywords[] ={ /* keep sorted: binary searched */ +const Keyword keywords[] = { /* keep sorted: binary searched */ { "BEGIN", XBEGIN, XBEGIN }, { "END", XEND, XEND }, { "NF", VARNF, VARNF }, @@ -91,14 +91,14 @@ Keyword keywords[] ={ /* keep sorted: binary searched */ #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); } -int peek(void) +static int peek(void) { int c = input(); unput(c); return c; } -int gettok(char **pbuf, int *psz) /* get next input token */ +static int gettok(char **pbuf, int *psz) /* get next input token */ { int c, retc; char *buf = *pbuf; @@ -440,7 +440,7 @@ int string(void) } -int binsearch(char *w, Keyword *kp, int n) +static int binsearch(char *w, const Keyword *kp, int n) { int cond, low, mid, high; @@ -460,7 +460,7 @@ int binsearch(char *w, Keyword *kp, int n) int word(char *w) { - Keyword *kp; + const Keyword *kp; int c, n; n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0])); @@ -572,6 +572,8 @@ int input(void) /* get next lexical input character */ void unput(int c) /* put lexical character back on input */ { + if (c == '\n') + lineno--; if (yysptr >= yysbuf + sizeof(yysbuf)) FATAL("pushed back too much: %.20s...", yysbuf); *yysptr++ = c; diff --git a/lib.c b/lib.c index dbc09c3..d96b830 100644 --- a/lib.c +++ b/lib.c @@ -32,15 +32,17 @@ THIS SOFTWARE. #include "awk.h" #include "ytab.h" +char EMPTY[] = { '\0' }; FILE *infile = NULL; -char *file = ""; +char *file = EMPTY; char *record; int recsize = RECSIZE; char *fields; int fieldssize = RECSIZE; Cell **fldtab; /* pointers to Cells */ -char inputFS[100] = " "; +static size_t len_inputFS = 0; +static char *inputFS = NULL; /* FS at time of input, for field splitting */ #define MAXFLD 2 int nfields = MAXFLD; /* last allocated slot for $i */ @@ -52,8 +54,8 @@ int lastfld = 0; /* last used field */ int argno = 1; /* current input argument number */ extern Awkfloat *ARGC; -static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE }; -static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE }; +static Cell dollar0 = { OCELL, CFLD, NULL, EMPTY, 0.0, REC|STR|DONTFREE }; +static Cell dollar1 = { OCELL, CFLD, NULL, EMPTY, 0.0, FLD|STR|DONTFREE }; void recinit(unsigned int n) { @@ -116,9 +118,17 @@ void initgetrec(void) */ void savefs(void) { - if (strlen(getsval(fsloc)) >= sizeof (inputFS)) + size_t len; + if ((len = strlen(getsval(fsloc))) < len_inputFS) { + strcpy(inputFS, *FS); /* for subsequent field splitting */ + return; + } + + len_inputFS = len + 1; + inputFS = realloc(inputFS, len_inputFS); + if (inputFS == NULL) FATAL("field separator %.10s... is too long", *FS); - strcpy(inputFS, *FS); + memcpy(inputFS, *FS, len_inputFS); } static bool firsttime = true; @@ -404,7 +414,7 @@ void cleanfld(int n1, int n2) /* clean out fields n1 .. n2 inclusive */ p = fldtab[i]; if (freeable(p)) xfree(p->sval); - p->sval = ""; + p->sval = EMPTY, p->tval = FLD | STR | DONTFREE; } } diff --git a/maketab.c b/maketab.c index c3ce5c6..9ac833e 100644 --- a/maketab.c +++ b/maketab.c @@ -122,8 +122,6 @@ int main(int argc, char *argv[]) printf("#include \n"); printf("#include \"awk.h\"\n"); printf("#include \"ytab.h\"\n\n"); - for (i = SIZE; --i >= 0; ) - names[i] = ""; if (argc != 2) { fprintf(stderr, "usage: maketab YTAB_H\n"); @@ -160,10 +158,8 @@ int main(int argc, char *argv[]) table[p->token-FIRSTTOKEN] = p->name; printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE); for (i=0; i Date: Fri, 24 Jan 2020 11:15:30 +0200 Subject: Update version info. --- FIXES | 5 +++++ awk.h | 2 +- main.c | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/FIXES b/FIXES index 1369d5e..1299c18 100644 --- a/FIXES +++ b/FIXES @@ -25,6 +25,11 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +January 24, 2020: + A number of small cleanups from Christos Zoulas. Add the close + on exec flag to files/pipes opened for redirection; courtesy of + Arnold Robbins. + January 19, 2020: If POSIXLY_CORRECT is set in the environment, then sub and gsub use POSIX rules for multiple backslashes. This fixes Issue #66, diff --git a/awk.h b/awk.h index 7b8f564..6865438 100644 --- a/awk.h +++ b/awk.h @@ -60,7 +60,7 @@ extern bool safe; /* false => unsafe, true => safe */ #define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */ extern int recsize; /* size of current record, orig RECSIZE */ -extern char EMPTY[]; +extern char EMPTY[]; /* this avoid -Wwritable-strings issues */ extern char **FS; extern char **RS; extern char **ORS; diff --git a/main.c b/main.c index e92d17c..efd6d1a 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20200119"; +const char *version = "version 20200124"; #define DEBUG #include -- cgit v1.2.3 From a3e9e8285e825e8d49570d9564b88b726dd30c2a Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Wed, 1 Jan 2020 22:47:29 +0200 Subject: Fix for a{0} bug. --- awkgram.y | 2 +- b.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/awkgram.y b/awkgram.y index 894f9b5..8fc1709 100644 --- a/awkgram.y +++ b/awkgram.y @@ -50,7 +50,7 @@ Node *arglist = 0; /* list of args for current function */ %token NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' %token ARRAY %token MATCH NOTMATCH MATCHOP -%token FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE +%token FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO %token AND BOR APPEND EQ GE GT LE LT NE IN %token ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC %token SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE diff --git a/b.c b/b.c index 34dfb20..545fb7d 100644 --- a/b.c +++ b/b.c @@ -263,6 +263,8 @@ void penter(Node *p) /* set up parent pointers and leaf indices */ parent(left(p)) = p; parent(right(p)) = p; break; + case ZERO: + break; default: /* can't happen */ FATAL("can't happen: unknown type %d in penter", type(p)); break; @@ -277,6 +279,7 @@ void freetr(Node *p) /* free parse tree */ xfree(p); break; UNARY + case ZERO: freetr(left(p)); xfree(p); break; @@ -436,6 +439,8 @@ void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfo cfoll(f,left(v)); cfoll(f,right(v)); break; + case ZERO: + break; default: /* can't happen */ FATAL("can't happen: unknown type %d in cfoll", type(v)); } @@ -479,6 +484,8 @@ int first(Node *p) /* collects initially active leaves of p into setvec */ b = first(right(p)); if (first(left(p)) == 0 || b == 0) return(0); return(1); + case ZERO: + return 0; } FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ return(-1); @@ -838,6 +845,9 @@ Node *unary(Node *np) case QUEST: rtok = relex(); return (unary(op2(QUEST, np, NIL))); + case ZERO: + rtok = relex(); + return (unary(op2(ZERO, np, NIL))); default: return (np); } @@ -1191,7 +1201,7 @@ rescan: if (repeat(starttok, prestr-starttok, lastatom, startreptok - lastatom, n, m) > 0) { if (n == 0 && m == 0) { - return EMPTYRE; + return ZERO; } /* must rescan input for next token */ goto rescan; -- cgit v1.2.3 From 69325710b145823367d4c21c8b55c0657305bd1c Mon Sep 17 00:00:00 2001 From: Michael Forney Date: Thu, 30 Jan 2020 22:23:34 -0800 Subject: Use MB_LEN_MAX instead of MB_CUR_MAX to avoid VLA (#70) MB_CUR_MAX is the maximum number of bytes in a multibyte character for the current locale, and might not be a constant expression. MB_LEN_MAX is the maximum number of bytes in a multibyte character for any locale, and always expands to a constant-expression. --- lib.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib.c b/lib.c index d96b830..0485e5a 100644 --- a/lib.c +++ b/lib.c @@ -29,6 +29,7 @@ THIS SOFTWARE. #include #include #include +#include #include "awk.h" #include "ytab.h" @@ -343,14 +344,14 @@ void fldbld(void) /* create fields from current record */ *fr = 0; } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */ for (i = 0; *r != '\0'; r += n) { - char buf[MB_CUR_MAX + 1]; + char buf[MB_LEN_MAX + 1]; i++; if (i > nfields) growfldtab(i); if (freeable(fldtab[i])) xfree(fldtab[i]->sval); - n = mblen(r, MB_CUR_MAX); + n = mblen(r, MB_LEN_MAX); if (n < 0) n = 1; memcpy(buf, r, n); -- cgit v1.2.3 From 78c79c06d07735d5881ea08cf40e014a41d341af Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Fri, 31 Jan 2020 08:40:11 +0200 Subject: Fix a{0}, update tests. --- FIXES | 11 ++++++++--- main.c | 2 +- testdir/T.int-expr | 42 ++++++++++++++++++++++++++++++++++++++++++ testdir/T.misc | 2 +- 4 files changed, 52 insertions(+), 5 deletions(-) diff --git a/FIXES b/FIXES index 1299c18..8854044 100644 --- a/FIXES +++ b/FIXES @@ -25,6 +25,11 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +January 31, 2020: + Merge PR #70, which avoids use of variable length arrays. Thanks + to GitHub user michaelforney. Fix issue #60 ({0} in interval + expressions doesn't work). Thanks to Arnold Robbins. + January 24, 2020: A number of small cleanups from Christos Zoulas. Add the close on exec flag to files/pipes opened for redirection; courtesy of @@ -77,13 +82,13 @@ October 25, 2019: October 24, 2019: Import second round of code cleanups from NetBSD. Much thanks - to Christos Zoulas (Github user zoulasc). Merges PR 53. + to Christos Zoulas (GitHub user zoulasc). Merges PR 53. Add an optimization for string concatenation, also from Christos. October 17, 2019: Import code cleanups from NetBSD. Much thanks to Christos - Zoulas (Github user zoulasc). Merges PR 51. + Zoulas (GitHub user zoulasc). Merges PR 51. October 6, 2019: Import code from NetBSD awk that implements RS as a regular @@ -91,7 +96,7 @@ October 6, 2019: September 10, 2019: Fixes for various array / memory overruns found via gcc's - -fsanitize=unknown. Thanks to Alexander Richardson (Github + -fsanitize=unknown. Thanks to Alexander Richardson (GitHub user arichardson). Merges PRs 47 and 48. July 28, 2019: diff --git a/main.c b/main.c index efd6d1a..2476320 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20200124"; +const char *version = "version 20200131"; #define DEBUG #include diff --git a/testdir/T.int-expr b/testdir/T.int-expr index e71a075..de77532 100755 --- a/testdir/T.int-expr +++ b/testdir/T.int-expr @@ -18,6 +18,34 @@ pat ab{0}c ac 1 abc 0 +pat a(b{0})c +ac 1 +abc 0 + +pat ab{0}*c +ac 1 +abc 0 + +pat a(b{0})*c +ac 1 +abc 0 + +pat ab{0,}c +ac 1 +abc 1 + +pat a(b{0,})c +ac 1 +abc 1 + +pat ab{0,}*c +ac 1 +abc 1 + +pat a(b{0,})*c +ac 1 +abc 1 + pat ab{1}c ac 0 abc 1 @@ -53,6 +81,20 @@ EOF cat << \EOF > foo1 ac ~ /ab{0}c/ -> should be 1, is 1 abc ~ /ab{0}c/ -> should be 0, is 0 +ac ~ /a(b{0})c/ -> should be 1, is 1 +abc ~ /a(b{0})c/ -> should be 0, is 0 +ac ~ /ab{0}*c/ -> should be 1, is 1 +abc ~ /ab{0}*c/ -> should be 0, is 0 +ac ~ /a(b{0})*c/ -> should be 1, is 1 +abc ~ /a(b{0})*c/ -> should be 0, is 0 +ac ~ /ab{0,}c/ -> should be 1, is 1 +abc ~ /ab{0,}c/ -> should be 1, is 1 +ac ~ /a(b{0,})c/ -> should be 1, is 1 +abc ~ /a(b{0,})c/ -> should be 1, is 1 +ac ~ /ab{0,}*c/ -> should be 1, is 1 +abc ~ /ab{0,}*c/ -> should be 1, is 1 +ac ~ /a(b{0,})*c/ -> should be 1, is 1 +abc ~ /a(b{0,})*c/ -> should be 1, is 1 ac ~ /ab{1}c/ -> should be 0, is 0 abc ~ /ab{1}c/ -> should be 1, is 1 abbc ~ /ab{1}c/ -> should be 0, is 0 diff --git a/testdir/T.misc b/testdir/T.misc index a191ae2..3903606 100755 --- a/testdir/T.misc +++ b/testdir/T.misc @@ -382,7 +382,7 @@ $awk 'BEGIN \ print "hello, world" } }}}' >foo1 2>foo2 -grep 'source line 5' foo2 >/dev/null 2>&1 || echo 'BAD: T.misc continuation line number' +grep 'source line 4' foo2 >/dev/null 2>&1 || echo 'BAD: T.misc continuation line number' echo 111 222 333 >foo -- cgit v1.2.3 From 768d6b58865eb9b0ff5ebd4ef430b767d2108224 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Fri, 31 Jan 2020 08:54:10 +0200 Subject: Get tests working again. --- FIXES | 3 ++- lex.c | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/FIXES b/FIXES index 8854044..19afa0b 100644 --- a/FIXES +++ b/FIXES @@ -28,7 +28,8 @@ was sent to the printers in August, 1987. January 31, 2020: Merge PR #70, which avoids use of variable length arrays. Thanks to GitHub user michaelforney. Fix issue #60 ({0} in interval - expressions doesn't work). Thanks to Arnold Robbins. + expressions doesn't work). Also get all tests working again. + Thanks to Arnold Robbins. January 24, 2020: A number of small cleanups from Christos Zoulas. Add the close diff --git a/lex.c b/lex.c index 503e41a..1c23212 100644 --- a/lex.c +++ b/lex.c @@ -210,6 +210,11 @@ int yylex(void) while ((c = input()) != '\n' && c != 0) ; unput(c); + /* + * Next line is a hack, itcompensates for + * unput's treatment of \n. + */ + lineno++; break; case ';': RET(';'); -- cgit v1.2.3 From 110bdc6b3e807bd672bf3b1113d01d71744a797a Mon Sep 17 00:00:00 2001 From: zoulasc Date: Thu, 6 Feb 2020 14:25:36 -0500 Subject: misc fixes (#69) * Add a test for german case folding. * Add a function to copy a string with a string with a larger allocation (to be used by the case folding routines) * Add printf attributes to the printf-like functions and fix one format warning * Cleanup the tempfree macro * make more functions static * rename fp to frp (FRame Pointer) to avoid shadowing with fp (File Pointer). * add more const * fix indent in UPLUS case * add locale-aware case folding * make nfiles size_t * fix bugs in file closing: - compare fclose to EOF and pclose to -1 - use nfiles instead of FOPEN_MAX in closeall - don't close files we did not open (0,1,2) fpurge/fflush instead * - use NUL instead of 0 for char comparisons - add ISWS() macro - use continue; instead of ; * Check for existance of the german locale before using it. * Add missing parentheses, thanks Arnold. --- proto.h | 10 +- run.c | 276 +++++++++++++++++++++++++++++++++--------------------- testdir/T.builtin | 8 ++ tran.c | 13 ++- 4 files changed, 196 insertions(+), 111 deletions(-) diff --git a/proto.h b/proto.h index 6918e1f..aac2547 100644 --- a/proto.h +++ b/proto.h @@ -111,6 +111,7 @@ extern double getfval(Cell *); extern char *getsval(Cell *); extern char *getpssval(Cell *); /* for print */ extern char *tostring(const char *); +extern char *tostringN(const char *, size_t); extern char *qstring(const char *, int); extern Cell *catstr(Cell *, Cell *); @@ -135,9 +136,12 @@ extern void yyerror(const char *); extern void fpecatch(int); extern void bracecheck(void); extern void bcheck2(int, int, int); -extern void SYNTAX(const char *, ...); -extern void FATAL(const char *, ...) __attribute__((__noreturn__)); -extern void WARNING(const char *, ...); +extern void SYNTAX(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); +extern void FATAL(const char *, ...) + __attribute__((__format__(__printf__, 1, 2), __noreturn__)); +extern void WARNING(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); extern void error(void); extern void eprint(void); extern void bclass(int); diff --git a/run.c b/run.c index 6eafc0b..0d5e6b4 100644 --- a/run.c +++ b/run.c @@ -25,6 +25,8 @@ THIS SOFTWARE. #define DEBUG #include #include +#include +#include #include #include #include @@ -37,11 +39,12 @@ THIS SOFTWARE. #include "awk.h" #include "ytab.h" -#define tempfree(x) if (istemp(x)) tfree(x); else - -/* -#undef tempfree +static void stdinit(void); +static void flush_all(void); +#if 1 +#define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) +#else void tempfree(Cell *p) { if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { WARNING("bad csub %d in Cell %d %s", @@ -50,7 +53,7 @@ void tempfree(Cell *p) { if (istemp(p)) tfree(p); } -*/ +#endif /* do we really need these? */ /* #ifdef _NFILE */ @@ -131,7 +134,6 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, void run(Node *a) /* execution of parse tree starts here */ { - extern void stdinit(void); stdinit(); execute(a); @@ -220,11 +222,11 @@ struct Frame { /* stack frame for awk function calls */ struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ int nframe = 0; /* number of frames allocated */ -struct Frame *fp = NULL; /* frame pointer. bottom level unused */ +struct Frame *frp = NULL; /* frame pointer. bottom level unused */ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ { - static Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL }; + static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL }; int i, ncall, ndef; int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ Node *x; @@ -237,21 +239,21 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ if (!isfcn(fcn)) FATAL("calling undefined function %s", s); if (frame == NULL) { - fp = frame = calloc(nframe += 100, sizeof(*frame)); + frp = frame = calloc(nframe += 100, sizeof(*frame)); if (frame == NULL) FATAL("out of space for stack frames calling %s", s); } for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ ncall++; ndef = (int) fcn->fval; /* args in defn */ - dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) ); + dprintf( ("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)) ); if (ncall > ndef) WARNING("function %s called with %d args, uses only %d", s, ncall, ndef); if (ncall + ndef > NARGS) FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ - dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) ); + dprintf( ("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)) ); y = execute(x); oargs[i] = y; dprintf( ("args[%d]: %s %f <%s>, t=%o\n", @@ -268,25 +270,25 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ args[i] = gettemp(); *args[i] = newcopycell; } - fp++; /* now ok to up frame */ - if (fp >= frame + nframe) { - int dfp = fp - frame; /* old index */ + frp++; /* now ok to up frame */ + if (frp >= frame + nframe) { + int dfp = frp - frame; /* old index */ frame = realloc(frame, (nframe += 100) * sizeof(*frame)); if (frame == NULL) FATAL("out of space for stack frames in %s", s); - fp = frame + dfp; + frp = frame + dfp; } - fp->fcncell = fcn; - fp->args = args; - fp->nargs = ndef; /* number defined with (excess are locals) */ - fp->retval = gettemp(); + frp->fcncell = fcn; + frp->args = args; + frp->nargs = ndef; /* number defined with (excess are locals) */ + frp->retval = gettemp(); - dprintf( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) ); + dprintf( ("start exec of %s, frp=%d\n", s, (int) (frp-frame)) ); y = execute((Node *)(fcn->sval)); /* execute body */ - dprintf( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) ); + dprintf( ("finished exec of %s, frp=%d\n", s, (int) (frp-frame)) ); for (i = 0; i < ndef; i++) { - Cell *t = fp->args[i]; + Cell *t = frp->args[i]; if (isarr(t)) { if (t->csub == CCOPY) { if (i >= ncall) { @@ -315,9 +317,9 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ if (freed == 0) { tempfree(y); /* don't free twice! */ } - z = fp->retval; /* return value */ + z = frp->retval; /* return value */ dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) ); - fp--; + frp--; return(z); } @@ -344,11 +346,11 @@ Cell *arg(Node **a, int n) /* nth argument of a function */ { n = ptoi(a[0]); /* argument number, counting from 0 */ - dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) ); - if (n+1 > fp->nargs) + dprintf( ("arg(%d), frp->nargs=%d\n", n, frp->nargs) ); + if (n+1 > frp->nargs) FATAL("argument #%d of function %s was not supplied", - n+1, fp->fcncell->nval); - return fp->args[n]; + n+1, frp->fcncell->nval); + return frp->args[n]; } Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ @@ -367,14 +369,14 @@ Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ if (a[0] != NULL) { y = execute(a[0]); if ((y->tval & (STR|NUM)) == (STR|NUM)) { - setsval(fp->retval, getsval(y)); - fp->retval->fval = getfval(y); - fp->retval->tval |= NUM; + setsval(frp->retval, getsval(y)); + frp->retval->fval = getfval(y); + frp->retval->tval |= NUM; } else if (y->tval & STR) - setsval(fp->retval, getsval(y)); + setsval(frp->retval, getsval(y)); else if (y->tval & NUM) - setfval(fp->retval, getfval(y)); + setfval(frp->retval, getfval(y)); else /* can't happen */ FATAL("bad type variable %d", y->tval); tempfree(y); @@ -793,8 +795,8 @@ Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ z = gettemp(); for (p1 = s1; *p1 != '\0'; p1++) { - for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++) - ; + for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) + continue; if (*p2 == '\0') { v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ break; @@ -1064,7 +1066,7 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ case UMINUS: i = -i; break; - case UPLUS: /* handled by getfval(), above */ + case UPLUS: /* handled by getfval(), above */ break; case POWER: if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ @@ -1301,7 +1303,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ setsymtab(num, s, 0.0, STR, (Array *) ap->sval); setptr(patbeg, temp); s = patbeg + patlen; - if (*(patbeg+patlen-1) == 0 || *s == 0) { + if (*(patbeg+patlen-1) == '\0' || *s == '\0') { n++; snprintf(num, sizeof(num), "%d", n); setsymtab(num, "", 0.0, STR, (Array *) ap->sval); @@ -1322,15 +1324,16 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ pfa = NULL; } else if (sep == ' ') { for (n = 0; ; ) { - while (*s == ' ' || *s == '\t' || *s == '\n') +#define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') + while (ISWS(*s)) s++; - if (*s == 0) + if (*s == '\0') break; n++; t = s; do s++; - while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0'); + while (*s != '\0' && !ISWS(*s)); temp = *s; setptr(s, '\0'); snprintf(num, sizeof(num), "%d", n); @@ -1339,22 +1342,22 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); setptr(s, temp); - if (*s != 0) + if (*s != '\0') s++; } } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ - for (n = 0; *s != 0; s++) { + for (n = 0; *s != '\0'; s++) { char buf[2]; n++; snprintf(num, sizeof(num), "%d", n); buf[0] = *s; - buf[1] = 0; + buf[1] = '\0'; if (isdigit((uschar)buf[0])) setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); else setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); } - } else if (*s != 0) { + } else if (*s != '\0') { for (;;) { n++; t = s; @@ -1368,7 +1371,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); setptr(s, temp); - if (*s++ == 0) + if (*s++ == '\0') break; } } @@ -1505,16 +1508,73 @@ Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ return True; } +static char *nawk_convert(const char *s, int (*fun_c)(int), + wint_t (*fun_wc)(wint_t)) +{ + char *buf = NULL; + char *pbuf = NULL; + const char *ps = NULL; + size_t n = 0; + mbstate_t mbs, mbs2; + wchar_t wc; + size_t sz = MB_CUR_MAX; + + if (sz == 1) { + buf = tostring(s); + + for (pbuf = buf; *pbuf; pbuf++) + *pbuf = fun_c((uschar)*pbuf); + + return buf; + } else { + /* upper/lower character may be shorter/longer */ + buf = tostringN(s, strlen(s) * sz + 1); + + memset(&mbs, 0, sizeof(mbs)); + memset(&mbs2, 0, sizeof(mbs2)); + + ps = s; + pbuf = buf; + while (n = mbrtowc(&wc, ps, sz, &mbs), + n > 0 && n != (size_t)-1 && n != (size_t)-2) + { + ps += n; + + n = wcrtomb(pbuf, fun_wc(wc), &mbs2); + if (n == (size_t)-1) + FATAL("illegal wide character %s", s); + + pbuf += n; + } + + *pbuf = '\0'; + + if (n) + FATAL("illegal byte sequence %s", s); + + return buf; + } +} + +static char *nawk_toupper(const char *s) +{ + return nawk_convert(s, toupper, towupper); +} + +static char *nawk_tolower(const char *s) +{ + return nawk_convert(s, tolower, towlower); +} + Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ { Cell *x, *y; Awkfloat u; int t; Awkfloat tmp; - char *p, *buf; + char *buf; Node *nextarg; FILE *fp; - void flush_all(void); int status = 0; t = ptoi(a[0]); @@ -1585,16 +1645,10 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis break; case FTOUPPER: case FTOLOWER: - buf = tostring(getsval(x)); - if (t == FTOUPPER) { - for (p = buf; *p; p++) - if (islower((uschar) *p)) - *p = toupper((uschar)*p); - } else { - for (p = buf; *p; p++) - if (isupper((uschar) *p)) - *p = tolower((uschar)*p); - } + if (t == FTOUPPER) + buf = nawk_toupper(getsval(x)); + else + buf = nawk_tolower(getsval(x)); tempfree(x); x = gettemp(); setsval(x, buf); @@ -1677,14 +1731,14 @@ struct files { int mode; /* '|', 'a', 'w' => LE/LT, GT */ } *files; -int nfiles; +size_t nfiles; -void stdinit(void) /* in case stdin, etc., are not constants */ +static void stdinit(void) /* in case stdin, etc., are not constants */ { nfiles = FOPEN_MAX; files = calloc(nfiles, sizeof(*files)); if (files == NULL) - FATAL("can't allocate file memory for %u files", nfiles); + FATAL("can't allocate file memory for %zu files", nfiles); files[0].fp = stdin; files[0].fname = "/dev/stdin"; files[0].mode = LT; @@ -1699,12 +1753,13 @@ void stdinit(void) /* in case stdin, etc., are not constants */ FILE *openfile(int a, const char *us) { const char *s = us; - int i, m; + size_t i; + int m; FILE *fp = NULL; if (*s == '\0') FATAL("null file name in print or getline"); - for (i=0; i < nfiles; i++) + for (i = 0; i < nfiles; i++) if (files[i].fname && strcmp(s, files[i].fname) == 0) { if (a == files[i].mode || (a==APPEND && files[i].mode==GT)) return files[i].fp; @@ -1714,15 +1769,15 @@ FILE *openfile(int a, const char *us) if (a == FFLUSH) /* didn't find it, so don't create it! */ return NULL; - for (i=0; i < nfiles; i++) + for (i = 0; i < nfiles; i++) if (files[i].fp == NULL) break; if (i >= nfiles) { struct files *nf; - int nnf = nfiles + FOPEN_MAX; + size_t nnf = nfiles + FOPEN_MAX; nf = realloc(files, nnf * sizeof(*nf)); if (nf == NULL) - FATAL("cannot grow files for %s and %d files", s, nnf); + FATAL("cannot grow files for %s and %zu files", s, nnf); memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); nfiles = nnf; files = nf; @@ -1754,7 +1809,7 @@ FILE *openfile(int a, const char *us) const char *filename(FILE *fp) { - int i; + size_t i; for (i = 0; i < nfiles; i++) if (fp == files[i].fp) @@ -1765,52 +1820,59 @@ const char *filename(FILE *fp) Cell *closefile(Node **a, int n) { Cell *x; - int i, stat; + size_t i; + bool stat; x = execute(a[0]); getsval(x); - stat = -1; + stat = true; for (i = 0; i < nfiles; i++) { - if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) { - if (ferror(files[i].fp)) - FATAL( "i/o error occurred on %s", files[i].fname ); - if (files[i].mode == '|' || files[i].mode == LE) - stat = pclose(files[i].fp); - else - stat = fclose(files[i].fp); - if (stat == EOF) - FATAL( "i/o error occurred closing %s", files[i].fname ); - if (i > 2) /* don't do /dev/std... */ - xfree(files[i].fname); - files[i].fname = NULL; /* watch out for ref thru this */ - files[i].fp = NULL; - } + if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) + continue; + if (ferror(files[i].fp)) + FATAL("i/o error occurred on %s", files[i].fname); + if (files[i].mode == '|' || files[i].mode == LE) + stat = pclose(files[i].fp) == -1; + else + stat = fclose(files[i].fp) == EOF; + if (stat) + FATAL("i/o error occurred closing %s", files[i].fname); + if (i > 2) /* don't do /dev/std... */ + xfree(files[i].fname); + files[i].fname = NULL; /* watch out for ref thru this */ + files[i].fp = NULL; } tempfree(x); x = gettemp(); - setfval(x, (Awkfloat) stat); + setfval(x, (Awkfloat) (stat ? -1 : 0)); return(x); } void closeall(void) { - int i, stat; - - for (i = 0; i < FOPEN_MAX; i++) { - if (files[i].fp) { - if (ferror(files[i].fp)) - FATAL( "i/o error occurred on %s", files[i].fname ); - if (files[i].mode == '|' || files[i].mode == LE) - stat = pclose(files[i].fp); - else - stat = fclose(files[i].fp); - if (stat == EOF) - FATAL( "i/o error occurred while closing %s", files[i].fname ); - } + size_t i; + bool stat; + + for (i = 0; i < nfiles; i++) { + if (!files[i].fp) + continue; + if (ferror(files[i].fp)) + FATAL("i/o error occurred on %s", files[i].fname); + if (i == 0) + stat = fpurge(files[i].fp) == EOF; + else if (i <= 2) + stat = fflush(files[i].fp) == EOF; + else if (files[i].mode == '|' || files[i].mode == LE) + stat = pclose(files[i].fp) == -1; + else + stat = fclose(files[i].fp) == EOF; + if (stat) + FATAL("i/o error occurred while closing %s", + files[i].fname); } } -void flush_all(void) +static void flush_all(void) { int i; @@ -1849,7 +1911,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */ while (sptr < patbeg) *pb++ = *sptr++; sptr = getsval(y); - while (*sptr != 0) { + while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -1867,8 +1929,8 @@ Cell *sub(Node **a, int nnn) /* substitute command */ sptr = patbeg + patlen; if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); - while ((*pb++ = *sptr++) != 0) - ; + while ((*pb++ = *sptr++) != '\0') + continue; } if (pb > buf + bufsz) FATAL("sub result2 %.30s too big; can't happen", buf); @@ -1911,11 +1973,11 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ pb = buf; rptr = getsval(y); do { - if (patlen == 0 && *patbeg != 0) { /* matched empty string */ + if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ if (mflag == 0) { /* can replace empty */ num++; sptr = rptr; - while (*sptr != 0) { + while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -1928,7 +1990,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ *pb++ = *sptr++; } } - if (*t == 0) /* at end */ + if (*t == '\0') /* at end */ goto done; adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); *pb++ = *t++; @@ -1943,7 +2005,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ while (sptr < patbeg) *pb++ = *sptr++; sptr = rptr; - while (*sptr != 0) { + while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -1956,7 +2018,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ *pb++ = *sptr++; } t = patbeg + patlen; - if (patlen == 0 || *t == 0 || *(t-1) == 0) + if (patlen == 0 || *t == '\0' || *(t-1) == '\0') goto done; if (pb > buf + bufsz) FATAL("gsub result1 %.30s too big; can't happen", buf); @@ -1965,8 +2027,8 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ } while (pmatch(pfa,t)); sptr = t; adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); - while ((*pb++ = *sptr++) != 0) - ; + while ((*pb++ = *sptr++) != '\0') + continue; done: if (pb < buf + bufsz) *pb = '\0'; else if (*(pb-1) != '\0') diff --git a/testdir/T.builtin b/testdir/T.builtin index 411a5e5..b36f6cb 100755 --- a/testdir/T.builtin +++ b/testdir/T.builtin @@ -29,6 +29,14 @@ $awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}' >foo1 echo 'hello, world!|HELLO, WORLD!|hello, WORLD!' >foo2 diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower)' + +if locale -a | grep -qsi de_DE.UTF-8; then + (export LANG=de_DE.UTF-8 && echo 'Dürst' | + $awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}') >foo1 + echo 'dürst|DÜRST|Dürst' >foo2 + diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower) for utf-8' +fi + $awk 'BEGIN { j = 1; sprintf("%d", 99, ++j) # does j get incremented? if (j != 2) diff --git a/tran.c b/tran.c index d659cfa..4efaa21 100644 --- a/tran.c +++ b/tran.c @@ -344,7 +344,7 @@ void funnyvar(Cell *vp, const char *rw) if (vp->tval & FCN) FATAL("can't %s %s; it's a function.", rw, vp->nval); WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o", - vp, vp->nval, vp->sval, vp->fval, vp->tval); + (void *)vp, vp->nval, vp->sval, vp->fval, vp->tval); } char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ @@ -516,6 +516,17 @@ char *tostring(const char *s) /* make a copy of string s */ return(p); } +char *tostringN(const char *s, size_t n) /* make a copy of string s */ +{ + char *p; + + p = malloc(n); + if (p == NULL) + FATAL("out of space in tostring on %s", s); + strcpy(p, s); + return(p); +} + Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */ { Cell *c; -- cgit v1.2.3 From 3c755f73f48c4a80e6014cc99ff4acc6dd91dd82 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Thu, 6 Feb 2020 21:45:46 +0200 Subject: Fix closeall for portability. --- run.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/run.c b/run.c index 0d5e6b4..22f4814 100644 --- a/run.c +++ b/run.c @@ -1858,9 +1858,8 @@ void closeall(void) continue; if (ferror(files[i].fp)) FATAL("i/o error occurred on %s", files[i].fname); - if (i == 0) - stat = fpurge(files[i].fp) == EOF; - else if (i <= 2) + + if (i > 0 && i < 3) stat = fflush(files[i].fp) == EOF; else if (files[i].mode == '|' || files[i].mode == LE) stat = pclose(files[i].fp) == -1; -- cgit v1.2.3 From 8447cc9d47e7d2d829cb5318824c3c07002b9ab7 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Thu, 6 Feb 2020 21:47:31 +0200 Subject: Update version and FIXES. --- FIXES | 4 ++++ main.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/FIXES b/FIXES index 19afa0b..598608a 100644 --- a/FIXES +++ b/FIXES @@ -25,6 +25,10 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +February 6, 2020: + Additional small cleanups from Christos Zoulas. awk is now + a little more robust about reporting I/O errors upon exit. + January 31, 2020: Merge PR #70, which avoids use of variable length arrays. Thanks to GitHub user michaelforney. Fix issue #60 ({0} in interval diff --git a/main.c b/main.c index 2476320..832d971 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20200131"; +const char *version = "version 20200206"; #define DEBUG #include -- cgit v1.2.3 From d7a7e4d1479ac90299401df3d5de513d1190b39f Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Thu, 6 Feb 2020 22:08:20 +0200 Subject: Revert zoulas changes until we can keep tests passing. --- FIXES | 4 - main.c | 2 +- proto.h | 10 +- run.c | 275 +++++++++++++++++++++--------------------------------- testdir/T.builtin | 8 -- tran.c | 13 +-- 6 files changed, 112 insertions(+), 200 deletions(-) diff --git a/FIXES b/FIXES index 598608a..19afa0b 100644 --- a/FIXES +++ b/FIXES @@ -25,10 +25,6 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. -February 6, 2020: - Additional small cleanups from Christos Zoulas. awk is now - a little more robust about reporting I/O errors upon exit. - January 31, 2020: Merge PR #70, which avoids use of variable length arrays. Thanks to GitHub user michaelforney. Fix issue #60 ({0} in interval diff --git a/main.c b/main.c index 832d971..2476320 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20200206"; +const char *version = "version 20200131"; #define DEBUG #include diff --git a/proto.h b/proto.h index aac2547..6918e1f 100644 --- a/proto.h +++ b/proto.h @@ -111,7 +111,6 @@ extern double getfval(Cell *); extern char *getsval(Cell *); extern char *getpssval(Cell *); /* for print */ extern char *tostring(const char *); -extern char *tostringN(const char *, size_t); extern char *qstring(const char *, int); extern Cell *catstr(Cell *, Cell *); @@ -136,12 +135,9 @@ extern void yyerror(const char *); extern void fpecatch(int); extern void bracecheck(void); extern void bcheck2(int, int, int); -extern void SYNTAX(const char *, ...) - __attribute__((__format__(__printf__, 1, 2))); -extern void FATAL(const char *, ...) - __attribute__((__format__(__printf__, 1, 2), __noreturn__)); -extern void WARNING(const char *, ...) - __attribute__((__format__(__printf__, 1, 2))); +extern void SYNTAX(const char *, ...); +extern void FATAL(const char *, ...) __attribute__((__noreturn__)); +extern void WARNING(const char *, ...); extern void error(void); extern void eprint(void); extern void bclass(int); diff --git a/run.c b/run.c index 22f4814..6eafc0b 100644 --- a/run.c +++ b/run.c @@ -25,8 +25,6 @@ THIS SOFTWARE. #define DEBUG #include #include -#include -#include #include #include #include @@ -39,12 +37,11 @@ THIS SOFTWARE. #include "awk.h" #include "ytab.h" -static void stdinit(void); -static void flush_all(void); +#define tempfree(x) if (istemp(x)) tfree(x); else + +/* +#undef tempfree -#if 1 -#define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) -#else void tempfree(Cell *p) { if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { WARNING("bad csub %d in Cell %d %s", @@ -53,7 +50,7 @@ void tempfree(Cell *p) { if (istemp(p)) tfree(p); } -#endif +*/ /* do we really need these? */ /* #ifdef _NFILE */ @@ -134,6 +131,7 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, void run(Node *a) /* execution of parse tree starts here */ { + extern void stdinit(void); stdinit(); execute(a); @@ -222,11 +220,11 @@ struct Frame { /* stack frame for awk function calls */ struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ int nframe = 0; /* number of frames allocated */ -struct Frame *frp = NULL; /* frame pointer. bottom level unused */ +struct Frame *fp = NULL; /* frame pointer. bottom level unused */ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ { - static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL }; + static Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL }; int i, ncall, ndef; int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ Node *x; @@ -239,21 +237,21 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ if (!isfcn(fcn)) FATAL("calling undefined function %s", s); if (frame == NULL) { - frp = frame = calloc(nframe += 100, sizeof(*frame)); + fp = frame = calloc(nframe += 100, sizeof(*frame)); if (frame == NULL) FATAL("out of space for stack frames calling %s", s); } for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ ncall++; ndef = (int) fcn->fval; /* args in defn */ - dprintf( ("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)) ); + dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) ); if (ncall > ndef) WARNING("function %s called with %d args, uses only %d", s, ncall, ndef); if (ncall + ndef > NARGS) FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ - dprintf( ("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)) ); + dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) ); y = execute(x); oargs[i] = y; dprintf( ("args[%d]: %s %f <%s>, t=%o\n", @@ -270,25 +268,25 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ args[i] = gettemp(); *args[i] = newcopycell; } - frp++; /* now ok to up frame */ - if (frp >= frame + nframe) { - int dfp = frp - frame; /* old index */ + fp++; /* now ok to up frame */ + if (fp >= frame + nframe) { + int dfp = fp - frame; /* old index */ frame = realloc(frame, (nframe += 100) * sizeof(*frame)); if (frame == NULL) FATAL("out of space for stack frames in %s", s); - frp = frame + dfp; + fp = frame + dfp; } - frp->fcncell = fcn; - frp->args = args; - frp->nargs = ndef; /* number defined with (excess are locals) */ - frp->retval = gettemp(); + fp->fcncell = fcn; + fp->args = args; + fp->nargs = ndef; /* number defined with (excess are locals) */ + fp->retval = gettemp(); - dprintf( ("start exec of %s, frp=%d\n", s, (int) (frp-frame)) ); + dprintf( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) ); y = execute((Node *)(fcn->sval)); /* execute body */ - dprintf( ("finished exec of %s, frp=%d\n", s, (int) (frp-frame)) ); + dprintf( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) ); for (i = 0; i < ndef; i++) { - Cell *t = frp->args[i]; + Cell *t = fp->args[i]; if (isarr(t)) { if (t->csub == CCOPY) { if (i >= ncall) { @@ -317,9 +315,9 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ if (freed == 0) { tempfree(y); /* don't free twice! */ } - z = frp->retval; /* return value */ + z = fp->retval; /* return value */ dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) ); - frp--; + fp--; return(z); } @@ -346,11 +344,11 @@ Cell *arg(Node **a, int n) /* nth argument of a function */ { n = ptoi(a[0]); /* argument number, counting from 0 */ - dprintf( ("arg(%d), frp->nargs=%d\n", n, frp->nargs) ); - if (n+1 > frp->nargs) + dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) ); + if (n+1 > fp->nargs) FATAL("argument #%d of function %s was not supplied", - n+1, frp->fcncell->nval); - return frp->args[n]; + n+1, fp->fcncell->nval); + return fp->args[n]; } Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ @@ -369,14 +367,14 @@ Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ if (a[0] != NULL) { y = execute(a[0]); if ((y->tval & (STR|NUM)) == (STR|NUM)) { - setsval(frp->retval, getsval(y)); - frp->retval->fval = getfval(y); - frp->retval->tval |= NUM; + setsval(fp->retval, getsval(y)); + fp->retval->fval = getfval(y); + fp->retval->tval |= NUM; } else if (y->tval & STR) - setsval(frp->retval, getsval(y)); + setsval(fp->retval, getsval(y)); else if (y->tval & NUM) - setfval(frp->retval, getfval(y)); + setfval(fp->retval, getfval(y)); else /* can't happen */ FATAL("bad type variable %d", y->tval); tempfree(y); @@ -795,8 +793,8 @@ Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ z = gettemp(); for (p1 = s1; *p1 != '\0'; p1++) { - for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) - continue; + for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++) + ; if (*p2 == '\0') { v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ break; @@ -1066,7 +1064,7 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ case UMINUS: i = -i; break; - case UPLUS: /* handled by getfval(), above */ + case UPLUS: /* handled by getfval(), above */ break; case POWER: if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ @@ -1303,7 +1301,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ setsymtab(num, s, 0.0, STR, (Array *) ap->sval); setptr(patbeg, temp); s = patbeg + patlen; - if (*(patbeg+patlen-1) == '\0' || *s == '\0') { + if (*(patbeg+patlen-1) == 0 || *s == 0) { n++; snprintf(num, sizeof(num), "%d", n); setsymtab(num, "", 0.0, STR, (Array *) ap->sval); @@ -1324,16 +1322,15 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ pfa = NULL; } else if (sep == ' ') { for (n = 0; ; ) { -#define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') - while (ISWS(*s)) + while (*s == ' ' || *s == '\t' || *s == '\n') s++; - if (*s == '\0') + if (*s == 0) break; n++; t = s; do s++; - while (*s != '\0' && !ISWS(*s)); + while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0'); temp = *s; setptr(s, '\0'); snprintf(num, sizeof(num), "%d", n); @@ -1342,22 +1339,22 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); setptr(s, temp); - if (*s != '\0') + if (*s != 0) s++; } } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ - for (n = 0; *s != '\0'; s++) { + for (n = 0; *s != 0; s++) { char buf[2]; n++; snprintf(num, sizeof(num), "%d", n); buf[0] = *s; - buf[1] = '\0'; + buf[1] = 0; if (isdigit((uschar)buf[0])) setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); else setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); } - } else if (*s != '\0') { + } else if (*s != 0) { for (;;) { n++; t = s; @@ -1371,7 +1368,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); setptr(s, temp); - if (*s++ == '\0') + if (*s++ == 0) break; } } @@ -1508,73 +1505,16 @@ Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ return True; } -static char *nawk_convert(const char *s, int (*fun_c)(int), - wint_t (*fun_wc)(wint_t)) -{ - char *buf = NULL; - char *pbuf = NULL; - const char *ps = NULL; - size_t n = 0; - mbstate_t mbs, mbs2; - wchar_t wc; - size_t sz = MB_CUR_MAX; - - if (sz == 1) { - buf = tostring(s); - - for (pbuf = buf; *pbuf; pbuf++) - *pbuf = fun_c((uschar)*pbuf); - - return buf; - } else { - /* upper/lower character may be shorter/longer */ - buf = tostringN(s, strlen(s) * sz + 1); - - memset(&mbs, 0, sizeof(mbs)); - memset(&mbs2, 0, sizeof(mbs2)); - - ps = s; - pbuf = buf; - while (n = mbrtowc(&wc, ps, sz, &mbs), - n > 0 && n != (size_t)-1 && n != (size_t)-2) - { - ps += n; - - n = wcrtomb(pbuf, fun_wc(wc), &mbs2); - if (n == (size_t)-1) - FATAL("illegal wide character %s", s); - - pbuf += n; - } - - *pbuf = '\0'; - - if (n) - FATAL("illegal byte sequence %s", s); - - return buf; - } -} - -static char *nawk_toupper(const char *s) -{ - return nawk_convert(s, toupper, towupper); -} - -static char *nawk_tolower(const char *s) -{ - return nawk_convert(s, tolower, towlower); -} - Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ { Cell *x, *y; Awkfloat u; int t; Awkfloat tmp; - char *buf; + char *p, *buf; Node *nextarg; FILE *fp; + void flush_all(void); int status = 0; t = ptoi(a[0]); @@ -1645,10 +1585,16 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis break; case FTOUPPER: case FTOLOWER: - if (t == FTOUPPER) - buf = nawk_toupper(getsval(x)); - else - buf = nawk_tolower(getsval(x)); + buf = tostring(getsval(x)); + if (t == FTOUPPER) { + for (p = buf; *p; p++) + if (islower((uschar) *p)) + *p = toupper((uschar)*p); + } else { + for (p = buf; *p; p++) + if (isupper((uschar) *p)) + *p = tolower((uschar)*p); + } tempfree(x); x = gettemp(); setsval(x, buf); @@ -1731,14 +1677,14 @@ struct files { int mode; /* '|', 'a', 'w' => LE/LT, GT */ } *files; -size_t nfiles; +int nfiles; -static void stdinit(void) /* in case stdin, etc., are not constants */ +void stdinit(void) /* in case stdin, etc., are not constants */ { nfiles = FOPEN_MAX; files = calloc(nfiles, sizeof(*files)); if (files == NULL) - FATAL("can't allocate file memory for %zu files", nfiles); + FATAL("can't allocate file memory for %u files", nfiles); files[0].fp = stdin; files[0].fname = "/dev/stdin"; files[0].mode = LT; @@ -1753,13 +1699,12 @@ static void stdinit(void) /* in case stdin, etc., are not constants */ FILE *openfile(int a, const char *us) { const char *s = us; - size_t i; - int m; + int i, m; FILE *fp = NULL; if (*s == '\0') FATAL("null file name in print or getline"); - for (i = 0; i < nfiles; i++) + for (i=0; i < nfiles; i++) if (files[i].fname && strcmp(s, files[i].fname) == 0) { if (a == files[i].mode || (a==APPEND && files[i].mode==GT)) return files[i].fp; @@ -1769,15 +1714,15 @@ FILE *openfile(int a, const char *us) if (a == FFLUSH) /* didn't find it, so don't create it! */ return NULL; - for (i = 0; i < nfiles; i++) + for (i=0; i < nfiles; i++) if (files[i].fp == NULL) break; if (i >= nfiles) { struct files *nf; - size_t nnf = nfiles + FOPEN_MAX; + int nnf = nfiles + FOPEN_MAX; nf = realloc(files, nnf * sizeof(*nf)); if (nf == NULL) - FATAL("cannot grow files for %s and %zu files", s, nnf); + FATAL("cannot grow files for %s and %d files", s, nnf); memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); nfiles = nnf; files = nf; @@ -1809,7 +1754,7 @@ FILE *openfile(int a, const char *us) const char *filename(FILE *fp) { - size_t i; + int i; for (i = 0; i < nfiles; i++) if (fp == files[i].fp) @@ -1820,58 +1765,52 @@ const char *filename(FILE *fp) Cell *closefile(Node **a, int n) { Cell *x; - size_t i; - bool stat; + int i, stat; x = execute(a[0]); getsval(x); - stat = true; + stat = -1; for (i = 0; i < nfiles; i++) { - if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) - continue; - if (ferror(files[i].fp)) - FATAL("i/o error occurred on %s", files[i].fname); - if (files[i].mode == '|' || files[i].mode == LE) - stat = pclose(files[i].fp) == -1; - else - stat = fclose(files[i].fp) == EOF; - if (stat) - FATAL("i/o error occurred closing %s", files[i].fname); - if (i > 2) /* don't do /dev/std... */ - xfree(files[i].fname); - files[i].fname = NULL; /* watch out for ref thru this */ - files[i].fp = NULL; + if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) { + if (ferror(files[i].fp)) + FATAL( "i/o error occurred on %s", files[i].fname ); + if (files[i].mode == '|' || files[i].mode == LE) + stat = pclose(files[i].fp); + else + stat = fclose(files[i].fp); + if (stat == EOF) + FATAL( "i/o error occurred closing %s", files[i].fname ); + if (i > 2) /* don't do /dev/std... */ + xfree(files[i].fname); + files[i].fname = NULL; /* watch out for ref thru this */ + files[i].fp = NULL; + } } tempfree(x); x = gettemp(); - setfval(x, (Awkfloat) (stat ? -1 : 0)); + setfval(x, (Awkfloat) stat); return(x); } void closeall(void) { - size_t i; - bool stat; - - for (i = 0; i < nfiles; i++) { - if (!files[i].fp) - continue; - if (ferror(files[i].fp)) - FATAL("i/o error occurred on %s", files[i].fname); - - if (i > 0 && i < 3) - stat = fflush(files[i].fp) == EOF; - else if (files[i].mode == '|' || files[i].mode == LE) - stat = pclose(files[i].fp) == -1; - else - stat = fclose(files[i].fp) == EOF; - if (stat) - FATAL("i/o error occurred while closing %s", - files[i].fname); + int i, stat; + + for (i = 0; i < FOPEN_MAX; i++) { + if (files[i].fp) { + if (ferror(files[i].fp)) + FATAL( "i/o error occurred on %s", files[i].fname ); + if (files[i].mode == '|' || files[i].mode == LE) + stat = pclose(files[i].fp); + else + stat = fclose(files[i].fp); + if (stat == EOF) + FATAL( "i/o error occurred while closing %s", files[i].fname ); + } } } -static void flush_all(void) +void flush_all(void) { int i; @@ -1910,7 +1849,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */ while (sptr < patbeg) *pb++ = *sptr++; sptr = getsval(y); - while (*sptr != '\0') { + while (*sptr != 0) { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -1928,8 +1867,8 @@ Cell *sub(Node **a, int nnn) /* substitute command */ sptr = patbeg + patlen; if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); - while ((*pb++ = *sptr++) != '\0') - continue; + while ((*pb++ = *sptr++) != 0) + ; } if (pb > buf + bufsz) FATAL("sub result2 %.30s too big; can't happen", buf); @@ -1972,11 +1911,11 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ pb = buf; rptr = getsval(y); do { - if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ + if (patlen == 0 && *patbeg != 0) { /* matched empty string */ if (mflag == 0) { /* can replace empty */ num++; sptr = rptr; - while (*sptr != '\0') { + while (*sptr != 0) { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -1989,7 +1928,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ *pb++ = *sptr++; } } - if (*t == '\0') /* at end */ + if (*t == 0) /* at end */ goto done; adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); *pb++ = *t++; @@ -2004,7 +1943,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ while (sptr < patbeg) *pb++ = *sptr++; sptr = rptr; - while (*sptr != '\0') { + while (*sptr != 0) { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -2017,7 +1956,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ *pb++ = *sptr++; } t = patbeg + patlen; - if (patlen == 0 || *t == '\0' || *(t-1) == '\0') + if (patlen == 0 || *t == 0 || *(t-1) == 0) goto done; if (pb > buf + bufsz) FATAL("gsub result1 %.30s too big; can't happen", buf); @@ -2026,8 +1965,8 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ } while (pmatch(pfa,t)); sptr = t; adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); - while ((*pb++ = *sptr++) != '\0') - continue; + while ((*pb++ = *sptr++) != 0) + ; done: if (pb < buf + bufsz) *pb = '\0'; else if (*(pb-1) != '\0') diff --git a/testdir/T.builtin b/testdir/T.builtin index b36f6cb..411a5e5 100755 --- a/testdir/T.builtin +++ b/testdir/T.builtin @@ -29,14 +29,6 @@ $awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}' >foo1 echo 'hello, world!|HELLO, WORLD!|hello, WORLD!' >foo2 diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower)' - -if locale -a | grep -qsi de_DE.UTF-8; then - (export LANG=de_DE.UTF-8 && echo 'Dürst' | - $awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}') >foo1 - echo 'dürst|DÜRST|Dürst' >foo2 - diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower) for utf-8' -fi - $awk 'BEGIN { j = 1; sprintf("%d", 99, ++j) # does j get incremented? if (j != 2) diff --git a/tran.c b/tran.c index 4efaa21..d659cfa 100644 --- a/tran.c +++ b/tran.c @@ -344,7 +344,7 @@ void funnyvar(Cell *vp, const char *rw) if (vp->tval & FCN) FATAL("can't %s %s; it's a function.", rw, vp->nval); WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o", - (void *)vp, vp->nval, vp->sval, vp->fval, vp->tval); + vp, vp->nval, vp->sval, vp->fval, vp->tval); } char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ @@ -516,17 +516,6 @@ char *tostring(const char *s) /* make a copy of string s */ return(p); } -char *tostringN(const char *s, size_t n) /* make a copy of string s */ -{ - char *p; - - p = malloc(n); - if (p == NULL) - FATAL("out of space in tostring on %s", s); - strcpy(p, s); - return(p); -} - Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */ { Cell *c; -- cgit v1.2.3 From 5068d20ef6ec53686488f1e48bd99e35e98efa40 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Thu, 6 Feb 2020 22:27:31 +0200 Subject: Restore zoulas fixes, step 1. --- FIXES | 4 ++ main.c | 2 +- proto.h | 10 ++- run.c | 207 ++++++++++++++++++++++++++++++++++-------------------- testdir/T.builtin | 8 +++ tran.c | 13 +++- 6 files changed, 163 insertions(+), 81 deletions(-) diff --git a/FIXES b/FIXES index 19afa0b..598608a 100644 --- a/FIXES +++ b/FIXES @@ -25,6 +25,10 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +February 6, 2020: + Additional small cleanups from Christos Zoulas. awk is now + a little more robust about reporting I/O errors upon exit. + January 31, 2020: Merge PR #70, which avoids use of variable length arrays. Thanks to GitHub user michaelforney. Fix issue #60 ({0} in interval diff --git a/main.c b/main.c index 2476320..832d971 100644 --- a/main.c +++ b/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20200131"; +const char *version = "version 20200206"; #define DEBUG #include diff --git a/proto.h b/proto.h index 6918e1f..aac2547 100644 --- a/proto.h +++ b/proto.h @@ -111,6 +111,7 @@ extern double getfval(Cell *); extern char *getsval(Cell *); extern char *getpssval(Cell *); /* for print */ extern char *tostring(const char *); +extern char *tostringN(const char *, size_t); extern char *qstring(const char *, int); extern Cell *catstr(Cell *, Cell *); @@ -135,9 +136,12 @@ extern void yyerror(const char *); extern void fpecatch(int); extern void bracecheck(void); extern void bcheck2(int, int, int); -extern void SYNTAX(const char *, ...); -extern void FATAL(const char *, ...) __attribute__((__noreturn__)); -extern void WARNING(const char *, ...); +extern void SYNTAX(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); +extern void FATAL(const char *, ...) + __attribute__((__format__(__printf__, 1, 2), __noreturn__)); +extern void WARNING(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); extern void error(void); extern void eprint(void); extern void bclass(int); diff --git a/run.c b/run.c index 6eafc0b..6abb8a3 100644 --- a/run.c +++ b/run.c @@ -25,6 +25,8 @@ THIS SOFTWARE. #define DEBUG #include #include +#include +#include #include #include #include @@ -37,11 +39,12 @@ THIS SOFTWARE. #include "awk.h" #include "ytab.h" -#define tempfree(x) if (istemp(x)) tfree(x); else - -/* -#undef tempfree +static void stdinit(void); +static void flush_all(void); +#if 1 +#define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) +#else void tempfree(Cell *p) { if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { WARNING("bad csub %d in Cell %d %s", @@ -50,7 +53,7 @@ void tempfree(Cell *p) { if (istemp(p)) tfree(p); } -*/ +#endif /* do we really need these? */ /* #ifdef _NFILE */ @@ -131,7 +134,6 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, void run(Node *a) /* execution of parse tree starts here */ { - extern void stdinit(void); stdinit(); execute(a); @@ -220,11 +222,11 @@ struct Frame { /* stack frame for awk function calls */ struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ int nframe = 0; /* number of frames allocated */ -struct Frame *fp = NULL; /* frame pointer. bottom level unused */ +struct Frame *frp = NULL; /* frame pointer. bottom level unused */ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ { - static Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL }; + static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL }; int i, ncall, ndef; int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ Node *x; @@ -237,21 +239,21 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ if (!isfcn(fcn)) FATAL("calling undefined function %s", s); if (frame == NULL) { - fp = frame = calloc(nframe += 100, sizeof(*frame)); + frp = frame = calloc(nframe += 100, sizeof(*frame)); if (frame == NULL) FATAL("out of space for stack frames calling %s", s); } for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ ncall++; ndef = (int) fcn->fval; /* args in defn */ - dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) ); + dprintf( ("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)) ); if (ncall > ndef) WARNING("function %s called with %d args, uses only %d", s, ncall, ndef); if (ncall + ndef > NARGS) FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ - dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) ); + dprintf( ("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)) ); y = execute(x); oargs[i] = y; dprintf( ("args[%d]: %s %f <%s>, t=%o\n", @@ -268,25 +270,25 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ args[i] = gettemp(); *args[i] = newcopycell; } - fp++; /* now ok to up frame */ - if (fp >= frame + nframe) { - int dfp = fp - frame; /* old index */ + frp++; /* now ok to up frame */ + if (frp >= frame + nframe) { + int dfp = frp - frame; /* old index */ frame = realloc(frame, (nframe += 100) * sizeof(*frame)); if (frame == NULL) FATAL("out of space for stack frames in %s", s); - fp = frame + dfp; + frp = frame + dfp; } - fp->fcncell = fcn; - fp->args = args; - fp->nargs = ndef; /* number defined with (excess are locals) */ - fp->retval = gettemp(); + frp->fcncell = fcn; + frp->args = args; + frp->nargs = ndef; /* number defined with (excess are locals) */ + frp->retval = gettemp(); - dprintf( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) ); + dprintf( ("start exec of %s, frp=%d\n", s, (int) (frp-frame)) ); y = execute((Node *)(fcn->sval)); /* execute body */ - dprintf( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) ); + dprintf( ("finished exec of %s, frp=%d\n", s, (int) (frp-frame)) ); for (i = 0; i < ndef; i++) { - Cell *t = fp->args[i]; + Cell *t = frp->args[i]; if (isarr(t)) { if (t->csub == CCOPY) { if (i >= ncall) { @@ -315,9 +317,9 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ if (freed == 0) { tempfree(y); /* don't free twice! */ } - z = fp->retval; /* return value */ + z = frp->retval; /* return value */ dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) ); - fp--; + frp--; return(z); } @@ -344,11 +346,11 @@ Cell *arg(Node **a, int n) /* nth argument of a function */ { n = ptoi(a[0]); /* argument number, counting from 0 */ - dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) ); - if (n+1 > fp->nargs) + dprintf( ("arg(%d), frp->nargs=%d\n", n, frp->nargs) ); + if (n+1 > frp->nargs) FATAL("argument #%d of function %s was not supplied", - n+1, fp->fcncell->nval); - return fp->args[n]; + n+1, frp->fcncell->nval); + return frp->args[n]; } Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ @@ -367,14 +369,14 @@ Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ if (a[0] != NULL) { y = execute(a[0]); if ((y->tval & (STR|NUM)) == (STR|NUM)) { - setsval(fp->retval, getsval(y)); - fp->retval->fval = getfval(y); - fp->retval->tval |= NUM; + setsval(frp->retval, getsval(y)); + frp->retval->fval = getfval(y); + frp->retval->tval |= NUM; } else if (y->tval & STR) - setsval(fp->retval, getsval(y)); + setsval(frp->retval, getsval(y)); else if (y->tval & NUM) - setfval(fp->retval, getfval(y)); + setfval(frp->retval, getfval(y)); else /* can't happen */ FATAL("bad type variable %d", y->tval); tempfree(y); @@ -793,8 +795,8 @@ Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ z = gettemp(); for (p1 = s1; *p1 != '\0'; p1++) { - for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++) - ; + for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) + continue; if (*p2 == '\0') { v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ break; @@ -1064,7 +1066,7 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ case UMINUS: i = -i; break; - case UPLUS: /* handled by getfval(), above */ + case UPLUS: /* handled by getfval(), above */ break; case POWER: if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ @@ -1301,7 +1303,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ setsymtab(num, s, 0.0, STR, (Array *) ap->sval); setptr(patbeg, temp); s = patbeg + patlen; - if (*(patbeg+patlen-1) == 0 || *s == 0) { + if (*(patbeg+patlen-1) == '\0' || *s == '\0') { n++; snprintf(num, sizeof(num), "%d", n); setsymtab(num, "", 0.0, STR, (Array *) ap->sval); @@ -1322,15 +1324,16 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ pfa = NULL; } else if (sep == ' ') { for (n = 0; ; ) { - while (*s == ' ' || *s == '\t' || *s == '\n') +#define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') + while (ISWS(*s)) s++; - if (*s == 0) + if (*s == '\0') break; n++; t = s; do s++; - while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0'); + while (*s != '\0' && !ISWS(*s)); temp = *s; setptr(s, '\0'); snprintf(num, sizeof(num), "%d", n); @@ -1339,22 +1342,22 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); setptr(s, temp); - if (*s != 0) + if (*s != '\0') s++; } } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ - for (n = 0; *s != 0; s++) { + for (n = 0; *s != '\0'; s++) { char buf[2]; n++; snprintf(num, sizeof(num), "%d", n); buf[0] = *s; - buf[1] = 0; + buf[1] = '\0'; if (isdigit((uschar)buf[0])) setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); else setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); } - } else if (*s != 0) { + } else if (*s != '\0') { for (;;) { n++; t = s; @@ -1368,7 +1371,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); setptr(s, temp); - if (*s++ == 0) + if (*s++ == '\0') break; } } @@ -1505,16 +1508,73 @@ Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ return True; } +static char *nawk_convert(const char *s, int (*fun_c)(int), + wint_t (*fun_wc)(wint_t)) +{ + char *buf = NULL; + char *pbuf = NULL; + const char *ps = NULL; + size_t n = 0; + mbstate_t mbs, mbs2; + wchar_t wc; + size_t sz = MB_CUR_MAX; + + if (sz == 1) { + buf = tostring(s); + + for (pbuf = buf; *pbuf; pbuf++) + *pbuf = fun_c((uschar)*pbuf); + + return buf; + } else { + /* upper/lower character may be shorter/longer */ + buf = tostringN(s, strlen(s) * sz + 1); + + memset(&mbs, 0, sizeof(mbs)); + memset(&mbs2, 0, sizeof(mbs2)); + + ps = s; + pbuf = buf; + while (n = mbrtowc(&wc, ps, sz, &mbs), + n > 0 && n != (size_t)-1 && n != (size_t)-2) + { + ps += n; + + n = wcrtomb(pbuf, fun_wc(wc), &mbs2); + if (n == (size_t)-1) + FATAL("illegal wide character %s", s); + + pbuf += n; + } + + *pbuf = '\0'; + + if (n) + FATAL("illegal byte sequence %s", s); + + return buf; + } +} + +static char *nawk_toupper(const char *s) +{ + return nawk_convert(s, toupper, towupper); +} + +static char *nawk_tolower(const char *s) +{ + return nawk_convert(s, tolower, towlower); +} + Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ { Cell *x, *y; Awkfloat u; int t; Awkfloat tmp; - char *p, *buf; + char *buf; Node *nextarg; FILE *fp; - void flush_all(void); int status = 0; t = ptoi(a[0]); @@ -1585,16 +1645,10 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis break; case FTOUPPER: case FTOLOWER: - buf = tostring(getsval(x)); - if (t == FTOUPPER) { - for (p = buf; *p; p++) - if (islower((uschar) *p)) - *p = toupper((uschar)*p); - } else { - for (p = buf; *p; p++) - if (isupper((uschar) *p)) - *p = tolower((uschar)*p); - } + if (t == FTOUPPER) + buf = nawk_toupper(getsval(x)); + else + buf = nawk_tolower(getsval(x)); tempfree(x); x = gettemp(); setsval(x, buf); @@ -1677,14 +1731,14 @@ struct files { int mode; /* '|', 'a', 'w' => LE/LT, GT */ } *files; -int nfiles; +size_t nfiles; -void stdinit(void) /* in case stdin, etc., are not constants */ +static void stdinit(void) /* in case stdin, etc., are not constants */ { nfiles = FOPEN_MAX; files = calloc(nfiles, sizeof(*files)); if (files == NULL) - FATAL("can't allocate file memory for %u files", nfiles); + FATAL("can't allocate file memory for %zu files", nfiles); files[0].fp = stdin; files[0].fname = "/dev/stdin"; files[0].mode = LT; @@ -1699,12 +1753,13 @@ void stdinit(void) /* in case stdin, etc., are not constants */ FILE *openfile(int a, const char *us) { const char *s = us; - int i, m; + size_t i; + int m; FILE *fp = NULL; if (*s == '\0') FATAL("null file name in print or getline"); - for (i=0; i < nfiles; i++) + for (i = 0; i < nfiles; i++) if (files[i].fname && strcmp(s, files[i].fname) == 0) { if (a == files[i].mode || (a==APPEND && files[i].mode==GT)) return files[i].fp; @@ -1714,15 +1769,15 @@ FILE *openfile(int a, const char *us) if (a == FFLUSH) /* didn't find it, so don't create it! */ return NULL; - for (i=0; i < nfiles; i++) + for (i = 0; i < nfiles; i++) if (files[i].fp == NULL) break; if (i >= nfiles) { struct files *nf; - int nnf = nfiles + FOPEN_MAX; + size_t nnf = nfiles + FOPEN_MAX; nf = realloc(files, nnf * sizeof(*nf)); if (nf == NULL) - FATAL("cannot grow files for %s and %d files", s, nnf); + FATAL("cannot grow files for %s and %zu files", s, nnf); memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); nfiles = nnf; files = nf; @@ -1754,7 +1809,7 @@ FILE *openfile(int a, const char *us) const char *filename(FILE *fp) { - int i; + size_t i; for (i = 0; i < nfiles; i++) if (fp == files[i].fp) @@ -1849,7 +1904,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */ while (sptr < patbeg) *pb++ = *sptr++; sptr = getsval(y); - while (*sptr != 0) { + while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -1867,8 +1922,8 @@ Cell *sub(Node **a, int nnn) /* substitute command */ sptr = patbeg + patlen; if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); - while ((*pb++ = *sptr++) != 0) - ; + while ((*pb++ = *sptr++) != '\0') + continue; } if (pb > buf + bufsz) FATAL("sub result2 %.30s too big; can't happen", buf); @@ -1911,11 +1966,11 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ pb = buf; rptr = getsval(y); do { - if (patlen == 0 && *patbeg != 0) { /* matched empty string */ + if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ if (mflag == 0) { /* can replace empty */ num++; sptr = rptr; - while (*sptr != 0) { + while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -1928,7 +1983,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ *pb++ = *sptr++; } } - if (*t == 0) /* at end */ + if (*t == '\0') /* at end */ goto done; adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); *pb++ = *t++; @@ -1943,7 +1998,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ while (sptr < patbeg) *pb++ = *sptr++; sptr = rptr; - while (*sptr != 0) { + while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -1956,7 +2011,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ *pb++ = *sptr++; } t = patbeg + patlen; - if (patlen == 0 || *t == 0 || *(t-1) == 0) + if (patlen == 0 || *t == '\0' || *(t-1) == '\0') goto done; if (pb > buf + bufsz) FATAL("gsub result1 %.30s too big; can't happen", buf); @@ -1965,8 +2020,8 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ } while (pmatch(pfa,t)); sptr = t; adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); - while ((*pb++ = *sptr++) != 0) - ; + while ((*pb++ = *sptr++) != '\0') + continue; done: if (pb < buf + bufsz) *pb = '\0'; else if (*(pb-1) != '\0') diff --git a/testdir/T.builtin b/testdir/T.builtin index 411a5e5..b36f6cb 100755 --- a/testdir/T.builtin +++ b/testdir/T.builtin @@ -29,6 +29,14 @@ $awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}' >foo1 echo 'hello, world!|HELLO, WORLD!|hello, WORLD!' >foo2 diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower)' + +if locale -a | grep -qsi de_DE.UTF-8; then + (export LANG=de_DE.UTF-8 && echo 'Dürst' | + $awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}') >foo1 + echo 'dürst|DÜRST|Dürst' >foo2 + diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower) for utf-8' +fi + $awk 'BEGIN { j = 1; sprintf("%d", 99, ++j) # does j get incremented? if (j != 2) diff --git a/tran.c b/tran.c index d659cfa..4efaa21 100644 --- a/tran.c +++ b/tran.c @@ -344,7 +344,7 @@ void funnyvar(Cell *vp, const char *rw) if (vp->tval & FCN) FATAL("can't %s %s; it's a function.", rw, vp->nval); WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o", - vp, vp->nval, vp->sval, vp->fval, vp->tval); + (void *)vp, vp->nval, vp->sval, vp->fval, vp->tval); } char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ @@ -516,6 +516,17 @@ char *tostring(const char *s) /* make a copy of string s */ return(p); } +char *tostringN(const char *s, size_t n) /* make a copy of string s */ +{ + char *p; + + p = malloc(n); + if (p == NULL) + FATAL("out of space in tostring on %s", s); + strcpy(p, s); + return(p); +} + Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */ { Cell *c; -- cgit v1.2.3 From cd552112a7c9d4d8892190fef4b45c646d43ad5d Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Thu, 6 Feb 2020 22:32:55 +0200 Subject: Restore zoulas fixes, stages 2. --- run.c | 59 ++++++++++++++++++++++++++++++----------------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/run.c b/run.c index 6abb8a3..0354ed2 100644 --- a/run.c +++ b/run.c @@ -1817,35 +1817,36 @@ const char *filename(FILE *fp) return "???"; } -Cell *closefile(Node **a, int n) -{ - Cell *x; - int i, stat; - - x = execute(a[0]); - getsval(x); - stat = -1; - for (i = 0; i < nfiles; i++) { - if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) { - if (ferror(files[i].fp)) - FATAL( "i/o error occurred on %s", files[i].fname ); - if (files[i].mode == '|' || files[i].mode == LE) - stat = pclose(files[i].fp); - else - stat = fclose(files[i].fp); - if (stat == EOF) - FATAL( "i/o error occurred closing %s", files[i].fname ); - if (i > 2) /* don't do /dev/std... */ - xfree(files[i].fname); - files[i].fname = NULL; /* watch out for ref thru this */ - files[i].fp = NULL; - } - } - tempfree(x); - x = gettemp(); - setfval(x, (Awkfloat) stat); - return(x); -} + Cell *closefile(Node **a, int n) + { + Cell *x; + size_t i; + bool stat; + + x = execute(a[0]); + getsval(x); + stat = true; + for (i = 0; i < nfiles; i++) { + if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) + continue; + if (ferror(files[i].fp)) + FATAL("i/o error occurred on %s", files[i].fname); + if (files[i].mode == '|' || files[i].mode == LE) + stat = pclose(files[i].fp) == -1; + else + stat = fclose(files[i].fp) == EOF; + if (stat) + FATAL("i/o error occurred closing %s", files[i].fname); + if (i > 2) /* don't do /dev/std... */ + xfree(files[i].fname); + files[i].fname = NULL; /* watch out for ref thru this */ + files[i].fp = NULL; + } + tempfree(x); + x = gettemp(); + setfval(x, (Awkfloat) (stat ? -1 : 0)); + return(x); + } void closeall(void) { -- cgit v1.2.3 From e6fe674b404151c66440fd76bca7708cf3a2fe37 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Thu, 6 Feb 2020 22:38:30 +0200 Subject: Restore zoulas fixes, stage 3. --- run.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/run.c b/run.c index 0354ed2..4270e54 100644 --- a/run.c +++ b/run.c @@ -1850,23 +1850,24 @@ const char *filename(FILE *fp) void closeall(void) { - int i, stat; - - for (i = 0; i < FOPEN_MAX; i++) { - if (files[i].fp) { - if (ferror(files[i].fp)) - FATAL( "i/o error occurred on %s", files[i].fname ); - if (files[i].mode == '|' || files[i].mode == LE) - stat = pclose(files[i].fp); - else - stat = fclose(files[i].fp); - if (stat == EOF) - FATAL( "i/o error occurred while closing %s", files[i].fname ); - } + size_t i; + bool stat = false; + + for (i = 0; i < nfiles; i++) { + if (! files[i].fp) + continue; + if (ferror(files[i].fp)) + FATAL( "i/o error occurred on %s", files[i].fname ); + if (files[i].mode == '|' || files[i].mode == LE) + stat = pclose(files[i].fp) == -1; + else + stat = fclose(files[i].fp) == EOF; + if (stat) + FATAL( "i/o error occurred while closing %s", files[i].fname ); } } -void flush_all(void) +static void flush_all(void) { int i; -- cgit v1.2.3 From e9c99065fd31253a4db4a6bce673decd143f7a3e Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Fri, 7 Feb 2020 09:32:41 +0200 Subject: Update README.md PR instructions. --- README.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 77701d7..ab6aae1 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,19 @@ Thanks. ## Submitting Pull Requests -Pull requests are welcome. However, please create them with a request +Pull requests are welcome. Some guidelines: + +* Please do not use functions or facilities that are not standard (e.g., +`strlcpy()`, `fpurge()`). + +* Please run the test suite and make sure that your changes pass before +posting the pull request. To do so: + + 1. Save the previous version of `awk` somewhere in your path. Call it `nawk` (for example). + 1. Run `oldawk=nawk make check > check.out 2>&1`. + 1. Search for `BAD` or `error` in the result. In general, look over it manually to make sure there are no errors. + +* Please create the pull request with a request to merge into the `staging` branch instead of into the `master` branch. This allows us to do testing, and to make any additional edits or changes after the merge but before merging to `master`. -- cgit v1.2.3