aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzoulasc <zoulasc@users.noreply.github.com>2020-02-28 06:23:54 -0500
committerGitHub <noreply@github.com>2020-02-28 13:23:54 +0200
commitffee7780fe08fa77f662a0903477545d9e26334f (patch)
treebdeafcbb782b3892f53cad636082292f1aae773a
parent91eaf7f7015ba2223e993532f5d65dfda4d1f33f (diff)
downloadone-true-awk-ffee7780fe08fa77f662a0903477545d9e26334f.tar.gz
3 more fixes (#75)
* LC_NUMERIC radix issue. According to https://pubs.opengroup.org/onlinepubs/7990989775/xcu/awk.html The period character is the character recognized in processing awk programs. Make it so that during output we also print the period character, since this is what other awk implementations do, and it makes sense from an interoperability point of view. * print "T.builtin" in the error message * Fix backslash continuation line handling. * Keep track of RS processing so we apply the regex properly only once per record.
-rw-r--r--lex.c1
-rw-r--r--lib.c17
-rw-r--r--main.c1
-rw-r--r--proto.h4
-rw-r--r--run.c24
-rwxr-xr-xtestdir/T.builtin18
-rwxr-xr-xtestdir/T.misc8
7 files changed, 57 insertions, 16 deletions
diff --git a/lex.c b/lex.c
index 1c23212..9ff7069 100644
--- a/lex.c
+++ b/lex.c
@@ -388,6 +388,7 @@ int string(void)
case '\\':
c = input();
switch (c) {
+ case '\n': break;
case '"': *bp++ = '"'; break;
case 'n': *bp++ = '\n'; break;
case 't': *bp++ = '\t'; break;
diff --git a/lib.c b/lib.c
index 9351e16..9665244 100644
--- a/lib.c
+++ b/lib.c
@@ -35,6 +35,7 @@ THIS SOFTWARE.
char EMPTY[] = { '\0' };
FILE *infile = NULL;
+bool innew; /* true = infile has not been read by readrec */
char *file = EMPTY;
char *record;
int recsize = RECSIZE;
@@ -106,6 +107,7 @@ void initgetrec(void)
argno++;
}
infile = stdin; /* no filenames, so use stdin */
+ innew = true;
}
/*
@@ -175,7 +177,9 @@ int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record *
FATAL("can't open file %s", file);
setfval(fnrloc, 0.0);
}
- c = readrec(&buf, &bufsize, infile);
+ c = readrec(&buf, &bufsize, infile, innew);
+ if (innew)
+ innew = false;
if (c != 0 || buf[0] != '\0') { /* normal record */
if (isrecord) {
if (freeable(fldtab[0]))
@@ -213,7 +217,7 @@ void nextfile(void)
argno++;
}
-int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */
+int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
{
int sep, c, isrec;
char *rr, *buf = *pbuf;
@@ -224,7 +228,14 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf *
bool found;
fa *pfa = makedfa(rs, 1);
- found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+ if (newflag)
+ found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+ else {
+ int tempstat = pfa->initstat;
+ pfa->initstat = 2;
+ found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+ pfa->initstat = tempstat;
+ }
if (found)
setptr(patbeg, '\0');
} else {
diff --git a/main.c b/main.c
index cc223f2..836783c 100644
--- a/main.c
+++ b/main.c
@@ -214,7 +214,6 @@ int main(int argc, char *argv[])
if (!safe)
envinit(environ);
yyparse();
- setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
if (fs)
*FS = qstring(fs, '\0');
dprintf( ("errorflag=%d\n", errorflag) );
diff --git a/proto.h b/proto.h
index 14e2d5f..5426ba3 100644
--- a/proto.h
+++ b/proto.h
@@ -122,7 +122,7 @@ extern void growfldtab(int n);
extern void savefs(void);
extern int getrec(char **, int *, bool);
extern void nextfile(void);
-extern int readrec(char **buf, int *bufsize, FILE *inf);
+extern int readrec(char **buf, int *bufsize, FILE *inf, bool isnew);
extern char *getargv(int);
extern void setclvar(char *);
extern void fldbld(void);
@@ -191,7 +191,7 @@ extern Cell *bltin(Node **, int);
extern Cell *printstat(Node **, int);
extern Cell *nullproc(Node **, int);
extern FILE *redirect(int, Node *);
-extern FILE *openfile(int, const char *);
+extern FILE *openfile(int, const char *, bool *);
extern const char *filename(FILE *);
extern Cell *closefile(Node **, int);
extern void closeall(void);
diff --git a/run.c b/run.c
index caab5ed..26e8c4f 100644
--- a/run.c
+++ b/run.c
@@ -405,6 +405,7 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
char *buf;
int bufsize = recsize;
int mode;
+ bool newflag;
if ((buf = malloc(bufsize)) == NULL)
FATAL("out of memory in getline");
@@ -416,12 +417,12 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */
mode = ptoi(a[1]);
if (mode == '|') /* input pipe */
mode = LE; /* arbitrary flag */
- fp = openfile(mode, getsval(x));
+ fp = openfile(mode, getsval(x), &newflag);
tempfree(x);
if (fp == NULL)
n = -1;
else
- n = readrec(&buf, &bufsize, fp);
+ n = readrec(&buf, &bufsize, fp, newflag);
if (n <= 0) {
;
} else if (a[0] != NULL) { /* getline var <file */
@@ -1658,7 +1659,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
if (isrec(x) || strlen(getsval(x)) == 0) {
flush_all(); /* fflush() or fflush("") -> all */
u = 0;
- } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
+ } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
u = EOF;
else
u = fflush(fp);
@@ -1718,7 +1719,7 @@ FILE *redirect(int a, Node *b) /* set up all i/o redirections */
x = execute(b);
fname = getsval(x);
- fp = openfile(a, fname);
+ fp = openfile(a, fname, NULL);
if (fp == NULL)
FATAL("can't open file %s", fname);
tempfree(x);
@@ -1750,7 +1751,7 @@ static void stdinit(void) /* in case stdin, etc., are not constants */
files[2].mode = GT;
}
-FILE *openfile(int a, const char *us)
+FILE *openfile(int a, const char *us, bool *pnewflag)
{
const char *s = us;
size_t i;
@@ -1760,11 +1761,12 @@ FILE *openfile(int a, const char *us)
if (*s == '\0')
FATAL("null file name in print or getline");
for (i = 0; i < nfiles; i++)
- if (files[i].fname && strcmp(s, files[i].fname) == 0) {
- if (a == files[i].mode || (a==APPEND && files[i].mode==GT))
- return files[i].fp;
- if (a == FFLUSH)
- return files[i].fp;
+ if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
+ (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
+ a == FFLUSH)) {
+ if (pnewflag)
+ *pnewflag = false;
+ return files[i].fp;
}
if (a == FFLUSH) /* didn't find it, so don't create it! */
return NULL;
@@ -1801,6 +1803,8 @@ FILE *openfile(int a, const char *us)
files[i].fname = tostring(s);
files[i].fp = fp;
files[i].mode = m;
+ if (pnewflag)
+ *pnewflag = true;
if (fp != stdin && fp != stdout && fp != stderr)
(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
}
diff --git a/testdir/T.builtin b/testdir/T.builtin
index b36f6cb..ef0e3bd 100755
--- a/testdir/T.builtin
+++ b/testdir/T.builtin
@@ -35,6 +35,9 @@ if locale -a | grep -qsi de_DE.UTF-8; then
$awk '{ printf("%s|%s|%s\n", tolower($0), toupper($0), $0)}') >foo1
echo 'dürst|DÜRST|Dürst' >foo2
diff foo1 foo2 || echo 'BAD: T.builtin (toupper/tolower) for utf-8'
+ (export LC_NUMERIC=de_DE.UTF-8 && $awk 'BEGIN { print 0.01 }' /dev/null) >foo1
+ echo "0.01" >foo2
+ diff foo1 foo2 || echo 'BAD: T.builtin LC_NUMERIC radix (.) handling'
fi
$awk 'BEGIN {
@@ -70,3 +73,18 @@ echo '1
3' >foo1
$awk '{ n = split($0, x); print length(x) }' <foo0 >foo2
diff foo1 foo2 || echo 'BAD: T.builtin length array'
+
+# Test for backslash handling
+cat << \EOF >foo0
+BEGIN {
+ print "A\
+B";
+ print "CD"
+}
+EOF
+$awk -f foo0 /dev/null >foo1
+cat << \EOF >foo2
+AB
+CD
+EOF
+diff foo1 foo2 || echo 'BAD: T.builtin continuation handling (backslash)'
diff --git a/testdir/T.misc b/testdir/T.misc
index 3903606..dff57db 100755
--- a/testdir/T.misc
+++ b/testdir/T.misc
@@ -186,6 +186,14 @@ BEGIN { RS = ""
}' >foo1
$awk 'END {print NR}' foo1 | grep 4 >/dev/null || echo 'BAD: T.misc abcdef fails'
+# Test for RS regex being reapplied
+echo aaa1a2a | $awk 1 RS='^a' >foo1
+cat << \EOF > foo2
+
+aa1a2a
+
+EOF
+diff foo1 foo2 || echo 'BAD: T.misc ^regex reapplied fails'
# The following should not produce a warning about changing a constant
# nor about a curdled tempcell list