aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2023-11-07 19:40:14 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2023-11-07 19:40:14 +0000
commit9e300e71309db98a1d7de7bb2b50ef4a05e1bf43 (patch)
treecf19b7bc55e300532fc24f31a0a42ad5a072d521
parent2ac7127bb7379161d8e0f09c5facac4fdf3a222c (diff)
parent607a5c4fd26135a13599d0fa0942aa42dbaee12c (diff)
downloadone-true-awk-9e300e71309db98a1d7de7bb2b50ef4a05e1bf43.tar.gz
Upgrade one-true-awk to d801514094d1140dfc9f8571b9821082ddddf107 am: 607a5c4fd2
Original change: https://android-review.googlesource.com/c/platform/external/one-true-awk/+/2817805 Change-Id: I0408e7c274a9ba2778ced8aa268ebcf94697a08f Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
-rw-r--r--FIXES9
-rw-r--r--METADATA6
-rw-r--r--README.md4
-rw-r--r--awk.h2
-rw-r--r--b.c8
-rw-r--r--main.c5
-rw-r--r--run.c16
7 files changed, 32 insertions, 18 deletions
diff --git a/FIXES b/FIXES
index e3dedac..a13ca50 100644
--- a/FIXES
+++ b/FIXES
@@ -25,6 +25,15 @@ THIS SOFTWARE.
This file lists all bug fixes, changes, etc., made since the
second edition of the AWK book was published in September 2023.
+Oct 30, 2023:
+ multiple fixes and a minor code cleanup.
+ disabled utf-8 for non-multibyte locales, such as C or POSIX.
+ fixed a bad char * cast that causes incorrect results on big-endian
+ systems. also fixed an out-of-bounds read for empty CCL.
+ fixed a buffer overflow in substr with utf-8 strings.
+ many thanks to Todd C Miller.
+
+
Sep 24, 2023:
fnematch and getrune have been overhauled to solve issues around
unicode FS and RS. also fixed gsub null match issue with unicode.
diff --git a/METADATA b/METADATA
index 2d4a73c..6ea18b5 100644
--- a/METADATA
+++ b/METADATA
@@ -9,11 +9,11 @@ third_party {
type: GIT
value: "https://github.com/onetrueawk/awk.git"
}
- version: "d8e4368964e4471a54a755823004f2b1aabc0f80"
+ version: "d801514094d1140dfc9f8571b9821082ddddf107"
license_type: NOTICE
last_upgrade_date {
year: 2023
- month: 10
- day: 5
+ month: 11
+ day: 6
}
}
diff --git a/README.md b/README.md
index ff8c103..daace23 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ when expanded.
The option `--csv` turns on CSV processing of input:
fields are separated by commas, fields may be quoted with
-double-quote (`"`) characters, fields may contain embedded newlines.
+double-quote (`"`) characters, quoted fields may contain embedded newlines.
In CSV mode, `FS` is ignored.
If no explicit separator argument is provided,
@@ -145,4 +145,4 @@ is not at the top of our priority list.
#### Last Updated
-Sun Sep 3 09:26:43 EDT 2023
+Sun 15 Oct 2023 06:28:36 IDT
diff --git a/awk.h b/awk.h
index 49b5dfc..217319c 100644
--- a/awk.h
+++ b/awk.h
@@ -64,6 +64,8 @@ extern bool safe; /* false => unsafe, true => safe */
#define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */
extern int recsize; /* size of current record, orig RECSIZE */
+extern size_t awk_mb_cur_max; /* max size of a multi-byte character */
+
extern char EMPTY[]; /* this avoid -Wwritable-strings issues */
extern char **FS;
extern char **RS;
diff --git a/b.c b/b.c
index 55b320e..aa07d59 100644
--- a/b.c
+++ b/b.c
@@ -529,7 +529,7 @@ int first(Node *p) /* collects initially active leaves of p into setvec */
setvec[lp] = 1;
setcnt++;
}
- if (type(p) == CCL && (*(char *) right(p)) == '\0')
+ if (type(p) == CCL && (*(int *) right(p)) == 0)
return(0); /* empty CCL */
return(1);
case PLUS:
@@ -778,7 +778,7 @@ struct runedata getrune(FILE *fp)
c = getc(fp);
if (c == EOF)
return result; // result.rune == 0 --> EOF
- else if (c < 128) {
+ else if (c < 128 || awk_mb_cur_max == 1) {
result.bytes[0] = c;
result.len = 1;
result.rune = c;
@@ -970,7 +970,7 @@ Node *primary(void)
rtok = relex();
if (rtok == ')') { /* special pleading for () */
rtok = relex();
- return unary(op2(CCL, NIL, (Node *) tostring("")));
+ return unary(op2(CCL, NIL, (Node *) cclenter("")));
}
np = regexp();
if (rtok == ')') {
@@ -993,7 +993,7 @@ Node *concat(Node *np)
return (concat(op2(CAT, np, primary())));
case EMPTYRE:
rtok = relex();
- return (concat(op2(CAT, op2(CCL, NIL, (Node *) tostring("")),
+ return (concat(op2(CAT, op2(CCL, NIL, (Node *) cclenter("")),
primary())));
}
return (np);
diff --git a/main.c b/main.c
index d500729..3a205c8 100644
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
-const char *version = "version 20231001";
+const char *version = "version 20231030";
#define DEBUG
#include <stdio.h>
@@ -53,6 +53,8 @@ bool CSV = false; /* true for csv input */
bool safe = false; /* true => "safe" mode */
+size_t awk_mb_cur_max = 1;
+
static noreturn void fpecatch(int n
#ifdef SA_SIGINFO
, siginfo_t *si, void *uc
@@ -116,6 +118,7 @@ int main(int argc, char *argv[])
setlocale(LC_CTYPE, "");
setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */
+ awk_mb_cur_max = MB_CUR_MAX;
cmdname = argv[0];
if (argc == 1) {
fprintf(stderr,
diff --git a/run.c b/run.c
index 0424a39..a9ef242 100644
--- a/run.c
+++ b/run.c
@@ -605,7 +605,7 @@ int u8_isutf(const char *s)
unsigned char c;
c = s[0];
- if (c < 128)
+ if (c < 128 || awk_mb_cur_max == 1)
return 1; /* what if it's 0? */
n = strlen(s);
@@ -632,7 +632,7 @@ int u8_rune(int *rune, const char *s)
unsigned char c;
c = s[0];
- if (c < 128) {
+ if (c < 128 || awk_mb_cur_max == 1) {
*rune = c;
return 1;
}
@@ -679,7 +679,7 @@ int u8_strlen(const char *s)
totlen = 0;
for (i = 0; i < n; i += len) {
c = s[i];
- if (c < 128) {
+ if (c < 128 || awk_mb_cur_max == 1) {
len = 1;
} else {
len = u8_nextlen(&s[i]);
@@ -985,7 +985,7 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
if (a[2] != NULL)
z = execute(a[2]);
s = getsval(x);
- k = strlen(s) + 1;
+ k = u8_strlen(s) + 1;
if (k <= 1) {
tempfree(x);
tempfree(y);
@@ -1289,7 +1289,7 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
int charval = (int) getfval(x);
if (charval != 0) {
- if (charval < 128)
+ if (charval < 128 || awk_mb_cur_max == 1)
snprintf(p, BUFSZ(p), fmt, charval);
else {
// possible unicode character
@@ -1349,7 +1349,7 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
int i;
if (ljust) { // print one char from t, then pad blanks
- for (int i = 0; i < n; i++)
+ for (i = 0; i < n; i++)
*p++ = t[i];
for (i = 0; i < pad; i++) {
//printf(" ");
@@ -1360,7 +1360,7 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
//printf(" ");
*p++ = ' ';
}
- for (int i = 0; i < n; i++)
+ for (i = 0; i < n; i++)
*p++ = t[i];
}
*p = 0;
@@ -1977,7 +1977,7 @@ static char *nawk_convert(const char *s, int (*fun_c)(int),
const char *ps = NULL;
size_t n = 0;
wchar_t wc;
- size_t sz = MB_CUR_MAX;
+ const size_t sz = awk_mb_cur_max;
int unused;
if (sz == 1) {