aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Hazel <Philip.Hazel@gmail.com>2023-12-01 16:49:59 +0000
committerPhilip Hazel <Philip.Hazel@gmail.com>2023-12-01 16:49:59 +0000
commitafce00e484cff118a824dac498e8044680dac401 (patch)
tree832aef5e5b7e40bf0b0d63c2aaa4e6b1af04d34e
parent0820852df64a8236684759fc7e80298d4fdc70bd (diff)
downloadpcre-afce00e484cff118a824dac498e8044680dac401.tar.gz
Fix compile loop in 32-bit mode for characters above the Unicode limit when caseless and ucp are set.
-rw-r--r--ChangeLog12
-rw-r--r--src/pcre2_compile.c6
-rw-r--r--testdata/testinput124
-rw-r--r--testdata/testoutput12-165
-rw-r--r--testdata/testoutput12-325
5 files changed, 27 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index b274adf1..2523b6c8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -174,10 +174,14 @@ undefined behaviour.
that its end is handled similarly to other recursions. This has altered the
behaviour of /|(?0)./endanchored which was previously not right.
-48. Improved the test for looping recursion by checking the last referenced
-character as well as the current character. This allows some patterns that
-previously triggered the check to run to completion instead of giving the loop
-error.
+48. Improved the test for looping recursion by checking the last referenced
+character as well as the current character. This allows some patterns that
+previously triggered the check to run to completion instead of giving the loop
+error.
+
+49. In 32-bit mode, the compiler looped for the pattern /[\x{ffffffff}]/ when
+PCRE2_CASELESS and PCRE2_UCP (but not PCRE2_UTF) were set. Fixed by not trying
+to look for other cases for characters above the Unicode range.
Version 10.42 11-December-2022
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 7b522c5b..1935e769 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -5155,10 +5155,14 @@ unsigned int co;
/* Find the first character that has an other case. If it has multiple other
cases, return its case offset value. When CASELESS_RESTRICT is set, ignore the
-multi-case entries that begin with ASCII values. */
+multi-case entries that begin with ASCII values. In 32-bit mode, a value
+greater than the Unicode maximum ends the range. */
for (c = *cptr; c <= d; c++)
{
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (c > MAX_UTF_CODE_POINT) return -1;
+#endif
if ((co = UCD_CASESET(c)) != 0 &&
(!restricted || PRIV(ucd_caseless_sets)[co] > 127))
{
diff --git a/testdata/testinput12 b/testdata/testinput12
index a6678bb1..de3d4067 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -573,4 +573,8 @@
/\X++/
a\x{110000}\x{ffffffff}
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
+/[\x{ffffffff}]/caseless,ucp
+ \x{ffffffff}xyz
+
# End of testinput12
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index f3b40a35..9fa93fa1 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1823,4 +1823,9 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to
** Truncation will probably give the wrong result.
0: a\x00\x{ffff}
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
+/[\x{ffffffff}]/caseless,ucp
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
+ \x{ffffffff}xyz
+
# End of testinput12
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index dd42f868..721d8bce 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1817,4 +1817,9 @@ No match
a\x{110000}\x{ffffffff}
0: a\x{110000}\x{ffffffff}
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
+/[\x{ffffffff}]/caseless,ucp
+ \x{ffffffff}xyz
+ 0: \x{ffffffff}
+
# End of testinput12