diff options
author | Philip Hazel <Philip.Hazel@gmail.com> | 2023-12-01 16:49:59 +0000 |
---|---|---|
committer | Philip Hazel <Philip.Hazel@gmail.com> | 2023-12-01 16:49:59 +0000 |
commit | afce00e484cff118a824dac498e8044680dac401 (patch) | |
tree | 832aef5e5b7e40bf0b0d63c2aaa4e6b1af04d34e | |
parent | 0820852df64a8236684759fc7e80298d4fdc70bd (diff) | |
download | pcre-afce00e484cff118a824dac498e8044680dac401.tar.gz |
Fix compile loop in 32-bit mode for characters above the Unicode limit when caseless and ucp are set.
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | src/pcre2_compile.c | 6 | ||||
-rw-r--r-- | testdata/testinput12 | 4 | ||||
-rw-r--r-- | testdata/testoutput12-16 | 5 | ||||
-rw-r--r-- | testdata/testoutput12-32 | 5 |
5 files changed, 27 insertions, 5 deletions
@@ -174,10 +174,14 @@ undefined behaviour. that its end is handled similarly to other recursions. This has altered the behaviour of /|(?0)./endanchored which was previously not right. -48. Improved the test for looping recursion by checking the last referenced -character as well as the current character. This allows some patterns that -previously triggered the check to run to completion instead of giving the loop -error. +48. Improved the test for looping recursion by checking the last referenced +character as well as the current character. This allows some patterns that +previously triggered the check to run to completion instead of giving the loop +error. + +49. In 32-bit mode, the compiler looped for the pattern /[\x{ffffffff}]/ when +PCRE2_CASELESS and PCRE2_UCP (but not PCRE2_UTF) were set. Fixed by not trying +to look for other cases for characters above the Unicode range. Version 10.42 11-December-2022 diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 7b522c5b..1935e769 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -5155,10 +5155,14 @@ unsigned int co; /* Find the first character that has an other case. If it has multiple other cases, return its case offset value. When CASELESS_RESTRICT is set, ignore the -multi-case entries that begin with ASCII values. */ +multi-case entries that begin with ASCII values. In 32-bit mode, a value +greater than the Unicode maximum ends the range. */ for (c = *cptr; c <= d; c++) { +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) return -1; +#endif if ((co = UCD_CASESET(c)) != 0 && (!restricted || PRIV(ucd_caseless_sets)[co] > 127)) { diff --git a/testdata/testinput12 b/testdata/testinput12 index a6678bb1..de3d4067 100644 --- a/testdata/testinput12 +++ b/testdata/testinput12 @@ -573,4 +573,8 @@ /\X++/ a\x{110000}\x{ffffffff} +# This used to loop in 32-bit mode; it will fail in 16-bit mode. +/[\x{ffffffff}]/caseless,ucp + \x{ffffffff}xyz + # End of testinput12 diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 index f3b40a35..9fa93fa1 100644 --- a/testdata/testoutput12-16 +++ b/testdata/testoutput12-16 @@ -1823,4 +1823,9 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to ** Truncation will probably give the wrong result. 0: a\x00\x{ffff} +# This used to loop in 32-bit mode; it will fail in 16-bit mode. +/[\x{ffffffff}]/caseless,ucp +Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + \x{ffffffff}xyz + # End of testinput12 diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 index dd42f868..721d8bce 100644 --- a/testdata/testoutput12-32 +++ b/testdata/testoutput12-32 @@ -1817,4 +1817,9 @@ No match a\x{110000}\x{ffffffff} 0: a\x{110000}\x{ffffffff} +# This used to loop in 32-bit mode; it will fail in 16-bit mode. +/[\x{ffffffff}]/caseless,ucp + \x{ffffffff}xyz + 0: \x{ffffffff} + # End of testinput12 |