diff options
author | Philip Hazel <Philip.Hazel@gmail.com> | 2023-12-04 16:11:41 +0000 |
---|---|---|
committer | Philip Hazel <Philip.Hazel@gmail.com> | 2023-12-04 16:11:41 +0000 |
commit | ad73148dfb6d06280a4d87f322991762aff90a55 (patch) | |
tree | 48245ad586fadb6581ba3556629b665c9bb4367f | |
parent | 014c82d7bcc2873cdb1f3abc5e5348587f477ba4 (diff) | |
download | pcre-ad73148dfb6d06280a4d87f322991762aff90a55.tar.gz |
Fix incorrect matching of 0xffffffff to any character with more than one other case in 32-bit UCP (but not UTF) mode.
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | src/pcre2_dfa_match.c | 28 | ||||
-rw-r--r-- | src/pcre2_match.c | 43 | ||||
-rw-r--r-- | testdata/testinput12 | 26 | ||||
-rw-r--r-- | testdata/testinput14 | 27 | ||||
-rw-r--r-- | testdata/testoutput12-16 | 37 | ||||
-rw-r--r-- | testdata/testoutput12-32 | 33 | ||||
-rw-r--r-- | testdata/testoutput14-16 | 38 | ||||
-rw-r--r-- | testdata/testoutput14-32 | 34 | ||||
-rw-r--r-- | testdata/testoutput14-8 | 38 |
10 files changed, 302 insertions, 6 deletions
@@ -183,6 +183,10 @@ error. PCRE2_CASELESS and PCRE2_UCP (but not PCRE2_UTF) were set. Fixed by not trying to look for other cases for characters above the Unicode range. +50. In caseless 32-bit mode with UCP (but not UTF) set, the character +0xffffffff incorrectly matched any character that has more than one other case, +in particular k and s. + Version 10.42 11-December-2022 ------------------------------ diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index 1c48ad67..caae6524 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -1241,6 +1241,13 @@ for (;;) break; case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) + { + OK = FALSE; + break; + } +#endif cp = PRIV(ucd_caseless_sets) + code[2]; for (;;) { @@ -1516,6 +1523,13 @@ for (;;) break; case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) + { + OK = FALSE; + break; + } +#endif cp = PRIV(ucd_caseless_sets) + code[3]; for (;;) { @@ -1774,6 +1788,13 @@ for (;;) break; case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) + { + OK = FALSE; + break; + } +#endif cp = PRIV(ucd_caseless_sets) + code[3]; for (;;) { @@ -2058,6 +2079,13 @@ for (;;) break; case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) + { + OK = FALSE; + break; + } +#endif cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2]; for (;;) { diff --git a/src/pcre2_match.c b/src/pcre2_match.c index d162e707..b2e1f23b 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -2565,6 +2565,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) + { + if (notmatch) break;; + RRETURN(MATCH_NOMATCH); + } +#endif cp = PRIV(ucd_caseless_sets) + Fecode[2]; for (;;) { @@ -2885,6 +2892,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) + { + if (notmatch) continue; + RRETURN(MATCH_NOMATCH); + } +#endif cp = PRIV(ucd_caseless_sets) + Lpropvalue; for (;;) { @@ -3698,6 +3712,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) + { + if (Lctype == OP_NOTPROP) continue; + RRETURN(MATCH_NOMATCH); + } +#endif cp = PRIV(ucd_caseless_sets) + Lpropvalue; for (;;) { @@ -4278,14 +4299,24 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; } GETCHARLENTEST(fc, Feptr, len); - cp = PRIV(ucd_caseless_sets) + Lpropvalue; - for (;;) +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) { - if (fc < *cp) - { if (notmatch) break; else goto GOT_MAX; } - if (fc == *cp++) - { if (notmatch) goto GOT_MAX; else break; } + if (!notmatch) goto GOT_MAX; } + else +#endif + { + cp = PRIV(ucd_caseless_sets) + Lpropvalue; + for (;;) + { + if (fc < *cp) + { if (notmatch) break; else goto GOT_MAX; } + if (fc == *cp++) + { if (notmatch) goto GOT_MAX; else break; } + } + } + Feptr += len; } GOT_MAX: diff --git a/testdata/testinput12 b/testdata/testinput12 index de3d4067..85550c3b 100644 --- a/testdata/testinput12 +++ b/testdata/testinput12 @@ -576,5 +576,31 @@ # This used to loop in 32-bit mode; it will fail in 16-bit mode. /[\x{ffffffff}]/caseless,ucp \x{ffffffff}xyz + +# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They +# will give errors in 16-bit mode. + +/k*\x{ffffffff}/caseless,ucp + \x{ffffffff} + +/k+\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff} + +/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess +\= Expect no match + Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z + +# --------------------------------------------------------- # End of testinput12 diff --git a/testdata/testinput14 b/testdata/testinput14 index 8a17ae73..8880b5ca 100644 --- a/testdata/testinput14 +++ b/testdata/testinput14 @@ -78,4 +78,31 @@ # ---------------------------------------------------- +# ---------------------------------------------------- +# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit +# mode; for the other widths they will fail. + +/k*\x{ffffffff}/caseless,ucp + \x{ffffffff} + +/k+\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff} + +/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess +\= Expect no match + Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z + +# ---------------------------------------------------- + # End of testinput14 diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 index 9fa93fa1..616d6930 100644 --- a/testdata/testoutput12-16 +++ b/testdata/testoutput12-16 @@ -1827,5 +1827,42 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to /[\x{ffffffff}]/caseless,ucp Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large \x{ffffffff}xyz + +# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They +# will give errors in 16-bit mode. + +/k*\x{ffffffff}/caseless,ucp +Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large + \x{ffffffff} + +/k+\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff} + +/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess +\= Expect no match + Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z +** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +No match + +# --------------------------------------------------------- # End of testinput12 diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 index 721d8bce..3c9586e4 100644 --- a/testdata/testoutput12-32 +++ b/testdata/testoutput12-32 @@ -1821,5 +1821,38 @@ No match /[\x{ffffffff}]/caseless,ucp \x{ffffffff}xyz 0: \x{ffffffff} + +# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They +# will give errors in 16-bit mode. + +/k*\x{ffffffff}/caseless,ucp + \x{ffffffff} + 0: \x{ffffffff} + +/k+\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} + 0: K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff} +No match + +/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} +No match + +/k\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} + 0: K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} +No match + +/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess +\= Expect no match + Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z +No match + +# --------------------------------------------------------- # End of testinput12 diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16 index 61541f61..dd1a977a 100644 --- a/testdata/testoutput14-16 +++ b/testdata/testoutput14-16 @@ -122,4 +122,42 @@ No match # ---------------------------------------------------- +# ---------------------------------------------------- +# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit +# mode; for the other widths they will fail. + +/k*\x{ffffffff}/caseless,ucp +Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large + \x{ffffffff} + +/k+\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff} + +/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess +\= Expect no match + Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z +** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled. +** Truncation will probably give the wrong result. +No match + +# ---------------------------------------------------- + # End of testinput14 diff --git a/testdata/testoutput14-32 b/testdata/testoutput14-32 index f1f65b74..dc21569c 100644 --- a/testdata/testoutput14-32 +++ b/testdata/testoutput14-32 @@ -122,4 +122,38 @@ No match # ---------------------------------------------------- +# ---------------------------------------------------- +# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit +# mode; for the other widths they will fail. + +/k*\x{ffffffff}/caseless,ucp + \x{ffffffff} + 0: \x{ffffffff} + +/k+\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} + 0: K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff} +No match + +/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} +No match + +/k\x{ffffffff}/caseless,ucp,no_start_optimize + K\x{ffffffff} + 0: K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} +No match + +/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess +\= Expect no match + Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z +No match + +# ---------------------------------------------------- + # End of testinput14 diff --git a/testdata/testoutput14-8 b/testdata/testoutput14-8 index aa624141..69285db2 100644 --- a/testdata/testoutput14-8 +++ b/testdata/testoutput14-8 @@ -122,4 +122,42 @@ Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too # ---------------------------------------------------- +# ---------------------------------------------------- +# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit +# mode; for the other widths they will fail. + +/k*\x{ffffffff}/caseless,ucp +Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large + \x{ffffffff} + +/k+\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff} + +/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k\x{ffffffff}/caseless,ucp,no_start_optimize +Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large + K\x{ffffffff} +\= Expect no match + \x{ffffffff}\x{ffffffff}\x{ffffffff} + +/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess +\= Expect no match + Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z +** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled. +** Truncation will probably give the wrong result. +** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled. +** Truncation will probably give the wrong result. +No match + +# ---------------------------------------------------- + # End of testinput14 |