diff options
author | jshin@chromium.org <jshin@chromium.org> | 2014-11-14 19:41:00 +0000 |
---|---|---|
committer | jshin@chromium.org <jshin@chromium.org> | 2014-11-14 19:41:00 +0000 |
commit | 3af4ce5982311035e5f36803d547c0befa576c8c (patch) | |
tree | c8805f0eb6dd61eb86dcf9b1013b3d86a2939f19 | |
parent | 54a65bb30412a0c82b52af355590a966ee9e2d82 (diff) | |
download | icu-3af4ce5982311035e5f36803d547c0befa576c8c.tar.gz |
Cherry-pick two upstream patches to the ICU regex engine
The patches for the following upstream two bugs are cherry-picked:
http://bugs.icu-project.org/trac/ticket/11369
http://bugs.icu-project.org/trac/ticket/11370
BUG=422824,430353
TEST=See the bugs.
TBR=mbarbella
Review URL: https://codereview.chromium.org/732743002
git-svn-id: http://src.chromium.org/svn/trunk/deps/third_party/icu52@292943 4ff67af0-8c30-449e-8e8b-ad334ec8d88c
-rw-r--r-- | README.chromium | 6 | ||||
-rw-r--r-- | source/i18n/regexcmp.cpp | 16 | ||||
-rw-r--r-- | source/i18n/regexcmp.h | 4 | ||||
-rw-r--r-- | source/test/testdata/regextst.txt | 18 |
4 files changed, 43 insertions, 1 deletions
diff --git a/README.chromium b/README.chromium index 66dd3d8..d271b5c 100644 --- a/README.chromium +++ b/README.chromium @@ -238,3 +238,9 @@ This directory contains the source code of ICU 52.1 for C/C++ 11. Cherry-pick an upstream patch to fix a bug in bidi. - patches/bidi.patch - upstream bug : http://bugs.icu-project.org/trac/ticket/11054 + +12. Apply the following patch for regex + - patches/regex.patch + - upstream bugs : http://bugs.icu-project.org/trac/ticket/11369 + http://bugs.icu-project.org/trac/ticket/11370 + diff --git a/source/i18n/regexcmp.cpp b/source/i18n/regexcmp.cpp index 0ec6154..8f1504f 100644 --- a/source/i18n/regexcmp.cpp +++ b/source/i18n/regexcmp.cpp @@ -2133,6 +2133,10 @@ void RegexCompile::handleCloseParen() { int32_t patEnd = fRXPat->fCompiledPat->size() - 1; int32_t minML = minMatchLength(fMatchOpenParen, patEnd); int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd); + if (URX_TYPE(maxML) != 0) { + error(U_REGEX_LOOK_BEHIND_LIMIT); + break; + } if (maxML == INT32_MAX) { error(U_REGEX_LOOK_BEHIND_LIMIT); break; @@ -2166,6 +2170,10 @@ void RegexCompile::handleCloseParen() { int32_t patEnd = fRXPat->fCompiledPat->size() - 1; int32_t minML = minMatchLength(fMatchOpenParen, patEnd); int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd); + if (URX_TYPE(maxML) != 0) { + error(U_REGEX_LOOK_BEHIND_LIMIT); + break; + } if (maxML == INT32_MAX) { error(U_REGEX_LOOK_BEHIND_LIMIT); break; @@ -2329,7 +2337,15 @@ UBool RegexCompile::compileInlineInterval() { int32_t topOfBlock = blockTopLoc(FALSE); if (fIntervalUpper == 0) { // Pathological case. Attempt no matches, as if the block doesn't exist. + // Discard the generated code for the block. + // If the block included parens, discard the info pertaining to them as well. fRXPat->fCompiledPat->setSize(topOfBlock); + if (fMatchOpenParen >= topOfBlock) { + fMatchOpenParen = -1; + } + if (fMatchCloseParen >= topOfBlock) { + fMatchCloseParen = -1; + } return TRUE; } diff --git a/source/i18n/regexcmp.h b/source/i18n/regexcmp.h index 0041beb..5d526be 100644 --- a/source/i18n/regexcmp.h +++ b/source/i18n/regexcmp.h @@ -182,7 +182,9 @@ private: int32_t fMatchOpenParen; // The position in the compiled pattern // of the slot reserved for a state save // at the start of the most recently processed - // parenthesized block. + // parenthesized block. Updated when processing + // a close to the location for the corresponding open. + int32_t fMatchCloseParen; // The position in the pattern of the first // location after the most recently processed // parenthesized block. diff --git a/source/test/testdata/regextst.txt b/source/test/testdata/regextst.txt index 5716ab5..f0b00ab 100644 --- a/source/test/testdata/regextst.txt +++ b/source/test/testdata/regextst.txt @@ -1173,6 +1173,24 @@ "(?<=(?:){11})bc" "<0>bc</0>" # Empty (?:) expression. +# Bug 11369 +# Incorrect optimization of patterns with a zero length quantifier {0} + +"(.|b)(|b){0}\$(?#xxx){3}(?>\D*)" "AAAAABBBBBCCCCCDDDDEEEEE" +"(|b)ab(c)" "<0><1></1>ab<2>c</2></0>" +"(|b){0}a{3}(D*)" "<0>aaa<2></2></0>" +"(|b){0,1}a{3}(D*)" "<0><1></1>aaa<2></2></0>" +"((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>" + +# Bug 11370 +# Max match length computation of look-behind expression gives result that is too big to fit in the +# in the 24 bit operand portion of the compiled code. Expressions should fail to compile +# (Look-behind match length must be bounded. This case is treated as unbounded, an error.) + +"(?<!(0123456789a){10000000})x" E "no match" +"(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match" + + # Random debugging, Temporary # #"^(?:a?b?)*$" "a--" |