aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTorne (Richard Coles) <torne@google.com>2014-11-27 15:41:26 +0000
committerTorne (Richard Coles) <torne@google.com>2014-11-27 15:41:26 +0000
commite318d1364cbed337f3c792dc0727677bf0886c77 (patch)
tree02011651eea5971e03c08055bf301d5307eb18af
parentcf50e1e1bced13a28c8f78c0c2eed5f6ce5713f9 (diff)
parent6242e2fbb36f486f2c0addd1c3cef67fc4ed33fb (diff)
downloadicu-e318d1364cbed337f3c792dc0727677bf0886c77.tar.gz
Merge third_party/icu from https://chromium.googlesource.com/chromium/deps/icu52.git at 6242e2fbb36f486f2c0addd1c3cef67fc4ed33fb
This commit was generated by merge_from_chromium.py. Change-Id: I714bb374c019b5d2ba7b7cfbbba844387f285078
-rw-r--r--README.chromium6
-rw-r--r--icu.gyp38
-rw-r--r--patches/regex.patch86
-rw-r--r--source/i18n/regexcmp.cpp16
-rw-r--r--source/i18n/regexcmp.h4
-rw-r--r--source/test/testdata/regextst.txt18
6 files changed, 130 insertions, 38 deletions
diff --git a/README.chromium b/README.chromium
index 66dd3d8..d271b5c 100644
--- a/README.chromium
+++ b/README.chromium
@@ -238,3 +238,9 @@ This directory contains the source code of ICU 52.1 for C/C++
11. Cherry-pick an upstream patch to fix a bug in bidi.
- patches/bidi.patch
- upstream bug : http://bugs.icu-project.org/trac/ticket/11054
+
+12. Apply the following patch for regex
+ - patches/regex.patch
+ - upstream bugs : http://bugs.icu-project.org/trac/ticket/11369
+ http://bugs.icu-project.org/trac/ticket/11370
+
diff --git a/icu.gyp b/icu.gyp
index 21aa82d..94ab910 100644
--- a/icu.gyp
+++ b/icu.gyp
@@ -247,17 +247,6 @@
},
},
}],
- ['OS == "android" and use_system_stlport == 1', {
- 'target_conditions': [
- ['_toolset == "target"', {
- # ICU requires RTTI, which is not present in the system's
- # stlport, so we have to include gabi++.
- 'include_dirs': [
- '<(android_src)/abi/cpp/include',
- ],
- }],
- ],
- }],
], # conditions
},
{
@@ -354,17 +343,6 @@
},
},
}],
- ['OS == "android" and use_system_stlport == 1', {
- 'target_conditions': [
- ['_toolset == "target"', {
- # ICU requires RTTI, which is not present in the system's
- # stlport, so we have to include gabi++.
- 'include_dirs': [
- '<(android_src)/abi/cpp/include',
- ],
- }],
- ],
- }],
], # conditions
},
], # targets
@@ -375,20 +353,6 @@
'target_name': 'system_icu',
'type': 'none',
'conditions': [
- ['OS=="android"', {
- 'direct_dependent_settings': {
- 'include_dirs': [
- '<(android_src)/external/icu/icu4c/source/common',
- '<(android_src)/external/icu/icu4c/source/i18n',
- ],
- },
- 'link_settings': {
- 'libraries': [
- '-licui18n',
- '-licuuc',
- ],
- },
- }],
['OS=="qnx"', {
'link_settings': {
'libraries': [
@@ -397,7 +361,7 @@
],
},
}],
- ['OS!="android" and OS!="qnx"', {
+ ['OS!="qnx"', {
'link_settings': {
'ldflags': [
'<!@(icu-config --ldflags)',
diff --git a/patches/regex.patch b/patches/regex.patch
new file mode 100644
index 0000000..4f74ee1
--- /dev/null
+++ b/patches/regex.patch
@@ -0,0 +1,86 @@
+Index: source/i18n/regexcmp.h
+===================================================================
+--- source/i18n/regexcmp.h (revision 292476)
++++ source/i18n/regexcmp.h (working copy)
+@@ -182,7 +182,9 @@
+ int32_t fMatchOpenParen; // The position in the compiled pattern
+ // of the slot reserved for a state save
+ // at the start of the most recently processed
+- // parenthesized block.
++ // parenthesized block. Updated when processing
++ // a close to the location for the corresponding open.
++
+ int32_t fMatchCloseParen; // The position in the pattern of the first
+ // location after the most recently processed
+ // parenthesized block.
+Index: source/i18n/regexcmp.cpp
+===================================================================
+--- source/i18n/regexcmp.cpp (revision 292476)
++++ source/i18n/regexcmp.cpp (working copy)
+@@ -2133,6 +2133,10 @@
+ int32_t patEnd = fRXPat->fCompiledPat->size() - 1;
+ int32_t minML = minMatchLength(fMatchOpenParen, patEnd);
+ int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd);
++ if (URX_TYPE(maxML) != 0) {
++ error(U_REGEX_LOOK_BEHIND_LIMIT);
++ break;
++ }
+ if (maxML == INT32_MAX) {
+ error(U_REGEX_LOOK_BEHIND_LIMIT);
+ break;
+@@ -2166,6 +2170,10 @@
+ int32_t patEnd = fRXPat->fCompiledPat->size() - 1;
+ int32_t minML = minMatchLength(fMatchOpenParen, patEnd);
+ int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd);
++ if (URX_TYPE(maxML) != 0) {
++ error(U_REGEX_LOOK_BEHIND_LIMIT);
++ break;
++ }
+ if (maxML == INT32_MAX) {
+ error(U_REGEX_LOOK_BEHIND_LIMIT);
+ break;
+@@ -2329,7 +2337,15 @@
+ int32_t topOfBlock = blockTopLoc(FALSE);
+ if (fIntervalUpper == 0) {
+ // Pathological case. Attempt no matches, as if the block doesn't exist.
++ // Discard the generated code for the block.
++ // If the block included parens, discard the info pertaining to them as well.
+ fRXPat->fCompiledPat->setSize(topOfBlock);
++ if (fMatchOpenParen >= topOfBlock) {
++ fMatchOpenParen = -1;
++ }
++ if (fMatchCloseParen >= topOfBlock) {
++ fMatchCloseParen = -1;
++ }
+ return TRUE;
+ }
+
+Index: source/test/testdata/regextst.txt
+===================================================================
+--- source/test/testdata/regextst.txt (revision 292476)
++++ source/test/testdata/regextst.txt (working copy)
+@@ -1173,6 +1173,24 @@
+ "(?<=(?:){11})bc" "<0>bc</0>" # Empty (?:) expression.
+
+
++# Bug 11369
++# Incorrect optimization of patterns with a zero length quantifier {0}
++
++"(.|b)(|b){0}\$(?#xxx){3}(?>\D*)" "AAAAABBBBBCCCCCDDDDEEEEE"
++"(|b)ab(c)" "<0><1></1>ab<2>c</2></0>"
++"(|b){0}a{3}(D*)" "<0>aaa<2></2></0>"
++"(|b){0,1}a{3}(D*)" "<0><1></1>aaa<2></2></0>"
++"((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>"
++
++# Bug 11370
++# Max match length computation of look-behind expression gives result that is too big to fit in the
++# in the 24 bit operand portion of the compiled code. Expressions should fail to compile
++# (Look-behind match length must be bounded. This case is treated as unbounded, an error.)
++
++"(?<!(0123456789a){10000000})x" E "no match"
++"(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match"
++
++
+ # Random debugging, Temporary
+ #
+ #"^(?:a?b?)*$" "a--"
diff --git a/source/i18n/regexcmp.cpp b/source/i18n/regexcmp.cpp
index 0ec6154..8f1504f 100644
--- a/source/i18n/regexcmp.cpp
+++ b/source/i18n/regexcmp.cpp
@@ -2133,6 +2133,10 @@ void RegexCompile::handleCloseParen() {
int32_t patEnd = fRXPat->fCompiledPat->size() - 1;
int32_t minML = minMatchLength(fMatchOpenParen, patEnd);
int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd);
+ if (URX_TYPE(maxML) != 0) {
+ error(U_REGEX_LOOK_BEHIND_LIMIT);
+ break;
+ }
if (maxML == INT32_MAX) {
error(U_REGEX_LOOK_BEHIND_LIMIT);
break;
@@ -2166,6 +2170,10 @@ void RegexCompile::handleCloseParen() {
int32_t patEnd = fRXPat->fCompiledPat->size() - 1;
int32_t minML = minMatchLength(fMatchOpenParen, patEnd);
int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd);
+ if (URX_TYPE(maxML) != 0) {
+ error(U_REGEX_LOOK_BEHIND_LIMIT);
+ break;
+ }
if (maxML == INT32_MAX) {
error(U_REGEX_LOOK_BEHIND_LIMIT);
break;
@@ -2329,7 +2337,15 @@ UBool RegexCompile::compileInlineInterval() {
int32_t topOfBlock = blockTopLoc(FALSE);
if (fIntervalUpper == 0) {
// Pathological case. Attempt no matches, as if the block doesn't exist.
+ // Discard the generated code for the block.
+ // If the block included parens, discard the info pertaining to them as well.
fRXPat->fCompiledPat->setSize(topOfBlock);
+ if (fMatchOpenParen >= topOfBlock) {
+ fMatchOpenParen = -1;
+ }
+ if (fMatchCloseParen >= topOfBlock) {
+ fMatchCloseParen = -1;
+ }
return TRUE;
}
diff --git a/source/i18n/regexcmp.h b/source/i18n/regexcmp.h
index 0041beb..5d526be 100644
--- a/source/i18n/regexcmp.h
+++ b/source/i18n/regexcmp.h
@@ -182,7 +182,9 @@ private:
int32_t fMatchOpenParen; // The position in the compiled pattern
// of the slot reserved for a state save
// at the start of the most recently processed
- // parenthesized block.
+ // parenthesized block. Updated when processing
+ // a close to the location for the corresponding open.
+
int32_t fMatchCloseParen; // The position in the pattern of the first
// location after the most recently processed
// parenthesized block.
diff --git a/source/test/testdata/regextst.txt b/source/test/testdata/regextst.txt
index 5716ab5..f0b00ab 100644
--- a/source/test/testdata/regextst.txt
+++ b/source/test/testdata/regextst.txt
@@ -1173,6 +1173,24 @@
"(?<=(?:){11})bc" "<0>bc</0>" # Empty (?:) expression.
+# Bug 11369
+# Incorrect optimization of patterns with a zero length quantifier {0}
+
+"(.|b)(|b){0}\$(?#xxx){3}(?>\D*)" "AAAAABBBBBCCCCCDDDDEEEEE"
+"(|b)ab(c)" "<0><1></1>ab<2>c</2></0>"
+"(|b){0}a{3}(D*)" "<0>aaa<2></2></0>"
+"(|b){0,1}a{3}(D*)" "<0><1></1>aaa<2></2></0>"
+"((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>"
+
+# Bug 11370
+# Max match length computation of look-behind expression gives result that is too big to fit in the
+# in the 24 bit operand portion of the compiled code. Expressions should fail to compile
+# (Look-behind match length must be bounded. This case is treated as unbounded, an error.)
+
+"(?<!(0123456789a){10000000})x" E "no match"
+"(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match"
+
+
# Random debugging, Temporary
#
#"^(?:a?b?)*$" "a--"