summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictor Chang <vichang@google.com>2018-03-13 17:07:52 +0000
committerVictor Chang <vichang@google.com>2018-03-15 13:27:49 +0000
commit717b543c71a3b673925479489f491b9726946d42 (patch)
tree41b99a53097826910ad0105be32a8915f47e23b3
parent86f3927eb3a0686ef285fa5990b66afcb5657615 (diff)
downloadicu-717b543c71a3b673925479489f491b9726946d42.tar.gz
Cherry-pick: ticket:13631 Regex Address Sanitizer fix
http://bugs.icu-project.org/trac/changeset/41086 Bug: 73612062 Test: RegexTest passes with "make check" Change-Id: I4a3e09d9a4ea946aaf8bc093bc31997e12b83597
-rw-r--r--icu4c/source/i18n/rematch.cpp10
-rw-r--r--icu4c/source/test/intltest/regextst.cpp26
-rw-r--r--icu4c/source/test/intltest/regextst.h1
3 files changed, 32 insertions, 5 deletions
diff --git a/icu4c/source/i18n/rematch.cpp b/icu4c/source/i18n/rematch.cpp
index d01117f05..f25218220 100644
--- a/icu4c/source/i18n/rematch.cpp
+++ b/icu4c/source/i18n/rematch.cpp
@@ -438,7 +438,7 @@ RegexMatcher &RegexMatcher::appendReplacement(UText *dest,
status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
}
}
-
+
} else if (u_isdigit(nextChar)) {
// $n Scan for a capture group number
int32_t numCaptureGroups = fPattern->fGroupMap->size();
@@ -459,7 +459,7 @@ RegexMatcher &RegexMatcher::appendReplacement(UText *dest,
break;
}
(void)UTEXT_NEXT32(replacement);
- groupNum=groupNum*10 + nextDigitVal;
+ groupNum=groupNum*10 + nextDigitVal;
++numDigits;
}
} else {
@@ -2187,7 +2187,7 @@ int32_t RegexMatcher::split(UText *input,
break;
}
i++;
- dest[i] = utext_extract_replace(fInputText, dest[i],
+ dest[i] = utext_extract_replace(fInputText, dest[i],
start64(groupNum, status), end64(groupNum, status), &status);
}
@@ -5469,7 +5469,7 @@ GC_Done:
if (lbStartIdx < 0) {
// First time through loop.
lbStartIdx = fp->fInputIdx - minML;
- if (lbStartIdx > 0) {
+ if (lbStartIdx > 0 && lbStartIdx < fInputLength) {
U16_SET_CP_START(inputBuf, 0, lbStartIdx);
}
} else {
@@ -5546,7 +5546,7 @@ GC_Done:
if (lbStartIdx < 0) {
// First time through loop.
lbStartIdx = fp->fInputIdx - minML;
- if (lbStartIdx > 0) {
+ if (lbStartIdx > 0 && lbStartIdx < fInputLength) {
U16_SET_CP_START(inputBuf, 0, lbStartIdx);
}
} else {
diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp
index 398bc68e2..b1d75537c 100644
--- a/icu4c/source/test/intltest/regextst.cpp
+++ b/icu4c/source/test/intltest/regextst.cpp
@@ -39,6 +39,7 @@
#include "unicode/ustring.h"
#include "unicode/utext.h"
#include "unicode/utf16.h"
+#include "cstr.h"
#include "regextst.h"
#include "regexcmp.h"
#include "uvector.h"
@@ -101,6 +102,7 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch
TESTCASE_AUTO(NamedCapture);
TESTCASE_AUTO(NamedCaptureLimits);
TESTCASE_AUTO(TestBug12884);
+ TESTCASE_AUTO(TestBug13631);
TESTCASE_AUTO_END;
}
@@ -5806,4 +5808,28 @@ void RegexTest::TestBug12884() {
REGEX_ASSERT(status == U_REGEX_TIME_OUT);
}
+// Bug 13631. A find() of a pattern with a zero length look-behind assertions
+// can cause a read past the end of the input text.
+// The failure is seen when running this test with Clang's Addresss Sanitizer.
+
+void RegexTest::TestBug13631() {
+ const UChar *pats[] = { u"(?<!^)",
+ u"(?<=^)",
+ nullptr
+ };
+ for (const UChar **pat=pats; *pat; ++pat) {
+ UErrorCode status = U_ZERO_ERROR;
+ UnicodeString upat(*pat);
+ RegexMatcher matcher(upat, 0, status);
+ const UChar s =u'a';
+ UText *ut = utext_openUChars(nullptr, &s, 1, &status);
+ REGEX_CHECK_STATUS;
+ matcher.reset(ut);
+ while (matcher.find()) {
+ }
+ utext_close(ut);
+ }
+}
+
+
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
diff --git a/icu4c/source/test/intltest/regextst.h b/icu4c/source/test/intltest/regextst.h
index 3f90de055..7e98cd622 100644
--- a/icu4c/source/test/intltest/regextst.h
+++ b/icu4c/source/test/intltest/regextst.h
@@ -57,6 +57,7 @@ public:
virtual void TestBug11371();
virtual void TestBug11480();
virtual void TestBug12884();
+ virtual void TestBug13631();
// The following functions are internal to the regexp tests.
virtual void assertUText(const char *expected, UText *actual, const char *file, int line);