diff options
author | Victor Chang <vichang@google.com> | 2018-03-13 17:07:52 +0000 |
---|---|---|
committer | Victor Chang <vichang@google.com> | 2018-03-15 13:27:49 +0000 |
commit | 717b543c71a3b673925479489f491b9726946d42 (patch) | |
tree | 41b99a53097826910ad0105be32a8915f47e23b3 | |
parent | 86f3927eb3a0686ef285fa5990b66afcb5657615 (diff) | |
download | icu-717b543c71a3b673925479489f491b9726946d42.tar.gz |
Cherry-pick: ticket:13631 Regex Address Sanitizer fix
http://bugs.icu-project.org/trac/changeset/41086
Bug: 73612062
Test: RegexTest passes with "make check"
Change-Id: I4a3e09d9a4ea946aaf8bc093bc31997e12b83597
-rw-r--r-- | icu4c/source/i18n/rematch.cpp | 10 | ||||
-rw-r--r-- | icu4c/source/test/intltest/regextst.cpp | 26 | ||||
-rw-r--r-- | icu4c/source/test/intltest/regextst.h | 1 |
3 files changed, 32 insertions, 5 deletions
diff --git a/icu4c/source/i18n/rematch.cpp b/icu4c/source/i18n/rematch.cpp index d01117f05..f25218220 100644 --- a/icu4c/source/i18n/rematch.cpp +++ b/icu4c/source/i18n/rematch.cpp @@ -438,7 +438,7 @@ RegexMatcher &RegexMatcher::appendReplacement(UText *dest, status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; } } - + } else if (u_isdigit(nextChar)) { // $n Scan for a capture group number int32_t numCaptureGroups = fPattern->fGroupMap->size(); @@ -459,7 +459,7 @@ RegexMatcher &RegexMatcher::appendReplacement(UText *dest, break; } (void)UTEXT_NEXT32(replacement); - groupNum=groupNum*10 + nextDigitVal; + groupNum=groupNum*10 + nextDigitVal; ++numDigits; } } else { @@ -2187,7 +2187,7 @@ int32_t RegexMatcher::split(UText *input, break; } i++; - dest[i] = utext_extract_replace(fInputText, dest[i], + dest[i] = utext_extract_replace(fInputText, dest[i], start64(groupNum, status), end64(groupNum, status), &status); } @@ -5469,7 +5469,7 @@ GC_Done: if (lbStartIdx < 0) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; - if (lbStartIdx > 0) { + if (lbStartIdx > 0 && lbStartIdx < fInputLength) { U16_SET_CP_START(inputBuf, 0, lbStartIdx); } } else { @@ -5546,7 +5546,7 @@ GC_Done: if (lbStartIdx < 0) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; - if (lbStartIdx > 0) { + if (lbStartIdx > 0 && lbStartIdx < fInputLength) { U16_SET_CP_START(inputBuf, 0, lbStartIdx); } } else { diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp index 398bc68e2..b1d75537c 100644 --- a/icu4c/source/test/intltest/regextst.cpp +++ b/icu4c/source/test/intltest/regextst.cpp @@ -39,6 +39,7 @@ #include "unicode/ustring.h" #include "unicode/utext.h" #include "unicode/utf16.h" +#include "cstr.h" #include "regextst.h" #include "regexcmp.h" #include "uvector.h" @@ -101,6 +102,7 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch TESTCASE_AUTO(NamedCapture); TESTCASE_AUTO(NamedCaptureLimits); TESTCASE_AUTO(TestBug12884); + TESTCASE_AUTO(TestBug13631); TESTCASE_AUTO_END; } @@ -5806,4 +5808,28 @@ void RegexTest::TestBug12884() { REGEX_ASSERT(status == U_REGEX_TIME_OUT); } +// Bug 13631. A find() of a pattern with a zero length look-behind assertions +// can cause a read past the end of the input text. +// The failure is seen when running this test with Clang's Addresss Sanitizer. + +void RegexTest::TestBug13631() { + const UChar *pats[] = { u"(?<!^)", + u"(?<=^)", + nullptr + }; + for (const UChar **pat=pats; *pat; ++pat) { + UErrorCode status = U_ZERO_ERROR; + UnicodeString upat(*pat); + RegexMatcher matcher(upat, 0, status); + const UChar s =u'a'; + UText *ut = utext_openUChars(nullptr, &s, 1, &status); + REGEX_CHECK_STATUS; + matcher.reset(ut); + while (matcher.find()) { + } + utext_close(ut); + } +} + + #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ diff --git a/icu4c/source/test/intltest/regextst.h b/icu4c/source/test/intltest/regextst.h index 3f90de055..7e98cd622 100644 --- a/icu4c/source/test/intltest/regextst.h +++ b/icu4c/source/test/intltest/regextst.h @@ -57,6 +57,7 @@ public: virtual void TestBug11371(); virtual void TestBug11480(); virtual void TestBug12884(); + virtual void TestBug13631(); // The following functions are internal to the regexp tests. virtual void assertUText(const char *expected, UText *actual, const char *file, int line); |