diff options
author | Jean-Baptiste Queru <jbq@google.com> | 2009-07-17 17:41:09 -0700 |
---|---|---|
committer | Jean-Baptiste Queru <jbq@google.com> | 2009-07-17 17:41:09 -0700 |
commit | a09443ea25f9d6e6cfae4b8e438c7a6ced388a06 (patch) | |
tree | 09477b4e0d3b30fe64209b11524b50ae312991a7 /i18n | |
parent | de821ad1a223bf1b670cfa0205e431bef3f860d3 (diff) | |
download | icu4c-a09443ea25f9d6e6cfae4b8e438c7a6ced388a06.tar.gz |
import cl @41443
Diffstat (limited to 'i18n')
-rw-r--r-- | i18n/rematch.cpp | 6 | ||||
-rw-r--r-- | i18n/unicode/regex.h | 6 | ||||
-rw-r--r-- | i18n/unicode/uregex.h | 60 | ||||
-rw-r--r-- | i18n/uregex.cpp | 28 |
4 files changed, 83 insertions, 17 deletions
diff --git a/i18n/rematch.cpp b/i18n/rematch.cpp index 47bb1cc8..2fe46a96 100644 --- a/i18n/rematch.cpp +++ b/i18n/rematch.cpp @@ -612,8 +612,8 @@ UBool RegexMatcher::lookingAt(UErrorCode &status) { status = fDeferredStatus; return FALSE; } - reset(); - MatchAt(0, FALSE, status); + resetPreserveRegion(); + MatchAt(fRegionStart, FALSE, status); return fMatch; } @@ -627,7 +627,7 @@ UBool RegexMatcher::lookingAt(int32_t start, UErrorCode &status) { return FALSE; } reset(); - if (start < 0 || start > fRegionLimit) { + if (start < fRegionStart || start > fRegionLimit) { status = U_INDEX_OUTOFBOUNDS_ERROR; return FALSE; } diff --git a/i18n/unicode/regex.h b/i18n/unicode/regex.h index 798ecfac..e2af3085 100644 --- a/i18n/unicode/regex.h +++ b/i18n/unicode/regex.h @@ -703,7 +703,7 @@ public: * and to cause subsequent find() operations to begin at * the specified position in the input string. * <p> - * The matcher's region is reset to its default, wich is the entire + * The matcher's region is reset to its default, which is the entire * input string. * <p> * An alternative to this function is to set a match region @@ -1034,8 +1034,10 @@ private: RegexMatcher &operator =(const RegexMatcher &rhs); friend class RegexPattern; friend class RegexCImpl; - +public: + /** @internal */ void resetPreserveRegion(); // Reset matcher state, but preserve any region. +private: // // MatchAt This is the internal interface to the match engine itself. diff --git a/i18n/unicode/uregex.h b/i18n/unicode/uregex.h index 0bb565d8..660076ac 100644 --- a/i18n/unicode/uregex.h +++ b/i18n/unicode/uregex.h @@ -59,12 +59,32 @@ typedef enum URegexpFlag{ /** If set, '.' matches line terminators, otherwise '.' matching stops at line end. * @stable ICU 2.4 */ UREGEX_DOTALL = 32, + + /** If set, treat the entire pattern as a literal string. + * Metacharacters or escape sequences in the input sequence will be given + * no special meaning. + * + * The flags CASE_INSENSITIVE and UNICODE_CASE retain their impact + * on matching when used in conjunction with this flag. + * The other flags become superfluous. + * TODO: say which escapes are still handled; anything Java does + * early (\u) we should still do. + * @draft ICU 4.0 + */ + UREGEG_LITERAL = 16, /** Control behavior of "$" and "^" * If set, recognize line terminators within string, * otherwise, match only at start and end of input string. * @stable ICU 2.4 */ UREGEX_MULTILINE = 8, + + /** Unix-only line endings. + * When this mode is enabled, only \u000a is recognized as a line ending + * in the behavior of ., ^, and $. + * @draft ICU 4.0 + */ + URGEGX_UNIX_LINES = 1, /** Unicode word boundaries. * If set, \b uses the Unicode TR 29 definition of word boundaries. @@ -261,11 +281,21 @@ uregex_getText(URegularExpression *regexp, UErrorCode *status); /** - * Attempts to match the input string, beginning at startIndex, against the pattern. - * To succeed, the match must extend to the end of the input string. + * Attempts to match the input string against the pattern. + * To succeed, the match must extend to the end of the string, + * or cover the complete match region. + * + * If startIndex >= zero the match operation starts at the specified + * index and must extend to the end of the input string. Any region + * that has been specified is reset. + * + * If startIndex == -1 the match must cover the input region, or the entire + * input string if no region has been set. This directly corresponds to + * Matcher.matches() in Java * * @param regexp The compiled regular expression. - * @param startIndex The input string index at which to begin matching. + * @param startIndex The input string index at which to begin matching, or -1 + * to match the input Region. * @param status Receives errors detected by this function. * @return TRUE if there is a match * @stable ICU 3.0 @@ -280,12 +310,20 @@ uregex_matches(URegularExpression *regexp, * The match may be of any length, and is not required to extend to the end * of the input string. Contrast with uregex_matches(). * + * <p>If startIndex is >= 0 any input region that was set for this + * URegularExpression is reset before the operation begins. + * + * <p>If the specified starting index == -1 the match begins at the start of the input + * region, or at the start of the full string if no region has been specified. + * This corresponds directly with Matcher.lookingAt() in Java. + * * <p>If the match succeeds then more information can be obtained via the * <code>uregexp_start()</code>, <code>uregexp_end()</code>, * and <code>uregexp_group()</code> functions.</p> * * @param regexp The compiled regular expression. - * @param startIndex The input string index at which to begin matching. + * @param startIndex The input string index at which to begin matching, or + * -1 to match the Input Region * @param status A reference to a UErrorCode to receive any errors. * @return TRUE if there is a match. * @stable ICU 3.0 @@ -297,12 +335,22 @@ uregex_lookingAt(URegularExpression *regexp, /** * Find the first matching substring of the input string that matches the pattern. - * The search for a match begins at the specified index. + * If startIndex is >= zero the search for a match begins at the specified index, + * and any match region is reset. This corresponds directly with + * Matcher.find(startIndex) in Java. + * + * If startIndex == -1 the search begins at the start of the input region, + * or at the start of the full string if no region has been specified. + * TODO: Or, should this be the same as findNext()? + * Consistency between the C functions (as it is), or + * consistency with -1 meaning match Java? + * * If a match is found, <code>uregex_start(), uregex_end()</code>, and * <code>uregex_group()</code> will provide more information regarding the match. * * @param regexp The compiled regular expression. - * @param startIndex The position in the input string to begin the search + * @param startIndex The position in the input string to begin the search, or + * -1 to search within the Input Region. * @param status A reference to a UErrorCode to receive any errors. * @return TRUE if a match is found. * @stable ICU 3.0 diff --git a/i18n/uregex.cpp b/i18n/uregex.cpp index f869d2e9..705d0385 100644 --- a/i18n/uregex.cpp +++ b/i18n/uregex.cpp @@ -304,10 +304,15 @@ U_CAPI UBool U_EXPORT2 uregex_matches(URegularExpression *regexp, int32_t startIndex, UErrorCode *status) { + UBool result = FALSE; if (validateRE(regexp, status) == FALSE) { - return FALSE; + return result; + } + if (startIndex == -1) { + result = regexp->fMatcher->matches(*status); + } else { + result = regexp->fMatcher->matches(startIndex, *status); } - UBool result = regexp->fMatcher->matches(startIndex, *status); return result; } @@ -322,10 +327,15 @@ U_CAPI UBool U_EXPORT2 uregex_lookingAt(URegularExpression *regexp, int32_t startIndex, UErrorCode *status) { + UBool result = FALSE; if (validateRE(regexp, status) == FALSE) { - return FALSE; + return result; + } + if (startIndex == -1) { + result = regexp->fMatcher->lookingAt(*status); + } else { + result = regexp->fMatcher->lookingAt(startIndex, *status); } - UBool result = regexp->fMatcher->lookingAt(startIndex, *status); return result; } @@ -340,10 +350,16 @@ U_CAPI UBool U_EXPORT2 uregex_find(URegularExpression *regexp, int32_t startIndex, UErrorCode *status) { + UBool result = FALSE; if (validateRE(regexp, status) == FALSE) { - return FALSE; + return result; + } + if (startIndex == -1) { + regexp->fMatcher->resetPreserveRegion(); + result = regexp->fMatcher->find(); + } else { + result = regexp->fMatcher->find(startIndex, *status); } - UBool result = regexp->fMatcher->find(startIndex, *status); return result; } |