diff options
Diffstat (limited to 'src/main/java/org/apache/commons/lang3/CharSequenceUtils.java')
-rw-r--r-- | src/main/java/org/apache/commons/lang3/CharSequenceUtils.java | 389 |
1 files changed, 389 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java b/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java new file mode 100644 index 000000000..f63b4248c --- /dev/null +++ b/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java @@ -0,0 +1,389 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.lang3; + +/** + * Operations on {@link CharSequence} that are + * {@code null} safe. + * + * @see CharSequence + * @since 3.0 + */ +public class CharSequenceUtils { + + private static final int NOT_FOUND = -1; + + static final int TO_STRING_LIMIT = 16; + + private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) { + for (int i = 1, j = len2 - 1; i <= j; i++, j--) { + if (cs.charAt(start1 + i) != searchChar.charAt(i) || cs.charAt(start1 + j) != searchChar.charAt(j)) { + return false; + } + } + return true; + } + + /** + * Used by the indexOf(CharSequence methods) as a green implementation of indexOf. + * + * @param cs the {@link CharSequence} to be processed + * @param searchChar the {@link CharSequence} to be searched for + * @param start the start index + * @return the index where the search sequence was found + */ + static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) { + if (cs instanceof String) { + return ((String) cs).indexOf(searchChar.toString(), start); + } + if (cs instanceof StringBuilder) { + return ((StringBuilder) cs).indexOf(searchChar.toString(), start); + } + if (cs instanceof StringBuffer) { + return ((StringBuffer) cs).indexOf(searchChar.toString(), start); + } + return cs.toString().indexOf(searchChar.toString(), start); +// if (cs instanceof String && searchChar instanceof String) { +// // TODO: Do we assume searchChar is usually relatively small; +// // If so then calling toString() on it is better than reverting to +// // the green implementation in the else block +// return ((String) cs).indexOf((String) searchChar, start); +// } else { +// // TODO: Implement rather than convert to String +// return cs.toString().indexOf(searchChar.toString(), start); +// } + } + + /** + * Returns the index within {@code cs} of the first occurrence of the + * specified character, starting the search at the specified index. + * <p> + * If a character with value {@code searchChar} occurs in the + * character sequence represented by the {@code cs} + * object at an index no smaller than {@code start}, then + * the index of the first such occurrence is returned. For values + * of {@code searchChar} in the range from 0 to 0xFFFF (inclusive), + * this is the smallest value <i>k</i> such that: + * </p> + * <blockquote><pre> + * (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> >= start) + * </pre></blockquote> + * is true. For other values of {@code searchChar}, it is the + * smallest value <i>k</i> such that: + * <blockquote><pre> + * (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> >= start) + * </pre></blockquote> + * <p> + * is true. In either case, if no such character occurs inm {@code cs} + * at or after position {@code start}, then + * {@code -1} is returned. + * </p> + * <p> + * There is no restriction on the value of {@code start}. If it + * is negative, it has the same effect as if it were zero: the entire + * {@link CharSequence} may be searched. If it is greater than + * the length of {@code cs}, it has the same effect as if it were + * equal to the length of {@code cs}: {@code -1} is returned. + * </p> + * <p>All indices are specified in {@code char} values + * (Unicode code units). + * </p> + * + * @param cs the {@link CharSequence} to be processed, not null + * @param searchChar the char to be searched for + * @param start the start index, negative starts at the string start + * @return the index where the search char was found, -1 if not found + * @since 3.6 updated to behave more like {@link String} + */ + static int indexOf(final CharSequence cs, final int searchChar, int start) { + if (cs instanceof String) { + return ((String) cs).indexOf(searchChar, start); + } + final int sz = cs.length(); + if (start < 0) { + start = 0; + } + if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { + for (int i = start; i < sz; i++) { + if (cs.charAt(i) == searchChar) { + return i; + } + } + return NOT_FOUND; + } + //supplementary characters (LANG1300) + if (searchChar <= Character.MAX_CODE_POINT) { + final char[] chars = Character.toChars(searchChar); + for (int i = start; i < sz - 1; i++) { + final char high = cs.charAt(i); + final char low = cs.charAt(i + 1); + if (high == chars[0] && low == chars[1]) { + return i; + } + } + } + return NOT_FOUND; + } + + /** + * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf + * + * @param cs the {@link CharSequence} to be processed + * @param searchChar the {@link CharSequence} to find + * @param start the start index + * @return the index where the search sequence was found + */ + static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) { + if (searchChar == null || cs == null) { + return NOT_FOUND; + } + if (searchChar instanceof String) { + if (cs instanceof String) { + return ((String) cs).lastIndexOf((String) searchChar, start); + } + if (cs instanceof StringBuilder) { + return ((StringBuilder) cs).lastIndexOf((String) searchChar, start); + } + if (cs instanceof StringBuffer) { + return ((StringBuffer) cs).lastIndexOf((String) searchChar, start); + } + } + + final int len1 = cs.length(); + final int len2 = searchChar.length(); + + if (start > len1) { + start = len1; + } + + if (start < 0 || len2 > len1) { + return NOT_FOUND; + } + + if (len2 == 0) { + return start; + } + + if (len2 <= TO_STRING_LIMIT) { + if (cs instanceof String) { + return ((String) cs).lastIndexOf(searchChar.toString(), start); + } + if (cs instanceof StringBuilder) { + return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start); + } + if (cs instanceof StringBuffer) { + return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start); + } + } + + if (start + len2 > len1) { + start = len1 - len2; + } + + final char char0 = searchChar.charAt(0); + + int i = start; + while (true) { + while (cs.charAt(i) != char0) { + i--; + if (i < 0) { + return NOT_FOUND; + } + } + if (checkLaterThan1(cs, searchChar, len2, i)) { + return i; + } + i--; + if (i < 0) { + return NOT_FOUND; + } + } + } + + /** + * Returns the index within {@code cs} of the last occurrence of + * the specified character, searching backward starting at the + * specified index. For values of {@code searchChar} in the range + * from 0 to 0xFFFF (inclusive), the index returned is the largest + * value <i>k</i> such that: + * <blockquote><pre> + * (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> <= start) + * </pre></blockquote> + * is true. For other values of {@code searchChar}, it is the + * largest value <i>k</i> such that: + * <blockquote><pre> + * (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> <= start) + * </pre></blockquote> + * is true. In either case, if no such character occurs in {@code cs} + * at or before position {@code start}, then {@code -1} is returned. + * + * <p> + * All indices are specified in {@code char} values + * (Unicode code units). + * </p> + * + * @param cs the {@link CharSequence} to be processed + * @param searchChar the char to be searched for + * @param start the start index, negative returns -1, beyond length starts at end + * @return the index where the search char was found, -1 if not found + * @since 3.6 updated to behave more like {@link String} + */ + static int lastIndexOf(final CharSequence cs, final int searchChar, int start) { + if (cs instanceof String) { + return ((String) cs).lastIndexOf(searchChar, start); + } + final int sz = cs.length(); + if (start < 0) { + return NOT_FOUND; + } + if (start >= sz) { + start = sz - 1; + } + if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { + for (int i = start; i >= 0; --i) { + if (cs.charAt(i) == searchChar) { + return i; + } + } + return NOT_FOUND; + } + //supplementary characters (LANG1300) + //NOTE - we must do a forward traversal for this to avoid duplicating code points + if (searchChar <= Character.MAX_CODE_POINT) { + final char[] chars = Character.toChars(searchChar); + //make sure it's not the last index + if (start == sz - 1) { + return NOT_FOUND; + } + for (int i = start; i >= 0; i--) { + final char high = cs.charAt(i); + final char low = cs.charAt(i + 1); + if (chars[0] == high && chars[1] == low) { + return i; + } + } + } + return NOT_FOUND; + } + + /** + * Green implementation of regionMatches. + * + * @param cs the {@link CharSequence} to be processed + * @param ignoreCase whether or not to be case-insensitive + * @param thisStart the index to start on the {@code cs} CharSequence + * @param substring the {@link CharSequence} to be looked for + * @param start the index to start on the {@code substring} CharSequence + * @param length character length of the region + * @return whether the region matched + */ + static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart, + final CharSequence substring, final int start, final int length) { + if (cs instanceof String && substring instanceof String) { + return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length); + } + int index1 = thisStart; + int index2 = start; + int tmpLen = length; + + // Extract these first so we detect NPEs the same as the java.lang.String version + final int srcLen = cs.length() - thisStart; + final int otherLen = substring.length() - start; + + // Check for invalid parameters + if (thisStart < 0 || start < 0 || length < 0) { + return false; + } + + // Check that the regions are long enough + if (srcLen < length || otherLen < length) { + return false; + } + + while (tmpLen-- > 0) { + final char c1 = cs.charAt(index1++); + final char c2 = substring.charAt(index2++); + + if (c1 == c2) { + continue; + } + + if (!ignoreCase) { + return false; + } + + // The real same check as in String.regionMatches(): + final char u1 = Character.toUpperCase(c1); + final char u2 = Character.toUpperCase(c2); + if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) { + return false; + } + } + + return true; + } + + /** + * Returns a new {@link CharSequence} that is a subsequence of this + * sequence starting with the {@code char} value at the specified index. + * + * <p>This provides the {@link CharSequence} equivalent to {@link String#substring(int)}. + * The length (in {@code char}) of the returned sequence is {@code length() - start}, + * so if {@code start == end} then an empty sequence is returned.</p> + * + * @param cs the specified subsequence, null returns null + * @param start the start index, inclusive, valid + * @return a new subsequence, may be null + * @throws IndexOutOfBoundsException if {@code start} is negative or if + * {@code start} is greater than {@code length()} + */ + public static CharSequence subSequence(final CharSequence cs, final int start) { + return cs == null ? null : cs.subSequence(start, cs.length()); + } + + /** + * Converts the given CharSequence to a char[]. + * + * @param source the {@link CharSequence} to be processed. + * @return the resulting char array, never null. + * @since 3.11 + */ + public static char[] toCharArray(final CharSequence source) { + final int len = StringUtils.length(source); + if (len == 0) { + return ArrayUtils.EMPTY_CHAR_ARRAY; + } + if (source instanceof String) { + return ((String) source).toCharArray(); + } + final char[] array = new char[len]; + for (int i = 0; i < len; i++) { + array[i] = source.charAt(i); + } + return array; + } + + /** + * {@link CharSequenceUtils} instances should NOT be constructed in + * standard programming. + * + * <p>This constructor is public to permit tools that require a JavaBean + * instance to operate.</p> + */ + public CharSequenceUtils() { + } +} |