1 files changed, 389 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java b/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
new file mode 100644
index 000000000..f63b4248c
--- /dev/null
+++ b/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java
@@ -0,0 +1,389 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.lang3;
+
+/**
+ * Operations on {@link CharSequence} that are
+ * {@code null} safe.
+ *
+ * @see CharSequence
+ * @since 3.0
+ */
+public class CharSequenceUtils {
+
+    private static final int NOT_FOUND = -1;
+
+    static final int TO_STRING_LIMIT = 16;
+
+    private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) {
+        for (int i = 1, j = len2 - 1; i <= j; i++, j--) {
+            if (cs.charAt(start1 + i) != searchChar.charAt(i) || cs.charAt(start1 + j) != searchChar.charAt(j)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
+     *
+     * @param cs the {@link CharSequence} to be processed
+     * @param searchChar the {@link CharSequence} to be searched for
+     * @param start the start index
+     * @return the index where the search sequence was found
+     */
+    static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
+        if (cs instanceof String) {
+            return ((String) cs).indexOf(searchChar.toString(), start);
+        }
+        if (cs instanceof StringBuilder) {
+            return ((StringBuilder) cs).indexOf(searchChar.toString(), start);
+        }
+        if (cs instanceof StringBuffer) {
+            return ((StringBuffer) cs).indexOf(searchChar.toString(), start);
+        }
+        return cs.toString().indexOf(searchChar.toString(), start);
+//        if (cs instanceof String && searchChar instanceof String) {
+//            // TODO: Do we assume searchChar is usually relatively small;
+//            //       If so then calling toString() on it is better than reverting to
+//            //       the green implementation in the else block
+//            return ((String) cs).indexOf((String) searchChar, start);
+//        } else {
+//            // TODO: Implement rather than convert to String
+//            return cs.toString().indexOf(searchChar.toString(), start);
+//        }
+    }
+
+    /**
+     * Returns the index within {@code cs} of the first occurrence of the
+     * specified character, starting the search at the specified index.
+     * <p>
+     * If a character with value {@code searchChar} occurs in the
+     * character sequence represented by the {@code cs}
+     * object at an index no smaller than {@code start}, then
+     * the index of the first such occurrence is returned. For values
+     * of {@code searchChar} in the range from 0 to 0xFFFF (inclusive),
+     * this is the smallest value <i>k</i> such that:
+     * </p>
+     * <blockquote><pre>
+     * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
+     * </pre></blockquote>
+     * is true. For other values of {@code searchChar}, it is the
+     * smallest value <i>k</i> such that:
+     * <blockquote><pre>
+     * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
+     * </pre></blockquote>
+     * <p>
+     * is true. In either case, if no such character occurs inm {@code cs}
+     * at or after position {@code start}, then
+     * {@code -1} is returned.
+     * </p>
+     * <p>
+     * There is no restriction on the value of {@code start}. If it
+     * is negative, it has the same effect as if it were zero: the entire
+     * {@link CharSequence} may be searched. If it is greater than
+     * the length of {@code cs}, it has the same effect as if it were
+     * equal to the length of {@code cs}: {@code -1} is returned.
+     * </p>
+     * <p>All indices are specified in {@code char} values
+     * (Unicode code units).
+     * </p>
+     *
+     * @param cs  the {@link CharSequence} to be processed, not null
+     * @param searchChar  the char to be searched for
+     * @param start  the start index, negative starts at the string start
+     * @return the index where the search char was found, -1 if not found
+     * @since 3.6 updated to behave more like {@link String}
+     */
+    static int indexOf(final CharSequence cs, final int searchChar, int start) {
+        if (cs instanceof String) {
+            return ((String) cs).indexOf(searchChar, start);
+        }
+        final int sz = cs.length();
+        if (start < 0) {
+            start = 0;
+        }
+        if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
+            for (int i = start; i < sz; i++) {
+                if (cs.charAt(i) == searchChar) {
+                    return i;
+                }
+            }
+            return NOT_FOUND;
+        }
+        //supplementary characters (LANG1300)
+        if (searchChar <= Character.MAX_CODE_POINT) {
+            final char[] chars = Character.toChars(searchChar);
+            for (int i = start; i < sz - 1; i++) {
+                final char high = cs.charAt(i);
+                final char low = cs.charAt(i + 1);
+                if (high == chars[0] && low == chars[1]) {
+                    return i;
+                }
+            }
+        }
+        return NOT_FOUND;
+    }
+
+    /**
+     * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
+     *
+     * @param cs the {@link CharSequence} to be processed
+     * @param searchChar the {@link CharSequence} to find
+     * @param start the start index
+     * @return the index where the search sequence was found
+     */
+    static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) {
+        if (searchChar == null || cs == null) {
+            return NOT_FOUND;
+        }
+        if (searchChar instanceof String) {
+            if (cs instanceof String) {
+                return ((String) cs).lastIndexOf((String) searchChar, start);
+            }
+            if (cs instanceof StringBuilder) {
+                return ((StringBuilder) cs).lastIndexOf((String) searchChar, start);
+            }
+            if (cs instanceof StringBuffer) {
+                return ((StringBuffer) cs).lastIndexOf((String) searchChar, start);
+            }
+        }
+
+        final int len1 = cs.length();
+        final int len2 = searchChar.length();
+
+        if (start > len1) {
+            start = len1;
+        }
+
+        if (start < 0 || len2 > len1) {
+            return NOT_FOUND;
+        }
+
+        if (len2 == 0) {
+            return start;
+        }
+
+        if (len2 <= TO_STRING_LIMIT) {
+            if (cs instanceof String) {
+                return ((String) cs).lastIndexOf(searchChar.toString(), start);
+            }
+            if (cs instanceof StringBuilder) {
+                return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start);
+            }
+            if (cs instanceof StringBuffer) {
+                return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start);
+            }
+        }
+
+        if (start + len2 > len1) {
+            start = len1 - len2;
+        }
+
+        final char char0 = searchChar.charAt(0);
+
+        int i = start;
+        while (true) {
+            while (cs.charAt(i) != char0) {
+                i--;
+                if (i < 0) {
+                    return NOT_FOUND;
+                }
+            }
+            if (checkLaterThan1(cs, searchChar, len2, i)) {
+                return i;
+            }
+            i--;
+            if (i < 0) {
+                return NOT_FOUND;
+            }
+        }
+    }
+
+    /**
+     * Returns the index within {@code cs} of the last occurrence of
+     * the specified character, searching backward starting at the
+     * specified index. For values of {@code searchChar} in the range
+     * from 0 to 0xFFFF (inclusive), the index returned is the largest
+     * value <i>k</i> such that:
+     * <blockquote><pre>
+     * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
+     * </pre></blockquote>
+     * is true. For other values of {@code searchChar}, it is the
+     * largest value <i>k</i> such that:
+     * <blockquote><pre>
+     * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
+     * </pre></blockquote>
+     * is true. In either case, if no such character occurs in {@code cs}
+     * at or before position {@code start}, then {@code -1} is returned.
+     *
+     * <p>
+     * All indices are specified in {@code char} values
+     * (Unicode code units).
+     * </p>
+     *
+     * @param cs  the {@link CharSequence} to be processed
+     * @param searchChar  the char to be searched for
+     * @param start  the start index, negative returns -1, beyond length starts at end
+     * @return the index where the search char was found, -1 if not found
+     * @since 3.6 updated to behave more like {@link String}
+     */
+    static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
+        if (cs instanceof String) {
+            return ((String) cs).lastIndexOf(searchChar, start);
+        }
+        final int sz = cs.length();
+        if (start < 0) {
+            return NOT_FOUND;
+        }
+        if (start >= sz) {
+            start = sz - 1;
+        }
+        if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
+            for (int i = start; i >= 0; --i) {
+                if (cs.charAt(i) == searchChar) {
+                    return i;
+                }
+            }
+            return NOT_FOUND;
+        }
+        //supplementary characters (LANG1300)
+        //NOTE - we must do a forward traversal for this to avoid duplicating code points
+        if (searchChar <= Character.MAX_CODE_POINT) {
+            final char[] chars = Character.toChars(searchChar);
+            //make sure it's not the last index
+            if (start == sz - 1) {
+                return NOT_FOUND;
+            }
+            for (int i = start; i >= 0; i--) {
+                final char high = cs.charAt(i);
+                final char low = cs.charAt(i + 1);
+                if (chars[0] == high && chars[1] == low) {
+                    return i;
+                }
+            }
+        }
+        return NOT_FOUND;
+    }
+
+    /**
+     * Green implementation of regionMatches.
+     *
+     * @param cs the {@link CharSequence} to be processed
+     * @param ignoreCase whether or not to be case-insensitive
+     * @param thisStart the index to start on the {@code cs} CharSequence
+     * @param substring the {@link CharSequence} to be looked for
+     * @param start the index to start on the {@code substring} CharSequence
+     * @param length character length of the region
+     * @return whether the region matched
+     */
+    static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
+            final CharSequence substring, final int start, final int length)    {
+        if (cs instanceof String && substring instanceof String) {
+            return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
+        }
+        int index1 = thisStart;
+        int index2 = start;
+        int tmpLen = length;
+
+        // Extract these first so we detect NPEs the same as the java.lang.String version
+        final int srcLen = cs.length() - thisStart;
+        final int otherLen = substring.length() - start;
+
+        // Check for invalid parameters
+        if (thisStart < 0 || start < 0 || length < 0) {
+            return false;
+        }
+
+        // Check that the regions are long enough
+        if (srcLen < length || otherLen < length) {
+            return false;
+        }
+
+        while (tmpLen-- > 0) {
+            final char c1 = cs.charAt(index1++);
+            final char c2 = substring.charAt(index2++);
+
+            if (c1 == c2) {
+                continue;
+            }
+
+            if (!ignoreCase) {
+                return false;
+            }
+
+            // The real same check as in String.regionMatches():
+            final char u1 = Character.toUpperCase(c1);
+            final char u2 = Character.toUpperCase(c2);
+            if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     * Returns a new {@link CharSequence} that is a subsequence of this
+     * sequence starting with the {@code char} value at the specified index.
+     *
+     * <p>This provides the {@link CharSequence} equivalent to {@link String#substring(int)}.
+     * The length (in {@code char}) of the returned sequence is {@code length() - start},
+     * so if {@code start == end} then an empty sequence is returned.</p>
+     *
+     * @param cs  the specified subsequence, null returns null
+     * @param start  the start index, inclusive, valid
+     * @return a new subsequence, may be null
+     * @throws IndexOutOfBoundsException if {@code start} is negative or if
+     *  {@code start} is greater than {@code length()}
+     */
+    public static CharSequence subSequence(final CharSequence cs, final int start) {
+        return cs == null ? null : cs.subSequence(start, cs.length());
+    }
+
+    /**
+     * Converts the given CharSequence to a char[].
+     *
+     * @param source the {@link CharSequence} to be processed.
+     * @return the resulting char array, never null.
+     * @since 3.11
+     */
+    public static char[] toCharArray(final CharSequence source) {
+        final int len = StringUtils.length(source);
+        if (len == 0) {
+            return ArrayUtils.EMPTY_CHAR_ARRAY;
+        }
+        if (source instanceof String) {
+            return ((String) source).toCharArray();
+        }
+        final char[] array = new char[len];
+        for (int i = 0; i < len; i++) {
+            array[i] = source.charAt(i);
+        }
+        return array;
+    }
+
+    /**
+     * {@link CharSequenceUtils} instances should NOT be constructed in
+     * standard programming.
+     *
+     * <p>This constructor is public to permit tools that require a JavaBean
+     * instance to operate.</p>
+     */
+    public CharSequenceUtils() {
+    }
+}