diff options
Diffstat (limited to 'jimfs/src/test/java/com/google/common/jimfs/PathNormalizationTest.java')
-rw-r--r-- | jimfs/src/test/java/com/google/common/jimfs/PathNormalizationTest.java | 351 |
1 files changed, 351 insertions, 0 deletions
diff --git a/jimfs/src/test/java/com/google/common/jimfs/PathNormalizationTest.java b/jimfs/src/test/java/com/google/common/jimfs/PathNormalizationTest.java new file mode 100644 index 0000000..23b28b4 --- /dev/null +++ b/jimfs/src/test/java/com/google/common/jimfs/PathNormalizationTest.java @@ -0,0 +1,351 @@ +/* + * Copyright 2013 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.common.jimfs; + +import static com.google.common.jimfs.PathNormalization.CASE_FOLD_ASCII; +import static com.google.common.jimfs.PathNormalization.CASE_FOLD_UNICODE; +import static com.google.common.jimfs.PathNormalization.NFC; +import static com.google.common.jimfs.PathNormalization.NFD; +import static com.google.common.jimfs.TestUtils.assertNotEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import com.google.common.collect.ImmutableSet; +import java.util.regex.Pattern; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** + * Tests for {@link PathNormalization}. + * + * @author Colin Decker + */ +@RunWith(JUnit4.class) +public class PathNormalizationTest { + + private ImmutableSet<PathNormalization> normalizations; + + @Test + public void testNone() { + normalizations = ImmutableSet.of(); + + assertNormalizedEqual("foo", "foo"); + assertNormalizedUnequal("Foo", "foo"); + assertNormalizedUnequal("\u00c5", "\u212b"); + assertNormalizedUnequal("Am\u00e9lie", "Ame\u0301lie"); + } + + private static final String[][] CASE_FOLD_TEST_DATA = { + {"foo", "fOo", "foO", "Foo", "FOO"}, + {"efficient", "efficient", "efficient", "Efficient", "EFFICIENT"}, + {"flour", "flour", "flour", "Flour", "FLOUR"}, + {"poſt", "post", "poſt", "Poſt", "POST"}, + {"poſt", "post", "poſt", "Poſt", "POST"}, + {"ſtop", "stop", "ſtop", "Stop", "STOP"}, + {"tschüß", "tschüss", "tschüß", "Tschüß", "TSCHÜSS"}, + {"weiß", "weiss", "weiß", "Weiß", "WEISS"}, + {"WEIẞ", "weiss", "weiß", "Weiß", "WEIẞ"}, + {"στιγμας", "στιγμασ", "στιγμας", "Στιγμας", "ΣΤΙΓΜΑΣ"}, + {"ᾲ στο διάολο", "ὰι στο διάολο", "ᾲ στο διάολο", "Ὰͅ Στο Διάολο", "ᾺΙ ΣΤΟ ΔΙΆΟΛΟ"}, + {"Henry Ⅷ", "henry ⅷ", "henry ⅷ", "Henry Ⅷ", "HENRY Ⅷ"}, + {"I Work At Ⓚ", "i work at ⓚ", "i work at ⓚ", "I Work At Ⓚ", "I WORK AT Ⓚ"}, + {"ʀᴀʀᴇ", "ʀᴀʀᴇ", "ʀᴀʀᴇ", "Ʀᴀʀᴇ", "ƦᴀƦᴇ"}, + {"Ὰͅ", "ὰι", "ᾲ", "Ὰͅ", "ᾺΙ"} + }; + + @Test + public void testCaseFold() { + normalizations = ImmutableSet.of(CASE_FOLD_UNICODE); + + for (String[] row : CASE_FOLD_TEST_DATA) { + for (int i = 0; i < row.length; i++) { + for (int j = i; j < row.length; j++) { + assertNormalizedEqual(row[i], row[j]); + } + } + } + } + + @Test + public void testCaseInsensitiveAscii() { + normalizations = ImmutableSet.of(CASE_FOLD_ASCII); + + String[] row = {"foo", "FOO", "fOo", "Foo"}; + for (int i = 0; i < row.length; i++) { + for (int j = i; j < row.length; j++) { + assertNormalizedEqual(row[i], row[j]); + } + } + + assertNormalizedUnequal("weiß", "weiss"); + } + + private static final String[][] NORMALIZE_TEST_DATA = { + {"\u00c5", "\u212b"}, // two forms of Å (one code point each) + {"Am\u00e9lie", "Ame\u0301lie"} // two forms of Amélie (one composed, one decomposed) + }; + + @Test + public void testNormalizeNfc() { + normalizations = ImmutableSet.of(NFC); + + for (String[] row : NORMALIZE_TEST_DATA) { + for (int i = 0; i < row.length; i++) { + for (int j = i; j < row.length; j++) { + assertNormalizedEqual(row[i], row[j]); + } + } + } + } + + @Test + public void testNormalizeNfd() { + normalizations = ImmutableSet.of(NFD); + + for (String[] row : NORMALIZE_TEST_DATA) { + for (int i = 0; i < row.length; i++) { + for (int j = i; j < row.length; j++) { + assertNormalizedEqual(row[i], row[j]); + } + } + } + } + + private static final String[][] NORMALIZE_CASE_FOLD_TEST_DATA = { + {"\u00c5", "\u00e5", "\u212b"}, + {"Am\u00e9lie", "Am\u00c9lie", "Ame\u0301lie", "AME\u0301LIE"} + }; + + @Test + public void testNormalizeNfcCaseFold() { + normalizations = ImmutableSet.of(NFC, CASE_FOLD_UNICODE); + + for (String[] row : NORMALIZE_CASE_FOLD_TEST_DATA) { + for (int i = 0; i < row.length; i++) { + for (int j = i; j < row.length; j++) { + assertNormalizedEqual(row[i], row[j]); + } + } + } + } + + @Test + public void testNormalizeNfdCaseFold() { + normalizations = ImmutableSet.of(NFD, CASE_FOLD_UNICODE); + + for (String[] row : NORMALIZE_CASE_FOLD_TEST_DATA) { + for (int i = 0; i < row.length; i++) { + for (int j = i; j < row.length; j++) { + assertNormalizedEqual(row[i], row[j]); + } + } + } + } + + private static final String[][] NORMALIZED_CASE_INSENSITIVE_ASCII_TEST_DATA = { + {"\u00e5", "\u212b"}, + {"Am\u00e9lie", "AME\u0301LIE"} + }; + + @Test + public void testNormalizeNfcCaseFoldAscii() { + normalizations = ImmutableSet.of(NFC, CASE_FOLD_ASCII); + + for (String[] row : NORMALIZED_CASE_INSENSITIVE_ASCII_TEST_DATA) { + for (int i = 0; i < row.length; i++) { + for (int j = i + 1; j < row.length; j++) { + assertNormalizedUnequal(row[i], row[j]); + } + } + } + } + + @Test + public void testNormalizeNfdCaseFoldAscii() { + normalizations = ImmutableSet.of(NFD, CASE_FOLD_ASCII); + + for (String[] row : NORMALIZED_CASE_INSENSITIVE_ASCII_TEST_DATA) { + for (int i = 0; i < row.length; i++) { + for (int j = i + 1; j < row.length; j++) { + // since decomposition happens before case folding, the strings are equal when the + // decomposed ASCII letter is folded + assertNormalizedEqual(row[i], row[j]); + } + } + } + } + + // regex patterns offer loosely similar matching, but that's all + + @Test + public void testNone_pattern() { + normalizations = ImmutableSet.of(); + assertNormalizedPatternMatches("foo", "foo"); + assertNormalizedPatternDoesNotMatch("foo", "FOO"); + assertNormalizedPatternDoesNotMatch("FOO", "foo"); + } + + @Test + public void testCaseFold_pattern() { + normalizations = ImmutableSet.of(CASE_FOLD_UNICODE); + assertNormalizedPatternMatches("foo", "foo"); + assertNormalizedPatternMatches("foo", "FOO"); + assertNormalizedPatternMatches("FOO", "foo"); + assertNormalizedPatternMatches("Am\u00e9lie", "AM\u00c9LIE"); + assertNormalizedPatternMatches("Ame\u0301lie", "AME\u0301LIE"); + assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "Ame\u0301lie"); + assertNormalizedPatternDoesNotMatch("AM\u00c9LIE", "AME\u0301LIE"); + assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "AME\u0301LIE"); + } + + @Test + public void testCaseFoldAscii_pattern() { + normalizations = ImmutableSet.of(CASE_FOLD_ASCII); + assertNormalizedPatternMatches("foo", "foo"); + assertNormalizedPatternMatches("foo", "FOO"); + assertNormalizedPatternMatches("FOO", "foo"); + assertNormalizedPatternMatches("Ame\u0301lie", "AME\u0301LIE"); + assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "AM\u00c9LIE"); + assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "Ame\u0301lie"); + assertNormalizedPatternDoesNotMatch("AM\u00c9LIE", "AME\u0301LIE"); + assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "AME\u0301LIE"); + } + + @Test + public void testNormalizeNfc_pattern() { + normalizations = ImmutableSet.of(NFC); + assertNormalizedPatternMatches("foo", "foo"); + assertNormalizedPatternDoesNotMatch("foo", "FOO"); + assertNormalizedPatternDoesNotMatch("FOO", "foo"); + assertNormalizedPatternMatches("Am\u00e9lie", "Ame\u0301lie"); + assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "AME\u0301LIE"); + } + + @Test + public void testNormalizeNfd_pattern() { + normalizations = ImmutableSet.of(NFD); + assertNormalizedPatternMatches("foo", "foo"); + assertNormalizedPatternDoesNotMatch("foo", "FOO"); + assertNormalizedPatternDoesNotMatch("FOO", "foo"); + assertNormalizedPatternMatches("Am\u00e9lie", "Ame\u0301lie"); + assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "AME\u0301LIE"); + } + + @Test + public void testNormalizeNfcCaseFold_pattern() { + normalizations = ImmutableSet.of(NFC, CASE_FOLD_UNICODE); + assertNormalizedPatternMatches("foo", "foo"); + assertNormalizedPatternMatches("foo", "FOO"); + assertNormalizedPatternMatches("FOO", "foo"); + assertNormalizedPatternMatches("Am\u00e9lie", "AM\u00c9LIE"); + assertNormalizedPatternMatches("Ame\u0301lie", "AME\u0301LIE"); + assertNormalizedPatternMatches("Am\u00e9lie", "Ame\u0301lie"); + assertNormalizedPatternMatches("AM\u00c9LIE", "AME\u0301LIE"); + assertNormalizedPatternMatches("Am\u00e9lie", "AME\u0301LIE"); + } + + @Test + public void testNormalizeNfdCaseFold_pattern() { + normalizations = ImmutableSet.of(NFD, CASE_FOLD_UNICODE); + assertNormalizedPatternMatches("foo", "foo"); + assertNormalizedPatternMatches("foo", "FOO"); + assertNormalizedPatternMatches("FOO", "foo"); + assertNormalizedPatternMatches("Am\u00e9lie", "AM\u00c9LIE"); + assertNormalizedPatternMatches("Ame\u0301lie", "AME\u0301LIE"); + assertNormalizedPatternMatches("Am\u00e9lie", "Ame\u0301lie"); + assertNormalizedPatternMatches("AM\u00c9LIE", "AME\u0301LIE"); + assertNormalizedPatternMatches("Am\u00e9lie", "AME\u0301LIE"); + } + + @Test + public void testNormalizeNfcCaseFoldAscii_pattern() { + normalizations = ImmutableSet.of(NFC, CASE_FOLD_ASCII); + assertNormalizedPatternMatches("foo", "foo"); + assertNormalizedPatternMatches("foo", "FOO"); + assertNormalizedPatternMatches("FOO", "foo"); + + // these are all a bit fuzzy as when CASE_INSENSITIVE is present but not UNICODE_CASE, ASCII + // only strings are expected + assertNormalizedPatternMatches("Ame\u0301lie", "AME\u0301LIE"); + assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "AM\u00c9LIE"); + assertNormalizedPatternMatches("Am\u00e9lie", "Ame\u0301lie"); + assertNormalizedPatternMatches("AM\u00c9LIE", "AME\u0301LIE"); + } + + @Test + public void testNormalizeNfdCaseFoldAscii_pattern() { + normalizations = ImmutableSet.of(NFD, CASE_FOLD_ASCII); + assertNormalizedPatternMatches("foo", "foo"); + assertNormalizedPatternMatches("foo", "FOO"); + assertNormalizedPatternMatches("FOO", "foo"); + + // these are all a bit fuzzy as when CASE_INSENSITIVE is present but not UNICODE_CASE, ASCII + // only strings are expected + assertNormalizedPatternMatches("Ame\u0301lie", "AME\u0301LIE"); + assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "AM\u00c9LIE"); + assertNormalizedPatternMatches("Am\u00e9lie", "Ame\u0301lie"); + assertNormalizedPatternMatches("AM\u00c9LIE", "AME\u0301LIE"); + } + + /** Asserts that the given strings normalize to the same string using the current normalizer. */ + private void assertNormalizedEqual(String first, String second) { + assertEquals( + PathNormalization.normalize(first, normalizations), + PathNormalization.normalize(second, normalizations)); + } + + /** Asserts that the given strings normalize to different strings using the current normalizer. */ + private void assertNormalizedUnequal(String first, String second) { + assertNotEquals( + PathNormalization.normalize(first, normalizations), + PathNormalization.normalize(second, normalizations)); + } + + /** + * Asserts that the given strings match when one is compiled as a regex pattern using the current + * normalizer and matched against the other. + */ + private void assertNormalizedPatternMatches(String first, String second) { + Pattern pattern = PathNormalization.compilePattern(first, normalizations); + assertTrue( + "pattern '" + pattern + "' does not match '" + second + "'", + pattern.matcher(second).matches()); + + pattern = PathNormalization.compilePattern(second, normalizations); + assertTrue( + "pattern '" + pattern + "' does not match '" + first + "'", + pattern.matcher(first).matches()); + } + + /** + * Asserts that the given strings do not match when one is compiled as a regex pattern using the + * current normalizer and matched against the other. + */ + private void assertNormalizedPatternDoesNotMatch(String first, String second) { + Pattern pattern = PathNormalization.compilePattern(first, normalizations); + assertFalse( + "pattern '" + pattern + "' should not match '" + second + "'", + pattern.matcher(second).matches()); + + pattern = PathNormalization.compilePattern(second, normalizations); + assertFalse( + "pattern '" + pattern + "' should not match '" + first + "'", + pattern.matcher(first).matches()); + } +} |