diff options
Diffstat (limited to 'src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java')
-rw-r--r-- | src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java | 573 |
1 files changed, 573 insertions, 0 deletions
diff --git a/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java b/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java new file mode 100644 index 000000000..87e12e108 --- /dev/null +++ b/src/test/java/org/apache/commons/lang3/StringEscapeUtilsTest.java @@ -0,0 +1,573 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.lang3; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import java.io.StringWriter; +import java.lang.reflect.Constructor; +import java.lang.reflect.Modifier; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; + +import org.apache.commons.lang3.text.translate.CharSequenceTranslator; +import org.apache.commons.lang3.text.translate.NumericEntityEscaper; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for {@link StringEscapeUtils}. + */ +@Deprecated +public class StringEscapeUtilsTest extends AbstractLangTest { + private static final String FOO = "foo"; + + @Test + public void testConstructor() { + assertNotNull(new StringEscapeUtils()); + final Constructor<?>[] cons = StringEscapeUtils.class.getDeclaredConstructors(); + assertEquals(1, cons.length); + assertTrue(Modifier.isPublic(cons[0].getModifiers())); + assertTrue(Modifier.isPublic(StringEscapeUtils.class.getModifiers())); + assertFalse(Modifier.isFinal(StringEscapeUtils.class.getModifiers())); + } + + @Test + public void testEscapeJava() throws IOException { + assertNull(StringEscapeUtils.escapeJava(null)); + assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_JAVA.translate(null, null)); + assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_JAVA.translate("", null)); + + assertEscapeJava("empty string", "", ""); + assertEscapeJava(FOO, FOO); + assertEscapeJava("tab", "\\t", "\t"); + assertEscapeJava("backslash", "\\\\", "\\"); + assertEscapeJava("single quote should not be escaped", "'", "'"); + assertEscapeJava("\\\\\\b\\t\\r", "\\\b\t\r"); + assertEscapeJava("\\u1234", "\u1234"); + assertEscapeJava("\\u0234", "\u0234"); + assertEscapeJava("\\u00EF", "\u00ef"); + assertEscapeJava("\\u0001", "\u0001"); + assertEscapeJava("Should use capitalized Unicode hex", "\\uABCD", "\uabcd"); + + assertEscapeJava("He didn't say, \\\"stop!\\\"", + "He didn't say, \"stop!\""); + assertEscapeJava("non-breaking space", "This space is non-breaking:" + "\\u00A0", + "This space is non-breaking:\u00a0"); + assertEscapeJava("\\uABCD\\u1234\\u012C", + "\uABCD\u1234\u012C"); + } + + /** + * Tests https://issues.apache.org/jira/browse/LANG-421 + */ + @Test + public void testEscapeJavaWithSlash() { + final String input = "String with a slash (/) in it"; + + final String expected = input; + final String actual = StringEscapeUtils.escapeJava(input); + + /* + * In 2.4 StringEscapeUtils.escapeJava(String) escapes '/' characters, which are not a valid character to escape + * in a Java string. + */ + assertEquals(expected, actual); + } + + private void assertEscapeJava(final String escaped, final String original) throws IOException { + assertEscapeJava(null, escaped, original); + } + + private void assertEscapeJava(String message, final String expected, final String original) throws IOException { + final String converted = StringEscapeUtils.escapeJava(original); + message = "escapeJava(String) failed" + (message == null ? "" : (": " + message)); + assertEquals(expected, converted, message); + + final StringWriter writer = new StringWriter(); + StringEscapeUtils.ESCAPE_JAVA.translate(original, writer); + assertEquals(expected, writer.toString()); + } + + @Test + public void testUnescapeJava() throws IOException { + assertNull(StringEscapeUtils.unescapeJava(null)); + assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_JAVA.translate(null, null)); + assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_JAVA.translate("", null)); + assertThrows(RuntimeException.class, () -> StringEscapeUtils.unescapeJava("\\u02-3")); + + assertUnescapeJava("", ""); + assertUnescapeJava("test", "test"); + assertUnescapeJava("\ntest\b", "\\ntest\\b"); + assertUnescapeJava("\u123425foo\ntest\b", "\\u123425foo\\ntest\\b"); + assertUnescapeJava("'\foo\teste\r", "\\'\\foo\\teste\\r"); + assertUnescapeJava("", "\\"); + //foo + assertUnescapeJava("lowercase Unicode", "\uABCDx", "\\uabcdx"); + assertUnescapeJava("uppercase Unicode", "\uABCDx", "\\uABCDx"); + assertUnescapeJava("Unicode as final character", "\uABCD", "\\uabcd"); + } + + private void assertUnescapeJava(final String unescaped, final String original) throws IOException { + assertUnescapeJava(null, unescaped, original); + } + + private void assertUnescapeJava(final String message, final String unescaped, final String original) throws IOException { + final String expected = unescaped; + final String actual = StringEscapeUtils.unescapeJava(original); + + assertEquals(expected, actual, + "unescape(String) failed" + + (message == null ? "" : (": " + message)) + + ": expected '" + StringEscapeUtils.escapeJava(expected) + + // we escape this so we can see it in the error message + "' actual '" + StringEscapeUtils.escapeJava(actual) + "'"); + + final StringWriter writer = new StringWriter(); + StringEscapeUtils.UNESCAPE_JAVA.translate(original, writer); + assertEquals(unescaped, writer.toString()); + + } + + @Test + public void testEscapeEcmaScript() { + assertNull(StringEscapeUtils.escapeEcmaScript(null)); + assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_ECMASCRIPT.translate(null, null)); + assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_ECMASCRIPT.translate("", null)); + + assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeEcmaScript("He didn't say, \"stop!\"")); + assertEquals("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';", + StringEscapeUtils.escapeEcmaScript("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';")); + } + + @Test + public void testUnescapeEcmaScript() { + assertNull(StringEscapeUtils.escapeEcmaScript(null)); + assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_ECMASCRIPT.translate(null, null)); + assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_ECMASCRIPT.translate("", null)); + + assertEquals("He didn't say, \"stop!\"", StringEscapeUtils.unescapeEcmaScript("He didn\\'t say, \\\"stop!\\\"")); + assertEquals("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';", + StringEscapeUtils.unescapeEcmaScript("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';")); + } + + + /** HTML and XML */ + private static final String[][] HTML_ESCAPES = { + {"no escaping", "plain text", "plain text"}, + {"no escaping", "plain text", "plain text"}, + {"empty string", "", ""}, + {"null", null, null}, + {"ampersand", "bread & butter", "bread & butter"}, + {"quotes", ""bread" & butter", "\"bread\" & butter"}, + {"final character only", "greater than >", "greater than >"}, + {"first character only", "< less than", "< less than"}, + {"apostrophe", "Huntington's chorea", "Huntington's chorea"}, + {"languages", "English,Français,\u65E5\u672C\u8A9E (nihongo)", "English,Fran\u00E7ais,\u65E5\u672C\u8A9E (nihongo)"}, + {"8-bit ascii shouldn't number-escape", "\u0080\u009F", "\u0080\u009F"}, + }; + + @Test + public void testEscapeHtml() throws IOException { + for (final String[] element : HTML_ESCAPES) { + final String message = element[0]; + final String expected = element[1]; + final String original = element[2]; + assertEquals(expected, StringEscapeUtils.escapeHtml4(original), message); + final StringWriter sw = new StringWriter(); + StringEscapeUtils.ESCAPE_HTML4.translate(original, sw); + final String actual = original == null ? null : sw.toString(); + assertEquals(expected, actual, message); + } + } + + @Test + public void testUnescapeHtml4() throws IOException { + for (final String[] element : HTML_ESCAPES) { + final String message = element[0]; + final String expected = element[2]; + final String original = element[1]; + assertEquals(expected, StringEscapeUtils.unescapeHtml4(original), message); + + final StringWriter sw = new StringWriter(); + StringEscapeUtils.UNESCAPE_HTML4.translate(original, sw); + final String actual = original == null ? null : sw.toString(); + assertEquals(expected, actual, message); + } + // \u00E7 is a cedilla (c with wiggle under) + // note that the test string must be 7-bit-clean (Unicode escaped) or else it will compile incorrectly + // on some locales + assertEquals("Fran\u00E7ais", StringEscapeUtils.unescapeHtml4("Fran\u00E7ais"), "funny chars pass through OK"); + + assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml4("Hello&;World")); + assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml4("Hello&#;World")); + assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml4("Hello&# ;World")); + assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml4("Hello&##;World")); + } + + @Test + public void testUnescapeHexCharsHtml() { + // Simple easy to grok test + assertEquals("\u0080\u009F", StringEscapeUtils.unescapeHtml4("€Ÿ"), "hex number unescape"); + assertEquals("\u0080\u009F", StringEscapeUtils.unescapeHtml4("€Ÿ"), "hex number unescape"); + // Test all Character values: + for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i++) { + final Character c1 = Character.valueOf(i); + final Character c2 = Character.valueOf((char) (i+1)); + final String expected = c1.toString() + c2.toString(); + final String escapedC1 = "&#x" + Integer.toHexString((c1.charValue())) + ";"; + final String escapedC2 = "&#x" + Integer.toHexString((c2.charValue())) + ";"; + assertEquals(expected, StringEscapeUtils.unescapeHtml4(escapedC1 + escapedC2), "hex number unescape index " + (int) i); + } + } + + @Test + public void testUnescapeUnknownEntity() { + assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml4("&zzzz;")); + } + + @Test + public void testEscapeHtmlVersions() { + assertEquals("Β", StringEscapeUtils.escapeHtml4("\u0392")); + assertEquals("\u0392", StringEscapeUtils.unescapeHtml4("Β")); + + // TODO: refine API for escaping/unescaping specific HTML versions + } + + @Test + public void testEscapeXml() throws Exception { + assertEquals("<abc>", StringEscapeUtils.escapeXml("<abc>")); + assertEquals("<abc>", StringEscapeUtils.unescapeXml("<abc>")); + + assertEquals("\u00A1", StringEscapeUtils.escapeXml("\u00A1"), "XML should not escape >0x7f values"); + assertEquals("\u00A0", StringEscapeUtils.unescapeXml(" "), "XML should be able to unescape >0x7f values"); + assertEquals("\u00A0", StringEscapeUtils.unescapeXml(" "), + "XML should be able to unescape >0x7f values with one leading 0"); + assertEquals("\u00A0", StringEscapeUtils.unescapeXml(" "), + "XML should be able to unescape >0x7f values with two leading 0s"); + assertEquals("\u00A0", StringEscapeUtils.unescapeXml(" "), + "XML should be able to unescape >0x7f values with three leading 0s"); + + assertEquals("ain't", StringEscapeUtils.unescapeXml("ain't")); + assertEquals("ain't", StringEscapeUtils.escapeXml("ain't")); + assertEquals("", StringEscapeUtils.escapeXml("")); + assertNull(StringEscapeUtils.escapeXml(null)); + assertNull(StringEscapeUtils.unescapeXml(null)); + + StringWriter sw = new StringWriter(); + StringEscapeUtils.ESCAPE_XML.translate("<abc>", sw); + assertEquals("<abc>", sw.toString(), "XML was escaped incorrectly"); + + sw = new StringWriter(); + StringEscapeUtils.UNESCAPE_XML.translate("<abc>", sw); + assertEquals("<abc>", sw.toString(), "XML was unescaped incorrectly"); + } + + @Test + public void testEscapeXml10() { + assertEquals("a<b>c"d'e&f", StringEscapeUtils.escapeXml10("a<b>c\"d'e&f")); + assertEquals("a\tb\rc\nd", StringEscapeUtils.escapeXml10("a\tb\rc\nd"), "XML 1.0 should not escape \t \n \r"); + assertEquals("ab", StringEscapeUtils.escapeXml10("a\u0000\u0001\u0008\u000b\u000c\u000e\u001fb"), + "XML 1.0 should omit most #x0-x8 | #xb | #xc | #xe-#x19"); + assertEquals("a\ud7ff \ue000b", StringEscapeUtils.escapeXml10("a\ud7ff\ud800 \udfff \ue000b"), + "XML 1.0 should omit #xd800-#xdfff"); + assertEquals("a\ufffdb", StringEscapeUtils.escapeXml10("a\ufffd\ufffe\uffffb"), + "XML 1.0 should omit #xfffe | #xffff"); + assertEquals("a\u007e„\u0085†Ÿ\u00a0b", + StringEscapeUtils.escapeXml10("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"), + "XML 1.0 should escape #x7f-#x84 | #x86 - #x9f, for XML 1.1 compatibility"); + } + + @Test + public void testEscapeXml11() { + assertEquals("a<b>c"d'e&f", StringEscapeUtils.escapeXml11("a<b>c\"d'e&f")); + assertEquals("a\tb\rc\nd", StringEscapeUtils.escapeXml11("a\tb\rc\nd"), "XML 1.1 should not escape \t \n \r"); + assertEquals("ab", StringEscapeUtils.escapeXml11("a\u0000b"), "XML 1.1 should omit #x0"); + assertEquals("ab", + StringEscapeUtils.escapeXml11("a\u0001\u0008\u000b\u000c\u000e\u001fb"), + "XML 1.1 should escape #x1-x8 | #xb | #xc | #xe-#x19"); + assertEquals("a\u007e„\u0085†Ÿ\u00a0b", + StringEscapeUtils.escapeXml11("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"), + "XML 1.1 should escape #x7F-#x84 | #x86-#x9F"); + assertEquals("a\ud7ff \ue000b", StringEscapeUtils.escapeXml11("a\ud7ff\ud800 \udfff \ue000b"), + "XML 1.1 should omit #xd800-#xdfff"); + assertEquals("a\ufffdb", StringEscapeUtils.escapeXml11("a\ufffd\ufffe\uffffb"), + "XML 1.1 should omit #xfffe | #xffff"); + } + + /** + * Tests Supplementary characters. + * <p> + * From https://www.w3.org/International/questions/qa-escapes + * </p> + * <blockquote> + * Supplementary characters are those Unicode characters that have code points higher than the characters in + * the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the + * BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect + * - you must use the single, code point value for that character. For example, use &#x233B4; rather than + * &#xD84C;&#xDFB4;. + * </blockquote> + * @see <a href="https://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a> + * @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a> + */ + @Test + public void testEscapeXmlSupplementaryCharacters() { + final CharSequenceTranslator escapeXml = + StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) ); + + assertEquals("𣎴", escapeXml.translate("\uD84C\uDFB4"), + "Supplementary character must be represented using a single escape"); + + assertEquals("a b c 𣎴", escapeXml.translate("a b c \uD84C\uDFB4"), + "Supplementary characters mixed with basic characters should be encoded correctly"); + } + + @Test + public void testEscapeXmlAllCharacters() { + // https://www.w3.org/TR/xml/#charsets says: + // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, + // excluding the surrogate blocks, FFFE, and FFFF. */ + final CharSequenceTranslator escapeXml = StringEscapeUtils.ESCAPE_XML + .with(NumericEntityEscaper.below(9), NumericEntityEscaper.between(0xB, 0xC), NumericEntityEscaper.between(0xE, 0x19), + NumericEntityEscaper.between(0xD800, 0xDFFF), NumericEntityEscaper.between(0xFFFE, 0xFFFF), NumericEntityEscaper.above(0x110000)); + + assertEquals("�", escapeXml.translate("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008")); + assertEquals("\t", escapeXml.translate("\t")); // 0x9 + assertEquals("\n", escapeXml.translate("\n")); // 0xA + assertEquals("", escapeXml.translate("\u000B\u000C")); + assertEquals("\r", escapeXml.translate("\r")); // 0xD + assertEquals("Hello World! Ain't this great?", escapeXml.translate("Hello World! Ain't this great?")); + assertEquals("", escapeXml.translate("\u000E\u000F\u0018\u0019")); + } + + /** + * Reverse of the above. + * + * @see <a href="https://issues.apache.org/jira/browse/LANG-729">LANG-729</a> + */ + @Test + public void testUnescapeXmlSupplementaryCharacters() { + assertEquals("\uD84C\uDFB4", StringEscapeUtils.unescapeXml("𣎴"), + "Supplementary character must be represented using a single escape"); + + assertEquals("a b c \uD84C\uDFB4", StringEscapeUtils.unescapeXml("a b c 𣎴"), + "Supplementary characters mixed with basic characters should be decoded correctly"); + } + + // Tests issue LANG-150 + // https://issues.apache.org/jira/browse/LANG-150 + @Test + public void testStandaloneAmphersand() { + assertEquals("<P&O>", StringEscapeUtils.unescapeHtml4("<P&O>")); + assertEquals("test & <", StringEscapeUtils.unescapeHtml4("test & <")); + assertEquals("<P&O>", StringEscapeUtils.unescapeXml("<P&O>")); + assertEquals("test & <", StringEscapeUtils.unescapeXml("test & <")); + } + + @Test + public void testLang313() { + assertEquals("& &", StringEscapeUtils.unescapeHtml4("& &")); + } + + @Test + public void testEscapeCsvString() { + assertEquals("foo.bar", StringEscapeUtils.escapeCsv("foo.bar")); + assertEquals("\"foo,bar\"", StringEscapeUtils.escapeCsv("foo,bar")); + assertEquals("\"foo\nbar\"", StringEscapeUtils.escapeCsv("foo\nbar")); + assertEquals("\"foo\rbar\"", StringEscapeUtils.escapeCsv("foo\rbar")); + assertEquals("\"foo\"\"bar\"", StringEscapeUtils.escapeCsv("foo\"bar")); + assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.escapeCsv("foo\uD84C\uDFB4bar")); + assertEquals("", StringEscapeUtils.escapeCsv("")); + assertNull(StringEscapeUtils.escapeCsv(null)); + } + + @Test + public void testEscapeCsvWriter() throws Exception { + checkCsvEscapeWriter("foo.bar", "foo.bar"); + checkCsvEscapeWriter("\"foo,bar\"", "foo,bar"); + checkCsvEscapeWriter("\"foo\nbar\"", "foo\nbar"); + checkCsvEscapeWriter("\"foo\rbar\"", "foo\rbar"); + checkCsvEscapeWriter("\"foo\"\"bar\"", "foo\"bar"); + checkCsvEscapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar"); + checkCsvEscapeWriter("", null); + checkCsvEscapeWriter("", ""); + } + + private void checkCsvEscapeWriter(final String expected, final String value) throws IOException { + final StringWriter writer = new StringWriter(); + StringEscapeUtils.ESCAPE_CSV.translate(value, writer); + assertEquals(expected, writer.toString()); + } + + @Test + public void testEscapeCsvIllegalStateException() { + final StringWriter writer = new StringWriter(); + assertThrows(IllegalStateException.class, () -> StringEscapeUtils.ESCAPE_CSV.translate("foo", -1, writer)); + } + + @Test + public void testUnescapeCsvString() { + assertEquals("foo.bar", StringEscapeUtils.unescapeCsv("foo.bar")); + assertEquals("foo,bar", StringEscapeUtils.unescapeCsv("\"foo,bar\"")); + assertEquals("foo\nbar", StringEscapeUtils.unescapeCsv("\"foo\nbar\"")); + assertEquals("foo\rbar", StringEscapeUtils.unescapeCsv("\"foo\rbar\"")); + assertEquals("foo\"bar", StringEscapeUtils.unescapeCsv("\"foo\"\"bar\"")); + assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.unescapeCsv("foo\uD84C\uDFB4bar")); + assertEquals("", StringEscapeUtils.unescapeCsv("")); + assertNull(StringEscapeUtils.unescapeCsv(null)); + + assertEquals("\"foo.bar\"", StringEscapeUtils.unescapeCsv("\"foo.bar\"")); + } + + @Test + public void testUnescapeCsvWriter() throws Exception { + checkCsvUnescapeWriter("foo.bar", "foo.bar"); + checkCsvUnescapeWriter("foo,bar", "\"foo,bar\""); + checkCsvUnescapeWriter("foo\nbar", "\"foo\nbar\""); + checkCsvUnescapeWriter("foo\rbar", "\"foo\rbar\""); + checkCsvUnescapeWriter("foo\"bar", "\"foo\"\"bar\""); + checkCsvUnescapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar"); + checkCsvUnescapeWriter("", null); + checkCsvUnescapeWriter("", ""); + + checkCsvUnescapeWriter("\"foo.bar\"", "\"foo.bar\""); + } + + private void checkCsvUnescapeWriter(final String expected, final String value) throws IOException { + final StringWriter writer = new StringWriter(); + StringEscapeUtils.UNESCAPE_CSV.translate(value, writer); + assertEquals(expected, writer.toString()); + } + + @Test + public void testUnescapeCsvIllegalStateException() { + final StringWriter writer = new StringWriter(); + assertThrows(IllegalStateException.class, () -> StringEscapeUtils.UNESCAPE_CSV.translate("foo", -1, writer)); + } + + /** + * Tests // https://issues.apache.org/jira/browse/LANG-480 + */ + @Test + public void testEscapeHtmlHighUnicode() { + // this is the utf8 representation of the character: + // COUNTING ROD UNIT DIGIT THREE + // in Unicode + // code point: U+1D362 + final byte[] data = { (byte) 0xF0, (byte) 0x9D, (byte) 0x8D, (byte) 0xA2 }; + + final String original = new String(data, StandardCharsets.UTF_8); + + final String escaped = StringEscapeUtils.escapeHtml4( original ); + assertEquals(original, escaped, "High Unicode should not have been escaped"); + + final String unescaped = StringEscapeUtils.unescapeHtml4( escaped ); + assertEquals(original, unescaped, "High Unicode should have been unchanged"); + +// TODO: I think this should hold, needs further investigation +// String unescapedFromEntity = StringEscapeUtils.unescapeHtml4( "𝍢" ); +// assertEquals( "High Unicode should have been unescaped", original, unescapedFromEntity); + } + + /** + * Tests https://issues.apache.org/jira/browse/LANG-339 + */ + @Test + public void testEscapeHiragana() { + // Some random Japanese Unicode characters + final String original = "\u304B\u304C\u3068"; + final String escaped = StringEscapeUtils.escapeHtml4(original); + assertEquals(original, escaped, + "Hiragana character Unicode behavior should not be being escaped by escapeHtml4"); + + final String unescaped = StringEscapeUtils.unescapeHtml4( escaped ); + + assertEquals(escaped, unescaped, "Hiragana character Unicode behavior has changed - expected no unescaping"); + } + + /** + * Tests https://issues.apache.org/jira/browse/LANG-708 + * + * @throws IOException + * if an I/O error occurs + */ + @Test + public void testLang708() throws IOException { + final byte[] inputBytes = Files.readAllBytes(Paths.get("src/test/resources/lang-708-input.txt")); + final String input = new String(inputBytes, StandardCharsets.UTF_8); + final String escaped = StringEscapeUtils.escapeEcmaScript(input); + // just the end: + assertTrue(escaped.endsWith("}]"), escaped); + // a little more: + assertTrue(escaped.endsWith("\"valueCode\\\":\\\"\\\"}]"), escaped); + } + + /** + * Tests https://issues.apache.org/jira/browse/LANG-720 + */ + @Test + public void testLang720() { + final String input = "\ud842\udfb7" + "A"; + final String escaped = StringEscapeUtils.escapeXml(input); + assertEquals(input, escaped); + } + + /** + * Tests https://issues.apache.org/jira/browse/LANG-911 + */ + @Test + public void testLang911() { + final String bellsTest = "\ud83d\udc80\ud83d\udd14"; + final String value = StringEscapeUtils.escapeJava(bellsTest); + final String valueTest = StringEscapeUtils.unescapeJava(value); + assertEquals(bellsTest, valueTest); + } + + @Test + public void testEscapeJson() { + assertNull(StringEscapeUtils.escapeJson(null)); + assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_JSON.translate(null, null)); + assertThrows(NullPointerException.class, () -> StringEscapeUtils.ESCAPE_JSON.translate("", null)); + + assertEquals("He didn't say, \\\"stop!\\\"", StringEscapeUtils.escapeJson("He didn't say, \"stop!\"")); + + final String expected = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/"; + final String input ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/"; + + assertEquals(expected, StringEscapeUtils.escapeJson(input)); + } + + @Test + public void testUnescapeJson() { + assertNull(StringEscapeUtils.unescapeJson(null)); + assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_JSON.translate(null, null)); + assertThrows(NullPointerException.class, () -> StringEscapeUtils.UNESCAPE_JSON.translate("", null)); + + assertEquals("He didn't say, \"stop!\"", StringEscapeUtils.unescapeJson("He didn't say, \\\"stop!\\\"")); + + final String expected ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/"; + final String input = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/"; + + assertEquals(expected, StringEscapeUtils.unescapeJson(input)); + } +} |