aboutsummaryrefslogtreecommitdiff
path: root/dictionaries/utf8.dict
diff options
context:
space:
mode:
Diffstat (limited to 'dictionaries/utf8.dict')
-rw-r--r--dictionaries/utf8.dict73
1 files changed, 73 insertions, 0 deletions
diff --git a/dictionaries/utf8.dict b/dictionaries/utf8.dict
new file mode 100644
index 00000000..ab0d6e35
--- /dev/null
+++ b/dictionaries/utf8.dict
@@ -0,0 +1,73 @@
+# https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
+
+
+# Defines byteorder and endianess
+byte_order="\xFE\xFF"
+
+# Reorder the display of text for RTL reading
+right_to_left="\x20\x2E"
+
+# Mongolian Vowel Separator: invisible and has the whitespace property
+invisible_separator="\x18\x03"
+
+# Invisible zero-width character.
+word_join="\x20\x60"
+
+# Reserved code point
+reserved="\xfe\xfe"
+
+# Invalid code point
+invalid1="\xff\xff"
+invalid2="\x01\xff\xff"
+invalid3="\xfdd0"
+
+# unassigned code point
+unassigned="\x0f\xed"
+
+# illegal low half-surrogate
+illegal_low="\xde\xad"
+
+# illegal high half-surrogate
+illegal_high="\xda\xad"
+
+# private use area code usbed by apple for its logo
+apple="\xf8\xff"
+
+# hostname normalization
+fullwidth_solidus="\xff\x0f"
+
+# numerical mapping and a value
+bold_eight="\x01\xd7\xd6"
+
+# # U+00DF normalizes to "ss" during IDNA2003's mapping phase,
+# different from its IDNA2008 mapping. See http://www.unicode.org/reports/tr46/
+weird="\x00\xdf"
+
+# U+FDFD expands by 11x (UTF-8) and 18x (UTF-16) under NFKC/NFKC
+expansion="\xfd\xfd"
+
+# U+0390 expands by 3x (UTF-8) under NFD
+expansion2="\x03\x90"
+
+# U+1F82 expands by 4x (UTF-16) under NFD
+expansion3= "\x1F\x82"
+
+# U+FB2C expands by 3x (UTF-16) under NFC
+expansion4="\xFB\x2C"
+
+# Lowecaser expansion: https://twitter.com/jifa/status/625776454479970304
+low_exp1="\x02\x3a"
+low_exp2="\x02\x3e"
+low_exp3="\x00\xdf"
+low_exp4="\x1e\x9e"
+
+# Null byte
+null="\x00\x00"
+"\xfc\x80\x80\x80\x80\x80"
+"fc\x80\x80\x80\x80\xaf"
+
+# Confusing new lines
+"\x00\x1b"
+"\x00\x85"
+"\x20\x28"
+"\x20\x29"