diff options
author | Jakub Kotur <qtr@google.com> | 2021-03-16 18:39:58 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2021-03-16 18:39:58 +0000 |
commit | 8606fb7c0b0a0f0796422a479a589b8c4f3ccddb (patch) | |
tree | db39b49c7217032e6b3ab6255f27b3d84122bad9 | |
parent | 9a565aa594c047b0553c5f1dd0be3c7aee131747 (diff) | |
parent | dd59cf911d21e566a33af399b71d59f7392cae29 (diff) | |
download | bstr-8606fb7c0b0a0f0796422a479a589b8c4f3ccddb.tar.gz |
Initial import of bstr-0.2.14. am: dd59cf911d
Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/bstr/+/1620788
Change-Id: I2bb71b56d1b40a1d21cdd30d0c78da9ef2716653
70 files changed, 16312 insertions, 0 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json new file mode 100644 index 0000000..2ecf829 --- /dev/null +++ b/.cargo_vcs_info.json @@ -0,0 +1,5 @@ +{ + "git": { + "sha1": "7f0ad15d9628c0abec8cf6b7585539cae63e6d5b" + } +} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..42cacb3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.*.swp +tags +target +/Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..b4215b4 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,68 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +name = "bstr" +version = "0.2.14" +authors = ["Andrew Gallant <jamslam@gmail.com>"] +exclude = ["/.github"] +description = "A string type that is not required to be valid UTF-8." +homepage = "https://github.com/BurntSushi/bstr" +documentation = "https://docs.rs/bstr" +readme = "README.md" +keywords = ["string", "str", "byte", "bytes", "text"] +categories = ["text-processing", "encoding"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/BurntSushi/bstr" +[profile.release] +debug = true + +[lib] +bench = false +[dependencies.lazy_static] +version = "1.2" +optional = true + +[dependencies.memchr] +version = "2.1.2" +default-features = false + +[dependencies.regex-automata] +version = "0.1.5" +optional = true +default-features = false + +[dependencies.serde] +version = "1.0.85" +optional = true +default-features = false +[dev-dependencies.quickcheck] +version = "0.8.1" +default-features = false + +[dev-dependencies.ucd-parse] +version = "0.1.3" + +[dev-dependencies.unicode-segmentation] +version = "1.2.1" + +[features] +default = ["std", "unicode"] +serde1 = ["std", "serde1-nostd", "serde/std"] +serde1-nostd = ["serde"] +std = ["memchr/use_std"] +unicode = ["lazy_static", "regex-automata"] +[badges.appveyor] +repository = "BurntSushi/bstr" + +[badges.travis-ci] +repository = "BurntSushi/bstr" diff --git a/Cargo.toml.orig b/Cargo.toml.orig new file mode 100644 index 0000000..63388bc --- /dev/null +++ b/Cargo.toml.orig @@ -0,0 +1,41 @@ +[package] +name = "bstr" +version = "0.2.14" #:version +authors = ["Andrew Gallant <jamslam@gmail.com>"] +description = "A string type that is not required to be valid UTF-8." +documentation = "https://docs.rs/bstr" +homepage = "https://github.com/BurntSushi/bstr" +repository = "https://github.com/BurntSushi/bstr" +readme = "README.md" +keywords = ["string", "str", "byte", "bytes", "text"] +license = "MIT OR Apache-2.0" +categories = ["text-processing", "encoding"] +exclude = ["/.github"] + +[badges] +travis-ci = { repository = "BurntSushi/bstr" } +appveyor = { repository = "BurntSushi/bstr" } + +[lib] +bench = false + +[features] +default = ["std", "unicode"] +std = ["memchr/use_std"] +unicode = ["lazy_static", "regex-automata"] +serde1 = ["std", "serde1-nostd", "serde/std"] +serde1-nostd = ["serde"] + +[dependencies] +memchr = { version = "2.1.2", default-features = false } +lazy_static = { version = "1.2", optional = true } +regex-automata = { version = "0.1.5", default-features = false, optional = true } +serde = { version = "1.0.85", default-features = false, optional = true } + +[dev-dependencies] +quickcheck = { version = "0.8.1", default-features = false } +ucd-parse = "0.1.3" +unicode-segmentation = "1.2.1" + +[profile.release] +debug = true diff --git a/README.md b/README.md new file mode 100644 index 0000000..3e4ef8b --- /dev/null +++ b/README.md @@ -0,0 +1,254 @@ +bstr +==== +This crate provides extension traits for `&[u8]` and `Vec<u8>` that enable +their use as byte strings, where byte strings are _conventionally_ UTF-8. This +differs from the standard library's `String` and `str` types in that they are +not required to be valid UTF-8, but may be fully or partially valid UTF-8. + +[![Build status](https://github.com/BurntSushi/bstr/workflows/ci/badge.svg)](https://github.com/BurntSushi/bstr/actions) +[![](https://meritbadge.herokuapp.com/bstr)](https://crates.io/crates/bstr) + + +### Documentation + +https://docs.rs/bstr + + +### When should I use byte strings? + +See this part of the documentation for more details: +https://docs.rs/bstr/0.2.*/bstr/#when-should-i-use-byte-strings. + +The short story is that byte strings are useful when it is inconvenient or +incorrect to require valid UTF-8. + + +### Usage + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +bstr = "0.2" +``` + + +### Examples + +The following two examples exhibit both the API features of byte strings and +the I/O convenience functions provided for reading line-by-line quickly. + +This first example simply shows how to efficiently iterate over lines in +stdin, and print out lines containing a particular substring: + +```rust +use std::error::Error; +use std::io::{self, Write}; + +use bstr::{ByteSlice, io::BufReadExt}; + +fn main() -> Result<(), Box<dyn Error>> { + let stdin = io::stdin(); + let mut stdout = io::BufWriter::new(io::stdout()); + + stdin.lock().for_byte_line_with_terminator(|line| { + if line.contains_str("Dimension") { + stdout.write_all(line)?; + } + Ok(true) + })?; + Ok(()) +} +``` + +This example shows how to count all of the words (Unicode-aware) in stdin, +line-by-line: + +```rust +use std::error::Error; +use std::io; + +use bstr::{ByteSlice, io::BufReadExt}; + +fn main() -> Result<(), Box<dyn Error>> { + let stdin = io::stdin(); + let mut words = 0; + stdin.lock().for_byte_line_with_terminator(|line| { + words += line.words().count(); + Ok(true) + })?; + println!("{}", words); + Ok(()) +} +``` + +This example shows how to convert a stream on stdin to uppercase without +performing UTF-8 validation _and_ amortizing allocation. On standard ASCII +text, this is quite a bit faster than what you can (easily) do with standard +library APIs. (N.B. Any invalid UTF-8 bytes are passed through unchanged.) + +```rust +use std::error::Error; +use std::io::{self, Write}; + +use bstr::{ByteSlice, io::BufReadExt}; + +fn main() -> Result<(), Box<dyn Error>> { + let stdin = io::stdin(); + let mut stdout = io::BufWriter::new(io::stdout()); + + let mut upper = vec![]; + stdin.lock().for_byte_line_with_terminator(|line| { + upper.clear(); + line.to_uppercase_into(&mut upper); + stdout.write_all(&upper)?; + Ok(true) + })?; + Ok(()) +} +``` + +This example shows how to extract the first 10 visual characters (as grapheme +clusters) from each line, where invalid UTF-8 sequences are generally treated +as a single character and are passed through correctly: + +```rust +use std::error::Error; +use std::io::{self, Write}; + +use bstr::{ByteSlice, io::BufReadExt}; + +fn main() -> Result<(), Box<dyn Error>> { + let stdin = io::stdin(); + let mut stdout = io::BufWriter::new(io::stdout()); + + stdin.lock().for_byte_line_with_terminator(|line| { + let end = line + .grapheme_indices() + .map(|(_, end, _)| end) + .take(10) + .last() + .unwrap_or(line.len()); + stdout.write_all(line[..end].trim_end())?; + stdout.write_all(b"\n")?; + Ok(true) + })?; + Ok(()) +} +``` + + +### Cargo features + +This crates comes with a few features that control standard library, serde +and Unicode support. + +* `std` - **Enabled** by default. This provides APIs that require the standard + library, such as `Vec<u8>`. +* `unicode` - **Enabled** by default. This provides APIs that require sizable + Unicode data compiled into the binary. This includes, but is not limited to, + grapheme/word/sentence segmenters. When this is disabled, basic support such + as UTF-8 decoding is still included. +* `serde1` - **Disabled** by default. Enables implementations of serde traits + for the `BStr` and `BString` types. +* `serde1-nostd` - **Disabled** by default. Enables implementations of serde + traits for the `BStr` type only, intended for use without the standard + library. Generally, you either want `serde1` or `serde1-nostd`, not both. + + +### Minimum Rust version policy + +This crate's minimum supported `rustc` version (MSRV) is `1.28.0`. + +In general, this crate will be conservative with respect to the minimum +supported version of Rust. MSRV may be bumped in minor version releases. + + +### Future work + +Since this is meant to be a core crate, getting a `1.0` release is a priority. +My hope is to move to `1.0` within the next year and commit to its API so that +`bstr` can be used as a public dependency. + +A large part of the API surface area was taken from the standard library, so +from an API design perspective, a good portion of this crate should be mature. +The main differences from the standard library are in how the various substring +search routines work. The standard library provides generic infrastructure for +supporting different types of searches with a single method, where as this +library prefers to define new methods for each type of search and drop the +generic infrastructure. + +Some _probable_ future considerations for APIs include, but are not limited to: + +* A convenience layer on top of the `aho-corasick` crate. +* Unicode normalization. +* More sophisticated support for dealing with Unicode case, perhaps by + combining the use cases supported by [`caseless`](https://docs.rs/caseless) + and [`unicase`](https://docs.rs/unicase). +* Add facilities for dealing with OS strings and file paths, probably via + simple conversion routines. + +Here are some examples that are _probably_ out of scope for this crate: + +* Regular expressions. +* Unicode collation. + +The exact scope isn't quite clear, but I expect we can iterate on it. + +In general, as stated below, this crate is an experiment in bringing lots of +related APIs together into a single crate while simultaneously attempting to +keep the total number of dependencies low. Indeed, every dependency of `bstr`, +except for `memchr`, is optional. + + +### High level motivation + +Strictly speaking, the `bstr` crate provides very little that can't already be +achieved with the standard library `Vec<u8>`/`&[u8]` APIs and the ecosystem of +library crates. For example: + +* The standard library's + [`Utf8Error`](https://doc.rust-lang.org/std/str/struct.Utf8Error.html) + can be used for incremental lossy decoding of `&[u8]`. +* The + [`unicode-segmentation`](https://unicode-rs.github.io/unicode-segmentation/unicode_segmentation/index.html) + crate can be used for iterating over graphemes (or words), but is only + implemented for `&str` types. One could use `Utf8Error` above to implement + grapheme iteration with the same semantics as what `bstr` provides (automatic + Unicode replacement codepoint substitution). +* The [`twoway`](https://docs.rs/twoway) crate can be used for + fast substring searching on `&[u8]`. + +So why create `bstr`? Part of the point of the `bstr` crate is to provide a +uniform API of coupled components instead of relying on users to piece together +loosely coupled components from the crate ecosystem. For example, if you wanted +to perform a search and replace in a `Vec<u8>`, then writing the code to do +that with the `twoway` crate is not that difficult, but it's still additional +glue code you have to write. This work adds up depending on what you're doing. +Consider, for example, trimming and splitting, along with their different +variants. + +In other words, `bstr` is partially a way of pushing back against the +micro-crate ecosystem that appears to be evolving. It's not clear to me whether +this experiment will be successful or not, but it is definitely a goal of +`bstr` to keep its dependency list lightweight. For example, `serde` is an +optional dependency because there is no feasible alternative, but `twoway` is +not, where we instead prefer to implement our own substring search. In service +of this philosophy, currently, the only required dependency of `bstr` is +`memchr`. + + +### License + +This project is licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + https://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + https://opensource.org/licenses/MIT) + +at your option. + +The data in `src/unicode/data/` is licensed under the Unicode License Agreement +([LICENSE-UNICODE](https://www.unicode.org/copyright.html#License)), although +this data is only used in tests. diff --git a/examples/graphemes-std.rs b/examples/graphemes-std.rs new file mode 100644 index 0000000..3522736 --- /dev/null +++ b/examples/graphemes-std.rs @@ -0,0 +1,28 @@ +extern crate unicode_segmentation; + +use std::error::Error; +use std::io::{self, BufRead, Write}; + +use unicode_segmentation::UnicodeSegmentation; + +fn main() -> Result<(), Box<dyn Error>> { + let stdin = io::stdin(); + let mut stdin = stdin.lock(); + let mut stdout = io::BufWriter::new(io::stdout()); + + let mut line = String::new(); + while stdin.read_line(&mut line)? > 0 { + let end = line + .grapheme_indices(true) + .map(|(start, g)| start + g.len()) + .take(10) + .last() + .unwrap_or(line.len()); + #[allow(deprecated)] // for Rust 1.28.0 + stdout.write_all(line[..end].trim_right().as_bytes())?; + stdout.write_all(b"\n")?; + + line.clear(); + } + Ok(()) +} diff --git a/examples/graphemes.rs b/examples/graphemes.rs new file mode 100644 index 0000000..2372490 --- /dev/null +++ b/examples/graphemes.rs @@ -0,0 +1,24 @@ +extern crate bstr; + +use std::error::Error; +use std::io::{self, Write}; + +use bstr::{io::BufReadExt, ByteSlice}; + +fn main() -> Result<(), Box<dyn Error>> { + let stdin = io::stdin(); + let mut stdout = io::BufWriter::new(io::stdout()); + + stdin.lock().for_byte_line_with_terminator(|line| { + let end = line + .grapheme_indices() + .map(|(_, end, _)| end) + .take(10) + .last() + .unwrap_or(line.len()); + stdout.write_all(line[..end].trim_end())?; + stdout.write_all(b"\n")?; + Ok(true) + })?; + Ok(()) +} diff --git a/examples/lines-std.rs b/examples/lines-std.rs new file mode 100644 index 0000000..69fc6a5 --- /dev/null +++ b/examples/lines-std.rs @@ -0,0 +1,17 @@ +use std::error::Error; +use std::io::{self, BufRead, Write}; + +fn main() -> Result<(), Box<dyn Error>> { + let stdin = io::stdin(); + let mut stdin = stdin.lock(); + let mut stdout = io::BufWriter::new(io::stdout()); + + let mut line = String::new(); + while stdin.read_line(&mut line)? > 0 { + if line.contains("Dimension") { + stdout.write_all(line.as_bytes())?; + } + line.clear(); + } + Ok(()) +} diff --git a/examples/lines.rs b/examples/lines.rs new file mode 100644 index 0000000..4b1045f --- /dev/null +++ b/examples/lines.rs @@ -0,0 +1,19 @@ +extern crate bstr; + +use std::error::Error; +use std::io::{self, Write}; + +use bstr::{io::BufReadExt, ByteSlice}; + +fn main() -> Result<(), Box<dyn Error>> { + let stdin = io::stdin(); + let mut stdout = io::BufWriter::new(io::stdout()); + + stdin.lock().for_byte_line_with_terminator(|line| { + if line.contains_str("Dimension") { + stdout.write_all(line)?; + } + Ok(true) + })?; + Ok(()) +} diff --git a/examples/uppercase-std.rs b/examples/uppercase-std.rs new file mode 100644 index 0000000..672bd71 --- /dev/null +++ b/examples/uppercase-std.rs @@ -0,0 +1,15 @@ +use std::error::Error; +use std::io::{self, BufRead, Write}; + +fn main() -> Result<(), Box<dyn Error>> { + let stdin = io::stdin(); + let mut stdin = stdin.lock(); + let mut stdout = io::BufWriter::new(io::stdout()); + + let mut line = String::new(); + while stdin.read_line(&mut line)? > 0 { + stdout.write_all(line.to_uppercase().as_bytes())?; + line.clear(); + } + Ok(()) +} diff --git a/examples/uppercase.rs b/examples/uppercase.rs new file mode 100644 index 0000000..f9771e0 --- /dev/null +++ b/examples/uppercase.rs @@ -0,0 +1,20 @@ +extern crate bstr; + +use std::error::Error; +use std::io::{self, Write}; + +use bstr::{io::BufReadExt, ByteSlice}; + +fn main() -> Result<(), Box<dyn Error>> { + let stdin = io::stdin(); + let mut stdout = io::BufWriter::new(io::stdout()); + + let mut upper = vec![]; + stdin.lock().for_byte_line_with_terminator(|line| { + upper.clear(); + line.to_uppercase_into(&mut upper); + stdout.write_all(&upper)?; + Ok(true) + })?; + Ok(()) +} diff --git a/examples/words-std.rs b/examples/words-std.rs new file mode 100644 index 0000000..7eae116 --- /dev/null +++ b/examples/words-std.rs @@ -0,0 +1,20 @@ +extern crate unicode_segmentation; + +use std::error::Error; +use std::io::{self, BufRead}; + +use unicode_segmentation::UnicodeSegmentation; + +fn main() -> Result<(), Box<dyn Error>> { + let stdin = io::stdin(); + let mut stdin = stdin.lock(); + + let mut words = 0; + let mut line = String::new(); + while stdin.read_line(&mut line)? > 0 { + words += line.unicode_words().count(); + line.clear(); + } + println!("{}", words); + Ok(()) +} diff --git a/examples/words.rs b/examples/words.rs new file mode 100644 index 0000000..eb20c0d --- /dev/null +++ b/examples/words.rs @@ -0,0 +1,17 @@ +extern crate bstr; + +use std::error::Error; +use std::io; + +use bstr::{io::BufReadExt, ByteSlice}; + +fn main() -> Result<(), Box<dyn Error>> { + let stdin = io::stdin(); + let mut words = 0; + stdin.lock().for_byte_line_with_terminator(|line| { + words += line.words().count(); + Ok(true) + })?; + println!("{}", words); + Ok(()) +} diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..aa37a21 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,2 @@ +max_width = 79 +use_small_heuristics = "max" diff --git a/scripts/generate-unicode-data b/scripts/generate-unicode-data new file mode 100755 index 0000000..6b59fae --- /dev/null +++ b/scripts/generate-unicode-data @@ -0,0 +1,149 @@ +#!/bin/sh + +set -e +D="$(dirname "$0")" + +# Convenience function for checking that a command exists. +requires() { + cmd="$1" + if ! command -v "$cmd" > /dev/null 2>&1; then + echo "DEPENDENCY MISSING: $cmd must be installed" >&2 + exit 1 + fi +} + +# Test if an array ($2) contains a particular element ($1). +array_exists() { + needle="$1" + shift + + for el in "$@"; do + if [ "$el" = "$needle" ]; then + return 0 + fi + done + return 1 +} + +graphemes() { + regex="$(sh "$D/regex/grapheme.sh")" + + echo "generating forward grapheme DFA" + ucd-generate dfa \ + --name GRAPHEME_BREAK_FWD \ + --sparse --minimize --anchored --state-size 2 \ + src/unicode/fsm/ \ + "$regex" + + echo "generating reverse grapheme DFA" + ucd-generate dfa \ + --name GRAPHEME_BREAK_REV \ + --reverse --longest \ + --sparse --minimize --anchored --state-size 2 \ + src/unicode/fsm/ \ + "$regex" +} + +words() { + regex="$(sh "$D/regex/word.sh")" + + echo "generating forward word DFA (this can take a while)" + ucd-generate dfa \ + --name WORD_BREAK_FWD \ + --sparse --minimize --anchored --state-size 4 \ + src/unicode/fsm/ \ + "$regex" +} + +sentences() { + regex="$(sh "$D/regex/sentence.sh")" + + echo "generating forward sentence DFA (this can take a while)" + ucd-generate dfa \ + --name SENTENCE_BREAK_FWD \ + --minimize \ + --sparse --anchored --state-size 4 \ + src/unicode/fsm/ \ + "$regex" +} + +regional_indicator() { + # For finding all occurrences of region indicators. This is used to handle + # regional indicators as a special case for the reverse grapheme iterator + # and the reverse word iterator. + echo "generating regional indicator DFA" + ucd-generate dfa \ + --name REGIONAL_INDICATOR_REV \ + --reverse \ + --classes --minimize --anchored --premultiply --state-size 1 \ + src/unicode/fsm/ \ + "\p{gcb=Regional_Indicator}" +} + +simple_word() { + echo "generating forward simple word DFA" + ucd-generate dfa \ + --name SIMPLE_WORD_FWD \ + --sparse --minimize --state-size 2 \ + src/unicode/fsm/ \ + "\w" +} + +whitespace() { + echo "generating forward whitespace DFA" + ucd-generate dfa \ + --name WHITESPACE_ANCHORED_FWD \ + --anchored --classes --premultiply --minimize --state-size 1 \ + src/unicode/fsm/ \ + "\s+" + + echo "generating reverse whitespace DFA" + ucd-generate dfa \ + --name WHITESPACE_ANCHORED_REV \ + --reverse \ + --anchored --classes --premultiply --minimize --state-size 1 \ + src/unicode/fsm/ \ + "\s+" +} + +main() { + if array_exists "-h" "$@" || array_exists "--help" "$@"; then + echo "Usage: $(basename "$0") [--list-commands] [<command>] ..." >&2 + exit + fi + + commands=" + graphemes + sentences + words + regional-indicator + simple-word + whitespace + " + if array_exists "--list-commands" "$@"; then + for cmd in $commands; do + echo "$cmd" + done + exit + fi + + # ucd-generate is used to compile regexes into DFAs. + requires ucd-generate + + mkdir -p src/unicode/fsm/ + + cmds=$* + if [ $# -eq 0 ] || array_exists "all" "$@"; then + cmds=$commands + fi + for cmd in $cmds; do + if array_exists "$cmd" $commands; then + fun="$(echo "$cmd" | sed 's/-/_/g')" + eval "$fun" + else + echo "unrecognized command: $cmd" >&2 + fi + done +} + +main "$@" diff --git a/scripts/regex/grapheme.sh b/scripts/regex/grapheme.sh new file mode 100644 index 0000000..0b2b54d --- /dev/null +++ b/scripts/regex/grapheme.sh @@ -0,0 +1,50 @@ +#!/bin/sh + +# vim: indentexpr= nosmartindent autoindent +# vim: tabstop=2 shiftwidth=2 softtabstop=2 + +# This regex was manually written, derived from the rules in UAX #29. +# Particularly, from Table 1c, which lays out a regex for grapheme clusters. + +CR="\p{gcb=CR}" +LF="\p{gcb=LF}" +Control="\p{gcb=Control}" +Prepend="\p{gcb=Prepend}" +L="\p{gcb=L}" +V="\p{gcb=V}" +LV="\p{gcb=LV}" +LVT="\p{gcb=LVT}" +T="\p{gcb=T}" +RI="\p{gcb=RI}" +Extend="\p{gcb=Extend}" +ZWJ="\p{gcb=ZWJ}" +SpacingMark="\p{gcb=SpacingMark}" + +Any="\p{any}" +ExtendPict="\p{Extended_Pictographic}" + +echo "(?x) +$CR $LF +| +$Control +| +$Prepend* +( + ( + ($L* ($V+ | $LV $V* | $LVT) $T*) + | + $L+ + | + $T+ + ) + | + $RI $RI + | + $ExtendPict ($Extend* $ZWJ $ExtendPict)* + | + [^$Control $CR $LF] +) +[$Extend $ZWJ $SpacingMark]* +| +$Any +" diff --git a/scripts/regex/sentence.sh b/scripts/regex/sentence.sh new file mode 100644 index 0000000..689d184 --- /dev/null +++ b/scripts/regex/sentence.sh @@ -0,0 +1,176 @@ +#!/bin/sh + +# vim: indentexpr= nosmartindent autoindent +# vim: tabstop=2 shiftwidth=2 softtabstop=2 + +# This is a regex that I reverse engineered from the sentence boundary chain +# rules in UAX #29. Unlike the grapheme regex, which is essentially provided +# for us in UAX #29, no such sentence regex exists. +# +# I looked into how ICU achieves this, since UAX #29 hints that producing +# finite state machines for grapheme/sentence/word/line breaking is possible, +# but only easy to do for graphemes. ICU does this by implementing their own +# DSL for describing the break algorithms in terms of the chaining rules +# directly. You can see an example for sentences in +# icu4c/source/data/brkitr/rules/sent.txt. ICU then builds a finite state +# machine from those rules in a mostly standard way, but implements the +# "chaining" aspect of the rules by connecting overlapping end and start +# states. For example, given SB7: +# +# (Upper | Lower) ATerm x Upper +# +# Then the naive way to convert this into a regex would be something like +# +# [\p{sb=Upper}\p{sb=Lower}]\p{sb=ATerm}\p{sb=Upper} +# +# Unfortunately, this is incorrect. Why? Well, consider an example like so: +# +# U.S.A. +# +# A correct implementation of the sentence breaking algorithm should not insert +# any breaks here, exactly in accordance with repeatedly applying rule SB7 as +# given above. Our regex fails to do this because it will first match `U.S` +# without breaking them---which is correct---but will then start looking for +# its next rule beginning with a full stop (in ATerm) and followed by an +# uppercase letter (A). This will wind up triggering rule SB11 (without +# matching `A`), which inserts a break. +# +# The reason why this happens is because our initial application of rule SB7 +# "consumes" the next uppercase letter (S), which we want to reuse as a prefix +# in the next rule application. A natural way to express this would be with +# look-around, although it's not clear that works in every case since you +# ultimately might want to consume that ending uppercase letter. In any case, +# we can't use look-around in our truly regular regexes, so we must fix this. +# The approach we take is to explicitly repeat rules when a suffix of a rule +# is a prefix of another rule. In the case of SB7, the end of the rule, an +# uppercase letter, also happens to match the beginning of the rule. This can +# in turn be repeated indefinitely. Thus, our actual translation to a regex is: +# +# [\p{sb=Upper}\p{sb=Lower}]\p{sb=ATerm}\p{sb=Upper}(\p{sb=ATerm}\p{sb=Upper}* +# +# It turns out that this is exactly what ICU does, but in their case, they do +# it automatically. In our case, we connect the chaining rules manually. It's +# tedious. With that said, we do no implement Unicode line breaking with this +# approach, which is a far scarier beast. In that case, it would probably be +# worth writing the code to do what ICU does. +# +# In the case of sentence breaks, there aren't *too* many overlaps of this +# nature. We list them out exhaustively to make this clear, because it's +# essentially impossible to easily observe this in the regex. (It took me a +# full day to figure all of this out.) Rules marked with N/A mean that they +# specify a break, and this strategy only really applies to stringing together +# non-breaks. +# +# SB1 - N/A +# SB2 - N/A +# SB3 - None +# SB4 - N/A +# SB5 - None +# SB6 - None +# SB7 - End overlaps with beginning of SB7 +# SB8 - End overlaps with beginning of SB7 +# SB8a - End overlaps with beginning of SB6, SB8, SB8a, SB9, SB10, SB11 +# SB9 - None +# SB10 - None +# SB11 - None +# SB998 - N/A +# +# SB8a is in particular quite tricky to get right without look-ahead, since it +# allows ping-ponging between match rules SB8a and SB9-11, where SB9-11 +# otherwise indicate that a break has been found. In the regex below, we tackle +# this by only permitting part of SB8a to match inside our core non-breaking +# repetition. In particular, we only allow the parts of SB8a to match that +# permit the non-breaking components to continue. If a part of SB8a matches +# that guarantees a pop out to SB9-11, (like `STerm STerm`), then we let it +# happen. This still isn't correct because an SContinue might be seen which +# would allow moving back into SB998 and thus the non-breaking repetition, so +# we handle that case as well. +# +# Finally, the last complication here is the sprinkling of $Ex* everywhere. +# This essentially corresponds to the implementation of SB5 by following +# UAX #29's recommendation in S6.2. Essentially, we use it avoid ever breaking +# in the middle of a grapheme cluster. + +CR="\p{sb=CR}" +LF="\p{sb=LF}" +Sep="\p{sb=Sep}" +Close="\p{sb=Close}" +Sp="\p{sb=Sp}" +STerm="\p{sb=STerm}" +ATerm="\p{sb=ATerm}" +SContinue="\p{sb=SContinue}" +Numeric="\p{sb=Numeric}" +Upper="\p{sb=Upper}" +Lower="\p{sb=Lower}" +OLetter="\p{sb=OLetter}" + +Ex="[\p{sb=Extend}\p{sb=Format}]" +ParaSep="[$Sep $CR $LF]" +SATerm="[$STerm $ATerm]" + +LetterSepTerm="[$OLetter $Upper $Lower $ParaSep $SATerm]" + +echo "(?x) +( + # SB6 + $ATerm $Ex* + $Numeric + | + # SB7 + [$Upper $Lower] $Ex* $ATerm $Ex* + $Upper $Ex* + # overlap with SB7 + ($ATerm $Ex* $Upper $Ex*)* + | + # SB8 + $ATerm $Ex* $Close* $Ex* $Sp* $Ex* + ([^$LetterSepTerm] $Ex*)* $Lower $Ex* + # overlap with SB7 + ($ATerm $Ex* $Upper $Ex*)* + | + # SB8a + $SATerm $Ex* $Close* $Ex* $Sp* $Ex* + ( + $SContinue + | + $ATerm $Ex* + # Permit repetition of SB8a + (($Close $Ex*)* ($Sp $Ex*)* $SATerm)* + # In order to continue non-breaking matching, we now must observe + # a match with a rule that keeps us in SB6-8a. Otherwise, we've entered + # one of SB9-11 and know that a break must follow. + ( + # overlap with SB6 + $Numeric + | + # overlap with SB8 + ($Close $Ex*)* ($Sp $Ex*)* + ([^$LetterSepTerm] $Ex*)* $Lower $Ex* + # overlap with SB7 + ($ATerm $Ex* $Upper $Ex*)* + | + # overlap with SB8a + ($Close $Ex*)* ($Sp $Ex*)* $SContinue + ) + | + $STerm $Ex* + # Permit repetition of SB8a + (($Close $Ex*)* ($Sp $Ex*)* $SATerm)* + # As with ATerm above, in order to continue non-breaking matching, we + # must now observe a match with a rule that keeps us out of SB9-11. + # For STerm, the only such possibility is to see an SContinue. Anything + # else will result in a break. + ($Close $Ex*)* ($Sp $Ex*)* $SContinue + ) + | + # SB998 + # The logic behind this catch-all is that if we get to this point and + # see a Sep, CR, LF, STerm or ATerm, then it has to fall into one of + # SB9, SB10 or SB11. In the cases of SB9-11, we always find a break since + # SB11 acts as a catch-all to induce a break following a SATerm that isn't + # handled by rules SB6-SB8a. + [^$ParaSep $SATerm] +)* +# The following collapses rules SB3, SB4, part of SB8a, SB9, SB10 and SB11. +($SATerm $Ex* ($Close $Ex*)* ($Sp $Ex*)*)* ($CR $LF | $ParaSep)? +" diff --git a/scripts/regex/word.sh b/scripts/regex/word.sh new file mode 100644 index 0000000..78c7a05 --- /dev/null +++ b/scripts/regex/word.sh @@ -0,0 +1,111 @@ +#!/bin/sh + +# vim: indentexpr= nosmartindent autoindent +# vim: tabstop=2 shiftwidth=2 softtabstop=2 + +# See the comments in regex/sentence.sh for the general approach to how this +# regex was written. +# +# Writing the regex for this was *hard*. It took me two days of hacking to get +# this far, and that was after I had finished the sentence regex, so my brain +# was fully cached on this. Unlike the sentence regex, the rules in the regex +# below don't correspond as nicely to the rules in UAX #29. In particular, the +# UAX #29 rules have a ton of overlap with each other, which requires crazy +# stuff in the regex. I'm not even sure the regex below is 100% correct or even +# minimal, however, I did compare this with the ICU word segmenter on a few +# different corpora, and it produces identical results. (In addition to of +# course passing the UCD tests.) +# +# In general, I consider this approach to be a failure. Firstly, this is +# clearly a write-only regex. Secondly, building the minimized DFA for this is +# incredibly slow. Thirdly, the DFA is itself very large (~240KB). Fourthly, +# reversing this regex (for reverse word iteration) results in a >19MB DFA. +# Yes. That's MB. Wat. And it took 5 minutes to build. +# +# I think we might consider changing our approach to this problem. The normal +# path I've seen, I think, is to decode codepoints one at a time, and then +# thread them through a state machine in the code itself. We could take this +# approach, or possibly combine it with a DFA that tells us which Word_Break +# value a codepoint has. I'd prefer the latter approach, but it requires adding +# RegexSet support to regex-automata. Something that should definitely be done, +# but is a fair amount of work. +# +# Gah. + +CR="\p{wb=CR}" +LF="\p{wb=LF}" +Newline="\p{wb=Newline}" +ZWJ="\p{wb=ZWJ}" +RI="\p{wb=Regional_Indicator}" +Katakana="\p{wb=Katakana}" +HebrewLet="\p{wb=HebrewLetter}" +ALetter="\p{wb=ALetter}" +SingleQuote="\p{wb=SingleQuote}" +DoubleQuote="\p{wb=DoubleQuote}" +MidNumLet="\p{wb=MidNumLet}" +MidLetter="\p{wb=MidLetter}" +MidNum="\p{wb=MidNum}" +Numeric="\p{wb=Numeric}" +ExtendNumLet="\p{wb=ExtendNumLet}" +WSegSpace="\p{wb=WSegSpace}" + +Any="\p{any}" +Ex="[\p{wb=Extend} \p{wb=Format} $ZWJ]" +ExtendPict="\p{Extended_Pictographic}" +AHLetter="[$ALetter $HebrewLet]" +MidNumLetQ="[$MidNumLet $SingleQuote]" + +AHLetterRepeat="$AHLetter $Ex* ([$MidLetter $MidNumLetQ] $Ex* $AHLetter $Ex*)*" +NumericRepeat="$Numeric $Ex* ([$MidNum $MidNumLetQ] $Ex* $Numeric $Ex*)*" + +echo "(?x) +$CR $LF +| +[$Newline $CR $LF] +| +$WSegSpace $WSegSpace+ +| +( + ([^$Newline $CR $LF]? $Ex* $ZWJ $ExtendPict $Ex*)+ + | + ($ExtendNumLet $Ex*)* $AHLetter $Ex* + ( + ( + ($NumericRepeat | $ExtendNumLet $Ex*)* + | + [$MidLetter $MidNumLetQ] $Ex* + ) + $AHLetter $Ex* + )+ + ($NumericRepeat | $ExtendNumLet $Ex*)* + | + ($ExtendNumLet $Ex*)* $AHLetter $Ex* ($NumericRepeat | $ExtendNumLet $Ex*)+ + | + ($ExtendNumLet $Ex*)* $Numeric $Ex* + ( + ( + ($AHLetterRepeat | $ExtendNumLet $Ex*)* + | + [$MidNum $MidNumLetQ] $Ex* + ) + $Numeric $Ex* + )+ + ($AHLetterRepeat | $ExtendNumLet $Ex*)* + | + ($ExtendNumLet $Ex*)* $Numeric $Ex* ($AHLetterRepeat | $ExtendNumLet $Ex*)+ + | + $Katakana $Ex* + (($Katakana | $ExtendNumLet) $Ex*)+ + | + $ExtendNumLet $Ex* + (($ExtendNumLet | $AHLetter | $Numeric | $Katakana) $Ex*)+ +)+ +| +$HebrewLet $Ex* $SingleQuote $Ex* +| +($HebrewLet $Ex* $DoubleQuote $Ex*)+ $HebrewLet $Ex* +| +$RI $Ex* $RI $Ex* +| +$Any $Ex* +" diff --git a/src/ascii.rs b/src/ascii.rs new file mode 100644 index 0000000..bb2b679 --- /dev/null +++ b/src/ascii.rs @@ -0,0 +1,336 @@ +use core::mem; + +// The following ~400 lines of code exists for exactly one purpose, which is +// to optimize this code: +// +// byte_slice.iter().position(|&b| b > 0x7F).unwrap_or(byte_slice.len()) +// +// Yes... Overengineered is a word that comes to mind, but this is effectively +// a very similar problem to memchr, and virtually nobody has been able to +// resist optimizing the crap out of that (except for perhaps the BSD and MUSL +// folks). In particular, this routine makes a very common case (ASCII) very +// fast, which seems worth it. We do stop short of adding AVX variants of the +// code below in order to retain our sanity and also to avoid needing to deal +// with runtime target feature detection. RESIST! +// +// In order to understand the SIMD version below, it would be good to read this +// comment describing how my memchr routine works: +// https://github.com/BurntSushi/rust-memchr/blob/b0a29f267f4a7fad8ffcc8fe8377a06498202883/src/x86/sse2.rs#L19-L106 +// +// The primary difference with memchr is that for ASCII, we can do a bit less +// work. In particular, we don't need to detect the presence of a specific +// byte, but rather, whether any byte has its most significant bit set. That +// means we can effectively skip the _mm_cmpeq_epi8 step and jump straight to +// _mm_movemask_epi8. + +#[cfg(any(test, not(target_arch = "x86_64")))] +const USIZE_BYTES: usize = mem::size_of::<usize>(); +#[cfg(any(test, not(target_arch = "x86_64")))] +const FALLBACK_LOOP_SIZE: usize = 2 * USIZE_BYTES; + +// This is a mask where the most significant bit of each byte in the usize +// is set. We test this bit to determine whether a character is ASCII or not. +// Namely, a single byte is regarded as an ASCII codepoint if and only if it's +// most significant bit is not set. +#[cfg(any(test, not(target_arch = "x86_64")))] +const ASCII_MASK_U64: u64 = 0x8080808080808080; +#[cfg(any(test, not(target_arch = "x86_64")))] +const ASCII_MASK: usize = ASCII_MASK_U64 as usize; + +/// Returns the index of the first non ASCII byte in the given slice. +/// +/// If slice only contains ASCII bytes, then the length of the slice is +/// returned. +pub fn first_non_ascii_byte(slice: &[u8]) -> usize { + #[cfg(not(target_arch = "x86_64"))] + { + first_non_ascii_byte_fallback(slice) + } + + #[cfg(target_arch = "x86_64")] + { + first_non_ascii_byte_sse2(slice) + } +} + +#[cfg(any(test, not(target_arch = "x86_64")))] +fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize { + let align = USIZE_BYTES - 1; + let start_ptr = slice.as_ptr(); + let end_ptr = slice[slice.len()..].as_ptr(); + let mut ptr = start_ptr; + + unsafe { + if slice.len() < USIZE_BYTES { + return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr); + } + + let chunk = read_unaligned_usize(ptr); + let mask = chunk & ASCII_MASK; + if mask != 0 { + return first_non_ascii_byte_mask(mask); + } + + ptr = ptr_add(ptr, USIZE_BYTES - (start_ptr as usize & align)); + debug_assert!(ptr > start_ptr); + debug_assert!(ptr_sub(end_ptr, USIZE_BYTES) >= start_ptr); + if slice.len() >= FALLBACK_LOOP_SIZE { + while ptr <= ptr_sub(end_ptr, FALLBACK_LOOP_SIZE) { + debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); + + let a = *(ptr as *const usize); + let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize); + if (a | b) & ASCII_MASK != 0 { + // What a kludge. We wrap the position finding code into + // a non-inlineable function, which makes the codegen in + // the tight loop above a bit better by avoiding a + // couple extra movs. We pay for it by two additional + // stores, but only in the case of finding a non-ASCII + // byte. + #[inline(never)] + unsafe fn findpos( + start_ptr: *const u8, + ptr: *const u8, + ) -> usize { + let a = *(ptr as *const usize); + let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize); + + let mut at = sub(ptr, start_ptr); + let maska = a & ASCII_MASK; + if maska != 0 { + return at + first_non_ascii_byte_mask(maska); + } + + at += USIZE_BYTES; + let maskb = b & ASCII_MASK; + debug_assert!(maskb != 0); + return at + first_non_ascii_byte_mask(maskb); + } + return findpos(start_ptr, ptr); + } + ptr = ptr_add(ptr, FALLBACK_LOOP_SIZE); + } + } + first_non_ascii_byte_slow(start_ptr, end_ptr, ptr) + } +} + +#[cfg(target_arch = "x86_64")] +fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize { + use core::arch::x86_64::*; + + const VECTOR_SIZE: usize = mem::size_of::<__m128i>(); + const VECTOR_ALIGN: usize = VECTOR_SIZE - 1; + const VECTOR_LOOP_SIZE: usize = 4 * VECTOR_SIZE; + + let start_ptr = slice.as_ptr(); + let end_ptr = slice[slice.len()..].as_ptr(); + let mut ptr = start_ptr; + + unsafe { + if slice.len() < VECTOR_SIZE { + return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr); + } + + let chunk = _mm_loadu_si128(ptr as *const __m128i); + let mask = _mm_movemask_epi8(chunk); + if mask != 0 { + return mask.trailing_zeros() as usize; + } + + ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); + debug_assert!(ptr > start_ptr); + debug_assert!(end_ptr.sub(VECTOR_SIZE) >= start_ptr); + if slice.len() >= VECTOR_LOOP_SIZE { + while ptr <= ptr_sub(end_ptr, VECTOR_LOOP_SIZE) { + debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); + + let a = _mm_load_si128(ptr as *const __m128i); + let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); + let c = + _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i); + let d = + _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i); + + let or1 = _mm_or_si128(a, b); + let or2 = _mm_or_si128(c, d); + let or3 = _mm_or_si128(or1, or2); + if _mm_movemask_epi8(or3) != 0 { + let mut at = sub(ptr, start_ptr); + let mask = _mm_movemask_epi8(a); + if mask != 0 { + return at + mask.trailing_zeros() as usize; + } + + at += VECTOR_SIZE; + let mask = _mm_movemask_epi8(b); + if mask != 0 { + return at + mask.trailing_zeros() as usize; + } + + at += VECTOR_SIZE; + let mask = _mm_movemask_epi8(c); + if mask != 0 { + return at + mask.trailing_zeros() as usize; + } + + at += VECTOR_SIZE; + let mask = _mm_movemask_epi8(d); + debug_assert!(mask != 0); + return at + mask.trailing_zeros() as usize; + } + ptr = ptr_add(ptr, VECTOR_LOOP_SIZE); + } + } + while ptr <= end_ptr.sub(VECTOR_SIZE) { + debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE); + + let chunk = _mm_loadu_si128(ptr as *const __m128i); + let mask = _mm_movemask_epi8(chunk); + if mask != 0 { + return sub(ptr, start_ptr) + mask.trailing_zeros() as usize; + } + ptr = ptr.add(VECTOR_SIZE); + } + first_non_ascii_byte_slow(start_ptr, end_ptr, ptr) + } +} + +#[inline(always)] +unsafe fn first_non_ascii_byte_slow( + start_ptr: *const u8, + end_ptr: *const u8, + mut ptr: *const u8, +) -> usize { + debug_assert!(start_ptr <= ptr); + debug_assert!(ptr <= end_ptr); + + while ptr < end_ptr { + if *ptr > 0x7F { + return sub(ptr, start_ptr); + } + ptr = ptr.offset(1); + } + sub(end_ptr, start_ptr) +} + +/// Compute the position of the first ASCII byte in the given mask. +/// +/// The mask should be computed by `chunk & ASCII_MASK`, where `chunk` is +/// 8 contiguous bytes of the slice being checked where *at least* one of those +/// bytes is not an ASCII byte. +/// +/// The position returned is always in the inclusive range [0, 7]. +#[cfg(any(test, not(target_arch = "x86_64")))] +fn first_non_ascii_byte_mask(mask: usize) -> usize { + #[cfg(target_endian = "little")] + { + mask.trailing_zeros() as usize / 8 + } + #[cfg(target_endian = "big")] + { + mask.leading_zeros() as usize / 8 + } +} + +/// Increment the given pointer by the given amount. +unsafe fn ptr_add(ptr: *const u8, amt: usize) -> *const u8 { + debug_assert!(amt < ::core::isize::MAX as usize); + ptr.offset(amt as isize) +} + +/// Decrement the given pointer by the given amount. +unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 { + debug_assert!(amt < ::core::isize::MAX as usize); + ptr.offset((amt as isize).wrapping_neg()) +} + +#[cfg(any(test, not(target_arch = "x86_64")))] +unsafe fn read_unaligned_usize(ptr: *const u8) -> usize { + use core::ptr; + + let mut n: usize = 0; + ptr::copy_nonoverlapping(ptr, &mut n as *mut _ as *mut u8, USIZE_BYTES); + n +} + +/// Subtract `b` from `a` and return the difference. `a` should be greater than +/// or equal to `b`. +fn sub(a: *const u8, b: *const u8) -> usize { + debug_assert!(a >= b); + (a as usize) - (b as usize) +} + +#[cfg(test)] +mod tests { + use super::*; + + // Our testing approach here is to try and exhaustively test every case. + // This includes the position at which a non-ASCII byte occurs in addition + // to the alignment of the slice that we're searching. + + #[test] + fn positive_fallback_forward() { + for i in 0..517 { + let s = "a".repeat(i); + assert_eq!( + i, + first_non_ascii_byte_fallback(s.as_bytes()), + "i: {:?}, len: {:?}, s: {:?}", + i, + s.len(), + s + ); + } + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn positive_sse2_forward() { + for i in 0..517 { + let b = "a".repeat(i).into_bytes(); + assert_eq!(b.len(), first_non_ascii_byte_sse2(&b)); + } + } + + #[test] + fn negative_fallback_forward() { + for i in 0..517 { + for align in 0..65 { + let mut s = "a".repeat(i); + s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃"); + let s = s.get(align..).unwrap_or(""); + assert_eq!( + i.saturating_sub(align), + first_non_ascii_byte_fallback(s.as_bytes()), + "i: {:?}, align: {:?}, len: {:?}, s: {:?}", + i, + align, + s.len(), + s + ); + } + } + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn negative_sse2_forward() { + for i in 0..517 { + for align in 0..65 { + let mut s = "a".repeat(i); + s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃"); + let s = s.get(align..).unwrap_or(""); + assert_eq!( + i.saturating_sub(align), + first_non_ascii_byte_sse2(s.as_bytes()), + "i: {:?}, align: {:?}, len: {:?}, s: {:?}", + i, + align, + s.len(), + s + ); + } + } + } +} diff --git a/src/bstr.rs b/src/bstr.rs new file mode 100644 index 0000000..1e3c91b --- /dev/null +++ b/src/bstr.rs @@ -0,0 +1,74 @@ +use core::mem; + +/// A wrapper for `&[u8]` that provides convenient string oriented trait impls. +/// +/// If you need ownership or a growable byte string buffer, then use +/// [`BString`](struct.BString.html). +/// +/// Using a `&BStr` is just like using a `&[u8]`, since `BStr` +/// implements `Deref` to `[u8]`. So all methods available on `[u8]` +/// are also available on `BStr`. +/// +/// # Representation +/// +/// A `&BStr` has the same representation as a `&str`. That is, a `&BStr` is +/// a fat pointer which consists of a pointer to some bytes and a length. +/// +/// # Trait implementations +/// +/// The `BStr` type has a number of trait implementations, and in particular, +/// defines equality and ordinal comparisons between `&BStr`, `&str` and +/// `&[u8]` for convenience. +/// +/// The `Debug` implementation for `BStr` shows its bytes as a normal string. +/// For invalid UTF-8, hex escape sequences are used. +/// +/// The `Display` implementation behaves as if `BStr` were first lossily +/// converted to a `str`. Invalid UTF-8 bytes are substituted with the Unicode +/// replacement codepoint, which looks like this: �. +#[derive(Hash)] +#[repr(transparent)] +pub struct BStr { + pub(crate) bytes: [u8], +} + +impl BStr { + #[inline] + pub(crate) fn new<B: ?Sized + AsRef<[u8]>>(bytes: &B) -> &BStr { + BStr::from_bytes(bytes.as_ref()) + } + + #[inline] + pub(crate) fn new_mut<B: ?Sized + AsMut<[u8]>>( + bytes: &mut B, + ) -> &mut BStr { + BStr::from_bytes_mut(bytes.as_mut()) + } + + #[inline] + pub(crate) fn from_bytes(slice: &[u8]) -> &BStr { + unsafe { mem::transmute(slice) } + } + + #[inline] + pub(crate) fn from_bytes_mut(slice: &mut [u8]) -> &mut BStr { + unsafe { mem::transmute(slice) } + } + + #[inline] + #[cfg(feature = "std")] + pub(crate) fn from_boxed_bytes(slice: Box<[u8]>) -> Box<BStr> { + unsafe { Box::from_raw(Box::into_raw(slice) as _) } + } + + #[inline] + #[cfg(feature = "std")] + pub(crate) fn into_boxed_bytes(slice: Box<BStr>) -> Box<[u8]> { + unsafe { Box::from_raw(Box::into_raw(slice) as _) } + } + + #[inline] + pub(crate) fn as_bytes(&self) -> &[u8] { + &self.bytes + } +} diff --git a/src/bstring.rs b/src/bstring.rs new file mode 100644 index 0000000..f04c651 --- /dev/null +++ b/src/bstring.rs @@ -0,0 +1,59 @@ +use bstr::BStr; + +/// A wrapper for `Vec<u8>` that provides convenient string oriented trait +/// impls. +/// +/// A `BString` has ownership over its contents and corresponds to +/// a growable or shrinkable buffer. Its borrowed counterpart is a +/// [`BStr`](struct.BStr.html), called a byte string slice. +/// +/// Using a `BString` is just like using a `Vec<u8>`, since `BString` +/// implements `Deref` to `Vec<u8>`. So all methods available on `Vec<u8>` +/// are also available on `BString`. +/// +/// # Examples +/// +/// You can create a new `BString` from a `Vec<u8>` via a `From` impl: +/// +/// ``` +/// use bstr::BString; +/// +/// let s = BString::from("Hello, world!"); +/// ``` +/// +/// # Deref +/// +/// The `BString` type implements `Deref` and `DerefMut`, where the target +/// types are `&Vec<u8>` and `&mut Vec<u8>`, respectively. `Deref` permits all of the +/// methods defined on `Vec<u8>` to be implicitly callable on any `BString`. +/// +/// For more information about how deref works, see the documentation for the +/// [`std::ops::Deref`](https://doc.rust-lang.org/std/ops/trait.Deref.html) +/// trait. +/// +/// # Representation +/// +/// A `BString` has the same representation as a `Vec<u8>` and a `String`. +/// That is, it is made up of three word sized components: a pointer to a +/// region of memory containing the bytes, a length and a capacity. +#[derive(Clone, Hash)] +pub struct BString { + pub(crate) bytes: Vec<u8>, +} + +impl BString { + #[inline] + pub(crate) fn as_bytes(&self) -> &[u8] { + &self.bytes + } + + #[inline] + pub(crate) fn as_bstr(&self) -> &BStr { + BStr::new(&self.bytes) + } + + #[inline] + pub(crate) fn as_mut_bstr(&mut self) -> &mut BStr { + BStr::new_mut(&mut self.bytes) + } +} diff --git a/src/byteset/mod.rs b/src/byteset/mod.rs new file mode 100644 index 0000000..969b0e3 --- /dev/null +++ b/src/byteset/mod.rs @@ -0,0 +1,115 @@ +use memchr::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3}; +mod scalar; + +#[inline] +fn build_table(byteset: &[u8]) -> [u8; 256] { + let mut table = [0u8; 256]; + for &b in byteset { + table[b as usize] = 1; + } + table +} + +#[inline] +pub(crate) fn find(haystack: &[u8], byteset: &[u8]) -> Option<usize> { + match byteset.len() { + 0 => return None, + 1 => memchr(byteset[0], haystack), + 2 => memchr2(byteset[0], byteset[1], haystack), + 3 => memchr3(byteset[0], byteset[1], byteset[2], haystack), + _ => { + let table = build_table(byteset); + scalar::forward_search_bytes(haystack, |b| table[b as usize] != 0) + } + } +} + +#[inline] +pub(crate) fn rfind(haystack: &[u8], byteset: &[u8]) -> Option<usize> { + match byteset.len() { + 0 => return None, + 1 => memrchr(byteset[0], haystack), + 2 => memrchr2(byteset[0], byteset[1], haystack), + 3 => memrchr3(byteset[0], byteset[1], byteset[2], haystack), + _ => { + let table = build_table(byteset); + scalar::reverse_search_bytes(haystack, |b| table[b as usize] != 0) + } + } +} + +#[inline] +pub(crate) fn find_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> { + if haystack.is_empty() { + return None; + } + match byteset.len() { + 0 => return Some(0), + 1 => scalar::inv_memchr(byteset[0], haystack), + 2 => scalar::forward_search_bytes(haystack, |b| { + b != byteset[0] && b != byteset[1] + }), + 3 => scalar::forward_search_bytes(haystack, |b| { + b != byteset[0] && b != byteset[1] && b != byteset[2] + }), + _ => { + let table = build_table(byteset); + scalar::forward_search_bytes(haystack, |b| table[b as usize] == 0) + } + } +} +#[inline] +pub(crate) fn rfind_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> { + if haystack.is_empty() { + return None; + } + match byteset.len() { + 0 => return Some(haystack.len() - 1), + 1 => scalar::inv_memrchr(byteset[0], haystack), + 2 => scalar::reverse_search_bytes(haystack, |b| { + b != byteset[0] && b != byteset[1] + }), + 3 => scalar::reverse_search_bytes(haystack, |b| { + b != byteset[0] && b != byteset[1] && b != byteset[2] + }), + _ => { + let table = build_table(byteset); + scalar::reverse_search_bytes(haystack, |b| table[b as usize] == 0) + } + } +} + +#[cfg(test)] +mod tests { + + quickcheck! { + fn qc_byteset_forward_matches_naive( + haystack: Vec<u8>, + needles: Vec<u8> + ) -> bool { + super::find(&haystack, &needles) + == haystack.iter().position(|b| needles.contains(b)) + } + fn qc_byteset_backwards_matches_naive( + haystack: Vec<u8>, + needles: Vec<u8> + ) -> bool { + super::rfind(&haystack, &needles) + == haystack.iter().rposition(|b| needles.contains(b)) + } + fn qc_byteset_forward_not_matches_naive( + haystack: Vec<u8>, + needles: Vec<u8> + ) -> bool { + super::find_not(&haystack, &needles) + == haystack.iter().position(|b| !needles.contains(b)) + } + fn qc_byteset_backwards_not_matches_naive( + haystack: Vec<u8>, + needles: Vec<u8> + ) -> bool { + super::rfind_not(&haystack, &needles) + == haystack.iter().rposition(|b| !needles.contains(b)) + } + } +} diff --git a/src/byteset/scalar.rs b/src/byteset/scalar.rs new file mode 100644 index 0000000..3fe1f53 --- /dev/null +++ b/src/byteset/scalar.rs @@ -0,0 +1,295 @@ +// This is adapted from `fallback.rs` from rust-memchr. It's modified to return +// the 'inverse' query of memchr, e.g. finding the first byte not in the provided +// set. This is simple for the 1-byte case. + +use core::cmp; +use core::usize; + +#[cfg(target_pointer_width = "32")] +const USIZE_BYTES: usize = 4; + +#[cfg(target_pointer_width = "64")] +const USIZE_BYTES: usize = 8; + +// The number of bytes to loop at in one iteration of memchr/memrchr. +const LOOP_SIZE: usize = 2 * USIZE_BYTES; + +/// Repeat the given byte into a word size number. That is, every 8 bits +/// is equivalent to the given byte. For example, if `b` is `\x4E` or +/// `01001110` in binary, then the returned value on a 32-bit system would be: +/// `01001110_01001110_01001110_01001110`. +#[inline(always)] +fn repeat_byte(b: u8) -> usize { + (b as usize) * (usize::MAX / 255) +} + +pub fn inv_memchr(n1: u8, haystack: &[u8]) -> Option<usize> { + let vn1 = repeat_byte(n1); + let confirm = |byte| byte != n1; + let loop_size = cmp::min(LOOP_SIZE, haystack.len()); + let align = USIZE_BYTES - 1; + let start_ptr = haystack.as_ptr(); + let end_ptr = haystack[haystack.len()..].as_ptr(); + let mut ptr = start_ptr; + + unsafe { + if haystack.len() < USIZE_BYTES { + return forward_search(start_ptr, end_ptr, ptr, confirm); + } + + let chunk = read_unaligned_usize(ptr); + if (chunk ^ vn1) != 0 { + return forward_search(start_ptr, end_ptr, ptr, confirm); + } + + ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align)); + debug_assert!(ptr > start_ptr); + debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr); + while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) { + debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); + + let a = *(ptr as *const usize); + let b = *(ptr.add(USIZE_BYTES) as *const usize); + let eqa = (a ^ vn1) != 0; + let eqb = (b ^ vn1) != 0; + if eqa || eqb { + break; + } + ptr = ptr.add(LOOP_SIZE); + } + forward_search(start_ptr, end_ptr, ptr, confirm) + } +} + +/// Return the last index not matching the byte `x` in `text`. +pub fn inv_memrchr(n1: u8, haystack: &[u8]) -> Option<usize> { + let vn1 = repeat_byte(n1); + let confirm = |byte| byte != n1; + let loop_size = cmp::min(LOOP_SIZE, haystack.len()); + let align = USIZE_BYTES - 1; + let start_ptr = haystack.as_ptr(); + let end_ptr = haystack[haystack.len()..].as_ptr(); + let mut ptr = end_ptr; + + unsafe { + if haystack.len() < USIZE_BYTES { + return reverse_search(start_ptr, end_ptr, ptr, confirm); + } + + let chunk = read_unaligned_usize(ptr.sub(USIZE_BYTES)); + if (chunk ^ vn1) != 0 { + return reverse_search(start_ptr, end_ptr, ptr, confirm); + } + + ptr = (end_ptr as usize & !align) as *const u8; + debug_assert!(start_ptr <= ptr && ptr <= end_ptr); + while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) { + debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); + + let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize); + let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize); + let eqa = (a ^ vn1) != 0; + let eqb = (b ^ vn1) != 0; + if eqa || eqb { + break; + } + ptr = ptr.sub(loop_size); + } + reverse_search(start_ptr, end_ptr, ptr, confirm) + } +} + +#[inline(always)] +unsafe fn forward_search<F: Fn(u8) -> bool>( + start_ptr: *const u8, + end_ptr: *const u8, + mut ptr: *const u8, + confirm: F, +) -> Option<usize> { + debug_assert!(start_ptr <= ptr); + debug_assert!(ptr <= end_ptr); + + while ptr < end_ptr { + if confirm(*ptr) { + return Some(sub(ptr, start_ptr)); + } + ptr = ptr.offset(1); + } + None +} + +#[inline(always)] +unsafe fn reverse_search<F: Fn(u8) -> bool>( + start_ptr: *const u8, + end_ptr: *const u8, + mut ptr: *const u8, + confirm: F, +) -> Option<usize> { + debug_assert!(start_ptr <= ptr); + debug_assert!(ptr <= end_ptr); + + while ptr > start_ptr { + ptr = ptr.offset(-1); + if confirm(*ptr) { + return Some(sub(ptr, start_ptr)); + } + } + None +} + +unsafe fn read_unaligned_usize(ptr: *const u8) -> usize { + (ptr as *const usize).read_unaligned() +} + +/// Subtract `b` from `a` and return the difference. `a` should be greater than +/// or equal to `b`. +fn sub(a: *const u8, b: *const u8) -> usize { + debug_assert!(a >= b); + (a as usize) - (b as usize) +} + +/// Safe wrapper around `forward_search` +#[inline] +pub(crate) fn forward_search_bytes<F: Fn(u8) -> bool>( + s: &[u8], + confirm: F, +) -> Option<usize> { + unsafe { + let start = s.as_ptr(); + let end = start.add(s.len()); + forward_search(start, end, start, confirm) + } +} + +/// Safe wrapper around `reverse_search` +#[inline] +pub(crate) fn reverse_search_bytes<F: Fn(u8) -> bool>( + s: &[u8], + confirm: F, +) -> Option<usize> { + unsafe { + let start = s.as_ptr(); + let end = start.add(s.len()); + reverse_search(start, end, end, confirm) + } +} + +#[cfg(test)] +mod tests { + use super::{inv_memchr, inv_memrchr}; + // search string, search byte, inv_memchr result, inv_memrchr result. + // these are expanded into a much larger set of tests in build_tests + const TESTS: &[(&[u8], u8, usize, usize)] = &[ + (b"z", b'a', 0, 0), + (b"zz", b'a', 0, 1), + (b"aza", b'a', 1, 1), + (b"zaz", b'a', 0, 2), + (b"zza", b'a', 0, 1), + (b"zaa", b'a', 0, 0), + (b"zzz", b'a', 0, 2), + ]; + + type TestCase = (Vec<u8>, u8, Option<(usize, usize)>); + + fn build_tests() -> Vec<TestCase> { + let mut result = vec![]; + for &(search, byte, fwd_pos, rev_pos) in TESTS { + result.push((search.to_vec(), byte, Some((fwd_pos, rev_pos)))); + for i in 1..515 { + // add a bunch of copies of the search byte to the end. + let mut suffixed: Vec<u8> = search.into(); + suffixed.extend(std::iter::repeat(byte).take(i)); + result.push((suffixed, byte, Some((fwd_pos, rev_pos)))); + + // add a bunch of copies of the search byte to the start. + let mut prefixed: Vec<u8> = + std::iter::repeat(byte).take(i).collect(); + prefixed.extend(search); + result.push(( + prefixed, + byte, + Some((fwd_pos + i, rev_pos + i)), + )); + + // add a bunch of copies of the search byte to both ends. + let mut surrounded: Vec<u8> = + std::iter::repeat(byte).take(i).collect(); + surrounded.extend(search); + surrounded.extend(std::iter::repeat(byte).take(i)); + result.push(( + surrounded, + byte, + Some((fwd_pos + i, rev_pos + i)), + )); + } + } + + // build non-matching tests for several sizes + for i in 0..515 { + result.push(( + std::iter::repeat(b'\0').take(i).collect(), + b'\0', + None, + )); + } + + result + } + + #[test] + fn test_inv_memchr() { + use {ByteSlice, B}; + for (search, byte, matching) in build_tests() { + assert_eq!( + inv_memchr(byte, &search), + matching.map(|m| m.0), + "inv_memchr when searching for {:?} in {:?}", + byte as char, + // better printing + B(&search).as_bstr(), + ); + assert_eq!( + inv_memrchr(byte, &search), + matching.map(|m| m.1), + "inv_memrchr when searching for {:?} in {:?}", + byte as char, + // better printing + B(&search).as_bstr(), + ); + // Test a rather large number off offsets for potential alignment issues + for offset in 1..130 { + if offset >= search.len() { + break; + } + // If this would cause us to shift the results off the end, skip + // it so that we don't have to recompute them. + if let Some((f, r)) = matching { + if offset > f || offset > r { + break; + } + } + let realigned = &search[offset..]; + + let forward_pos = matching.map(|m| m.0 - offset); + let reverse_pos = matching.map(|m| m.1 - offset); + + assert_eq!( + inv_memchr(byte, &realigned), + forward_pos, + "inv_memchr when searching (realigned by {}) for {:?} in {:?}", + offset, + byte as char, + realigned.as_bstr(), + ); + assert_eq!( + inv_memrchr(byte, &realigned), + reverse_pos, + "inv_memrchr when searching (realigned by {}) for {:?} in {:?}", + offset, + byte as char, + realigned.as_bstr(), + ); + } + } + } +} diff --git a/src/cow.rs b/src/cow.rs new file mode 100644 index 0000000..1556353 --- /dev/null +++ b/src/cow.rs @@ -0,0 +1,84 @@ +use core::ops; +#[cfg(feature = "std")] +use std::borrow::Cow; + +/// A specialized copy-on-write byte string. +/// +/// The purpose of this type is to permit usage of a "borrowed or owned +/// byte string" in a way that keeps std/no-std compatibility. That is, in +/// no-std mode, this type devolves into a simple &[u8] with no owned variant +/// availble. +#[derive(Clone, Debug)] +pub struct CowBytes<'a>(Imp<'a>); + +#[cfg(feature = "std")] +#[derive(Clone, Debug)] +struct Imp<'a>(Cow<'a, [u8]>); + +#[cfg(not(feature = "std"))] +#[derive(Clone, Debug)] +struct Imp<'a>(&'a [u8]); + +impl<'a> ops::Deref for CowBytes<'a> { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + self.as_slice() + } +} + +impl<'a> CowBytes<'a> { + /// Create a new borrowed CowBytes. + pub fn new<B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> CowBytes<'a> { + CowBytes(Imp::new(bytes.as_ref())) + } + + /// Create a new owned CowBytes. + #[cfg(feature = "std")] + pub fn new_owned(bytes: Vec<u8>) -> CowBytes<'static> { + CowBytes(Imp(Cow::Owned(bytes))) + } + + /// Return a borrowed byte string, regardless of whether this is an owned + /// or borrowed byte string internally. + pub fn as_slice(&self) -> &[u8] { + self.0.as_slice() + } + + /// Return an owned version of this copy-on-write byte string. + /// + /// If this is already an owned byte string internally, then this is a + /// no-op. Otherwise, the internal byte string is copied. + #[cfg(feature = "std")] + pub fn into_owned(self) -> CowBytes<'static> { + match (self.0).0 { + Cow::Borrowed(b) => CowBytes::new_owned(b.to_vec()), + Cow::Owned(b) => CowBytes::new_owned(b), + } + } +} + +impl<'a> Imp<'a> { + #[cfg(feature = "std")] + pub fn new(bytes: &'a [u8]) -> Imp<'a> { + Imp(Cow::Borrowed(bytes)) + } + + #[cfg(not(feature = "std"))] + pub fn new(bytes: &'a [u8]) -> Imp<'a> { + Imp(bytes) + } + + #[cfg(feature = "std")] + pub fn as_slice(&self) -> &[u8] { + match self.0 { + Cow::Owned(ref x) => x, + Cow::Borrowed(x) => x, + } + } + + #[cfg(not(feature = "std"))] + pub fn as_slice(&self) -> &[u8] { + self.0 + } +} diff --git a/src/ext_slice.rs b/src/ext_slice.rs new file mode 100644 index 0000000..fa08190 --- /dev/null +++ b/src/ext_slice.rs @@ -0,0 +1,3695 @@ +#[cfg(feature = "std")] +use std::borrow::Cow; +#[cfg(feature = "std")] +use std::ffi::OsStr; +#[cfg(feature = "std")] +use std::path::Path; + +use core::{cmp, iter, ops, ptr, slice, str}; +use memchr::{memchr, memrchr}; + +use ascii; +use bstr::BStr; +use byteset; +#[cfg(feature = "std")] +use ext_vec::ByteVec; +use search::{PrefilterState, TwoWay}; +#[cfg(feature = "unicode")] +use unicode::{ + whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes, + SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices, + WordsWithBreaks, +}; +use utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error}; + +/// A short-hand constructor for building a `&[u8]`. +/// +/// This idiosyncratic constructor is useful for concisely building byte string +/// slices. Its primary utility is in conveniently writing byte string literals +/// in a uniform way. For example, consider this code that does not compile: +/// +/// ```ignore +/// let strs = vec![b"a", b"xy"]; +/// ``` +/// +/// The above code doesn't compile because the type of the byte string literal +/// `b"a"` is `&'static [u8; 1]`, and the type of `b"xy"` is +/// `&'static [u8; 2]`. Since their types aren't the same, they can't be stored +/// in the same `Vec`. (This is dissimilar from normal Unicode string slices, +/// where both `"a"` and `"xy"` have the same type of `&'static str`.) +/// +/// One way of getting the above code to compile is to convert byte strings to +/// slices. You might try this: +/// +/// ```ignore +/// let strs = vec![&b"a", &b"xy"]; +/// ``` +/// +/// But this just creates values with type `& &'static [u8; 1]` and +/// `& &'static [u8; 2]`. Instead, you need to force the issue like so: +/// +/// ``` +/// let strs = vec![&b"a"[..], &b"xy"[..]]; +/// // or +/// let strs = vec![b"a".as_ref(), b"xy".as_ref()]; +/// ``` +/// +/// But neither of these are particularly convenient to type, especially when +/// it's something as common as a string literal. Thus, this constructor +/// permits writing the following instead: +/// +/// ``` +/// use bstr::B; +/// +/// let strs = vec![B("a"), B(b"xy")]; +/// ``` +/// +/// Notice that this also lets you mix and match both string literals and byte +/// string literals. This can be quite convenient! +#[allow(non_snake_case)] +#[inline] +pub fn B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8] { + bytes.as_ref() +} + +impl ByteSlice for [u8] { + #[inline] + fn as_bytes(&self) -> &[u8] { + self + } + + #[inline] + fn as_bytes_mut(&mut self) -> &mut [u8] { + self + } +} + +/// Ensure that callers cannot implement `ByteSlice` by making an +/// umplementable trait its super trait. +pub trait Sealed {} +impl Sealed for [u8] {} + +/// A trait that extends `&[u8]` with string oriented methods. +pub trait ByteSlice: Sealed { + /// A method for accessing the raw bytes of this type. This is always a + /// no-op and callers shouldn't care about it. This only exists for making + /// the extension trait work. + #[doc(hidden)] + fn as_bytes(&self) -> &[u8]; + + /// A method for accessing the raw bytes of this type, mutably. This is + /// always a no-op and callers shouldn't care about it. This only exists + /// for making the extension trait work. + #[doc(hidden)] + fn as_bytes_mut(&mut self) -> &mut [u8]; + + /// Return this byte slice as a `&BStr`. + /// + /// Use `&BStr` is useful because of its `fmt::Debug` representation + /// and various other trait implementations (such as `PartialEq` and + /// `PartialOrd`). In particular, the `Debug` implementation for `BStr` + /// shows its bytes as a normal string. For invalid UTF-8, hex escape + /// sequences are used. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// println!("{:?}", b"foo\xFFbar".as_bstr()); + /// ``` + #[inline] + fn as_bstr(&self) -> &BStr { + BStr::new(self.as_bytes()) + } + + /// Return this byte slice as a `&mut BStr`. + /// + /// Use `&mut BStr` is useful because of its `fmt::Debug` representation + /// and various other trait implementations (such as `PartialEq` and + /// `PartialOrd`). In particular, the `Debug` implementation for `BStr` + /// shows its bytes as a normal string. For invalid UTF-8, hex escape + /// sequences are used. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut bytes = *b"foo\xFFbar"; + /// println!("{:?}", &mut bytes.as_bstr_mut()); + /// ``` + #[inline] + fn as_bstr_mut(&mut self) -> &mut BStr { + BStr::new_mut(self.as_bytes_mut()) + } + + /// Create an immutable byte string from an OS string slice. + /// + /// On Unix, this always succeeds and is zero cost. On non-Unix systems, + /// this returns `None` if the given OS string is not valid UTF-8. (For + /// example, on Windows, file paths are allowed to be a sequence of + /// arbitrary 16-bit integers. Not all such sequences can be transcoded to + /// valid UTF-8.) + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::ffi::OsStr; + /// + /// use bstr::{B, ByteSlice}; + /// + /// let os_str = OsStr::new("foo"); + /// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8"); + /// assert_eq!(bs, B("foo")); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn from_os_str(os_str: &OsStr) -> Option<&[u8]> { + #[cfg(unix)] + #[inline] + fn imp(os_str: &OsStr) -> Option<&[u8]> { + use std::os::unix::ffi::OsStrExt; + + Some(os_str.as_bytes()) + } + + #[cfg(not(unix))] + #[inline] + fn imp(os_str: &OsStr) -> Option<&[u8]> { + os_str.to_str().map(|s| s.as_bytes()) + } + + imp(os_str) + } + + /// Create an immutable byte string from a file path. + /// + /// On Unix, this always succeeds and is zero cost. On non-Unix systems, + /// this returns `None` if the given path is not valid UTF-8. (For example, + /// on Windows, file paths are allowed to be a sequence of arbitrary 16-bit + /// integers. Not all such sequences can be transcoded to valid UTF-8.) + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::path::Path; + /// + /// use bstr::{B, ByteSlice}; + /// + /// let path = Path::new("foo"); + /// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8"); + /// assert_eq!(bs, B("foo")); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn from_path(path: &Path) -> Option<&[u8]> { + Self::from_os_str(path.as_os_str()) + } + + /// Safely convert this byte string into a `&str` if it's valid UTF-8. + /// + /// If this byte string is not valid UTF-8, then an error is returned. The + /// error returned indicates the first invalid byte found and the length + /// of the error. + /// + /// In cases where a lossy conversion to `&str` is acceptable, then use one + /// of the [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) or + /// [`to_str_lossy_into`](trait.ByteSlice.html#method.to_str_lossy_into) + /// methods. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice, ByteVec}; + /// + /// # fn example() -> Result<(), bstr::Utf8Error> { + /// let s = B("☃βツ").to_str()?; + /// assert_eq!("☃βツ", s); + /// + /// let mut bstring = <Vec<u8>>::from("☃βツ"); + /// bstring.push(b'\xFF'); + /// let err = bstring.to_str().unwrap_err(); + /// assert_eq!(8, err.valid_up_to()); + /// # Ok(()) }; example().unwrap() + /// ``` + #[inline] + fn to_str(&self) -> Result<&str, Utf8Error> { + utf8::validate(self.as_bytes()).map(|_| { + // SAFETY: This is safe because of the guarantees provided by + // utf8::validate. + unsafe { str::from_utf8_unchecked(self.as_bytes()) } + }) + } + + /// Unsafely convert this byte string into a `&str`, without checking for + /// valid UTF-8. + /// + /// # Safety + /// + /// Callers *must* ensure that this byte string is valid UTF-8 before + /// calling this method. Converting a byte string into a `&str` that is + /// not valid UTF-8 is considered undefined behavior. + /// + /// This routine is useful in performance sensitive contexts where the + /// UTF-8 validity of the byte string is already known and it is + /// undesirable to pay the cost of an additional UTF-8 validation check + /// that [`to_str`](trait.ByteSlice.html#method.to_str) performs. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// // SAFETY: This is safe because string literals are guaranteed to be + /// // valid UTF-8 by the Rust compiler. + /// let s = unsafe { B("☃βツ").to_str_unchecked() }; + /// assert_eq!("☃βツ", s); + /// ``` + #[inline] + unsafe fn to_str_unchecked(&self) -> &str { + str::from_utf8_unchecked(self.as_bytes()) + } + + /// Convert this byte string to a valid UTF-8 string by replacing invalid + /// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`). + /// + /// If the byte string is already valid UTF-8, then no copying or + /// allocation is performed and a borrrowed string slice is returned. If + /// the byte string is not valid UTF-8, then an owned string buffer is + /// returned with invalid bytes replaced by the replacement codepoint. + /// + /// This method uses the "substitution of maximal subparts" (Unicode + /// Standard, Chapter 3, Section 9) strategy for inserting the replacement + /// codepoint. Specifically, a replacement codepoint is inserted whenever a + /// byte is found that cannot possibly lead to a valid code unit sequence. + /// If there were previous bytes that represented a prefix of a well-formed + /// code unit sequence, then all of those bytes are substituted with a + /// single replacement codepoint. The "substitution of maximal subparts" + /// strategy is the same strategy used by + /// [W3C's Encoding standard](https://www.w3.org/TR/encoding/). + /// For a more precise description of the maximal subpart strategy, see + /// the Unicode Standard, Chapter 3, Section 9. See also + /// [Public Review Issue #121](http://www.unicode.org/review/pr-121.html). + /// + /// N.B. Rust's standard library also appears to use the same strategy, + /// but it does not appear to be an API guarantee. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::borrow::Cow; + /// + /// use bstr::ByteSlice; + /// + /// let mut bstring = <Vec<u8>>::from("☃βツ"); + /// assert_eq!(Cow::Borrowed("☃βツ"), bstring.to_str_lossy()); + /// + /// // Add a byte that makes the sequence invalid. + /// bstring.push(b'\xFF'); + /// assert_eq!(Cow::Borrowed("☃βツ\u{FFFD}"), bstring.to_str_lossy()); + /// ``` + /// + /// This demonstrates the "maximal subpart" substitution logic. + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// // \x61 is the ASCII codepoint for 'a'. + /// // \xF1\x80\x80 is a valid 3-byte code unit prefix. + /// // \xE1\x80 is a valid 2-byte code unit prefix. + /// // \xC2 is a valid 1-byte code unit prefix. + /// // \x62 is the ASCII codepoint for 'b'. + /// // + /// // In sum, each of the prefixes is replaced by a single replacement + /// // codepoint since none of the prefixes are properly completed. This + /// // is in contrast to other strategies that might insert a replacement + /// // codepoint for every single byte. + /// let bs = B(b"\x61\xF1\x80\x80\xE1\x80\xC2\x62"); + /// assert_eq!("a\u{FFFD}\u{FFFD}\u{FFFD}b", bs.to_str_lossy()); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn to_str_lossy(&self) -> Cow<str> { + match utf8::validate(self.as_bytes()) { + Ok(()) => { + // SAFETY: This is safe because of the guarantees provided by + // utf8::validate. + unsafe { + Cow::Borrowed(str::from_utf8_unchecked(self.as_bytes())) + } + } + Err(err) => { + let mut lossy = String::with_capacity(self.as_bytes().len()); + let (valid, after) = + self.as_bytes().split_at(err.valid_up_to()); + // SAFETY: This is safe because utf8::validate guarantees + // that all of `valid` is valid UTF-8. + lossy.push_str(unsafe { str::from_utf8_unchecked(valid) }); + lossy.push_str("\u{FFFD}"); + if let Some(len) = err.error_len() { + after[len..].to_str_lossy_into(&mut lossy); + } + Cow::Owned(lossy) + } + } + } + + /// Copy the contents of this byte string into the given owned string + /// buffer, while replacing invalid UTF-8 code unit sequences with the + /// Unicode replacement codepoint (`U+FFFD`). + /// + /// This method uses the same "substitution of maximal subparts" strategy + /// for inserting the replacement codepoint as the + /// [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) method. + /// + /// This routine is useful for amortizing allocation. However, unlike + /// `to_str_lossy`, this routine will _always_ copy the contents of this + /// byte string into the destination buffer, even if this byte string is + /// valid UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::borrow::Cow; + /// + /// use bstr::ByteSlice; + /// + /// let mut bstring = <Vec<u8>>::from("☃βツ"); + /// // Add a byte that makes the sequence invalid. + /// bstring.push(b'\xFF'); + /// + /// let mut dest = String::new(); + /// bstring.to_str_lossy_into(&mut dest); + /// assert_eq!("☃βツ\u{FFFD}", dest); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn to_str_lossy_into(&self, dest: &mut String) { + let mut bytes = self.as_bytes(); + dest.reserve(bytes.len()); + loop { + match utf8::validate(bytes) { + Ok(()) => { + // SAFETY: This is safe because utf8::validate guarantees + // that all of `bytes` is valid UTF-8. + dest.push_str(unsafe { str::from_utf8_unchecked(bytes) }); + break; + } + Err(err) => { + let (valid, after) = bytes.split_at(err.valid_up_to()); + // SAFETY: This is safe because utf8::validate guarantees + // that all of `valid` is valid UTF-8. + dest.push_str(unsafe { str::from_utf8_unchecked(valid) }); + dest.push_str("\u{FFFD}"); + match err.error_len() { + None => break, + Some(len) => bytes = &after[len..], + } + } + } + } + } + + /// Create an OS string slice from this byte string. + /// + /// On Unix, this always succeeds and is zero cost. On non-Unix systems, + /// this returns a UTF-8 decoding error if this byte string is not valid + /// UTF-8. (For example, on Windows, file paths are allowed to be a + /// sequence of arbitrary 16-bit integers. There is no obvious mapping from + /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of + /// 16-bit integers.) + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let os_str = b"foo".to_os_str().expect("should be valid UTF-8"); + /// assert_eq!(os_str, "foo"); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn to_os_str(&self) -> Result<&OsStr, Utf8Error> { + #[cfg(unix)] + #[inline] + fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> { + use std::os::unix::ffi::OsStrExt; + + Ok(OsStr::from_bytes(bytes)) + } + + #[cfg(not(unix))] + #[inline] + fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> { + bytes.to_str().map(OsStr::new) + } + + imp(self.as_bytes()) + } + + /// Lossily create an OS string slice from this byte string. + /// + /// On Unix, this always succeeds and is zero cost. On non-Unix systems, + /// this will perform a UTF-8 check and lossily convert this byte string + /// into valid UTF-8 using the Unicode replacement codepoint. + /// + /// Note that this can prevent the correct roundtripping of file paths on + /// non-Unix systems such as Windows, where file paths are an arbitrary + /// sequence of 16-bit integers. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let os_str = b"foo\xFFbar".to_os_str_lossy(); + /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar"); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn to_os_str_lossy(&self) -> Cow<OsStr> { + #[cfg(unix)] + #[inline] + fn imp(bytes: &[u8]) -> Cow<OsStr> { + use std::os::unix::ffi::OsStrExt; + + Cow::Borrowed(OsStr::from_bytes(bytes)) + } + + #[cfg(not(unix))] + #[inline] + fn imp(bytes: &[u8]) -> Cow<OsStr> { + use std::ffi::OsString; + + match bytes.to_str_lossy() { + Cow::Borrowed(x) => Cow::Borrowed(OsStr::new(x)), + Cow::Owned(x) => Cow::Owned(OsString::from(x)), + } + } + + imp(self.as_bytes()) + } + + /// Create a path slice from this byte string. + /// + /// On Unix, this always succeeds and is zero cost. On non-Unix systems, + /// this returns a UTF-8 decoding error if this byte string is not valid + /// UTF-8. (For example, on Windows, file paths are allowed to be a + /// sequence of arbitrary 16-bit integers. There is no obvious mapping from + /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of + /// 16-bit integers.) + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let path = b"foo".to_path().expect("should be valid UTF-8"); + /// assert_eq!(path.as_os_str(), "foo"); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn to_path(&self) -> Result<&Path, Utf8Error> { + self.to_os_str().map(Path::new) + } + + /// Lossily create a path slice from this byte string. + /// + /// On Unix, this always succeeds and is zero cost. On non-Unix systems, + /// this will perform a UTF-8 check and lossily convert this byte string + /// into valid UTF-8 using the Unicode replacement codepoint. + /// + /// Note that this can prevent the correct roundtripping of file paths on + /// non-Unix systems such as Windows, where file paths are an arbitrary + /// sequence of 16-bit integers. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = b"foo\xFFbar"; + /// let path = bs.to_path_lossy(); + /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar"); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn to_path_lossy(&self) -> Cow<Path> { + use std::path::PathBuf; + + match self.to_os_str_lossy() { + Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)), + Cow::Owned(x) => Cow::Owned(PathBuf::from(x)), + } + } + + /// Create a new byte string by repeating this byte string `n` times. + /// + /// # Panics + /// + /// This function panics if the capacity of the new byte string would + /// overflow. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// assert_eq!(b"foo".repeatn(4), B("foofoofoofoo")); + /// assert_eq!(b"foo".repeatn(0), B("")); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn repeatn(&self, n: usize) -> Vec<u8> { + let bs = self.as_bytes(); + let mut dst = vec![0; bs.len() * n]; + for i in 0..n { + dst[i * bs.len()..(i + 1) * bs.len()].copy_from_slice(bs); + } + dst + } + + /// Returns true if and only if this byte string contains the given needle. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// assert!(b"foo bar".contains_str("foo")); + /// assert!(b"foo bar".contains_str("bar")); + /// assert!(!b"foo".contains_str("foobar")); + /// ``` + #[inline] + fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool { + self.find(needle).is_some() + } + + /// Returns true if and only if this byte string has the given prefix. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// assert!(b"foo bar".starts_with_str("foo")); + /// assert!(!b"foo bar".starts_with_str("bar")); + /// assert!(!b"foo".starts_with_str("foobar")); + /// ``` + #[inline] + fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool { + self.as_bytes().starts_with(prefix.as_ref()) + } + + /// Returns true if and only if this byte string has the given suffix. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// assert!(b"foo bar".ends_with_str("bar")); + /// assert!(!b"foo bar".ends_with_str("foo")); + /// assert!(!b"bar".ends_with_str("foobar")); + /// ``` + #[inline] + fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool { + self.as_bytes().ends_with(suffix.as_ref()) + } + + /// Returns the index of the first occurrence of the given needle. + /// + /// The needle may be any type that can be cheaply converted into a + /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. + /// + /// Note that if you're are searching for the same needle in many + /// different small haystacks, it may be faster to initialize a + /// [`Finder`](struct.Finder.html) once, and reuse it for each search. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the needle and the haystack. That is, this runs + /// in `O(needle.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"foo bar baz"; + /// assert_eq!(Some(0), s.find("foo")); + /// assert_eq!(Some(4), s.find("bar")); + /// assert_eq!(None, s.find("quux")); + /// ``` + #[inline] + fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> { + Finder::new(needle.as_ref()).find(self.as_bytes()) + } + + /// Returns the index of the last occurrence of the given needle. + /// + /// The needle may be any type that can be cheaply converted into a + /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. + /// + /// Note that if you're are searching for the same needle in many + /// different small haystacks, it may be faster to initialize a + /// [`FinderReverse`](struct.FinderReverse.html) once, and reuse it for + /// each search. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the needle and the haystack. That is, this runs + /// in `O(needle.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"foo bar baz"; + /// assert_eq!(Some(0), s.rfind("foo")); + /// assert_eq!(Some(4), s.rfind("bar")); + /// assert_eq!(Some(8), s.rfind("ba")); + /// assert_eq!(None, s.rfind("quux")); + /// ``` + #[inline] + fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> { + FinderReverse::new(needle.as_ref()).rfind(self.as_bytes()) + } + + /// Returns an iterator of the non-overlapping occurrences of the given + /// needle. The iterator yields byte offset positions indicating the start + /// of each match. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the needle and the haystack. That is, this runs + /// in `O(needle.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"foo bar foo foo quux foo"; + /// let matches: Vec<usize> = s.find_iter("foo").collect(); + /// assert_eq!(matches, vec![0, 8, 12, 21]); + /// ``` + /// + /// An empty string matches at every position, including the position + /// immediately following the last byte: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let matches: Vec<usize> = b"foo".find_iter("").collect(); + /// assert_eq!(matches, vec![0, 1, 2, 3]); + /// + /// let matches: Vec<usize> = b"".find_iter("").collect(); + /// assert_eq!(matches, vec![0]); + /// ``` + #[inline] + fn find_iter<'a, B: ?Sized + AsRef<[u8]>>( + &'a self, + needle: &'a B, + ) -> Find<'a> { + Find::new(self.as_bytes(), needle.as_ref()) + } + + /// Returns an iterator of the non-overlapping occurrences of the given + /// needle in reverse. The iterator yields byte offset positions indicating + /// the start of each match. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the needle and the haystack. That is, this runs + /// in `O(needle.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"foo bar foo foo quux foo"; + /// let matches: Vec<usize> = s.rfind_iter("foo").collect(); + /// assert_eq!(matches, vec![21, 12, 8, 0]); + /// ``` + /// + /// An empty string matches at every position, including the position + /// immediately following the last byte: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let matches: Vec<usize> = b"foo".rfind_iter("").collect(); + /// assert_eq!(matches, vec![3, 2, 1, 0]); + /// + /// let matches: Vec<usize> = b"".rfind_iter("").collect(); + /// assert_eq!(matches, vec![0]); + /// ``` + #[inline] + fn rfind_iter<'a, B: ?Sized + AsRef<[u8]>>( + &'a self, + needle: &'a B, + ) -> FindReverse<'a> { + FindReverse::new(self.as_bytes(), needle.as_ref()) + } + + /// Returns the index of the first occurrence of the given byte. If the + /// byte does not occur in this byte string, then `None` is returned. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// assert_eq!(Some(10), b"foo bar baz".find_byte(b'z')); + /// assert_eq!(None, b"foo bar baz".find_byte(b'y')); + /// ``` + #[inline] + fn find_byte(&self, byte: u8) -> Option<usize> { + memchr(byte, self.as_bytes()) + } + + /// Returns the index of the last occurrence of the given byte. If the + /// byte does not occur in this byte string, then `None` is returned. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// assert_eq!(Some(10), b"foo bar baz".rfind_byte(b'z')); + /// assert_eq!(None, b"foo bar baz".rfind_byte(b'y')); + /// ``` + #[inline] + fn rfind_byte(&self, byte: u8) -> Option<usize> { + memrchr(byte, self.as_bytes()) + } + + /// Returns the index of the first occurrence of the given codepoint. + /// If the codepoint does not occur in this byte string, then `None` is + /// returned. + /// + /// Note that if one searches for the replacement codepoint, `\u{FFFD}`, + /// then only explicit occurrences of that encoding will be found. Invalid + /// UTF-8 sequences will not be matched. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// assert_eq!(Some(10), b"foo bar baz".find_char('z')); + /// assert_eq!(Some(4), B("αβγγδ").find_char('γ')); + /// assert_eq!(None, b"foo bar baz".find_char('y')); + /// ``` + #[inline] + fn find_char(&self, ch: char) -> Option<usize> { + self.find(ch.encode_utf8(&mut [0; 4])) + } + + /// Returns the index of the last occurrence of the given codepoint. + /// If the codepoint does not occur in this byte string, then `None` is + /// returned. + /// + /// Note that if one searches for the replacement codepoint, `\u{FFFD}`, + /// then only explicit occurrences of that encoding will be found. Invalid + /// UTF-8 sequences will not be matched. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// assert_eq!(Some(10), b"foo bar baz".rfind_char('z')); + /// assert_eq!(Some(6), B("αβγγδ").rfind_char('γ')); + /// assert_eq!(None, b"foo bar baz".rfind_char('y')); + /// ``` + #[inline] + fn rfind_char(&self, ch: char) -> Option<usize> { + self.rfind(ch.encode_utf8(&mut [0; 4])) + } + + /// Returns the index of the first occurrence of any of the bytes in the + /// provided set. + /// + /// The `byteset` may be any type that can be cheaply converted into a + /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but + /// note that passing a `&str` which contains multibyte characters may not + /// behave as you expect: each byte in the `&str` is treated as an + /// individual member of the byte set. + /// + /// Note that order is irrelevant for the `byteset` parameter, and + /// duplicate bytes present in its body are ignored. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the set of bytes and the haystack. That is, this + /// runs in `O(byteset.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// assert_eq!(b"foo bar baz".find_byteset(b"zr"), Some(6)); + /// assert_eq!(b"foo baz bar".find_byteset(b"bzr"), Some(4)); + /// assert_eq!(None, b"foo baz bar".find_byteset(b"\t\n")); + /// ``` + #[inline] + fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { + byteset::find(self.as_bytes(), byteset.as_ref()) + } + + /// Returns the index of the first occurrence of a byte that is not a member + /// of the provided set. + /// + /// The `byteset` may be any type that can be cheaply converted into a + /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but + /// note that passing a `&str` which contains multibyte characters may not + /// behave as you expect: each byte in the `&str` is treated as an + /// individual member of the byte set. + /// + /// Note that order is irrelevant for the `byteset` parameter, and + /// duplicate bytes present in its body are ignored. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the set of bytes and the haystack. That is, this + /// runs in `O(byteset.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// assert_eq!(b"foo bar baz".find_not_byteset(b"fo "), Some(4)); + /// assert_eq!(b"\t\tbaz bar".find_not_byteset(b" \t\r\n"), Some(2)); + /// assert_eq!(b"foo\nbaz\tbar".find_not_byteset(b"\t\n"), Some(0)); + /// ``` + #[inline] + fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { + byteset::find_not(self.as_bytes(), byteset.as_ref()) + } + + /// Returns the index of the last occurrence of any of the bytes in the + /// provided set. + /// + /// The `byteset` may be any type that can be cheaply converted into a + /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but + /// note that passing a `&str` which contains multibyte characters may not + /// behave as you expect: each byte in the `&str` is treated as an + /// individual member of the byte set. + /// + /// Note that order is irrelevant for the `byteset` parameter, and duplicate + /// bytes present in its body are ignored. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the set of bytes and the haystack. That is, this + /// runs in `O(byteset.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// assert_eq!(b"foo bar baz".rfind_byteset(b"agb"), Some(9)); + /// assert_eq!(b"foo baz bar".rfind_byteset(b"rabz "), Some(10)); + /// assert_eq!(b"foo baz bar".rfind_byteset(b"\n123"), None); + /// ``` + #[inline] + fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { + byteset::rfind(self.as_bytes(), byteset.as_ref()) + } + + /// Returns the index of the last occurrence of a byte that is not a member + /// of the provided set. + /// + /// The `byteset` may be any type that can be cheaply converted into a + /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but + /// note that passing a `&str` which contains multibyte characters may not + /// behave as you expect: each byte in the `&str` is treated as an + /// individual member of the byte set. + /// + /// Note that order is irrelevant for the `byteset` parameter, and + /// duplicate bytes present in its body are ignored. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the set of bytes and the haystack. That is, this + /// runs in `O(byteset.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// assert_eq!(b"foo bar baz,\t".rfind_not_byteset(b",\t"), Some(10)); + /// assert_eq!(b"foo baz bar".rfind_not_byteset(b"rabz "), Some(2)); + /// assert_eq!(None, b"foo baz bar".rfind_not_byteset(b"barfoz ")); + /// ``` + #[inline] + fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { + byteset::rfind_not(self.as_bytes(), byteset.as_ref()) + } + + /// Returns an iterator over the fields in a byte string, separated by + /// contiguous whitespace. + /// + /// # Example + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B(" foo\tbar\t\u{2003}\nquux \n"); + /// let fields: Vec<&[u8]> = s.fields().collect(); + /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]); + /// ``` + /// + /// A byte string consisting of just whitespace yields no elements: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// assert_eq!(0, B(" \n\t\u{2003}\n \t").fields().count()); + /// ``` + #[inline] + fn fields(&self) -> Fields { + Fields::new(self.as_bytes()) + } + + /// Returns an iterator over the fields in a byte string, separated by + /// contiguous codepoints satisfying the given predicate. + /// + /// If this byte string is not valid UTF-8, then the given closure will + /// be called with a Unicode replacement codepoint when invalid UTF-8 + /// bytes are seen. + /// + /// # Example + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = b"123foo999999bar1quux123456"; + /// let fields: Vec<&[u8]> = s.fields_with(|c| c.is_numeric()).collect(); + /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]); + /// ``` + /// + /// A byte string consisting of all codepoints satisfying the predicate + /// yields no elements: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// assert_eq!(0, b"1911354563".fields_with(|c| c.is_numeric()).count()); + /// ``` + #[inline] + fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<F> { + FieldsWith::new(self.as_bytes(), f) + } + + /// Returns an iterator over substrings of this byte string, separated + /// by the given byte string. Each element yielded is guaranteed not to + /// include the splitter substring. + /// + /// The splitter may be any type that can be cheaply converted into a + /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let x: Vec<&[u8]> = b"Mary had a little lamb".split_str(" ").collect(); + /// assert_eq!(x, vec![ + /// B("Mary"), B("had"), B("a"), B("little"), B("lamb"), + /// ]); + /// + /// let x: Vec<&[u8]> = b"".split_str("X").collect(); + /// assert_eq!(x, vec![b""]); + /// + /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".split_str("X").collect(); + /// assert_eq!(x, vec![B("lion"), B(""), B("tiger"), B("leopard")]); + /// + /// let x: Vec<&[u8]> = b"lion::tiger::leopard".split_str("::").collect(); + /// assert_eq!(x, vec![B("lion"), B("tiger"), B("leopard")]); + /// ``` + /// + /// If a string contains multiple contiguous separators, you will end up + /// with empty strings yielded by the iterator: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let x: Vec<&[u8]> = b"||||a||b|c".split_str("|").collect(); + /// assert_eq!(x, vec![ + /// B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"), + /// ]); + /// + /// let x: Vec<&[u8]> = b"(///)".split_str("/").collect(); + /// assert_eq!(x, vec![B("("), B(""), B(""), B(")")]); + /// ``` + /// + /// Separators at the start or end of a string are neighbored by empty + /// strings. + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let x: Vec<&[u8]> = b"010".split_str("0").collect(); + /// assert_eq!(x, vec![B(""), B("1"), B("")]); + /// ``` + /// + /// When the empty string is used as a separator, it splits every **byte** + /// in the byte string, along with the beginning and end of the byte + /// string. + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let x: Vec<&[u8]> = b"rust".split_str("").collect(); + /// assert_eq!(x, vec![ + /// B(""), B("r"), B("u"), B("s"), B("t"), B(""), + /// ]); + /// + /// // Splitting by an empty string is not UTF-8 aware. Elements yielded + /// // may not be valid UTF-8! + /// let x: Vec<&[u8]> = B("☃").split_str("").collect(); + /// assert_eq!(x, vec![ + /// B(""), B(b"\xE2"), B(b"\x98"), B(b"\x83"), B(""), + /// ]); + /// ``` + /// + /// Contiguous separators, especially whitespace, can lead to possibly + /// surprising behavior. For example, this code is correct: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let x: Vec<&[u8]> = b" a b c".split_str(" ").collect(); + /// assert_eq!(x, vec![ + /// B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"), + /// ]); + /// ``` + /// + /// It does *not* give you `["a", "b", "c"]`. For that behavior, use + /// [`fields`](#method.fields) instead. + #[inline] + fn split_str<'a, B: ?Sized + AsRef<[u8]>>( + &'a self, + splitter: &'a B, + ) -> Split<'a> { + Split::new(self.as_bytes(), splitter.as_ref()) + } + + /// Returns an iterator over substrings of this byte string, separated by + /// the given byte string, in reverse. Each element yielded is guaranteed + /// not to include the splitter substring. + /// + /// The splitter may be any type that can be cheaply converted into a + /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let x: Vec<&[u8]> = + /// b"Mary had a little lamb".rsplit_str(" ").collect(); + /// assert_eq!(x, vec![ + /// B("lamb"), B("little"), B("a"), B("had"), B("Mary"), + /// ]); + /// + /// let x: Vec<&[u8]> = b"".rsplit_str("X").collect(); + /// assert_eq!(x, vec![b""]); + /// + /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".rsplit_str("X").collect(); + /// assert_eq!(x, vec![B("leopard"), B("tiger"), B(""), B("lion")]); + /// + /// let x: Vec<&[u8]> = b"lion::tiger::leopard".rsplit_str("::").collect(); + /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lion")]); + /// ``` + /// + /// If a string contains multiple contiguous separators, you will end up + /// with empty strings yielded by the iterator: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let x: Vec<&[u8]> = b"||||a||b|c".rsplit_str("|").collect(); + /// assert_eq!(x, vec![ + /// B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""), + /// ]); + /// + /// let x: Vec<&[u8]> = b"(///)".rsplit_str("/").collect(); + /// assert_eq!(x, vec![B(")"), B(""), B(""), B("(")]); + /// ``` + /// + /// Separators at the start or end of a string are neighbored by empty + /// strings. + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let x: Vec<&[u8]> = b"010".rsplit_str("0").collect(); + /// assert_eq!(x, vec![B(""), B("1"), B("")]); + /// ``` + /// + /// When the empty string is used as a separator, it splits every **byte** + /// in the byte string, along with the beginning and end of the byte + /// string. + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let x: Vec<&[u8]> = b"rust".rsplit_str("").collect(); + /// assert_eq!(x, vec![ + /// B(""), B("t"), B("s"), B("u"), B("r"), B(""), + /// ]); + /// + /// // Splitting by an empty string is not UTF-8 aware. Elements yielded + /// // may not be valid UTF-8! + /// let x: Vec<&[u8]> = B("☃").rsplit_str("").collect(); + /// assert_eq!(x, vec![B(""), B(b"\x83"), B(b"\x98"), B(b"\xE2"), B("")]); + /// ``` + /// + /// Contiguous separators, especially whitespace, can lead to possibly + /// surprising behavior. For example, this code is correct: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let x: Vec<&[u8]> = b" a b c".rsplit_str(" ").collect(); + /// assert_eq!(x, vec![ + /// B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""), + /// ]); + /// ``` + /// + /// It does *not* give you `["a", "b", "c"]`. + #[inline] + fn rsplit_str<'a, B: ?Sized + AsRef<[u8]>>( + &'a self, + splitter: &'a B, + ) -> SplitReverse<'a> { + SplitReverse::new(self.as_bytes(), splitter.as_ref()) + } + + /// Returns an iterator of at most `limit` substrings of this byte string, + /// separated by the given byte string. If `limit` substrings are yielded, + /// then the last substring will contain the remainder of this byte string. + /// + /// The needle may be any type that can be cheaply converted into a + /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let x: Vec<_> = b"Mary had a little lamb".splitn_str(3, " ").collect(); + /// assert_eq!(x, vec![B("Mary"), B("had"), B("a little lamb")]); + /// + /// let x: Vec<_> = b"".splitn_str(3, "X").collect(); + /// assert_eq!(x, vec![b""]); + /// + /// let x: Vec<_> = b"lionXXtigerXleopard".splitn_str(3, "X").collect(); + /// assert_eq!(x, vec![B("lion"), B(""), B("tigerXleopard")]); + /// + /// let x: Vec<_> = b"lion::tiger::leopard".splitn_str(2, "::").collect(); + /// assert_eq!(x, vec![B("lion"), B("tiger::leopard")]); + /// + /// let x: Vec<_> = b"abcXdef".splitn_str(1, "X").collect(); + /// assert_eq!(x, vec![B("abcXdef")]); + /// + /// let x: Vec<_> = b"abcdef".splitn_str(2, "X").collect(); + /// assert_eq!(x, vec![B("abcdef")]); + /// + /// let x: Vec<_> = b"abcXdef".splitn_str(0, "X").collect(); + /// assert!(x.is_empty()); + /// ``` + #[inline] + fn splitn_str<'a, B: ?Sized + AsRef<[u8]>>( + &'a self, + limit: usize, + splitter: &'a B, + ) -> SplitN<'a> { + SplitN::new(self.as_bytes(), splitter.as_ref(), limit) + } + + /// Returns an iterator of at most `limit` substrings of this byte string, + /// separated by the given byte string, in reverse. If `limit` substrings + /// are yielded, then the last substring will contain the remainder of this + /// byte string. + /// + /// The needle may be any type that can be cheaply converted into a + /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let x: Vec<_> = + /// b"Mary had a little lamb".rsplitn_str(3, " ").collect(); + /// assert_eq!(x, vec![B("lamb"), B("little"), B("Mary had a")]); + /// + /// let x: Vec<_> = b"".rsplitn_str(3, "X").collect(); + /// assert_eq!(x, vec![b""]); + /// + /// let x: Vec<_> = b"lionXXtigerXleopard".rsplitn_str(3, "X").collect(); + /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lionX")]); + /// + /// let x: Vec<_> = b"lion::tiger::leopard".rsplitn_str(2, "::").collect(); + /// assert_eq!(x, vec![B("leopard"), B("lion::tiger")]); + /// + /// let x: Vec<_> = b"abcXdef".rsplitn_str(1, "X").collect(); + /// assert_eq!(x, vec![B("abcXdef")]); + /// + /// let x: Vec<_> = b"abcdef".rsplitn_str(2, "X").collect(); + /// assert_eq!(x, vec![B("abcdef")]); + /// + /// let x: Vec<_> = b"abcXdef".rsplitn_str(0, "X").collect(); + /// assert!(x.is_empty()); + /// ``` + #[inline] + fn rsplitn_str<'a, B: ?Sized + AsRef<[u8]>>( + &'a self, + limit: usize, + splitter: &'a B, + ) -> SplitNReverse<'a> { + SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit) + } + + /// Replace all matches of the given needle with the given replacement, and + /// the result as a new `Vec<u8>`. + /// + /// This routine is useful as a convenience. If you need to reuse an + /// allocation, use [`replace_into`](#method.replace_into) instead. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"this is old".replace("old", "new"); + /// assert_eq!(s, "this is new".as_bytes()); + /// ``` + /// + /// When the pattern doesn't match: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"this is old".replace("nada nada", "limonada"); + /// assert_eq!(s, "this is old".as_bytes()); + /// ``` + /// + /// When the needle is an empty string: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"foo".replace("", "Z"); + /// assert_eq!(s, "ZfZoZoZ".as_bytes()); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn replace<N: AsRef<[u8]>, R: AsRef<[u8]>>( + &self, + needle: N, + replacement: R, + ) -> Vec<u8> { + let mut dest = Vec::with_capacity(self.as_bytes().len()); + self.replace_into(needle, replacement, &mut dest); + dest + } + + /// Replace up to `limit` matches of the given needle with the given + /// replacement, and the result as a new `Vec<u8>`. + /// + /// This routine is useful as a convenience. If you need to reuse an + /// allocation, use [`replacen_into`](#method.replacen_into) instead. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"foofoo".replacen("o", "z", 2); + /// assert_eq!(s, "fzzfoo".as_bytes()); + /// ``` + /// + /// When the pattern doesn't match: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"foofoo".replacen("a", "z", 2); + /// assert_eq!(s, "foofoo".as_bytes()); + /// ``` + /// + /// When the needle is an empty string: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"foo".replacen("", "Z", 2); + /// assert_eq!(s, "ZfZoo".as_bytes()); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>( + &self, + needle: N, + replacement: R, + limit: usize, + ) -> Vec<u8> { + let mut dest = Vec::with_capacity(self.as_bytes().len()); + self.replacen_into(needle, replacement, limit, &mut dest); + dest + } + + /// Replace all matches of the given needle with the given replacement, + /// and write the result into the provided `Vec<u8>`. + /// + /// This does **not** clear `dest` before writing to it. + /// + /// This routine is useful for reusing allocation. For a more convenient + /// API, use [`replace`](#method.replace) instead. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"this is old"; + /// + /// let mut dest = vec![]; + /// s.replace_into("old", "new", &mut dest); + /// assert_eq!(dest, "this is new".as_bytes()); + /// ``` + /// + /// When the pattern doesn't match: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"this is old"; + /// + /// let mut dest = vec![]; + /// s.replace_into("nada nada", "limonada", &mut dest); + /// assert_eq!(dest, "this is old".as_bytes()); + /// ``` + /// + /// When the needle is an empty string: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"foo"; + /// + /// let mut dest = vec![]; + /// s.replace_into("", "Z", &mut dest); + /// assert_eq!(dest, "ZfZoZoZ".as_bytes()); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>( + &self, + needle: N, + replacement: R, + dest: &mut Vec<u8>, + ) { + let (needle, replacement) = (needle.as_ref(), replacement.as_ref()); + + let mut last = 0; + for start in self.find_iter(needle) { + dest.push_str(&self.as_bytes()[last..start]); + dest.push_str(replacement); + last = start + needle.len(); + } + dest.push_str(&self.as_bytes()[last..]); + } + + /// Replace up to `limit` matches of the given needle with the given + /// replacement, and write the result into the provided `Vec<u8>`. + /// + /// This does **not** clear `dest` before writing to it. + /// + /// This routine is useful for reusing allocation. For a more convenient + /// API, use [`replacen`](#method.replacen) instead. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"foofoo"; + /// + /// let mut dest = vec![]; + /// s.replacen_into("o", "z", 2, &mut dest); + /// assert_eq!(dest, "fzzfoo".as_bytes()); + /// ``` + /// + /// When the pattern doesn't match: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"foofoo"; + /// + /// let mut dest = vec![]; + /// s.replacen_into("a", "z", 2, &mut dest); + /// assert_eq!(dest, "foofoo".as_bytes()); + /// ``` + /// + /// When the needle is an empty string: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"foo"; + /// + /// let mut dest = vec![]; + /// s.replacen_into("", "Z", 2, &mut dest); + /// assert_eq!(dest, "ZfZoo".as_bytes()); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>( + &self, + needle: N, + replacement: R, + limit: usize, + dest: &mut Vec<u8>, + ) { + let (needle, replacement) = (needle.as_ref(), replacement.as_ref()); + + let mut last = 0; + for start in self.find_iter(needle).take(limit) { + dest.push_str(&self.as_bytes()[last..start]); + dest.push_str(replacement); + last = start + needle.len(); + } + dest.push_str(&self.as_bytes()[last..]); + } + + /// Returns an iterator over the bytes in this byte string. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = b"foobar"; + /// let bytes: Vec<u8> = bs.bytes().collect(); + /// assert_eq!(bytes, bs); + /// ``` + #[inline] + fn bytes(&self) -> Bytes { + Bytes { it: self.as_bytes().iter() } + } + + /// Returns an iterator over the Unicode scalar values in this byte string. + /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint + /// is yielded instead. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61"; + /// let chars: Vec<char> = bs.chars().collect(); + /// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars); + /// ``` + /// + /// Codepoints can also be iterated over in reverse: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61"; + /// let chars: Vec<char> = bs.chars().rev().collect(); + /// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars); + /// ``` + #[inline] + fn chars(&self) -> Chars { + Chars::new(self.as_bytes()) + } + + /// Returns an iterator over the Unicode scalar values in this byte string + /// along with their starting and ending byte index positions. If invalid + /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded + /// instead. + /// + /// Note that this is slightly different from the `CharIndices` iterator + /// provided by the standard library. Aside from working on possibly + /// invalid UTF-8, this iterator provides both the corresponding starting + /// and ending byte indices of each codepoint yielded. The ending position + /// is necessary to slice the original byte string when invalid UTF-8 bytes + /// are converted into a Unicode replacement codepoint, since a single + /// replacement codepoint can substitute anywhere from 1 to 3 invalid bytes + /// (inclusive). + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61"; + /// let chars: Vec<(usize, usize, char)> = bs.char_indices().collect(); + /// assert_eq!(chars, vec![ + /// (0, 3, '☃'), + /// (3, 4, '\u{FFFD}'), + /// (4, 8, '𝞃'), + /// (8, 10, '\u{FFFD}'), + /// (10, 11, 'a'), + /// ]); + /// ``` + /// + /// Codepoints can also be iterated over in reverse: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61"; + /// let chars: Vec<(usize, usize, char)> = bs + /// .char_indices() + /// .rev() + /// .collect(); + /// assert_eq!(chars, vec![ + /// (10, 11, 'a'), + /// (8, 10, '\u{FFFD}'), + /// (4, 8, '𝞃'), + /// (3, 4, '\u{FFFD}'), + /// (0, 3, '☃'), + /// ]); + /// ``` + #[inline] + fn char_indices(&self) -> CharIndices { + CharIndices::new(self.as_bytes()) + } + + /// Iterate over chunks of valid UTF-8. + /// + /// The iterator returned yields chunks of valid UTF-8 separated by invalid + /// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes, + /// which are determined via the "substitution of maximal subparts" + /// strategy described in the docs for the + /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) + /// method. + /// + /// # Examples + /// + /// This example shows how to gather all valid and invalid chunks from a + /// byte slice: + /// + /// ``` + /// use bstr::{ByteSlice, Utf8Chunk}; + /// + /// let bytes = b"foo\xFD\xFEbar\xFF"; + /// + /// let (mut valid_chunks, mut invalid_chunks) = (vec![], vec![]); + /// for chunk in bytes.utf8_chunks() { + /// if !chunk.valid().is_empty() { + /// valid_chunks.push(chunk.valid()); + /// } + /// if !chunk.invalid().is_empty() { + /// invalid_chunks.push(chunk.invalid()); + /// } + /// } + /// + /// assert_eq!(valid_chunks, vec!["foo", "bar"]); + /// assert_eq!(invalid_chunks, vec![b"\xFD", b"\xFE", b"\xFF"]); + /// ``` + #[inline] + fn utf8_chunks(&self) -> Utf8Chunks { + Utf8Chunks { bytes: self.as_bytes() } + } + + /// Returns an iterator over the grapheme clusters in this byte string. + /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint + /// is yielded instead. + /// + /// # Examples + /// + /// This example shows how multiple codepoints can combine to form a + /// single grapheme cluster: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes(); + /// let graphemes: Vec<&str> = bs.graphemes().collect(); + /// assert_eq!(vec!["à̖", "🇺🇸"], graphemes); + /// ``` + /// + /// This shows that graphemes can be iterated over in reverse: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes(); + /// let graphemes: Vec<&str> = bs.graphemes().rev().collect(); + /// assert_eq!(vec!["🇺🇸", "à̖"], graphemes); + /// ``` + #[cfg(feature = "unicode")] + #[inline] + fn graphemes(&self) -> Graphemes { + Graphemes::new(self.as_bytes()) + } + + /// Returns an iterator over the grapheme clusters in this byte string + /// along with their starting and ending byte index positions. If invalid + /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded + /// instead. + /// + /// # Examples + /// + /// This example shows how to get the byte offsets of each individual + /// grapheme cluster: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes(); + /// let graphemes: Vec<(usize, usize, &str)> = + /// bs.grapheme_indices().collect(); + /// assert_eq!(vec![(0, 5, "à̖"), (5, 13, "🇺🇸")], graphemes); + /// ``` + /// + /// This example shows what happens when invalid UTF-8 is enountered. Note + /// that the offsets are valid indices into the original string, and do + /// not necessarily correspond to the length of the `&str` returned! + /// + /// ``` + /// use bstr::{ByteSlice, ByteVec}; + /// + /// let mut bytes = vec![]; + /// bytes.push_str("a\u{0300}\u{0316}"); + /// bytes.push(b'\xFF'); + /// bytes.push_str("\u{1F1FA}\u{1F1F8}"); + /// + /// let graphemes: Vec<(usize, usize, &str)> = + /// bytes.grapheme_indices().collect(); + /// assert_eq!( + /// graphemes, + /// vec![(0, 5, "à̖"), (5, 6, "\u{FFFD}"), (6, 14, "🇺🇸")] + /// ); + /// ``` + #[cfg(feature = "unicode")] + #[inline] + fn grapheme_indices(&self) -> GraphemeIndices { + GraphemeIndices::new(self.as_bytes()) + } + + /// Returns an iterator over the words in this byte string. If invalid + /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded + /// instead. + /// + /// This is similar to + /// [`words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks), + /// except it only returns elements that contain a "word" character. A word + /// character is defined by UTS #18 (Annex C) to be the combination of the + /// `Alphabetic` and `Join_Control` properties, along with the + /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general + /// categories. + /// + /// Since words are made up of one or more codepoints, this iterator + /// yields `&str` elements. When invalid UTF-8 is encountered, replacement + /// codepoints are [substituted](index.html#handling-of-invalid-utf-8). + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#; + /// let words: Vec<&str> = bs.words().collect(); + /// assert_eq!(words, vec![ + /// "The", "quick", "brown", "fox", "can't", + /// "jump", "32.3", "feet", "right", + /// ]); + /// ``` + #[cfg(feature = "unicode")] + #[inline] + fn words(&self) -> Words { + Words::new(self.as_bytes()) + } + + /// Returns an iterator over the words in this byte string along with + /// their starting and ending byte index positions. + /// + /// This is similar to + /// [`words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices), + /// except it only returns elements that contain a "word" character. A word + /// character is defined by UTS #18 (Annex C) to be the combination of the + /// `Alphabetic` and `Join_Control` properties, along with the + /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general + /// categories. + /// + /// Since words are made up of one or more codepoints, this iterator + /// yields `&str` elements. When invalid UTF-8 is encountered, replacement + /// codepoints are [substituted](index.html#handling-of-invalid-utf-8). + /// + /// # Examples + /// + /// This example shows how to get the byte offsets of each individual + /// word: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = b"can't jump 32.3 feet"; + /// let words: Vec<(usize, usize, &str)> = bs.word_indices().collect(); + /// assert_eq!(words, vec![ + /// (0, 5, "can't"), + /// (6, 10, "jump"), + /// (11, 15, "32.3"), + /// (16, 20, "feet"), + /// ]); + /// ``` + #[cfg(feature = "unicode")] + #[inline] + fn word_indices(&self) -> WordIndices { + WordIndices::new(self.as_bytes()) + } + + /// Returns an iterator over the words in this byte string, along with + /// all breaks between the words. Concatenating all elements yielded by + /// the iterator results in the original string (modulo Unicode replacement + /// codepoint substitutions if invalid UTF-8 is encountered). + /// + /// Since words are made up of one or more codepoints, this iterator + /// yields `&str` elements. When invalid UTF-8 is encountered, replacement + /// codepoints are [substituted](index.html#handling-of-invalid-utf-8). + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#; + /// let words: Vec<&str> = bs.words_with_breaks().collect(); + /// assert_eq!(words, vec![ + /// "The", " ", "quick", " ", "(", "\"", "brown", "\"", ")", + /// " ", "fox", " ", "can't", " ", "jump", " ", "32.3", " ", "feet", + /// ",", " ", "right", "?", + /// ]); + /// ``` + #[cfg(feature = "unicode")] + #[inline] + fn words_with_breaks(&self) -> WordsWithBreaks { + WordsWithBreaks::new(self.as_bytes()) + } + + /// Returns an iterator over the words and their byte offsets in this + /// byte string, along with all breaks between the words. Concatenating + /// all elements yielded by the iterator results in the original string + /// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is + /// encountered). + /// + /// Since words are made up of one or more codepoints, this iterator + /// yields `&str` elements. When invalid UTF-8 is encountered, replacement + /// codepoints are [substituted](index.html#handling-of-invalid-utf-8). + /// + /// # Examples + /// + /// This example shows how to get the byte offsets of each individual + /// word: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = b"can't jump 32.3 feet"; + /// let words: Vec<(usize, usize, &str)> = + /// bs.words_with_break_indices().collect(); + /// assert_eq!(words, vec![ + /// (0, 5, "can't"), + /// (5, 6, " "), + /// (6, 10, "jump"), + /// (10, 11, " "), + /// (11, 15, "32.3"), + /// (15, 16, " "), + /// (16, 20, "feet"), + /// ]); + /// ``` + #[cfg(feature = "unicode")] + #[inline] + fn words_with_break_indices(&self) -> WordsWithBreakIndices { + WordsWithBreakIndices::new(self.as_bytes()) + } + + /// Returns an iterator over the sentences in this byte string. + /// + /// Typically, a sentence will include its trailing punctuation and + /// whitespace. Concatenating all elements yielded by the iterator + /// results in the original string (modulo Unicode replacement codepoint + /// substitutions if invalid UTF-8 is encountered). + /// + /// Since sentences are made up of one or more codepoints, this iterator + /// yields `&str` elements. When invalid UTF-8 is encountered, replacement + /// codepoints are [substituted](index.html#handling-of-invalid-utf-8). + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = b"I want this. Not that. Right now."; + /// let sentences: Vec<&str> = bs.sentences().collect(); + /// assert_eq!(sentences, vec![ + /// "I want this. ", + /// "Not that. ", + /// "Right now.", + /// ]); + /// ``` + #[cfg(feature = "unicode")] + #[inline] + fn sentences(&self) -> Sentences { + Sentences::new(self.as_bytes()) + } + + /// Returns an iterator over the sentences in this byte string along with + /// their starting and ending byte index positions. + /// + /// Typically, a sentence will include its trailing punctuation and + /// whitespace. Concatenating all elements yielded by the iterator + /// results in the original string (modulo Unicode replacement codepoint + /// substitutions if invalid UTF-8 is encountered). + /// + /// Since sentences are made up of one or more codepoints, this iterator + /// yields `&str` elements. When invalid UTF-8 is encountered, replacement + /// codepoints are [substituted](index.html#handling-of-invalid-utf-8). + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let bs = b"I want this. Not that. Right now."; + /// let sentences: Vec<(usize, usize, &str)> = + /// bs.sentence_indices().collect(); + /// assert_eq!(sentences, vec![ + /// (0, 13, "I want this. "), + /// (13, 23, "Not that. "), + /// (23, 33, "Right now."), + /// ]); + /// ``` + #[cfg(feature = "unicode")] + #[inline] + fn sentence_indices(&self) -> SentenceIndices { + SentenceIndices::new(self.as_bytes()) + } + + /// An iterator over all lines in a byte string, without their + /// terminators. + /// + /// For this iterator, the only line terminators recognized are `\r\n` and + /// `\n`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = b"\ + /// foo + /// + /// bar\r + /// baz + /// + /// + /// quux"; + /// let lines: Vec<&[u8]> = s.lines().collect(); + /// assert_eq!(lines, vec![ + /// B("foo"), B(""), B("bar"), B("baz"), B(""), B(""), B("quux"), + /// ]); + /// ``` + #[inline] + fn lines(&self) -> Lines { + Lines::new(self.as_bytes()) + } + + /// An iterator over all lines in a byte string, including their + /// terminators. + /// + /// For this iterator, the only line terminator recognized is `\n`. (Since + /// line terminators are included, this also handles `\r\n` line endings.) + /// + /// Line terminators are only included if they are present in the original + /// byte string. For example, the last line in a byte string may not end + /// with a line terminator. + /// + /// Concatenating all elements yielded by this iterator is guaranteed to + /// yield the original byte string. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = b"\ + /// foo + /// + /// bar\r + /// baz + /// + /// + /// quux"; + /// let lines: Vec<&[u8]> = s.lines_with_terminator().collect(); + /// assert_eq!(lines, vec![ + /// B("foo\n"), + /// B("\n"), + /// B("bar\r\n"), + /// B("baz\n"), + /// B("\n"), + /// B("\n"), + /// B("quux"), + /// ]); + /// ``` + #[inline] + fn lines_with_terminator(&self) -> LinesWithTerminator { + LinesWithTerminator::new(self.as_bytes()) + } + + /// Return a byte string slice with leading and trailing whitespace + /// removed. + /// + /// Whitespace is defined according to the terms of the `White_Space` + /// Unicode property. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B(" foo\tbar\t\u{2003}\n"); + /// assert_eq!(s.trim(), B("foo\tbar")); + /// ``` + #[cfg(feature = "unicode")] + #[inline] + fn trim(&self) -> &[u8] { + self.trim_start().trim_end() + } + + /// Return a byte string slice with leading whitespace removed. + /// + /// Whitespace is defined according to the terms of the `White_Space` + /// Unicode property. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B(" foo\tbar\t\u{2003}\n"); + /// assert_eq!(s.trim_start(), B("foo\tbar\t\u{2003}\n")); + /// ``` + #[cfg(feature = "unicode")] + #[inline] + fn trim_start(&self) -> &[u8] { + let start = whitespace_len_fwd(self.as_bytes()); + &self.as_bytes()[start..] + } + + /// Return a byte string slice with trailing whitespace removed. + /// + /// Whitespace is defined according to the terms of the `White_Space` + /// Unicode property. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B(" foo\tbar\t\u{2003}\n"); + /// assert_eq!(s.trim_end(), B(" foo\tbar")); + /// ``` + #[cfg(feature = "unicode")] + #[inline] + fn trim_end(&self) -> &[u8] { + let end = whitespace_len_rev(self.as_bytes()); + &self.as_bytes()[..end] + } + + /// Return a byte string slice with leading and trailing characters + /// satisfying the given predicate removed. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = b"123foo5bar789"; + /// assert_eq!(s.trim_with(|c| c.is_numeric()), B("foo5bar")); + /// ``` + #[inline] + fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] { + self.trim_start_with(&mut trim).trim_end_with(&mut trim) + } + + /// Return a byte string slice with leading characters satisfying the given + /// predicate removed. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = b"123foo5bar789"; + /// assert_eq!(s.trim_start_with(|c| c.is_numeric()), B("foo5bar789")); + /// ``` + #[inline] + fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] { + for (s, _, ch) in self.char_indices() { + if !trim(ch) { + return &self.as_bytes()[s..]; + } + } + b"" + } + + /// Return a byte string slice with trailing characters satisfying the + /// given predicate removed. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = b"123foo5bar789"; + /// assert_eq!(s.trim_end_with(|c| c.is_numeric()), B("123foo5bar")); + /// ``` + #[inline] + fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] { + for (_, e, ch) in self.char_indices().rev() { + if !trim(ch) { + return &self.as_bytes()[..e]; + } + } + b"" + } + + /// Returns a new `Vec<u8>` containing the lowercase equivalent of this + /// byte string. + /// + /// In this case, lowercase is defined according to the `Lowercase` Unicode + /// property. + /// + /// If invalid UTF-8 is seen, or if a character has no lowercase variant, + /// then it is written to the given buffer unchanged. + /// + /// Note that some characters in this byte string may expand into multiple + /// characters when changing the case, so the number of bytes written to + /// the given byte string may not be equivalent to the number of bytes in + /// this byte string. + /// + /// If you'd like to reuse an allocation for performance reasons, then use + /// [`to_lowercase_into`](#method.to_lowercase_into) instead. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B("HELLO Β"); + /// assert_eq!("hello β".as_bytes(), s.to_lowercase().as_bytes()); + /// ``` + /// + /// Scripts without case are not changed: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B("农历新年"); + /// assert_eq!("农历新年".as_bytes(), s.to_lowercase().as_bytes()); + /// ``` + /// + /// Invalid UTF-8 remains as is: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ"); + /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), s.to_lowercase().as_bytes()); + /// ``` + #[cfg(all(feature = "std", feature = "unicode"))] + #[inline] + fn to_lowercase(&self) -> Vec<u8> { + let mut buf = vec![]; + self.to_lowercase_into(&mut buf); + buf + } + + /// Writes the lowercase equivalent of this byte string into the given + /// buffer. The buffer is not cleared before written to. + /// + /// In this case, lowercase is defined according to the `Lowercase` + /// Unicode property. + /// + /// If invalid UTF-8 is seen, or if a character has no lowercase variant, + /// then it is written to the given buffer unchanged. + /// + /// Note that some characters in this byte string may expand into multiple + /// characters when changing the case, so the number of bytes written to + /// the given byte string may not be equivalent to the number of bytes in + /// this byte string. + /// + /// If you don't need to amortize allocation and instead prefer + /// convenience, then use [`to_lowercase`](#method.to_lowercase) instead. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B("HELLO Β"); + /// + /// let mut buf = vec![]; + /// s.to_lowercase_into(&mut buf); + /// assert_eq!("hello β".as_bytes(), buf.as_bytes()); + /// ``` + /// + /// Scripts without case are not changed: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B("农历新年"); + /// + /// let mut buf = vec![]; + /// s.to_lowercase_into(&mut buf); + /// assert_eq!("农历新年".as_bytes(), buf.as_bytes()); + /// ``` + /// + /// Invalid UTF-8 remains as is: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ"); + /// + /// let mut buf = vec![]; + /// s.to_lowercase_into(&mut buf); + /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), buf.as_bytes()); + /// ``` + #[cfg(all(feature = "std", feature = "unicode"))] + #[inline] + fn to_lowercase_into(&self, buf: &mut Vec<u8>) { + // TODO: This is the best we can do given what std exposes I think. + // If we roll our own case handling, then we might be able to do this + // a bit faster. We shouldn't roll our own case handling unless we + // need to, e.g., for doing caseless matching or case folding. + + // TODO(BUG): This doesn't handle any special casing rules. + + buf.reserve(self.as_bytes().len()); + for (s, e, ch) in self.char_indices() { + if ch == '\u{FFFD}' { + buf.push_str(&self.as_bytes()[s..e]); + } else if ch.is_ascii() { + buf.push_char(ch.to_ascii_lowercase()); + } else { + for upper in ch.to_lowercase() { + buf.push_char(upper); + } + } + } + } + + /// Returns a new `Vec<u8>` containing the ASCII lowercase equivalent of + /// this byte string. + /// + /// In this case, lowercase is only defined in ASCII letters. Namely, the + /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged. + /// In particular, the length of the byte string returned is always + /// equivalent to the length of this byte string. + /// + /// If you'd like to reuse an allocation for performance reasons, then use + /// [`make_ascii_lowercase`](#method.make_ascii_lowercase) to perform + /// the conversion in place. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B("HELLO Β"); + /// assert_eq!("hello Β".as_bytes(), s.to_ascii_lowercase().as_bytes()); + /// ``` + /// + /// Invalid UTF-8 remains as is: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ"); + /// assert_eq!(s.to_ascii_lowercase(), B(b"foo\xFFbar\xE2\x98baz")); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn to_ascii_lowercase(&self) -> Vec<u8> { + self.as_bytes().to_ascii_lowercase() + } + + /// Convert this byte string to its lowercase ASCII equivalent in place. + /// + /// In this case, lowercase is only defined in ASCII letters. Namely, the + /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged. + /// + /// If you don't need to do the conversion in + /// place and instead prefer convenience, then use + /// [`to_ascii_lowercase`](#method.to_ascii_lowercase) instead. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut s = <Vec<u8>>::from("HELLO Β"); + /// s.make_ascii_lowercase(); + /// assert_eq!(s, "hello Β".as_bytes()); + /// ``` + /// + /// Invalid UTF-8 remains as is: + /// + /// ``` + /// use bstr::{B, ByteSlice, ByteVec}; + /// + /// let mut s = <Vec<u8>>::from_slice(b"FOO\xFFBAR\xE2\x98BAZ"); + /// s.make_ascii_lowercase(); + /// assert_eq!(s, B(b"foo\xFFbar\xE2\x98baz")); + /// ``` + #[inline] + fn make_ascii_lowercase(&mut self) { + self.as_bytes_mut().make_ascii_lowercase(); + } + + /// Returns a new `Vec<u8>` containing the uppercase equivalent of this + /// byte string. + /// + /// In this case, uppercase is defined according to the `Uppercase` + /// Unicode property. + /// + /// If invalid UTF-8 is seen, or if a character has no uppercase variant, + /// then it is written to the given buffer unchanged. + /// + /// Note that some characters in this byte string may expand into multiple + /// characters when changing the case, so the number of bytes written to + /// the given byte string may not be equivalent to the number of bytes in + /// this byte string. + /// + /// If you'd like to reuse an allocation for performance reasons, then use + /// [`to_uppercase_into`](#method.to_uppercase_into) instead. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B("hello β"); + /// assert_eq!(s.to_uppercase(), B("HELLO Β")); + /// ``` + /// + /// Scripts without case are not changed: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B("农历新年"); + /// assert_eq!(s.to_uppercase(), B("农历新年")); + /// ``` + /// + /// Invalid UTF-8 remains as is: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B(b"foo\xFFbar\xE2\x98baz"); + /// assert_eq!(s.to_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ")); + /// ``` + #[cfg(all(feature = "std", feature = "unicode"))] + #[inline] + fn to_uppercase(&self) -> Vec<u8> { + let mut buf = vec![]; + self.to_uppercase_into(&mut buf); + buf + } + + /// Writes the uppercase equivalent of this byte string into the given + /// buffer. The buffer is not cleared before written to. + /// + /// In this case, uppercase is defined according to the `Uppercase` + /// Unicode property. + /// + /// If invalid UTF-8 is seen, or if a character has no uppercase variant, + /// then it is written to the given buffer unchanged. + /// + /// Note that some characters in this byte string may expand into multiple + /// characters when changing the case, so the number of bytes written to + /// the given byte string may not be equivalent to the number of bytes in + /// this byte string. + /// + /// If you don't need to amortize allocation and instead prefer + /// convenience, then use [`to_uppercase`](#method.to_uppercase) instead. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B("hello β"); + /// + /// let mut buf = vec![]; + /// s.to_uppercase_into(&mut buf); + /// assert_eq!(buf, B("HELLO Β")); + /// ``` + /// + /// Scripts without case are not changed: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B("农历新年"); + /// + /// let mut buf = vec![]; + /// s.to_uppercase_into(&mut buf); + /// assert_eq!(buf, B("农历新年")); + /// ``` + /// + /// Invalid UTF-8 remains as is: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B(b"foo\xFFbar\xE2\x98baz"); + /// + /// let mut buf = vec![]; + /// s.to_uppercase_into(&mut buf); + /// assert_eq!(buf, B(b"FOO\xFFBAR\xE2\x98BAZ")); + /// ``` + #[cfg(all(feature = "std", feature = "unicode"))] + #[inline] + fn to_uppercase_into(&self, buf: &mut Vec<u8>) { + // TODO: This is the best we can do given what std exposes I think. + // If we roll our own case handling, then we might be able to do this + // a bit faster. We shouldn't roll our own case handling unless we + // need to, e.g., for doing caseless matching or case folding. + buf.reserve(self.as_bytes().len()); + for (s, e, ch) in self.char_indices() { + if ch == '\u{FFFD}' { + buf.push_str(&self.as_bytes()[s..e]); + } else if ch.is_ascii() { + buf.push_char(ch.to_ascii_uppercase()); + } else { + for upper in ch.to_uppercase() { + buf.push_char(upper); + } + } + } + } + + /// Returns a new `Vec<u8>` containing the ASCII uppercase equivalent of + /// this byte string. + /// + /// In this case, uppercase is only defined in ASCII letters. Namely, the + /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged. + /// In particular, the length of the byte string returned is always + /// equivalent to the length of this byte string. + /// + /// If you'd like to reuse an allocation for performance reasons, then use + /// [`make_ascii_uppercase`](#method.make_ascii_uppercase) to perform + /// the conversion in place. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B("hello β"); + /// assert_eq!(s.to_ascii_uppercase(), B("HELLO β")); + /// ``` + /// + /// Invalid UTF-8 remains as is: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let s = B(b"foo\xFFbar\xE2\x98baz"); + /// assert_eq!(s.to_ascii_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ")); + /// ``` + #[cfg(feature = "std")] + #[inline] + fn to_ascii_uppercase(&self) -> Vec<u8> { + self.as_bytes().to_ascii_uppercase() + } + + /// Convert this byte string to its uppercase ASCII equivalent in place. + /// + /// In this case, uppercase is only defined in ASCII letters. Namely, the + /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged. + /// + /// If you don't need to do the conversion in + /// place and instead prefer convenience, then use + /// [`to_ascii_uppercase`](#method.to_ascii_uppercase) instead. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let mut s = <Vec<u8>>::from("hello β"); + /// s.make_ascii_uppercase(); + /// assert_eq!(s, B("HELLO β")); + /// ``` + /// + /// Invalid UTF-8 remains as is: + /// + /// ``` + /// use bstr::{B, ByteSlice, ByteVec}; + /// + /// let mut s = <Vec<u8>>::from_slice(b"foo\xFFbar\xE2\x98baz"); + /// s.make_ascii_uppercase(); + /// assert_eq!(s, B(b"FOO\xFFBAR\xE2\x98BAZ")); + /// ``` + #[inline] + fn make_ascii_uppercase(&mut self) { + self.as_bytes_mut().make_ascii_uppercase(); + } + + /// Reverse the bytes in this string, in place. + /// + /// This is not necessarily a well formed operation! For example, if this + /// byte string contains valid UTF-8 that isn't ASCII, then reversing the + /// string will likely result in invalid UTF-8 and otherwise non-sensical + /// content. + /// + /// Note that this is equivalent to the generic `[u8]::reverse` method. + /// This method is provided to permit callers to explicitly differentiate + /// between reversing bytes, codepoints and graphemes. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut s = <Vec<u8>>::from("hello"); + /// s.reverse_bytes(); + /// assert_eq!(s, "olleh".as_bytes()); + /// ``` + #[inline] + fn reverse_bytes(&mut self) { + self.as_bytes_mut().reverse(); + } + + /// Reverse the codepoints in this string, in place. + /// + /// If this byte string is valid UTF-8, then its reversal by codepoint + /// is also guaranteed to be valid UTF-8. + /// + /// This operation is equivalent to the following, but without allocating: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut s = <Vec<u8>>::from("foo☃bar"); + /// + /// let mut chars: Vec<char> = s.chars().collect(); + /// chars.reverse(); + /// + /// let reversed: String = chars.into_iter().collect(); + /// assert_eq!(reversed, "rab☃oof"); + /// ``` + /// + /// Note that this is not necessarily a well formed operation. For example, + /// if this byte string contains grapheme clusters with more than one + /// codepoint, then those grapheme clusters will not necessarily be + /// preserved. If you'd like to preserve grapheme clusters, then use + /// [`reverse_graphemes`](#method.reverse_graphemes) instead. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut s = <Vec<u8>>::from("foo☃bar"); + /// s.reverse_chars(); + /// assert_eq!(s, "rab☃oof".as_bytes()); + /// ``` + /// + /// This example shows that not all reversals lead to a well formed string. + /// For example, in this case, combining marks are used to put accents over + /// some letters, and those accent marks must appear after the codepoints + /// they modify. + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let mut s = <Vec<u8>>::from("résumé"); + /// s.reverse_chars(); + /// assert_eq!(s, B(b"\xCC\x81emus\xCC\x81er")); + /// ``` + /// + /// A word of warning: the above example relies on the fact that + /// `résumé` is in decomposed normal form, which means there are separate + /// codepoints for the accents above `e`. If it is instead in composed + /// normal form, then the example works: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let mut s = <Vec<u8>>::from("résumé"); + /// s.reverse_chars(); + /// assert_eq!(s, B("émusér")); + /// ``` + /// + /// The point here is to be cautious and not assume that just because + /// `reverse_chars` works in one case, that it therefore works in all + /// cases. + #[inline] + fn reverse_chars(&mut self) { + let mut i = 0; + loop { + let (_, size) = utf8::decode(&self.as_bytes()[i..]); + if size == 0 { + break; + } + if size > 1 { + self.as_bytes_mut()[i..i + size].reverse_bytes(); + } + i += size; + } + self.reverse_bytes(); + } + + /// Reverse the graphemes in this string, in place. + /// + /// If this byte string is valid UTF-8, then its reversal by grapheme + /// is also guaranteed to be valid UTF-8. + /// + /// This operation is equivalent to the following, but without allocating: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut s = <Vec<u8>>::from("foo☃bar"); + /// + /// let mut graphemes: Vec<&str> = s.graphemes().collect(); + /// graphemes.reverse(); + /// + /// let reversed = graphemes.concat(); + /// assert_eq!(reversed, "rab☃oof"); + /// ``` + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut s = <Vec<u8>>::from("foo☃bar"); + /// s.reverse_graphemes(); + /// assert_eq!(s, "rab☃oof".as_bytes()); + /// ``` + /// + /// This example shows how this correctly handles grapheme clusters, + /// unlike `reverse_chars`. + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut s = <Vec<u8>>::from("résumé"); + /// s.reverse_graphemes(); + /// assert_eq!(s, "émusér".as_bytes()); + /// ``` + #[cfg(feature = "unicode")] + #[inline] + fn reverse_graphemes(&mut self) { + use unicode::decode_grapheme; + + let mut i = 0; + loop { + let (_, size) = decode_grapheme(&self.as_bytes()[i..]); + if size == 0 { + break; + } + if size > 1 { + self.as_bytes_mut()[i..i + size].reverse_bytes(); + } + i += size; + } + self.reverse_bytes(); + } + + /// Returns true if and only if every byte in this byte string is ASCII. + /// + /// ASCII is an encoding that defines 128 codepoints. A byte corresponds to + /// an ASCII codepoint if and only if it is in the inclusive range + /// `[0, 127]`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// assert!(B("abc").is_ascii()); + /// assert!(!B("☃βツ").is_ascii()); + /// assert!(!B(b"\xFF").is_ascii()); + /// ``` + #[inline] + fn is_ascii(&self) -> bool { + ascii::first_non_ascii_byte(self.as_bytes()) == self.as_bytes().len() + } + + /// Returns true if and only if the entire byte string is valid UTF-8. + /// + /// If you need location information about where a byte string's first + /// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// assert!(B("abc").is_utf8()); + /// assert!(B("☃βツ").is_utf8()); + /// // invalid bytes + /// assert!(!B(b"abc\xFF").is_utf8()); + /// // surrogate encoding + /// assert!(!B(b"\xED\xA0\x80").is_utf8()); + /// // incomplete sequence + /// assert!(!B(b"\xF0\x9D\x9Ca").is_utf8()); + /// // overlong sequence + /// assert!(!B(b"\xF0\x82\x82\xAC").is_utf8()); + /// ``` + #[inline] + fn is_utf8(&self) -> bool { + utf8::validate(self.as_bytes()).is_ok() + } + + /// Returns the last byte in this byte string, if it's non-empty. If this + /// byte string is empty, this returns `None`. + /// + /// Note that this is like the generic `[u8]::last`, except this returns + /// the byte by value instead of a reference to the byte. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// assert_eq!(Some(b'z'), b"baz".last_byte()); + /// assert_eq!(None, b"".last_byte()); + /// ``` + #[inline] + fn last_byte(&self) -> Option<u8> { + let bytes = self.as_bytes(); + bytes.get(bytes.len().saturating_sub(1)).map(|&b| b) + } + + /// Returns the index of the first non-ASCII byte in this byte string (if + /// any such indices exist). Specifically, it returns the index of the + /// first byte with a value greater than or equal to `0x80`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{ByteSlice, B}; + /// + /// assert_eq!(Some(3), b"abc\xff".find_non_ascii_byte()); + /// assert_eq!(None, b"abcde".find_non_ascii_byte()); + /// assert_eq!(Some(0), B("😀").find_non_ascii_byte()); + /// ``` + #[inline] + fn find_non_ascii_byte(&self) -> Option<usize> { + let index = ascii::first_non_ascii_byte(self.as_bytes()); + if index == self.as_bytes().len() { + None + } else { + Some(index) + } + } + + /// Copies elements from one part of the slice to another part of itself, + /// where the parts may be overlapping. + /// + /// `src` is the range within this byte string to copy from, while `dest` + /// is the starting index of the range within this byte string to copy to. + /// The length indicated by `src` must be less than or equal to the number + /// of bytes from `dest` to the end of the byte string. + /// + /// # Panics + /// + /// Panics if either range is out of bounds, or if `src` is too big to fit + /// into `dest`, or if the end of `src` is before the start. + /// + /// # Examples + /// + /// Copying four bytes within a byte string: + /// + /// ``` + /// use bstr::{B, ByteSlice}; + /// + /// let mut buf = *b"Hello, World!"; + /// let s = &mut buf; + /// s.copy_within_str(1..5, 8); + /// assert_eq!(s, B("Hello, Wello!")); + /// ``` + #[inline] + fn copy_within_str<R>(&mut self, src: R, dest: usize) + where + R: ops::RangeBounds<usize>, + { + // TODO: Deprecate this once slice::copy_within stabilizes. + let src_start = match src.start_bound() { + ops::Bound::Included(&n) => n, + ops::Bound::Excluded(&n) => { + n.checked_add(1).expect("attempted to index slice beyond max") + } + ops::Bound::Unbounded => 0, + }; + let src_end = match src.end_bound() { + ops::Bound::Included(&n) => { + n.checked_add(1).expect("attempted to index slice beyond max") + } + ops::Bound::Excluded(&n) => n, + ops::Bound::Unbounded => self.as_bytes().len(), + }; + assert!(src_start <= src_end, "src end is before src start"); + assert!(src_end <= self.as_bytes().len(), "src is out of bounds"); + let count = src_end - src_start; + assert!( + dest <= self.as_bytes().len() - count, + "dest is out of bounds", + ); + + // SAFETY: This is safe because we use ptr::copy to handle overlapping + // copies, and is also safe because we've checked all the bounds above. + // Finally, we are only dealing with u8 data, which is Copy, which + // means we can copy without worrying about ownership/destructors. + unsafe { + ptr::copy( + self.as_bytes().get_unchecked(src_start), + self.as_bytes_mut().get_unchecked_mut(dest), + count, + ); + } + } +} + +/// A single substring searcher fixed to a particular needle. +/// +/// The purpose of this type is to permit callers to construct a substring +/// searcher that can be used to search haystacks without the overhead of +/// constructing the searcher in the first place. This is a somewhat niche +/// concern when it's necessary to re-use the same needle to search multiple +/// different haystacks with as little overhead as possible. In general, using +/// [`ByteSlice::find`](trait.ByteSlice.html#method.find) +/// or +/// [`ByteSlice::find_iter`](trait.ByteSlice.html#method.find_iter) +/// is good enough, but `Finder` is useful when you can meaningfully observe +/// searcher construction time in a profile. +/// +/// When the `std` feature is enabled, then this type has an `into_owned` +/// version which permits building a `Finder` that is not connected to the +/// lifetime of its needle. +#[derive(Clone, Debug)] +pub struct Finder<'a> { + searcher: TwoWay<'a>, +} + +impl<'a> Finder<'a> { + /// Create a new finder for the given needle. + #[inline] + pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> { + Finder { searcher: TwoWay::forward(needle.as_ref()) } + } + + /// Convert this finder into its owned variant, such that it no longer + /// borrows the needle. + /// + /// If this is already an owned finder, then this is a no-op. Otherwise, + /// this copies the needle. + /// + /// This is only available when the `std` feature is enabled. + #[cfg(feature = "std")] + #[inline] + pub fn into_owned(self) -> Finder<'static> { + Finder { searcher: self.searcher.into_owned() } + } + + /// Returns the needle that this finder searches for. + /// + /// Note that the lifetime of the needle returned is tied to the lifetime + /// of the finder, and may be shorter than the `'a` lifetime. Namely, a + /// finder's needle can be either borrowed or owned, so the lifetime of the + /// needle returned must necessarily be the shorter of the two. + #[inline] + pub fn needle(&self) -> &[u8] { + self.searcher.needle() + } + + /// Returns the index of the first occurrence of this needle in the given + /// haystack. + /// + /// The haystack may be any type that can be cheaply converted into a + /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the needle and the haystack. That is, this runs + /// in `O(needle.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::Finder; + /// + /// let haystack = "foo bar baz"; + /// assert_eq!(Some(0), Finder::new("foo").find(haystack)); + /// assert_eq!(Some(4), Finder::new("bar").find(haystack)); + /// assert_eq!(None, Finder::new("quux").find(haystack)); + /// ``` + #[inline] + pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { + self.searcher.find(haystack.as_ref()) + } +} + +/// A single substring reverse searcher fixed to a particular needle. +/// +/// The purpose of this type is to permit callers to construct a substring +/// searcher that can be used to search haystacks without the overhead of +/// constructing the searcher in the first place. This is a somewhat niche +/// concern when it's necessary to re-use the same needle to search multiple +/// different haystacks with as little overhead as possible. In general, using +/// [`ByteSlice::rfind`](trait.ByteSlice.html#method.rfind) +/// or +/// [`ByteSlice::rfind_iter`](trait.ByteSlice.html#method.rfind_iter) +/// is good enough, but `FinderReverse` is useful when you can meaningfully +/// observe searcher construction time in a profile. +/// +/// When the `std` feature is enabled, then this type has an `into_owned` +/// version which permits building a `FinderReverse` that is not connected to +/// the lifetime of its needle. +#[derive(Clone, Debug)] +pub struct FinderReverse<'a> { + searcher: TwoWay<'a>, +} + +impl<'a> FinderReverse<'a> { + /// Create a new reverse finder for the given needle. + #[inline] + pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> { + FinderReverse { searcher: TwoWay::reverse(needle.as_ref()) } + } + + /// Convert this finder into its owned variant, such that it no longer + /// borrows the needle. + /// + /// If this is already an owned finder, then this is a no-op. Otherwise, + /// this copies the needle. + /// + /// This is only available when the `std` feature is enabled. + #[cfg(feature = "std")] + #[inline] + pub fn into_owned(self) -> FinderReverse<'static> { + FinderReverse { searcher: self.searcher.into_owned() } + } + + /// Returns the needle that this finder searches for. + /// + /// Note that the lifetime of the needle returned is tied to the lifetime + /// of this finder, and may be shorter than the `'a` lifetime. Namely, + /// a finder's needle can be either borrowed or owned, so the lifetime of + /// the needle returned must necessarily be the shorter of the two. + #[inline] + pub fn needle(&self) -> &[u8] { + self.searcher.needle() + } + + /// Returns the index of the last occurrence of this needle in the given + /// haystack. + /// + /// The haystack may be any type that can be cheaply converted into a + /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the needle and the haystack. That is, this runs + /// in `O(needle.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::FinderReverse; + /// + /// let haystack = "foo bar baz"; + /// assert_eq!(Some(0), FinderReverse::new("foo").rfind(haystack)); + /// assert_eq!(Some(4), FinderReverse::new("bar").rfind(haystack)); + /// assert_eq!(None, FinderReverse::new("quux").rfind(haystack)); + /// ``` + #[inline] + pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { + self.searcher.rfind(haystack.as_ref()) + } +} + +/// An iterator over non-overlapping substring matches. +/// +/// Matches are reported by the byte offset at which they begin. +/// +/// `'a` is the shorter of two lifetimes: the byte string being searched or the +/// byte string being looked for. +#[derive(Debug)] +pub struct Find<'a> { + haystack: &'a [u8], + prestate: PrefilterState, + searcher: TwoWay<'a>, + pos: usize, +} + +impl<'a> Find<'a> { + fn new(haystack: &'a [u8], needle: &'a [u8]) -> Find<'a> { + let searcher = TwoWay::forward(needle); + let prestate = searcher.prefilter_state(); + Find { haystack, prestate, searcher, pos: 0 } + } +} + +impl<'a> Iterator for Find<'a> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option<usize> { + if self.pos > self.haystack.len() { + return None; + } + let result = self + .searcher + .find_with(&mut self.prestate, &self.haystack[self.pos..]); + match result { + None => None, + Some(i) => { + let pos = self.pos + i; + self.pos = pos + cmp::max(1, self.searcher.needle().len()); + Some(pos) + } + } + } +} + +/// An iterator over non-overlapping substring matches in reverse. +/// +/// Matches are reported by the byte offset at which they begin. +/// +/// `'a` is the shorter of two lifetimes: the byte string being searched or the +/// byte string being looked for. +#[derive(Debug)] +pub struct FindReverse<'a> { + haystack: &'a [u8], + prestate: PrefilterState, + searcher: TwoWay<'a>, + /// When searching with an empty needle, this gets set to `None` after + /// we've yielded the last element at `0`. + pos: Option<usize>, +} + +impl<'a> FindReverse<'a> { + fn new(haystack: &'a [u8], needle: &'a [u8]) -> FindReverse<'a> { + let searcher = TwoWay::reverse(needle); + let prestate = searcher.prefilter_state(); + let pos = Some(haystack.len()); + FindReverse { haystack, prestate, searcher, pos } + } + + fn haystack(&self) -> &'a [u8] { + self.haystack + } + + fn needle(&self) -> &[u8] { + self.searcher.needle() + } +} + +impl<'a> Iterator for FindReverse<'a> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option<usize> { + let pos = match self.pos { + None => return None, + Some(pos) => pos, + }; + let result = self + .searcher + .rfind_with(&mut self.prestate, &self.haystack[..pos]); + match result { + None => None, + Some(i) => { + if pos == i { + self.pos = pos.checked_sub(1); + } else { + self.pos = Some(i); + } + Some(i) + } + } + } +} + +/// An iterator over the bytes in a byte string. +/// +/// `'a` is the lifetime of the byte string being traversed. +#[derive(Clone, Debug)] +pub struct Bytes<'a> { + it: slice::Iter<'a, u8>, +} + +impl<'a> Bytes<'a> { + /// Views the remaining underlying data as a subslice of the original data. + /// This has the same lifetime as the original slice, + /// and so the iterator can continue to be used while this exists. + #[inline] + pub fn as_slice(&self) -> &'a [u8] { + self.it.as_slice() + } +} + +impl<'a> Iterator for Bytes<'a> { + type Item = u8; + + #[inline] + fn next(&mut self) -> Option<u8> { + self.it.next().map(|&b| b) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl<'a> DoubleEndedIterator for Bytes<'a> { + #[inline] + fn next_back(&mut self) -> Option<u8> { + self.it.next_back().map(|&b| b) + } +} + +impl<'a> ExactSizeIterator for Bytes<'a> { + #[inline] + fn len(&self) -> usize { + self.it.len() + } +} + +impl<'a> iter::FusedIterator for Bytes<'a> {} + +/// An iterator over the fields in a byte string, separated by whitespace. +/// +/// This iterator splits on contiguous runs of whitespace, such that the fields +/// in `foo\t\t\n \nbar` are `foo` and `bar`. +/// +/// `'a` is the lifetime of the byte string being split. +#[derive(Debug)] +pub struct Fields<'a> { + it: FieldsWith<'a, fn(char) -> bool>, +} + +impl<'a> Fields<'a> { + fn new(bytes: &'a [u8]) -> Fields<'a> { + Fields { it: bytes.fields_with(|ch| ch.is_whitespace()) } + } +} + +impl<'a> Iterator for Fields<'a> { + type Item = &'a [u8]; + + #[inline] + fn next(&mut self) -> Option<&'a [u8]> { + self.it.next() + } +} + +/// An iterator over fields in the byte string, separated by a predicate over +/// codepoints. +/// +/// This iterator splits a byte string based on its predicate function such +/// that the elements returned are separated by contiguous runs of codepoints +/// for which the predicate returns true. +/// +/// `'a` is the lifetime of the byte string being split, while `F` is the type +/// of the predicate, i.e., `FnMut(char) -> bool`. +#[derive(Debug)] +pub struct FieldsWith<'a, F> { + f: F, + bytes: &'a [u8], + chars: CharIndices<'a>, +} + +impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> { + fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> { + FieldsWith { f, bytes, chars: bytes.char_indices() } + } +} + +impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> { + type Item = &'a [u8]; + + #[inline] + fn next(&mut self) -> Option<&'a [u8]> { + let (start, mut end); + loop { + match self.chars.next() { + None => return None, + Some((s, e, ch)) => { + if !(self.f)(ch) { + start = s; + end = e; + break; + } + } + } + } + while let Some((_, e, ch)) = self.chars.next() { + if (self.f)(ch) { + break; + } + end = e; + } + Some(&self.bytes[start..end]) + } +} + +/// An iterator over substrings in a byte string, split by a separator. +/// +/// `'a` is the lifetime of the byte string being split. +#[derive(Debug)] +pub struct Split<'a> { + finder: Find<'a>, + /// The end position of the previous match of our splitter. The element + /// we yield corresponds to the substring starting at `last` up to the + /// beginning of the next match of the splitter. + last: usize, + /// Only set when iteration is complete. A corner case here is when a + /// splitter is matched at the end of the haystack. At that point, we still + /// need to yield an empty string following it. + done: bool, +} + +impl<'a> Split<'a> { + fn new(haystack: &'a [u8], splitter: &'a [u8]) -> Split<'a> { + let finder = haystack.find_iter(splitter); + Split { finder, last: 0, done: false } + } +} + +impl<'a> Iterator for Split<'a> { + type Item = &'a [u8]; + + #[inline] + fn next(&mut self) -> Option<&'a [u8]> { + let haystack = self.finder.haystack; + match self.finder.next() { + Some(start) => { + let next = &haystack[self.last..start]; + self.last = start + self.finder.searcher.needle().len(); + Some(next) + } + None => { + if self.last >= haystack.len() { + if !self.done { + self.done = true; + Some(b"") + } else { + None + } + } else { + let s = &haystack[self.last..]; + self.last = haystack.len(); + self.done = true; + Some(s) + } + } + } + } +} + +/// An iterator over substrings in a byte string, split by a separator, in +/// reverse. +/// +/// `'a` is the lifetime of the byte string being split, while `F` is the type +/// of the predicate, i.e., `FnMut(char) -> bool`. +#[derive(Debug)] +pub struct SplitReverse<'a> { + finder: FindReverse<'a>, + /// The end position of the previous match of our splitter. The element + /// we yield corresponds to the substring starting at `last` up to the + /// beginning of the next match of the splitter. + last: usize, + /// Only set when iteration is complete. A corner case here is when a + /// splitter is matched at the end of the haystack. At that point, we still + /// need to yield an empty string following it. + done: bool, +} + +impl<'a> SplitReverse<'a> { + fn new(haystack: &'a [u8], splitter: &'a [u8]) -> SplitReverse<'a> { + let finder = haystack.rfind_iter(splitter); + SplitReverse { finder, last: haystack.len(), done: false } + } +} + +impl<'a> Iterator for SplitReverse<'a> { + type Item = &'a [u8]; + + #[inline] + fn next(&mut self) -> Option<&'a [u8]> { + let haystack = self.finder.haystack(); + match self.finder.next() { + Some(start) => { + let nlen = self.finder.needle().len(); + let next = &haystack[start + nlen..self.last]; + self.last = start; + Some(next) + } + None => { + if self.last == 0 { + if !self.done { + self.done = true; + Some(b"") + } else { + None + } + } else { + let s = &haystack[..self.last]; + self.last = 0; + self.done = true; + Some(s) + } + } + } + } +} + +/// An iterator over at most `n` substrings in a byte string, split by a +/// separator. +/// +/// `'a` is the lifetime of the byte string being split, while `F` is the type +/// of the predicate, i.e., `FnMut(char) -> bool`. +#[derive(Debug)] +pub struct SplitN<'a> { + split: Split<'a>, + limit: usize, + count: usize, +} + +impl<'a> SplitN<'a> { + fn new( + haystack: &'a [u8], + splitter: &'a [u8], + limit: usize, + ) -> SplitN<'a> { + let split = haystack.split_str(splitter); + SplitN { split, limit, count: 0 } + } +} + +impl<'a> Iterator for SplitN<'a> { + type Item = &'a [u8]; + + #[inline] + fn next(&mut self) -> Option<&'a [u8]> { + self.count += 1; + if self.count > self.limit || self.split.done { + None + } else if self.count == self.limit { + Some(&self.split.finder.haystack[self.split.last..]) + } else { + self.split.next() + } + } +} + +/// An iterator over at most `n` substrings in a byte string, split by a +/// separator, in reverse. +/// +/// `'a` is the lifetime of the byte string being split, while `F` is the type +/// of the predicate, i.e., `FnMut(char) -> bool`. +#[derive(Debug)] +pub struct SplitNReverse<'a> { + split: SplitReverse<'a>, + limit: usize, + count: usize, +} + +impl<'a> SplitNReverse<'a> { + fn new( + haystack: &'a [u8], + splitter: &'a [u8], + limit: usize, + ) -> SplitNReverse<'a> { + let split = haystack.rsplit_str(splitter); + SplitNReverse { split, limit, count: 0 } + } +} + +impl<'a> Iterator for SplitNReverse<'a> { + type Item = &'a [u8]; + + #[inline] + fn next(&mut self) -> Option<&'a [u8]> { + self.count += 1; + if self.count > self.limit || self.split.done { + None + } else if self.count == self.limit { + Some(&self.split.finder.haystack()[..self.split.last]) + } else { + self.split.next() + } + } +} + +/// An iterator over all lines in a byte string, without their terminators. +/// +/// For this iterator, the only line terminators recognized are `\r\n` and +/// `\n`. +/// +/// `'a` is the lifetime of the byte string being iterated over. +pub struct Lines<'a> { + it: LinesWithTerminator<'a>, +} + +impl<'a> Lines<'a> { + fn new(bytes: &'a [u8]) -> Lines<'a> { + Lines { it: LinesWithTerminator::new(bytes) } + } +} + +impl<'a> Iterator for Lines<'a> { + type Item = &'a [u8]; + + #[inline] + fn next(&mut self) -> Option<&'a [u8]> { + let mut line = self.it.next()?; + if line.last_byte() == Some(b'\n') { + line = &line[..line.len() - 1]; + if line.last_byte() == Some(b'\r') { + line = &line[..line.len() - 1]; + } + } + Some(line) + } +} + +/// An iterator over all lines in a byte string, including their terminators. +/// +/// For this iterator, the only line terminator recognized is `\n`. (Since +/// line terminators are included, this also handles `\r\n` line endings.) +/// +/// Line terminators are only included if they are present in the original +/// byte string. For example, the last line in a byte string may not end with +/// a line terminator. +/// +/// Concatenating all elements yielded by this iterator is guaranteed to yield +/// the original byte string. +/// +/// `'a` is the lifetime of the byte string being iterated over. +pub struct LinesWithTerminator<'a> { + bytes: &'a [u8], +} + +impl<'a> LinesWithTerminator<'a> { + fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> { + LinesWithTerminator { bytes } + } +} + +impl<'a> Iterator for LinesWithTerminator<'a> { + type Item = &'a [u8]; + + #[inline] + fn next(&mut self) -> Option<&'a [u8]> { + match self.bytes.find_byte(b'\n') { + None if self.bytes.is_empty() => None, + None => { + let line = self.bytes; + self.bytes = b""; + Some(line) + } + Some(end) => { + let line = &self.bytes[..end + 1]; + self.bytes = &self.bytes[end + 1..]; + Some(line) + } + } + } +} + +#[cfg(test)] +mod tests { + use ext_slice::{ByteSlice, B}; + use tests::LOSSY_TESTS; + + #[test] + fn to_str_lossy() { + for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() { + let got = B(input).to_str_lossy(); + assert_eq!( + expected.as_bytes(), + got.as_bytes(), + "to_str_lossy(ith: {:?}, given: {:?})", + i, + input, + ); + + let mut got = String::new(); + B(input).to_str_lossy_into(&mut got); + assert_eq!( + expected.as_bytes(), + got.as_bytes(), + "to_str_lossy_into", + ); + + let got = String::from_utf8_lossy(input); + assert_eq!(expected.as_bytes(), got.as_bytes(), "std"); + } + } + + #[test] + #[should_panic] + fn copy_within_fail1() { + let mut buf = *b"foobar"; + let s = &mut buf; + s.copy_within_str(0..2, 5); + } + + #[test] + #[should_panic] + fn copy_within_fail2() { + let mut buf = *b"foobar"; + let s = &mut buf; + s.copy_within_str(3..2, 0); + } + + #[test] + #[should_panic] + fn copy_within_fail3() { + let mut buf = *b"foobar"; + let s = &mut buf; + s.copy_within_str(5..7, 0); + } + + #[test] + #[should_panic] + fn copy_within_fail4() { + let mut buf = *b"foobar"; + let s = &mut buf; + s.copy_within_str(0..1, 6); + } +} diff --git a/src/ext_vec.rs b/src/ext_vec.rs new file mode 100644 index 0000000..6f6db56 --- /dev/null +++ b/src/ext_vec.rs @@ -0,0 +1,1108 @@ +#![allow(unused_imports)] + +use std::borrow::Cow; +use std::error; +use std::ffi::{OsStr, OsString}; +use std::fmt; +use std::iter; +use std::ops; +use std::path::{Path, PathBuf}; +use std::ptr; +use std::str; +use std::vec; + +use ext_slice::ByteSlice; +use utf8::{self, Utf8Error}; + +/// Concatenate the elements given by the iterator together into a single +/// `Vec<u8>`. +/// +/// The elements may be any type that can be cheaply converted into an `&[u8]`. +/// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use bstr; +/// +/// let s = bstr::concat(&["foo", "bar", "baz"]); +/// assert_eq!(s, "foobarbaz".as_bytes()); +/// ``` +#[inline] +pub fn concat<T, I>(elements: I) -> Vec<u8> +where + T: AsRef<[u8]>, + I: IntoIterator<Item = T>, +{ + let mut dest = vec![]; + for element in elements { + dest.push_str(element); + } + dest +} + +/// Join the elements given by the iterator with the given separator into a +/// single `Vec<u8>`. +/// +/// Both the separator and the elements may be any type that can be cheaply +/// converted into an `&[u8]`. This includes, but is not limited to, +/// `&str`, `&BStr` and `&[u8]` itself. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use bstr; +/// +/// let s = bstr::join(",", &["foo", "bar", "baz"]); +/// assert_eq!(s, "foo,bar,baz".as_bytes()); +/// ``` +#[inline] +pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8> +where + B: AsRef<[u8]>, + T: AsRef<[u8]>, + I: IntoIterator<Item = T>, +{ + let mut it = elements.into_iter(); + let mut dest = vec![]; + match it.next() { + None => return dest, + Some(first) => { + dest.push_str(first); + } + } + for element in it { + dest.push_str(&separator); + dest.push_str(element); + } + dest +} + +impl ByteVec for Vec<u8> { + #[inline] + fn as_vec(&self) -> &Vec<u8> { + self + } + + #[inline] + fn as_vec_mut(&mut self) -> &mut Vec<u8> { + self + } + + #[inline] + fn into_vec(self) -> Vec<u8> { + self + } +} + +/// Ensure that callers cannot implement `ByteSlice` by making an +/// umplementable trait its super trait. +pub trait Sealed {} +impl Sealed for Vec<u8> {} + +/// A trait that extends `Vec<u8>` with string oriented methods. +/// +/// Note that when using the constructor methods, such as +/// `ByteVec::from_slice`, one should actually call them using the concrete +/// type. For example: +/// +/// ``` +/// use bstr::{B, ByteVec}; +/// +/// let s = Vec::from_slice(b"abc"); // NOT ByteVec::from_slice("...") +/// assert_eq!(s, B("abc")); +/// ``` +pub trait ByteVec: Sealed { + /// A method for accessing the raw vector bytes of this type. This is + /// always a no-op and callers shouldn't care about it. This only exists + /// for making the extension trait work. + #[doc(hidden)] + fn as_vec(&self) -> &Vec<u8>; + + /// A method for accessing the raw vector bytes of this type, mutably. This + /// is always a no-op and callers shouldn't care about it. This only exists + /// for making the extension trait work. + #[doc(hidden)] + fn as_vec_mut(&mut self) -> &mut Vec<u8>; + + /// A method for consuming ownership of this vector. This is always a no-op + /// and callers shouldn't care about it. This only exists for making the + /// extension trait work. + #[doc(hidden)] + fn into_vec(self) -> Vec<u8> + where + Self: Sized; + + /// Create a new owned byte string from the given byte slice. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteVec}; + /// + /// let s = Vec::from_slice(b"abc"); + /// assert_eq!(s, B("abc")); + /// ``` + #[inline] + fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> { + bytes.as_ref().to_vec() + } + + /// Create a new byte string from an owned OS string. + /// + /// On Unix, this always succeeds and is zero cost. On non-Unix systems, + /// this returns the original OS string if it is not valid UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::ffi::OsString; + /// + /// use bstr::{B, ByteVec}; + /// + /// let os_str = OsString::from("foo"); + /// let bs = Vec::from_os_string(os_str).expect("valid UTF-8"); + /// assert_eq!(bs, B("foo")); + /// ``` + #[inline] + fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> { + #[cfg(unix)] + #[inline] + fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> { + use std::os::unix::ffi::OsStringExt; + + Ok(Vec::from(os_str.into_vec())) + } + + #[cfg(not(unix))] + #[inline] + fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> { + os_str.into_string().map(Vec::from) + } + + imp(os_str) + } + + /// Lossily create a new byte string from an OS string slice. + /// + /// On Unix, this always succeeds, is zero cost and always returns a slice. + /// On non-Unix systems, this does a UTF-8 check. If the given OS string + /// slice is not valid UTF-8, then it is lossily decoded into valid UTF-8 + /// (with invalid bytes replaced by the Unicode replacement codepoint). + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::ffi::OsStr; + /// + /// use bstr::{B, ByteVec}; + /// + /// let os_str = OsStr::new("foo"); + /// let bs = Vec::from_os_str_lossy(os_str); + /// assert_eq!(bs, B("foo")); + /// ``` + #[inline] + fn from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> { + #[cfg(unix)] + #[inline] + fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> { + use std::os::unix::ffi::OsStrExt; + + Cow::Borrowed(os_str.as_bytes()) + } + + #[cfg(not(unix))] + #[inline] + fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> { + match os_str.to_string_lossy() { + Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()), + Cow::Owned(x) => Cow::Owned(Vec::from(x)), + } + } + + imp(os_str) + } + + /// Create a new byte string from an owned file path. + /// + /// On Unix, this always succeeds and is zero cost. On non-Unix systems, + /// this returns the original path if it is not valid UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::path::PathBuf; + /// + /// use bstr::{B, ByteVec}; + /// + /// let path = PathBuf::from("foo"); + /// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8"); + /// assert_eq!(bs, B("foo")); + /// ``` + #[inline] + fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> { + Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from) + } + + /// Lossily create a new byte string from a file path. + /// + /// On Unix, this always succeeds, is zero cost and always returns a slice. + /// On non-Unix systems, this does a UTF-8 check. If the given path is not + /// valid UTF-8, then it is lossily decoded into valid UTF-8 (with invalid + /// bytes replaced by the Unicode replacement codepoint). + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::path::Path; + /// + /// use bstr::{B, ByteVec}; + /// + /// let path = Path::new("foo"); + /// let bs = Vec::from_path_lossy(path); + /// assert_eq!(bs, B("foo")); + /// ``` + #[inline] + fn from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]> { + Vec::from_os_str_lossy(path.as_os_str()) + } + + /// Appends the given byte to the end of this byte string. + /// + /// Note that this is equivalent to the generic `Vec::push` method. This + /// method is provided to permit callers to explicitly differentiate + /// between pushing bytes, codepoints and strings. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let mut s = <Vec<u8>>::from("abc"); + /// s.push_byte(b'\xE2'); + /// s.push_byte(b'\x98'); + /// s.push_byte(b'\x83'); + /// assert_eq!(s, "abc☃".as_bytes()); + /// ``` + #[inline] + fn push_byte(&mut self, byte: u8) { + self.as_vec_mut().push(byte); + } + + /// Appends the given `char` to the end of this byte string. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let mut s = <Vec<u8>>::from("abc"); + /// s.push_char('1'); + /// s.push_char('2'); + /// s.push_char('3'); + /// assert_eq!(s, "abc123".as_bytes()); + /// ``` + #[inline] + fn push_char(&mut self, ch: char) { + if ch.len_utf8() == 1 { + self.push_byte(ch as u8); + return; + } + self.as_vec_mut() + .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()); + } + + /// Appends the given slice to the end of this byte string. This accepts + /// any type that be converted to a `&[u8]`. This includes, but is not + /// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let mut s = <Vec<u8>>::from("abc"); + /// s.push_str(b"123"); + /// assert_eq!(s, "abc123".as_bytes()); + /// ``` + #[inline] + fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) { + self.as_vec_mut().extend_from_slice(bytes.as_ref()); + } + + /// Converts a `Vec<u8>` into a `String` if and only if this byte string is + /// valid UTF-8. + /// + /// If it is not valid UTF-8, then a + /// [`FromUtf8Error`](struct.FromUtf8Error.html) + /// is returned. (This error can be used to examine why UTF-8 validation + /// failed, or to regain the original byte string.) + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// # fn example() -> Result<(), Box<dyn std::error::Error>> { + /// let bytes = Vec::from("hello"); + /// let string = bytes.into_string()?; + /// + /// assert_eq!("hello", string); + /// # Ok(()) }; example().unwrap() + /// ``` + /// + /// If this byte string is not valid UTF-8, then an error will be returned. + /// That error can then be used to inspect the location at which invalid + /// UTF-8 was found, or to regain the original byte string: + /// + /// ``` + /// use bstr::{B, ByteVec}; + /// + /// let bytes = Vec::from_slice(b"foo\xFFbar"); + /// let err = bytes.into_string().unwrap_err(); + /// + /// assert_eq!(err.utf8_error().valid_up_to(), 3); + /// assert_eq!(err.utf8_error().error_len(), Some(1)); + /// + /// // At no point in this example is an allocation performed. + /// let bytes = Vec::from(err.into_vec()); + /// assert_eq!(bytes, B(b"foo\xFFbar")); + /// ``` + #[inline] + fn into_string(self) -> Result<String, FromUtf8Error> + where + Self: Sized, + { + match utf8::validate(self.as_vec()) { + Err(err) => Err(FromUtf8Error { original: self.into_vec(), err }), + Ok(()) => { + // SAFETY: This is safe because of the guarantees provided by + // utf8::validate. + unsafe { Ok(self.into_string_unchecked()) } + } + } + } + + /// Lossily converts a `Vec<u8>` into a `String`. If this byte string + /// contains invalid UTF-8, then the invalid bytes are replaced with the + /// Unicode replacement codepoint. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let bytes = Vec::from_slice(b"foo\xFFbar"); + /// let string = bytes.into_string_lossy(); + /// assert_eq!(string, "foo\u{FFFD}bar"); + /// ``` + #[inline] + fn into_string_lossy(self) -> String + where + Self: Sized, + { + match self.as_vec().to_str_lossy() { + Cow::Borrowed(_) => { + // SAFETY: to_str_lossy() returning a Cow::Borrowed guarantees + // the entire string is valid utf8. + unsafe { self.into_string_unchecked() } + } + Cow::Owned(s) => s, + } + } + + /// Unsafely convert this byte string into a `String`, without checking for + /// valid UTF-8. + /// + /// # Safety + /// + /// Callers *must* ensure that this byte string is valid UTF-8 before + /// calling this method. Converting a byte string into a `String` that is + /// not valid UTF-8 is considered undefined behavior. + /// + /// This routine is useful in performance sensitive contexts where the + /// UTF-8 validity of the byte string is already known and it is + /// undesirable to pay the cost of an additional UTF-8 validation check + /// that [`into_string`](#method.into_string) performs. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// // SAFETY: This is safe because string literals are guaranteed to be + /// // valid UTF-8 by the Rust compiler. + /// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() }; + /// assert_eq!("☃βツ", s); + /// ``` + #[inline] + unsafe fn into_string_unchecked(self) -> String + where + Self: Sized, + { + String::from_utf8_unchecked(self.into_vec()) + } + + /// Converts this byte string into an OS string, in place. + /// + /// On Unix, this always succeeds and is zero cost. On non-Unix systems, + /// this returns the original byte string if it is not valid UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::ffi::OsStr; + /// + /// use bstr::ByteVec; + /// + /// let bs = Vec::from("foo"); + /// let os_str = bs.into_os_string().expect("should be valid UTF-8"); + /// assert_eq!(os_str, OsStr::new("foo")); + /// ``` + #[inline] + fn into_os_string(self) -> Result<OsString, Vec<u8>> + where + Self: Sized, + { + #[cfg(unix)] + #[inline] + fn imp(v: Vec<u8>) -> Result<OsString, Vec<u8>> { + use std::os::unix::ffi::OsStringExt; + + Ok(OsString::from_vec(v)) + } + + #[cfg(not(unix))] + #[inline] + fn imp(v: Vec<u8>) -> Result<OsString, Vec<u8>> { + match v.into_string() { + Ok(s) => Ok(OsString::from(s)), + Err(err) => Err(err.into_vec()), + } + } + + imp(self.into_vec()) + } + + /// Lossily converts this byte string into an OS string, in place. + /// + /// On Unix, this always succeeds and is zero cost. On non-Unix systems, + /// this will perform a UTF-8 check and lossily convert this byte string + /// into valid UTF-8 using the Unicode replacement codepoint. + /// + /// Note that this can prevent the correct roundtripping of file paths on + /// non-Unix systems such as Windows, where file paths are an arbitrary + /// sequence of 16-bit integers. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let bs = Vec::from_slice(b"foo\xFFbar"); + /// let os_str = bs.into_os_string_lossy(); + /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar"); + /// ``` + #[inline] + fn into_os_string_lossy(self) -> OsString + where + Self: Sized, + { + #[cfg(unix)] + #[inline] + fn imp(v: Vec<u8>) -> OsString { + use std::os::unix::ffi::OsStringExt; + + OsString::from_vec(v) + } + + #[cfg(not(unix))] + #[inline] + fn imp(v: Vec<u8>) -> OsString { + OsString::from(v.into_string_lossy()) + } + + imp(self.into_vec()) + } + + /// Converts this byte string into an owned file path, in place. + /// + /// On Unix, this always succeeds and is zero cost. On non-Unix systems, + /// this returns the original byte string if it is not valid UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let bs = Vec::from("foo"); + /// let path = bs.into_path_buf().expect("should be valid UTF-8"); + /// assert_eq!(path.as_os_str(), "foo"); + /// ``` + #[inline] + fn into_path_buf(self) -> Result<PathBuf, Vec<u8>> + where + Self: Sized, + { + self.into_os_string().map(PathBuf::from) + } + + /// Lossily converts this byte string into an owned file path, in place. + /// + /// On Unix, this always succeeds and is zero cost. On non-Unix systems, + /// this will perform a UTF-8 check and lossily convert this byte string + /// into valid UTF-8 using the Unicode replacement codepoint. + /// + /// Note that this can prevent the correct roundtripping of file paths on + /// non-Unix systems such as Windows, where file paths are an arbitrary + /// sequence of 16-bit integers. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let bs = Vec::from_slice(b"foo\xFFbar"); + /// let path = bs.into_path_buf_lossy(); + /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar"); + /// ``` + #[inline] + fn into_path_buf_lossy(self) -> PathBuf + where + Self: Sized, + { + PathBuf::from(self.into_os_string_lossy()) + } + + /// Removes the last byte from this `Vec<u8>` and returns it. + /// + /// If this byte string is empty, then `None` is returned. + /// + /// If the last codepoint in this byte string is not ASCII, then removing + /// the last byte could make this byte string contain invalid UTF-8. + /// + /// Note that this is equivalent to the generic `Vec::pop` method. This + /// method is provided to permit callers to explicitly differentiate + /// between popping bytes and codepoints. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let mut s = Vec::from("foo"); + /// assert_eq!(s.pop_byte(), Some(b'o')); + /// assert_eq!(s.pop_byte(), Some(b'o')); + /// assert_eq!(s.pop_byte(), Some(b'f')); + /// assert_eq!(s.pop_byte(), None); + /// ``` + #[inline] + fn pop_byte(&mut self) -> Option<u8> { + self.as_vec_mut().pop() + } + + /// Removes the last codepoint from this `Vec<u8>` and returns it. + /// + /// If this byte string is empty, then `None` is returned. If the last + /// bytes of this byte string do not correspond to a valid UTF-8 code unit + /// sequence, then the Unicode replacement codepoint is yielded instead in + /// accordance with the + /// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8). + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let mut s = Vec::from("foo"); + /// assert_eq!(s.pop_char(), Some('o')); + /// assert_eq!(s.pop_char(), Some('o')); + /// assert_eq!(s.pop_char(), Some('f')); + /// assert_eq!(s.pop_char(), None); + /// ``` + /// + /// This shows the replacement codepoint substitution policy. Note that + /// the first pop yields a replacement codepoint but actually removes two + /// bytes. This is in contrast with subsequent pops when encountering + /// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8 + /// code unit sequence. + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let mut s = Vec::from_slice(b"f\xFF\xFF\xFFoo\xE2\x98"); + /// assert_eq!(s.pop_char(), Some('\u{FFFD}')); + /// assert_eq!(s.pop_char(), Some('o')); + /// assert_eq!(s.pop_char(), Some('o')); + /// assert_eq!(s.pop_char(), Some('\u{FFFD}')); + /// assert_eq!(s.pop_char(), Some('\u{FFFD}')); + /// assert_eq!(s.pop_char(), Some('\u{FFFD}')); + /// assert_eq!(s.pop_char(), Some('f')); + /// assert_eq!(s.pop_char(), None); + /// ``` + #[inline] + fn pop_char(&mut self) -> Option<char> { + let (ch, size) = utf8::decode_last_lossy(self.as_vec()); + if size == 0 { + return None; + } + let new_len = self.as_vec().len() - size; + self.as_vec_mut().truncate(new_len); + Some(ch) + } + + /// Removes a `char` from this `Vec<u8>` at the given byte position and + /// returns it. + /// + /// If the bytes at the given position do not lead to a valid UTF-8 code + /// unit sequence, then a + /// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8). + /// + /// # Panics + /// + /// Panics if `at` is larger than or equal to this byte string's length. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let mut s = Vec::from("foo☃bar"); + /// assert_eq!(s.remove_char(3), '☃'); + /// assert_eq!(s, b"foobar"); + /// ``` + /// + /// This example shows how the Unicode replacement codepoint policy is + /// used: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let mut s = Vec::from_slice(b"foo\xFFbar"); + /// assert_eq!(s.remove_char(3), '\u{FFFD}'); + /// assert_eq!(s, b"foobar"); + /// ``` + #[inline] + fn remove_char(&mut self, at: usize) -> char { + let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]); + assert!( + size > 0, + "expected {} to be less than {}", + at, + self.as_vec().len(), + ); + self.as_vec_mut().drain(at..at + size); + ch + } + + /// Inserts the given codepoint into this `Vec<u8>` at a particular byte + /// position. + /// + /// This is an `O(n)` operation as it may copy a number of elements in this + /// byte string proportional to its length. + /// + /// # Panics + /// + /// Panics if `at` is larger than the byte string's length. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let mut s = Vec::from("foobar"); + /// s.insert_char(3, '☃'); + /// assert_eq!(s, "foo☃bar".as_bytes()); + /// ``` + #[inline] + fn insert_char(&mut self, at: usize, ch: char) { + self.insert_str(at, ch.encode_utf8(&mut [0; 4]).as_bytes()); + } + + /// Inserts the given byte string into this byte string at a particular + /// byte position. + /// + /// This is an `O(n)` operation as it may copy a number of elements in this + /// byte string proportional to its length. + /// + /// The given byte string may be any type that can be cheaply converted + /// into a `&[u8]`. This includes, but is not limited to, `&str` and + /// `&[u8]`. + /// + /// # Panics + /// + /// Panics if `at` is larger than the byte string's length. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let mut s = Vec::from("foobar"); + /// s.insert_str(3, "☃☃☃"); + /// assert_eq!(s, "foo☃☃☃bar".as_bytes()); + /// ``` + #[inline] + fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) { + let bytes = bytes.as_ref(); + let len = self.as_vec().len(); + assert!(at <= len, "expected {} to be <= {}", at, len); + + // SAFETY: We'd like to efficiently splice in the given bytes into + // this byte string. Since we are only working with `u8` elements here, + // we only need to consider whether our bounds are correct and whether + // our byte string has enough space. + self.as_vec_mut().reserve(bytes.len()); + unsafe { + // Shift bytes after `at` over by the length of `bytes` to make + // room for it. This requires referencing two regions of memory + // that may overlap, so we use ptr::copy. + ptr::copy( + self.as_vec().as_ptr().add(at), + self.as_vec_mut().as_mut_ptr().add(at + bytes.len()), + len - at, + ); + // Now copy the bytes given into the room we made above. In this + // case, we know that the given bytes cannot possibly overlap + // with this byte string since we have a mutable borrow of the + // latter. Thus, we can use a nonoverlapping copy. + ptr::copy_nonoverlapping( + bytes.as_ptr(), + self.as_vec_mut().as_mut_ptr().add(at), + bytes.len(), + ); + self.as_vec_mut().set_len(len + bytes.len()); + } + } + + /// Removes the specified range in this byte string and replaces it with + /// the given bytes. The given bytes do not need to have the same length + /// as the range provided. + /// + /// # Panics + /// + /// Panics if the given range is invalid. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let mut s = Vec::from("foobar"); + /// s.replace_range(2..4, "xxxxx"); + /// assert_eq!(s, "foxxxxxar".as_bytes()); + /// ``` + #[inline] + fn replace_range<R, B>(&mut self, range: R, replace_with: B) + where + R: ops::RangeBounds<usize>, + B: AsRef<[u8]>, + { + self.as_vec_mut().splice(range, replace_with.as_ref().iter().cloned()); + } + + /// Creates a draining iterator that removes the specified range in this + /// `Vec<u8>` and yields each of the removed bytes. + /// + /// Note that the elements specified by the given range are removed + /// regardless of whether the returned iterator is fully exhausted. + /// + /// Also note that is is unspecified how many bytes are removed from the + /// `Vec<u8>` if the `DrainBytes` iterator is leaked. + /// + /// # Panics + /// + /// Panics if the given range is not valid. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::ByteVec; + /// + /// let mut s = Vec::from("foobar"); + /// { + /// let mut drainer = s.drain_bytes(2..4); + /// assert_eq!(drainer.next(), Some(b'o')); + /// assert_eq!(drainer.next(), Some(b'b')); + /// assert_eq!(drainer.next(), None); + /// } + /// assert_eq!(s, "foar".as_bytes()); + /// ``` + #[inline] + fn drain_bytes<R>(&mut self, range: R) -> DrainBytes + where + R: ops::RangeBounds<usize>, + { + DrainBytes { it: self.as_vec_mut().drain(range) } + } +} + +/// A draining byte oriented iterator for `Vec<u8>`. +/// +/// This iterator is created by +/// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes). +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use bstr::ByteVec; +/// +/// let mut s = Vec::from("foobar"); +/// { +/// let mut drainer = s.drain_bytes(2..4); +/// assert_eq!(drainer.next(), Some(b'o')); +/// assert_eq!(drainer.next(), Some(b'b')); +/// assert_eq!(drainer.next(), None); +/// } +/// assert_eq!(s, "foar".as_bytes()); +/// ``` +#[derive(Debug)] +pub struct DrainBytes<'a> { + it: vec::Drain<'a, u8>, +} + +impl<'a> iter::FusedIterator for DrainBytes<'a> {} + +impl<'a> Iterator for DrainBytes<'a> { + type Item = u8; + + #[inline] + fn next(&mut self) -> Option<u8> { + self.it.next() + } +} + +impl<'a> DoubleEndedIterator for DrainBytes<'a> { + #[inline] + fn next_back(&mut self) -> Option<u8> { + self.it.next_back() + } +} + +impl<'a> ExactSizeIterator for DrainBytes<'a> { + #[inline] + fn len(&self) -> usize { + self.it.len() + } +} + +/// An error that may occur when converting a `Vec<u8>` to a `String`. +/// +/// This error includes the original `Vec<u8>` that failed to convert to a +/// `String`. This permits callers to recover the allocation used even if it +/// it not valid UTF-8. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use bstr::{B, ByteVec}; +/// +/// let bytes = Vec::from_slice(b"foo\xFFbar"); +/// let err = bytes.into_string().unwrap_err(); +/// +/// assert_eq!(err.utf8_error().valid_up_to(), 3); +/// assert_eq!(err.utf8_error().error_len(), Some(1)); +/// +/// // At no point in this example is an allocation performed. +/// let bytes = Vec::from(err.into_vec()); +/// assert_eq!(bytes, B(b"foo\xFFbar")); +/// ``` +#[derive(Debug, Eq, PartialEq)] +pub struct FromUtf8Error { + original: Vec<u8>, + err: Utf8Error, +} + +impl FromUtf8Error { + /// Return the original bytes as a slice that failed to convert to a + /// `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteVec}; + /// + /// let bytes = Vec::from_slice(b"foo\xFFbar"); + /// let err = bytes.into_string().unwrap_err(); + /// + /// // At no point in this example is an allocation performed. + /// assert_eq!(err.as_bytes(), B(b"foo\xFFbar")); + /// ``` + #[inline] + pub fn as_bytes(&self) -> &[u8] { + &self.original + } + + /// Consume this error and return the original byte string that failed to + /// convert to a `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteVec}; + /// + /// let bytes = Vec::from_slice(b"foo\xFFbar"); + /// let err = bytes.into_string().unwrap_err(); + /// let original = err.into_vec(); + /// + /// // At no point in this example is an allocation performed. + /// assert_eq!(original, B(b"foo\xFFbar")); + /// ``` + #[inline] + pub fn into_vec(self) -> Vec<u8> { + self.original + } + + /// Return the underlying UTF-8 error that occurred. This error provides + /// information on the nature and location of the invalid UTF-8 detected. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use bstr::{B, ByteVec}; + /// + /// let bytes = Vec::from_slice(b"foo\xFFbar"); + /// let err = bytes.into_string().unwrap_err(); + /// + /// assert_eq!(err.utf8_error().valid_up_to(), 3); + /// assert_eq!(err.utf8_error().error_len(), Some(1)); + /// ``` + #[inline] + pub fn utf8_error(&self) -> &Utf8Error { + &self.err + } +} + +impl error::Error for FromUtf8Error { + #[inline] + fn description(&self) -> &str { + "invalid UTF-8 vector" + } +} + +impl fmt::Display for FromUtf8Error { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.err) + } +} + +#[cfg(test)] +mod tests { + use ext_slice::B; + use ext_vec::ByteVec; + + #[test] + fn insert() { + let mut s = vec![]; + s.insert_str(0, "foo"); + assert_eq!(s, "foo".as_bytes()); + + let mut s = Vec::from("a"); + s.insert_str(0, "foo"); + assert_eq!(s, "fooa".as_bytes()); + + let mut s = Vec::from("a"); + s.insert_str(1, "foo"); + assert_eq!(s, "afoo".as_bytes()); + + let mut s = Vec::from("foobar"); + s.insert_str(3, "quux"); + assert_eq!(s, "fooquuxbar".as_bytes()); + + let mut s = Vec::from("foobar"); + s.insert_str(3, "x"); + assert_eq!(s, "fooxbar".as_bytes()); + + let mut s = Vec::from("foobar"); + s.insert_str(0, "x"); + assert_eq!(s, "xfoobar".as_bytes()); + + let mut s = Vec::from("foobar"); + s.insert_str(6, "x"); + assert_eq!(s, "foobarx".as_bytes()); + + let mut s = Vec::from("foobar"); + s.insert_str(3, "quuxbazquux"); + assert_eq!(s, "fooquuxbazquuxbar".as_bytes()); + } + + #[test] + #[should_panic] + fn insert_fail1() { + let mut s = vec![]; + s.insert_str(1, "foo"); + } + + #[test] + #[should_panic] + fn insert_fail2() { + let mut s = Vec::from("a"); + s.insert_str(2, "foo"); + } + + #[test] + #[should_panic] + fn insert_fail3() { + let mut s = Vec::from("foobar"); + s.insert_str(7, "foo"); + } +} diff --git a/src/impls.rs b/src/impls.rs new file mode 100644 index 0000000..c7d0be3 --- /dev/null +++ b/src/impls.rs @@ -0,0 +1,969 @@ +macro_rules! impl_partial_eq { + ($lhs:ty, $rhs:ty) => { + impl<'a, 'b> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + let other: &[u8] = other.as_ref(); + PartialEq::eq(self.as_bytes(), other) + } + } + + impl<'a, 'b> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + let this: &[u8] = self.as_ref(); + PartialEq::eq(this, other.as_bytes()) + } + } + }; +} + +#[cfg(feature = "std")] +macro_rules! impl_partial_eq_cow { + ($lhs:ty, $rhs:ty) => { + impl<'a, 'b> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + let other: &[u8] = (&**other).as_ref(); + PartialEq::eq(self.as_bytes(), other) + } + } + + impl<'a, 'b> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + let this: &[u8] = (&**other).as_ref(); + PartialEq::eq(this, other.as_bytes()) + } + } + }; +} + +macro_rules! impl_partial_ord { + ($lhs:ty, $rhs:ty) => { + impl<'a, 'b> PartialOrd<$rhs> for $lhs { + #[inline] + fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> { + let other: &[u8] = other.as_ref(); + PartialOrd::partial_cmp(self.as_bytes(), other) + } + } + + impl<'a, 'b> PartialOrd<$lhs> for $rhs { + #[inline] + fn partial_cmp(&self, other: &$lhs) -> Option<Ordering> { + let this: &[u8] = self.as_ref(); + PartialOrd::partial_cmp(this, other.as_bytes()) + } + } + }; +} + +#[cfg(feature = "std")] +mod bstring { + use std::borrow::{Borrow, Cow, ToOwned}; + use std::cmp::Ordering; + use std::fmt; + use std::iter::FromIterator; + use std::ops; + + use bstr::BStr; + use bstring::BString; + use ext_vec::ByteVec; + + impl fmt::Display for BString { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(self.as_bstr(), f) + } + } + + impl fmt::Debug for BString { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self.as_bstr(), f) + } + } + + impl ops::Deref for BString { + type Target = Vec<u8>; + + #[inline] + fn deref(&self) -> &Vec<u8> { + &self.bytes + } + } + + impl ops::DerefMut for BString { + #[inline] + fn deref_mut(&mut self) -> &mut Vec<u8> { + &mut self.bytes + } + } + + impl AsRef<[u8]> for BString { + #[inline] + fn as_ref(&self) -> &[u8] { + &self.bytes + } + } + + impl AsRef<BStr> for BString { + #[inline] + fn as_ref(&self) -> &BStr { + self.as_bstr() + } + } + + impl AsMut<[u8]> for BString { + #[inline] + fn as_mut(&mut self) -> &mut [u8] { + &mut self.bytes + } + } + + impl AsMut<BStr> for BString { + #[inline] + fn as_mut(&mut self) -> &mut BStr { + self.as_mut_bstr() + } + } + + impl Borrow<BStr> for BString { + #[inline] + fn borrow(&self) -> &BStr { + self.as_bstr() + } + } + + impl ToOwned for BStr { + type Owned = BString; + + #[inline] + fn to_owned(&self) -> BString { + BString::from(self) + } + } + + impl Default for BString { + fn default() -> BString { + BString::from(vec![]) + } + } + + impl<'a> From<&'a [u8]> for BString { + #[inline] + fn from(s: &'a [u8]) -> BString { + BString::from(s.to_vec()) + } + } + + impl From<Vec<u8>> for BString { + #[inline] + fn from(s: Vec<u8>) -> BString { + BString { bytes: s } + } + } + + impl From<BString> for Vec<u8> { + #[inline] + fn from(s: BString) -> Vec<u8> { + s.bytes + } + } + + impl<'a> From<&'a str> for BString { + #[inline] + fn from(s: &'a str) -> BString { + BString::from(s.as_bytes().to_vec()) + } + } + + impl From<String> for BString { + #[inline] + fn from(s: String) -> BString { + BString::from(s.into_bytes()) + } + } + + impl<'a> From<&'a BStr> for BString { + #[inline] + fn from(s: &'a BStr) -> BString { + BString::from(s.bytes.to_vec()) + } + } + + impl<'a> From<BString> for Cow<'a, BStr> { + #[inline] + fn from(s: BString) -> Cow<'a, BStr> { + Cow::Owned(s) + } + } + + impl FromIterator<char> for BString { + #[inline] + fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> BString { + BString::from(iter.into_iter().collect::<String>()) + } + } + + impl FromIterator<u8> for BString { + #[inline] + fn from_iter<T: IntoIterator<Item = u8>>(iter: T) -> BString { + BString::from(iter.into_iter().collect::<Vec<u8>>()) + } + } + + impl<'a> FromIterator<&'a str> for BString { + #[inline] + fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> BString { + let mut buf = vec![]; + for b in iter { + buf.push_str(b); + } + BString::from(buf) + } + } + + impl<'a> FromIterator<&'a [u8]> for BString { + #[inline] + fn from_iter<T: IntoIterator<Item = &'a [u8]>>(iter: T) -> BString { + let mut buf = vec![]; + for b in iter { + buf.push_str(b); + } + BString::from(buf) + } + } + + impl<'a> FromIterator<&'a BStr> for BString { + #[inline] + fn from_iter<T: IntoIterator<Item = &'a BStr>>(iter: T) -> BString { + let mut buf = vec![]; + for b in iter { + buf.push_str(b); + } + BString::from(buf) + } + } + + impl FromIterator<BString> for BString { + #[inline] + fn from_iter<T: IntoIterator<Item = BString>>(iter: T) -> BString { + let mut buf = vec![]; + for b in iter { + buf.push_str(b); + } + BString::from(buf) + } + } + + impl Eq for BString {} + + impl PartialEq for BString { + #[inline] + fn eq(&self, other: &BString) -> bool { + &self[..] == &other[..] + } + } + + impl_partial_eq!(BString, Vec<u8>); + impl_partial_eq!(BString, [u8]); + impl_partial_eq!(BString, &'a [u8]); + impl_partial_eq!(BString, String); + impl_partial_eq!(BString, str); + impl_partial_eq!(BString, &'a str); + impl_partial_eq!(BString, BStr); + impl_partial_eq!(BString, &'a BStr); + + impl PartialOrd for BString { + #[inline] + fn partial_cmp(&self, other: &BString) -> Option<Ordering> { + PartialOrd::partial_cmp(&self.bytes, &other.bytes) + } + } + + impl Ord for BString { + #[inline] + fn cmp(&self, other: &BString) -> Ordering { + self.partial_cmp(other).unwrap() + } + } + + impl_partial_ord!(BString, Vec<u8>); + impl_partial_ord!(BString, [u8]); + impl_partial_ord!(BString, &'a [u8]); + impl_partial_ord!(BString, String); + impl_partial_ord!(BString, str); + impl_partial_ord!(BString, &'a str); + impl_partial_ord!(BString, BStr); + impl_partial_ord!(BString, &'a BStr); +} + +mod bstr { + #[cfg(feature = "std")] + use std::borrow::Cow; + + use core::cmp::Ordering; + use core::fmt; + use core::ops; + + use bstr::BStr; + use ext_slice::ByteSlice; + + impl fmt::Display for BStr { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + /// Write the given bstr (lossily) to the given formatter. + fn write_bstr( + f: &mut fmt::Formatter, + bstr: &BStr, + ) -> Result<(), fmt::Error> { + for chunk in bstr.utf8_chunks() { + f.write_str(chunk.valid())?; + if !chunk.invalid().is_empty() { + f.write_str("\u{FFFD}")?; + } + } + Ok(()) + } + + /// Write 'num' fill characters to the given formatter. + fn write_pads(f: &mut fmt::Formatter, num: usize) -> fmt::Result { + let fill = f.fill(); + for _ in 0..num { + f.write_fmt(format_args!("{}", fill))?; + } + Ok(()) + } + + if let Some(align) = f.align() { + let width = f.width().unwrap_or(0); + let nchars = self.chars().count(); + let remaining_pads = width.saturating_sub(nchars); + match align { + fmt::Alignment::Left => { + write_bstr(f, self)?; + write_pads(f, remaining_pads)?; + } + fmt::Alignment::Right => { + write_pads(f, remaining_pads)?; + write_bstr(f, self)?; + } + fmt::Alignment::Center => { + let half = remaining_pads / 2; + let second_half = if remaining_pads % 2 == 0 { + half + } else { + half + 1 + }; + write_pads(f, half)?; + write_bstr(f, self)?; + write_pads(f, second_half)?; + } + } + Ok(()) + } else { + write_bstr(f, self)?; + Ok(()) + } + } + } + + impl fmt::Debug for BStr { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "\"")?; + for (s, e, ch) in self.char_indices() { + match ch { + '\0' => write!(f, "\\0")?, + '\u{FFFD}' => { + let bytes = self[s..e].as_bytes(); + if bytes == b"\xEF\xBF\xBD" { + write!(f, "{}", ch.escape_debug())?; + } else { + for &b in self[s..e].as_bytes() { + write!(f, r"\x{:02X}", b)?; + } + } + } + // ASCII control characters except \0, \n, \r, \t + '\x01'..='\x08' + | '\x0b' + | '\x0c' + | '\x0e'..='\x19' + | '\x7f' => { + write!(f, "\\x{:02x}", ch as u32)?; + } + '\n' | '\r' | '\t' | _ => { + write!(f, "{}", ch.escape_debug())?; + } + } + } + write!(f, "\"")?; + Ok(()) + } + } + + impl ops::Deref for BStr { + type Target = [u8]; + + #[inline] + fn deref(&self) -> &[u8] { + &self.bytes + } + } + + impl ops::DerefMut for BStr { + #[inline] + fn deref_mut(&mut self) -> &mut [u8] { + &mut self.bytes + } + } + + impl ops::Index<usize> for BStr { + type Output = u8; + + #[inline] + fn index(&self, idx: usize) -> &u8 { + &self.as_bytes()[idx] + } + } + + impl ops::Index<ops::RangeFull> for BStr { + type Output = BStr; + + #[inline] + fn index(&self, _: ops::RangeFull) -> &BStr { + self + } + } + + impl ops::Index<ops::Range<usize>> for BStr { + type Output = BStr; + + #[inline] + fn index(&self, r: ops::Range<usize>) -> &BStr { + BStr::new(&self.as_bytes()[r.start..r.end]) + } + } + + impl ops::Index<ops::RangeInclusive<usize>> for BStr { + type Output = BStr; + + #[inline] + fn index(&self, r: ops::RangeInclusive<usize>) -> &BStr { + BStr::new(&self.as_bytes()[*r.start()..=*r.end()]) + } + } + + impl ops::Index<ops::RangeFrom<usize>> for BStr { + type Output = BStr; + + #[inline] + fn index(&self, r: ops::RangeFrom<usize>) -> &BStr { + BStr::new(&self.as_bytes()[r.start..]) + } + } + + impl ops::Index<ops::RangeTo<usize>> for BStr { + type Output = BStr; + + #[inline] + fn index(&self, r: ops::RangeTo<usize>) -> &BStr { + BStr::new(&self.as_bytes()[..r.end]) + } + } + + impl ops::Index<ops::RangeToInclusive<usize>> for BStr { + type Output = BStr; + + #[inline] + fn index(&self, r: ops::RangeToInclusive<usize>) -> &BStr { + BStr::new(&self.as_bytes()[..=r.end]) + } + } + + impl ops::IndexMut<usize> for BStr { + #[inline] + fn index_mut(&mut self, idx: usize) -> &mut u8 { + &mut self.bytes[idx] + } + } + + impl ops::IndexMut<ops::RangeFull> for BStr { + #[inline] + fn index_mut(&mut self, _: ops::RangeFull) -> &mut BStr { + self + } + } + + impl ops::IndexMut<ops::Range<usize>> for BStr { + #[inline] + fn index_mut(&mut self, r: ops::Range<usize>) -> &mut BStr { + BStr::from_bytes_mut(&mut self.bytes[r.start..r.end]) + } + } + + impl ops::IndexMut<ops::RangeInclusive<usize>> for BStr { + #[inline] + fn index_mut(&mut self, r: ops::RangeInclusive<usize>) -> &mut BStr { + BStr::from_bytes_mut(&mut self.bytes[*r.start()..=*r.end()]) + } + } + + impl ops::IndexMut<ops::RangeFrom<usize>> for BStr { + #[inline] + fn index_mut(&mut self, r: ops::RangeFrom<usize>) -> &mut BStr { + BStr::from_bytes_mut(&mut self.bytes[r.start..]) + } + } + + impl ops::IndexMut<ops::RangeTo<usize>> for BStr { + #[inline] + fn index_mut(&mut self, r: ops::RangeTo<usize>) -> &mut BStr { + BStr::from_bytes_mut(&mut self.bytes[..r.end]) + } + } + + impl ops::IndexMut<ops::RangeToInclusive<usize>> for BStr { + #[inline] + fn index_mut(&mut self, r: ops::RangeToInclusive<usize>) -> &mut BStr { + BStr::from_bytes_mut(&mut self.bytes[..=r.end]) + } + } + + impl AsRef<[u8]> for BStr { + #[inline] + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } + } + + impl AsRef<BStr> for [u8] { + #[inline] + fn as_ref(&self) -> &BStr { + BStr::new(self) + } + } + + impl AsRef<BStr> for str { + #[inline] + fn as_ref(&self) -> &BStr { + BStr::new(self) + } + } + + impl AsMut<[u8]> for BStr { + #[inline] + fn as_mut(&mut self) -> &mut [u8] { + &mut self.bytes + } + } + + impl AsMut<BStr> for [u8] { + #[inline] + fn as_mut(&mut self) -> &mut BStr { + BStr::new_mut(self) + } + } + + impl<'a> Default for &'a BStr { + fn default() -> &'a BStr { + BStr::from_bytes(b"") + } + } + + impl<'a> Default for &'a mut BStr { + fn default() -> &'a mut BStr { + BStr::from_bytes_mut(&mut []) + } + } + + impl<'a> From<&'a [u8]> for &'a BStr { + #[inline] + fn from(s: &'a [u8]) -> &'a BStr { + BStr::from_bytes(s) + } + } + + impl<'a> From<&'a str> for &'a BStr { + #[inline] + fn from(s: &'a str) -> &'a BStr { + BStr::from_bytes(s.as_bytes()) + } + } + + #[cfg(feature = "std")] + impl<'a> From<&'a BStr> for Cow<'a, BStr> { + #[inline] + fn from(s: &'a BStr) -> Cow<'a, BStr> { + Cow::Borrowed(s) + } + } + + #[cfg(feature = "std")] + impl From<Box<[u8]>> for Box<BStr> { + #[inline] + fn from(s: Box<[u8]>) -> Box<BStr> { + BStr::from_boxed_bytes(s) + } + } + + #[cfg(feature = "std")] + impl From<Box<BStr>> for Box<[u8]> { + #[inline] + fn from(s: Box<BStr>) -> Box<[u8]> { + BStr::into_boxed_bytes(s) + } + } + + impl Eq for BStr {} + + impl PartialEq<BStr> for BStr { + #[inline] + fn eq(&self, other: &BStr) -> bool { + self.as_bytes() == other.as_bytes() + } + } + + impl_partial_eq!(BStr, [u8]); + impl_partial_eq!(BStr, &'a [u8]); + impl_partial_eq!(BStr, str); + impl_partial_eq!(BStr, &'a str); + + #[cfg(feature = "std")] + impl_partial_eq!(BStr, Vec<u8>); + #[cfg(feature = "std")] + impl_partial_eq!(&'a BStr, Vec<u8>); + #[cfg(feature = "std")] + impl_partial_eq!(BStr, String); + #[cfg(feature = "std")] + impl_partial_eq!(&'a BStr, String); + #[cfg(feature = "std")] + impl_partial_eq_cow!(&'a BStr, Cow<'a, BStr>); + #[cfg(feature = "std")] + impl_partial_eq_cow!(&'a BStr, Cow<'a, str>); + #[cfg(feature = "std")] + impl_partial_eq_cow!(&'a BStr, Cow<'a, [u8]>); + + impl PartialOrd for BStr { + #[inline] + fn partial_cmp(&self, other: &BStr) -> Option<Ordering> { + PartialOrd::partial_cmp(self.as_bytes(), other.as_bytes()) + } + } + + impl Ord for BStr { + #[inline] + fn cmp(&self, other: &BStr) -> Ordering { + self.partial_cmp(other).unwrap() + } + } + + impl_partial_ord!(BStr, [u8]); + impl_partial_ord!(BStr, &'a [u8]); + impl_partial_ord!(BStr, str); + impl_partial_ord!(BStr, &'a str); + + #[cfg(feature = "std")] + impl_partial_ord!(BStr, Vec<u8>); + #[cfg(feature = "std")] + impl_partial_ord!(&'a BStr, Vec<u8>); + #[cfg(feature = "std")] + impl_partial_ord!(BStr, String); + #[cfg(feature = "std")] + impl_partial_ord!(&'a BStr, String); +} + +#[cfg(feature = "serde1-nostd")] +mod bstr_serde { + use core::fmt; + + use serde::{ + de::Error, de::Visitor, Deserialize, Deserializer, Serialize, + Serializer, + }; + + use bstr::BStr; + + impl Serialize for BStr { + #[inline] + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_bytes(self.as_bytes()) + } + } + + impl<'a, 'de: 'a> Deserialize<'de> for &'a BStr { + #[inline] + fn deserialize<D>(deserializer: D) -> Result<&'a BStr, D::Error> + where + D: Deserializer<'de>, + { + struct BStrVisitor; + + impl<'de> Visitor<'de> for BStrVisitor { + type Value = &'de BStr; + + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("a borrowed byte string") + } + + #[inline] + fn visit_borrowed_bytes<E: Error>( + self, + value: &'de [u8], + ) -> Result<&'de BStr, E> { + Ok(BStr::new(value)) + } + + #[inline] + fn visit_borrowed_str<E: Error>( + self, + value: &'de str, + ) -> Result<&'de BStr, E> { + Ok(BStr::new(value)) + } + } + + deserializer.deserialize_bytes(BStrVisitor) + } + } +} + +#[cfg(feature = "serde1")] +mod bstring_serde { + use std::cmp; + use std::fmt; + + use serde::{ + de::Error, de::SeqAccess, de::Visitor, Deserialize, Deserializer, + Serialize, Serializer, + }; + + use bstring::BString; + + impl Serialize for BString { + #[inline] + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_bytes(self.as_bytes()) + } + } + + impl<'de> Deserialize<'de> for BString { + #[inline] + fn deserialize<D>(deserializer: D) -> Result<BString, D::Error> + where + D: Deserializer<'de>, + { + struct BStringVisitor; + + impl<'de> Visitor<'de> for BStringVisitor { + type Value = BString; + + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("a byte string") + } + + #[inline] + fn visit_seq<V: SeqAccess<'de>>( + self, + mut visitor: V, + ) -> Result<BString, V::Error> { + let len = cmp::min(visitor.size_hint().unwrap_or(0), 256); + let mut bytes = Vec::with_capacity(len); + while let Some(v) = visitor.next_element()? { + bytes.push(v); + } + Ok(BString::from(bytes)) + } + + #[inline] + fn visit_bytes<E: Error>( + self, + value: &[u8], + ) -> Result<BString, E> { + Ok(BString::from(value)) + } + + #[inline] + fn visit_byte_buf<E: Error>( + self, + value: Vec<u8>, + ) -> Result<BString, E> { + Ok(BString::from(value)) + } + + #[inline] + fn visit_str<E: Error>( + self, + value: &str, + ) -> Result<BString, E> { + Ok(BString::from(value)) + } + + #[inline] + fn visit_string<E: Error>( + self, + value: String, + ) -> Result<BString, E> { + Ok(BString::from(value)) + } + } + + deserializer.deserialize_byte_buf(BStringVisitor) + } + } +} + +#[cfg(test)] +mod display { + use crate::ByteSlice; + use bstring::BString; + + #[test] + fn clean() { + assert_eq!(&format!("{}", &b"abc".as_bstr()), "abc"); + assert_eq!(&format!("{}", &b"\xf0\x28\x8c\xbc".as_bstr()), "�(��"); + } + + #[test] + fn width_bigger_than_bstr() { + assert_eq!(&format!("{:<7}!", &b"abc".as_bstr()), "abc !"); + assert_eq!(&format!("{:>7}!", &b"abc".as_bstr()), " abc!"); + assert_eq!(&format!("{:^7}!", &b"abc".as_bstr()), " abc !"); + assert_eq!(&format!("{:^6}!", &b"abc".as_bstr()), " abc !"); + assert_eq!(&format!("{:-<7}!", &b"abc".as_bstr()), "abc----!"); + assert_eq!(&format!("{:->7}!", &b"abc".as_bstr()), "----abc!"); + assert_eq!(&format!("{:-^7}!", &b"abc".as_bstr()), "--abc--!"); + assert_eq!(&format!("{:-^6}!", &b"abc".as_bstr()), "-abc--!"); + + assert_eq!( + &format!("{:<7}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + "�(�� !" + ); + assert_eq!( + &format!("{:>7}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + " �(��!" + ); + assert_eq!( + &format!("{:^7}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + " �(�� !" + ); + assert_eq!( + &format!("{:^6}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + " �(�� !" + ); + + assert_eq!( + &format!("{:-<7}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + "�(��---!" + ); + assert_eq!( + &format!("{:->7}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + "---�(��!" + ); + assert_eq!( + &format!("{:-^7}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + "-�(��--!" + ); + assert_eq!( + &format!("{:-^6}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + "-�(��-!" + ); + } + + #[test] + fn width_lesser_than_bstr() { + assert_eq!(&format!("{:<2}!", &b"abc".as_bstr()), "abc!"); + assert_eq!(&format!("{:>2}!", &b"abc".as_bstr()), "abc!"); + assert_eq!(&format!("{:^2}!", &b"abc".as_bstr()), "abc!"); + assert_eq!(&format!("{:-<2}!", &b"abc".as_bstr()), "abc!"); + assert_eq!(&format!("{:->2}!", &b"abc".as_bstr()), "abc!"); + assert_eq!(&format!("{:-^2}!", &b"abc".as_bstr()), "abc!"); + + assert_eq!( + &format!("{:<3}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + "�(��!" + ); + assert_eq!( + &format!("{:>3}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + "�(��!" + ); + assert_eq!( + &format!("{:^3}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + "�(��!" + ); + assert_eq!( + &format!("{:^2}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + "�(��!" + ); + + assert_eq!( + &format!("{:-<3}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + "�(��!" + ); + assert_eq!( + &format!("{:->3}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + "�(��!" + ); + assert_eq!( + &format!("{:-^3}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + "�(��!" + ); + assert_eq!( + &format!("{:-^2}!", &b"\xf0\x28\x8c\xbc".as_bstr()), + "�(��!" + ); + } + + quickcheck! { + fn total_length(bstr: BString) -> bool { + let size = bstr.chars().count(); + format!("{:<1$}", bstr.as_bstr(), size).chars().count() >= size + } + } +} + +#[cfg(test)] +mod bstring_arbitrary { + use bstring::BString; + + use quickcheck::{Arbitrary, Gen}; + + impl Arbitrary for BString { + fn arbitrary<G: Gen>(g: &mut G) -> BString { + BString::from(Vec::<u8>::arbitrary(g)) + } + + fn shrink(&self) -> Box<dyn Iterator<Item = BString>> { + Box::new(self.bytes.shrink().map(BString::from)) + } + } +} + +#[test] +fn test_debug() { + use crate::{ByteSlice, B}; + + assert_eq!( + r#""\0\0\0 ftypisom\0\0\x02\0isomiso2avc1mp""#, + format!("{:?}", b"\0\0\0 ftypisom\0\0\x02\0isomiso2avc1mp".as_bstr()), + ); + + // Tests that if the underlying bytes contain the UTF-8 encoding of the + // replacement codepoint, then we emit the codepoint just like other + // non-printable Unicode characters. + assert_eq!( + b"\"\\xFF\xEF\xBF\xBD\\xFF\"".as_bstr(), + // Before fixing #72, the output here would be: + // \\xFF\\xEF\\xBF\\xBD\\xFF + B(&format!("{:?}", b"\xFF\xEF\xBF\xBD\xFF".as_bstr())).as_bstr(), + ); +} diff --git a/src/io.rs b/src/io.rs new file mode 100644 index 0000000..f2b4452 --- /dev/null +++ b/src/io.rs @@ -0,0 +1,514 @@ +/*! +Utilities for working with I/O using byte strings. + +This module currently only exports a single trait, `BufReadExt`, which provides +facilities for conveniently and efficiently working with lines as byte strings. + +More APIs may be added in the future. +*/ + +use std::io; + +use ext_slice::ByteSlice; +use ext_vec::ByteVec; + +/// An extention trait for +/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html) +/// which provides convenience APIs for dealing with byte strings. +pub trait BufReadExt: io::BufRead { + /// Returns an iterator over the lines of this reader, where each line + /// is represented as a byte string. + /// + /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where + /// an error is yielded if there was a problem reading from the underlying + /// reader. + /// + /// On success, the next line in the iterator is returned. The line does + /// *not* contain a trailing `\n` or `\r\n`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::io; + /// + /// use bstr::io::BufReadExt; + /// + /// # fn example() -> Result<(), io::Error> { + /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor"); + /// + /// let mut lines = vec![]; + /// for result in cursor.byte_lines() { + /// let line = result?; + /// lines.push(line); + /// } + /// assert_eq!(lines.len(), 3); + /// assert_eq!(lines[0], "lorem".as_bytes()); + /// assert_eq!(lines[1], "ipsum".as_bytes()); + /// assert_eq!(lines[2], "dolor".as_bytes()); + /// # Ok(()) }; example().unwrap() + /// ``` + fn byte_lines(self) -> ByteLines<Self> + where + Self: Sized, + { + ByteLines { buf: self } + } + + /// Returns an iterator over byte-terminated records of this reader, where + /// each record is represented as a byte string. + /// + /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where + /// an error is yielded if there was a problem reading from the underlying + /// reader. + /// + /// On success, the next record in the iterator is returned. The record + /// does *not* contain its trailing terminator. + /// + /// Note that calling `byte_records(b'\n')` differs from `byte_lines()` in + /// that it has no special handling for `\r`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::io; + /// + /// use bstr::io::BufReadExt; + /// + /// # fn example() -> Result<(), io::Error> { + /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor"); + /// + /// let mut records = vec![]; + /// for result in cursor.byte_records(b'\x00') { + /// let record = result?; + /// records.push(record); + /// } + /// assert_eq!(records.len(), 3); + /// assert_eq!(records[0], "lorem".as_bytes()); + /// assert_eq!(records[1], "ipsum".as_bytes()); + /// assert_eq!(records[2], "dolor".as_bytes()); + /// # Ok(()) }; example().unwrap() + /// ``` + fn byte_records(self, terminator: u8) -> ByteRecords<Self> + where + Self: Sized, + { + ByteRecords { terminator, buf: self } + } + + /// Executes the given closure on each line in the underlying reader. + /// + /// If the closure returns an error (or if the underlying reader returns an + /// error), then iteration is stopped and the error is returned. If false + /// is returned, then iteration is stopped and no error is returned. + /// + /// The closure given is called on exactly the same values as yielded by + /// the [`byte_lines`](trait.BufReadExt.html#method.byte_lines) + /// iterator. Namely, lines do _not_ contain trailing `\n` or `\r\n` bytes. + /// + /// This routine is useful for iterating over lines as quickly as + /// possible. Namely, a single allocation is reused for each line. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::io; + /// + /// use bstr::io::BufReadExt; + /// + /// # fn example() -> Result<(), io::Error> { + /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor"); + /// + /// let mut lines = vec![]; + /// cursor.for_byte_line(|line| { + /// lines.push(line.to_vec()); + /// Ok(true) + /// })?; + /// assert_eq!(lines.len(), 3); + /// assert_eq!(lines[0], "lorem".as_bytes()); + /// assert_eq!(lines[1], "ipsum".as_bytes()); + /// assert_eq!(lines[2], "dolor".as_bytes()); + /// # Ok(()) }; example().unwrap() + /// ``` + fn for_byte_line<F>(self, mut for_each_line: F) -> io::Result<()> + where + Self: Sized, + F: FnMut(&[u8]) -> io::Result<bool>, + { + self.for_byte_line_with_terminator(|line| { + for_each_line(&trim_line_slice(&line)) + }) + } + + /// Executes the given closure on each byte-terminated record in the + /// underlying reader. + /// + /// If the closure returns an error (or if the underlying reader returns an + /// error), then iteration is stopped and the error is returned. If false + /// is returned, then iteration is stopped and no error is returned. + /// + /// The closure given is called on exactly the same values as yielded by + /// the [`byte_records`](trait.BufReadExt.html#method.byte_records) + /// iterator. Namely, records do _not_ contain a trailing terminator byte. + /// + /// This routine is useful for iterating over records as quickly as + /// possible. Namely, a single allocation is reused for each record. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::io; + /// + /// use bstr::io::BufReadExt; + /// + /// # fn example() -> Result<(), io::Error> { + /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor"); + /// + /// let mut records = vec![]; + /// cursor.for_byte_record(b'\x00', |record| { + /// records.push(record.to_vec()); + /// Ok(true) + /// })?; + /// assert_eq!(records.len(), 3); + /// assert_eq!(records[0], "lorem".as_bytes()); + /// assert_eq!(records[1], "ipsum".as_bytes()); + /// assert_eq!(records[2], "dolor".as_bytes()); + /// # Ok(()) }; example().unwrap() + /// ``` + fn for_byte_record<F>( + self, + terminator: u8, + mut for_each_record: F, + ) -> io::Result<()> + where + Self: Sized, + F: FnMut(&[u8]) -> io::Result<bool>, + { + self.for_byte_record_with_terminator(terminator, |chunk| { + for_each_record(&trim_record_slice(&chunk, terminator)) + }) + } + + /// Executes the given closure on each line in the underlying reader. + /// + /// If the closure returns an error (or if the underlying reader returns an + /// error), then iteration is stopped and the error is returned. If false + /// is returned, then iteration is stopped and no error is returned. + /// + /// Unlike + /// [`for_byte_line`](trait.BufReadExt.html#method.for_byte_line), + /// the lines given to the closure *do* include the line terminator, if one + /// exists. + /// + /// This routine is useful for iterating over lines as quickly as + /// possible. Namely, a single allocation is reused for each line. + /// + /// This is identical to `for_byte_record_with_terminator` with a + /// terminator of `\n`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::io; + /// + /// use bstr::io::BufReadExt; + /// + /// # fn example() -> Result<(), io::Error> { + /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor"); + /// + /// let mut lines = vec![]; + /// cursor.for_byte_line_with_terminator(|line| { + /// lines.push(line.to_vec()); + /// Ok(true) + /// })?; + /// assert_eq!(lines.len(), 3); + /// assert_eq!(lines[0], "lorem\n".as_bytes()); + /// assert_eq!(lines[1], "ipsum\r\n".as_bytes()); + /// assert_eq!(lines[2], "dolor".as_bytes()); + /// # Ok(()) }; example().unwrap() + /// ``` + fn for_byte_line_with_terminator<F>( + self, + for_each_line: F, + ) -> io::Result<()> + where + Self: Sized, + F: FnMut(&[u8]) -> io::Result<bool>, + { + self.for_byte_record_with_terminator(b'\n', for_each_line) + } + + /// Executes the given closure on each byte-terminated record in the + /// underlying reader. + /// + /// If the closure returns an error (or if the underlying reader returns an + /// error), then iteration is stopped and the error is returned. If false + /// is returned, then iteration is stopped and no error is returned. + /// + /// Unlike + /// [`for_byte_record`](trait.BufReadExt.html#method.for_byte_record), + /// the lines given to the closure *do* include the record terminator, if + /// one exists. + /// + /// This routine is useful for iterating over records as quickly as + /// possible. Namely, a single allocation is reused for each record. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::io; + /// + /// use bstr::B; + /// use bstr::io::BufReadExt; + /// + /// # fn example() -> Result<(), io::Error> { + /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor"); + /// + /// let mut records = vec![]; + /// cursor.for_byte_record_with_terminator(b'\x00', |record| { + /// records.push(record.to_vec()); + /// Ok(true) + /// })?; + /// assert_eq!(records.len(), 3); + /// assert_eq!(records[0], B(b"lorem\x00")); + /// assert_eq!(records[1], B("ipsum\x00")); + /// assert_eq!(records[2], B("dolor")); + /// # Ok(()) }; example().unwrap() + /// ``` + fn for_byte_record_with_terminator<F>( + mut self, + terminator: u8, + mut for_each_record: F, + ) -> io::Result<()> + where + Self: Sized, + F: FnMut(&[u8]) -> io::Result<bool>, + { + let mut bytes = vec![]; + let mut res = Ok(()); + let mut consumed = 0; + 'outer: loop { + // Lend out complete record slices from our buffer + { + let mut buf = self.fill_buf()?; + while let Some(index) = buf.find_byte(terminator) { + let (record, rest) = buf.split_at(index + 1); + buf = rest; + consumed += record.len(); + match for_each_record(&record) { + Ok(false) => break 'outer, + Err(err) => { + res = Err(err); + break 'outer; + } + _ => (), + } + } + + // Copy the final record fragment to our local buffer. This + // saves read_until() from re-scanning a buffer we know + // contains no remaining terminators. + bytes.extend_from_slice(&buf); + consumed += buf.len(); + } + + self.consume(consumed); + consumed = 0; + + // N.B. read_until uses a different version of memchr that may + // be slower than the memchr crate that bstr uses. However, this + // should only run for a fairly small number of records, assuming a + // decent buffer size. + self.read_until(terminator, &mut bytes)?; + if bytes.is_empty() || !for_each_record(&bytes)? { + break; + } + bytes.clear(); + } + self.consume(consumed); + res + } +} + +impl<B: io::BufRead> BufReadExt for B {} + +/// An iterator over lines from an instance of +/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html). +/// +/// This iterator is generally created by calling the +/// [`byte_lines`](trait.BufReadExt.html#method.byte_lines) +/// method on the +/// [`BufReadExt`](trait.BufReadExt.html) +/// trait. +#[derive(Debug)] +pub struct ByteLines<B> { + buf: B, +} + +/// An iterator over records from an instance of +/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html). +/// +/// A byte record is any sequence of bytes terminated by a particular byte +/// chosen by the caller. For example, NUL separated byte strings are said to +/// be NUL-terminated byte records. +/// +/// This iterator is generally created by calling the +/// [`byte_records`](trait.BufReadExt.html#method.byte_records) +/// method on the +/// [`BufReadExt`](trait.BufReadExt.html) +/// trait. +#[derive(Debug)] +pub struct ByteRecords<B> { + buf: B, + terminator: u8, +} + +impl<B: io::BufRead> Iterator for ByteLines<B> { + type Item = io::Result<Vec<u8>>; + + fn next(&mut self) -> Option<io::Result<Vec<u8>>> { + let mut bytes = vec![]; + match self.buf.read_until(b'\n', &mut bytes) { + Err(e) => Some(Err(e)), + Ok(0) => None, + Ok(_) => { + trim_line(&mut bytes); + Some(Ok(bytes)) + } + } + } +} + +impl<B: io::BufRead> Iterator for ByteRecords<B> { + type Item = io::Result<Vec<u8>>; + + fn next(&mut self) -> Option<io::Result<Vec<u8>>> { + let mut bytes = vec![]; + match self.buf.read_until(self.terminator, &mut bytes) { + Err(e) => Some(Err(e)), + Ok(0) => None, + Ok(_) => { + trim_record(&mut bytes, self.terminator); + Some(Ok(bytes)) + } + } + } +} + +fn trim_line(line: &mut Vec<u8>) { + if line.last_byte() == Some(b'\n') { + line.pop_byte(); + if line.last_byte() == Some(b'\r') { + line.pop_byte(); + } + } +} + +fn trim_line_slice(mut line: &[u8]) -> &[u8] { + if line.last_byte() == Some(b'\n') { + line = &line[..line.len() - 1]; + if line.last_byte() == Some(b'\r') { + line = &line[..line.len() - 1]; + } + } + line +} + +fn trim_record(record: &mut Vec<u8>, terminator: u8) { + if record.last_byte() == Some(terminator) { + record.pop_byte(); + } +} + +fn trim_record_slice(mut record: &[u8], terminator: u8) -> &[u8] { + if record.last_byte() == Some(terminator) { + record = &record[..record.len() - 1]; + } + record +} + +#[cfg(test)] +mod tests { + use super::BufReadExt; + use bstring::BString; + + fn collect_lines<B: AsRef<[u8]>>(slice: B) -> Vec<BString> { + let mut lines = vec![]; + slice + .as_ref() + .for_byte_line(|line| { + lines.push(BString::from(line.to_vec())); + Ok(true) + }) + .unwrap(); + lines + } + + fn collect_lines_term<B: AsRef<[u8]>>(slice: B) -> Vec<BString> { + let mut lines = vec![]; + slice + .as_ref() + .for_byte_line_with_terminator(|line| { + lines.push(BString::from(line.to_vec())); + Ok(true) + }) + .unwrap(); + lines + } + + #[test] + fn lines_without_terminator() { + assert_eq!(collect_lines(""), Vec::<BString>::new()); + + assert_eq!(collect_lines("\n"), vec![""]); + assert_eq!(collect_lines("\n\n"), vec!["", ""]); + assert_eq!(collect_lines("a\nb\n"), vec!["a", "b"]); + assert_eq!(collect_lines("a\nb"), vec!["a", "b"]); + assert_eq!(collect_lines("abc\nxyz\n"), vec!["abc", "xyz"]); + assert_eq!(collect_lines("abc\nxyz"), vec!["abc", "xyz"]); + + assert_eq!(collect_lines("\r\n"), vec![""]); + assert_eq!(collect_lines("\r\n\r\n"), vec!["", ""]); + assert_eq!(collect_lines("a\r\nb\r\n"), vec!["a", "b"]); + assert_eq!(collect_lines("a\r\nb"), vec!["a", "b"]); + assert_eq!(collect_lines("abc\r\nxyz\r\n"), vec!["abc", "xyz"]); + assert_eq!(collect_lines("abc\r\nxyz"), vec!["abc", "xyz"]); + + assert_eq!(collect_lines("abc\rxyz"), vec!["abc\rxyz"]); + } + + #[test] + fn lines_with_terminator() { + assert_eq!(collect_lines_term(""), Vec::<BString>::new()); + + assert_eq!(collect_lines_term("\n"), vec!["\n"]); + assert_eq!(collect_lines_term("\n\n"), vec!["\n", "\n"]); + assert_eq!(collect_lines_term("a\nb\n"), vec!["a\n", "b\n"]); + assert_eq!(collect_lines_term("a\nb"), vec!["a\n", "b"]); + assert_eq!(collect_lines_term("abc\nxyz\n"), vec!["abc\n", "xyz\n"]); + assert_eq!(collect_lines_term("abc\nxyz"), vec!["abc\n", "xyz"]); + + assert_eq!(collect_lines_term("\r\n"), vec!["\r\n"]); + assert_eq!(collect_lines_term("\r\n\r\n"), vec!["\r\n", "\r\n"]); + assert_eq!(collect_lines_term("a\r\nb\r\n"), vec!["a\r\n", "b\r\n"]); + assert_eq!(collect_lines_term("a\r\nb"), vec!["a\r\n", "b"]); + assert_eq!( + collect_lines_term("abc\r\nxyz\r\n"), + vec!["abc\r\n", "xyz\r\n"] + ); + assert_eq!(collect_lines_term("abc\r\nxyz"), vec!["abc\r\n", "xyz"]); + + assert_eq!(collect_lines_term("abc\rxyz"), vec!["abc\rxyz"]); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..c240cd1 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,456 @@ +/*! +An experimental byte string library. + +Byte strings are just like standard Unicode strings with one very important +difference: byte strings are only *conventionally* UTF-8 while Rust's standard +Unicode strings are *guaranteed* to be valid UTF-8. The primary motivation for +byte strings is for handling arbitrary bytes that are mostly UTF-8. + +# Overview + +This crate provides two important traits that provide string oriented methods +on `&[u8]` and `Vec<u8>` types: + +* [`ByteSlice`](trait.ByteSlice.html) extends the `[u8]` type with additional + string oriented methods. +* [`ByteVec`](trait.ByteVec.html) extends the `Vec<u8>` type with additional + string oriented methods. + +Additionally, this crate provides two concrete byte string types that deref to +`[u8]` and `Vec<u8>`. These are useful for storing byte string types, and come +with convenient `std::fmt::Debug` implementations: + +* [`BStr`](struct.BStr.html) is a byte string slice, analogous to `str`. +* [`BString`](struct.BString.html) is an owned growable byte string buffer, + analogous to `String`. + +Additionally, the free function [`B`](fn.B.html) serves as a convenient short +hand for writing byte string literals. + +# Quick examples + +Byte strings build on the existing APIs for `Vec<u8>` and `&[u8]`, with +additional string oriented methods. Operations such as iterating over +graphemes, searching for substrings, replacing substrings, trimming and case +conversion are examples of things not provided on the standard library `&[u8]` +APIs but are provided by this crate. For example, this code iterates over all +of occurrences of a subtring: + +``` +use bstr::ByteSlice; + +let s = b"foo bar foo foo quux foo"; + +let mut matches = vec![]; +for start in s.find_iter("foo") { + matches.push(start); +} +assert_eq!(matches, [0, 8, 12, 21]); +``` + +Here's another example showing how to do a search and replace (and also showing +use of the `B` function): + +``` +use bstr::{B, ByteSlice}; + +let old = B("foo ☃☃☃ foo foo quux foo"); +let new = old.replace("foo", "hello"); +assert_eq!(new, B("hello ☃☃☃ hello hello quux hello")); +``` + +And here's an example that shows case conversion, even in the presence of +invalid UTF-8: + +``` +use bstr::{ByteSlice, ByteVec}; + +let mut lower = Vec::from("hello β"); +lower[0] = b'\xFF'; +// lowercase β is uppercased to Β +assert_eq!(lower.to_uppercase(), b"\xFFELLO \xCE\x92"); +``` + +# Convenient debug representation + +When working with byte strings, it is often useful to be able to print them +as if they were byte strings and not sequences of integers. While this crate +cannot affect the `std::fmt::Debug` implementations for `[u8]` and `Vec<u8>`, +this crate does provide the `BStr` and `BString` types which have convenient +`std::fmt::Debug` implementations. + +For example, this + +``` +use bstr::ByteSlice; + +let mut bytes = Vec::from("hello β"); +bytes[0] = b'\xFF'; + +println!("{:?}", bytes.as_bstr()); +``` + +will output `"\xFFello β"`. + +This example works because the +[`ByteSlice::as_bstr`](trait.ByteSlice.html#method.as_bstr) +method converts any `&[u8]` to a `&BStr`. + +# When should I use byte strings? + +This library is somewhat of an experiment that reflects my hypothesis that +UTF-8 by convention is a better trade off in some circumstances than guaranteed +UTF-8. It's possible, perhaps even likely, that this is a niche concern for +folks working closely with core text primitives. + +The first time this idea hit me was in the implementation of Rust's regex +engine. In particular, very little of the internal implementation cares at all +about searching valid UTF-8 encoded strings. Indeed, internally, the +implementation converts `&str` from the API to `&[u8]` fairly quickly and +just deals with raw bytes. UTF-8 match boundaries are then guaranteed by the +finite state machine itself rather than any specific string type. This makes it +possible to not only run regexes on `&str` values, but also on `&[u8]` values. + +Why would you ever want to run a regex on a `&[u8]` though? Well, `&[u8]` is +the fundamental way at which one reads data from all sorts of streams, via the +standard library's [`Read`](https://doc.rust-lang.org/std/io/trait.Read.html) +trait. In particular, there is no platform independent way to determine whether +what you're reading from is some binary file or a human readable text file. +Therefore, if you're writing a program to search files, you probably need to +deal with `&[u8]` directly unless you're okay with first converting it to a +`&str` and dropping any bytes that aren't valid UTF-8. (Or otherwise determine +the encoding---which is often impractical---and perform a transcoding step.) +Often, the simplest and most robust way to approach this is to simply treat the +contents of a file as if it were mostly valid UTF-8 and pass through invalid +UTF-8 untouched. This may not be the most correct approach though! + +One case in particular exacerbates these issues, and that's memory mapping +a file. When you memory map a file, that file may be gigabytes big, but all +you get is a `&[u8]`. Converting that to a `&str` all in one go is generally +not a good idea because of the costs associated with doing so, and also +because it generally causes one to do two passes over the data instead of +one, which is quite undesirable. It is of course usually possible to do it an +incremental way by only parsing chunks at a time, but this is often complex to +do or impractical. For example, many regex engines only accept one contiguous +sequence of bytes at a time with no way to perform incremental matching. + +In summary, the conventional UTF-8 byte strings provided by this library is an +experiment. They are definitely useful in some limited circumstances, but how +useful they are more broadly isn't clear yet. + +# `bstr` in public APIs + +Since this library is still experimental, you should not use it in the public +API of your crates until it hits `1.0` (unless you're OK with with tracking +breaking releases of `bstr`). + +In general, it should be possible to avoid putting anything in this crate into +your public APIs. Namely, you should never need to use the `ByteSlice` or +`ByteVec` traits as bounds on public APIs, since their only purpose is to +extend the methods on the concrete types `[u8]` and `Vec<u8>`, respectively. +Similarly, it should not be necessary to put either the `BStr` or `BString` +types into public APIs. If you want to use them internally, then they can +be converted to/from `[u8]`/`Vec<u8>` as needed. + +# Differences with standard strings + +The primary difference between `[u8]` and `str` is that the former is +conventionally UTF-8 while the latter is guaranteed to be UTF-8. The phrase +"conventionally UTF-8" means that a `[u8]` may contain bytes that do not form +a valid UTF-8 sequence, but operations defined on the type in this crate are +generally most useful on valid UTF-8 sequences. For example, iterating over +Unicode codepoints or grapheme clusters is an operation that is only defined +on valid UTF-8. Therefore, when invalid UTF-8 is encountered, the Unicode +replacement codepoint is substituted. Thus, a byte string that is not UTF-8 at +all is of limited utility when using these crate. + +However, not all operations on byte strings are specifically Unicode aware. For +example, substring search has no specific Unicode semantics ascribed to it. It +works just as well for byte strings that are completely valid UTF-8 as for byte +strings that contain no valid UTF-8 at all. Similarly for replacements and +various other operations that do not need any Unicode specific tailoring. + +Aside from the difference in how UTF-8 is handled, the APIs between `[u8]` and +`str` (and `Vec<u8>` and `String`) are intentionally very similar, including +maintaining the same behavior for corner cases in things like substring +splitting. There are, however, some differences: + +* Substring search is not done with `matches`, but instead, `find_iter`. + In general, this crate does not define any generic + [`Pattern`](https://doc.rust-lang.org/std/str/pattern/trait.Pattern.html) + infrastructure, and instead prefers adding new methods for different + argument types. For example, `matches` can search by a `char` or a `&str`, + where as `find_iter` can only search by a byte string. `find_char` can be + used for searching by a `char`. +* Since `SliceConcatExt` in the standard library is unstable, it is not + possible to reuse that to implement `join` and `concat` methods. Instead, + [`join`](fn.join.html) and [`concat`](fn.concat.html) are provided as free + functions that perform a similar task. +* This library bundles in a few more Unicode operations, such as grapheme, + word and sentence iterators. More operations, such as normalization and + case folding, may be provided in the future. +* Some `String`/`str` APIs will panic if a particular index was not on a valid + UTF-8 code unit sequence boundary. Conversely, no such checking is performed + in this crate, as is consistent with treating byte strings as a sequence of + bytes. This means callers are responsible for maintaining a UTF-8 invariant + if that's important. +* Some routines provided by this crate, such as `starts_with_str`, have a + `_str` suffix to differentiate them from similar routines already defined + on the `[u8]` type. The difference is that `starts_with` requires its + parameter to be a `&[u8]`, where as `starts_with_str` permits its parameter + to by anything that implements `AsRef<[u8]>`, which is more flexible. This + means you can write `bytes.starts_with_str("☃")` instead of + `bytes.starts_with("☃".as_bytes())`. + +Otherwise, you should find most of the APIs between this crate and the standard +library string APIs to be very similar, if not identical. + +# Handling of invalid UTF-8 + +Since byte strings are only *conventionally* UTF-8, there is no guarantee +that byte strings contain valid UTF-8. Indeed, it is perfectly legal for a +byte string to contain arbitrary bytes. However, since this library defines +a *string* type, it provides many operations specified by Unicode. These +operations are typically only defined over codepoints, and thus have no real +meaning on bytes that are invalid UTF-8 because they do not map to a particular +codepoint. + +For this reason, whenever operations defined only on codepoints are used, this +library will automatically convert invalid UTF-8 to the Unicode replacement +codepoint, `U+FFFD`, which looks like this: `�`. For example, an +[iterator over codepoints](struct.Chars.html) will yield a Unicode +replacement codepoint whenever it comes across bytes that are not valid UTF-8: + +``` +use bstr::ByteSlice; + +let bs = b"a\xFF\xFFz"; +let chars: Vec<char> = bs.chars().collect(); +assert_eq!(vec!['a', '\u{FFFD}', '\u{FFFD}', 'z'], chars); +``` + +There are a few ways in which invalid bytes can be substituted with a Unicode +replacement codepoint. One way, not used by this crate, is to replace every +individual invalid byte with a single replacement codepoint. In contrast, the +approach this crate uses is called the "substitution of maximal subparts," as +specified by the Unicode Standard (Chapter 3, Section 9). (This approach is +also used by [W3C's Encoding Standard](https://www.w3.org/TR/encoding/).) In +this strategy, a replacement codepoint is inserted whenever a byte is found +that cannot possibly lead to a valid UTF-8 code unit sequence. If there were +previous bytes that represented a *prefix* of a well-formed UTF-8 code unit +sequence, then all of those bytes (up to 3) are substituted with a single +replacement codepoint. For example: + +``` +use bstr::ByteSlice; + +let bs = b"a\xF0\x9F\x87z"; +let chars: Vec<char> = bs.chars().collect(); +// The bytes \xF0\x9F\x87 could lead to a valid UTF-8 sequence, but 3 of them +// on their own are invalid. Only one replacement codepoint is substituted, +// which demonstrates the "substitution of maximal subparts" strategy. +assert_eq!(vec!['a', '\u{FFFD}', 'z'], chars); +``` + +If you do need to access the raw bytes for some reason in an iterator like +`Chars`, then you should use the iterator's "indices" variant, which gives +the byte offsets containing the invalid UTF-8 bytes that were substituted with +the replacement codepoint. For example: + +``` +use bstr::{B, ByteSlice}; + +let bs = b"a\xE2\x98z"; +let chars: Vec<(usize, usize, char)> = bs.char_indices().collect(); +// Even though the replacement codepoint is encoded as 3 bytes itself, the +// byte range given here is only two bytes, corresponding to the original +// raw bytes. +assert_eq!(vec![(0, 1, 'a'), (1, 3, '\u{FFFD}'), (3, 4, 'z')], chars); + +// Thus, getting the original raw bytes is as simple as slicing the original +// byte string: +let chars: Vec<&[u8]> = bs.char_indices().map(|(s, e, _)| &bs[s..e]).collect(); +assert_eq!(vec![B("a"), B(b"\xE2\x98"), B("z")], chars); +``` + +# File paths and OS strings + +One of the premiere features of Rust's standard library is how it handles file +paths. In particular, it makes it very hard to write incorrect code while +simultaneously providing a correct cross platform abstraction for manipulating +file paths. The key challenge that one faces with file paths across platforms +is derived from the following observations: + +* On most Unix-like systems, file paths are an arbitrary sequence of bytes. +* On Windows, file paths are an arbitrary sequence of 16-bit integers. + +(In both cases, certain sequences aren't allowed. For example a `NUL` byte is +not allowed in either case. But we can ignore this for the purposes of this +section.) + +Byte strings, like the ones provided in this crate, line up really well with +file paths on Unix like systems, which are themselves just arbitrary sequences +of bytes. It turns out that if you treat them as "mostly UTF-8," then things +work out pretty well. On the contrary, byte strings _don't_ really work +that well on Windows because it's not possible to correctly roundtrip file +paths between 16-bit integers and something that looks like UTF-8 _without_ +explicitly defining an encoding to do this for you, which is anathema to byte +strings, which are just bytes. + +Rust's standard library elegantly solves this problem by specifying an +internal encoding for file paths that's only used on Windows called +[WTF-8](https://simonsapin.github.io/wtf-8/). Its key properties are that they +permit losslessly roundtripping file paths on Windows by extending UTF-8 to +support an encoding of surrogate codepoints, while simultaneously supporting +zero-cost conversion from Rust's Unicode strings to file paths. (Since UTF-8 is +a proper subset of WTF-8.) + +The fundamental point at which the above strategy fails is when you want to +treat file paths as things that look like strings in a zero cost way. In most +cases, this is actually the wrong thing to do, but some cases call for it, +for example, glob or regex matching on file paths. This is because WTF-8 is +treated as an internal implementation detail, and there is no way to access +those bytes via a public API. Therefore, such consumers are limited in what +they can do: + +1. One could re-implement WTF-8 and re-encode file paths on Windows to WTF-8 + by accessing their underlying 16-bit integer representation. Unfortunately, + this isn't zero cost (it introduces a second WTF-8 decoding step) and it's + not clear this is a good thing to do, since WTF-8 should ideally remain an + internal implementation detail. +2. One could instead declare that they will not handle paths on Windows that + are not valid UTF-16, and return an error when one is encountered. +3. Like (2), but instead of returning an error, lossily decode the file path + on Windows that isn't valid UTF-16 into UTF-16 by replacing invalid bytes + with the Unicode replacement codepoint. + +While this library may provide facilities for (1) in the future, currently, +this library only provides facilities for (2) and (3). In particular, a suite +of conversion functions are provided that permit converting between byte +strings, OS strings and file paths. For owned byte strings, they are: + +* [`ByteVec::from_os_string`](trait.ByteVec.html#method.from_os_string) +* [`ByteVec::from_os_str_lossy`](trait.ByteVec.html#method.from_os_str_lossy) +* [`ByteVec::from_path_buf`](trait.ByteVec.html#method.from_path_buf) +* [`ByteVec::from_path_lossy`](trait.ByteVec.html#method.from_path_lossy) +* [`ByteVec::into_os_string`](trait.ByteVec.html#method.into_os_string) +* [`ByteVec::into_os_string_lossy`](trait.ByteVec.html#method.into_os_string_lossy) +* [`ByteVec::into_path_buf`](trait.ByteVec.html#method.into_path_buf) +* [`ByteVec::into_path_buf_lossy`](trait.ByteVec.html#method.into_path_buf_lossy) + +For byte string slices, they are: + +* [`ByteSlice::from_os_str`](trait.ByteSlice.html#method.from_os_str) +* [`ByteSlice::from_path`](trait.ByteSlice.html#method.from_path) +* [`ByteSlice::to_os_str`](trait.ByteSlice.html#method.to_os_str) +* [`ByteSlice::to_os_str_lossy`](trait.ByteSlice.html#method.to_os_str_lossy) +* [`ByteSlice::to_path`](trait.ByteSlice.html#method.to_path) +* [`ByteSlice::to_path_lossy`](trait.ByteSlice.html#method.to_path_lossy) + +On Unix, all of these conversions are rigorously zero cost, which gives one +a way to ergonomically deal with raw file paths exactly as they are using +normal string-related functions. On Windows, these conversion routines perform +a UTF-8 check and either return an error or lossily decode the file path +into valid UTF-8, depending on which function you use. This means that you +cannot roundtrip all file paths on Windows correctly using these conversion +routines. However, this may be an acceptable downside since such file paths +are exceptionally rare. Moreover, roundtripping isn't always necessary, for +example, if all you're doing is filtering based on file paths. + +The reason why using byte strings for this is potentially superior than the +standard library's approach is that a lot of Rust code is already lossily +converting file paths to Rust's Unicode strings, which are required to be valid +UTF-8, and thus contain latent bugs on Unix where paths with invalid UTF-8 are +not terribly uncommon. If you instead use byte strings, then you're guaranteed +to write correct code for Unix, at the cost of getting a corner case wrong on +Windows. +*/ + +#![cfg_attr(not(feature = "std"), no_std)] +#![allow(dead_code)] + +#[cfg(feature = "std")] +extern crate core; + +#[cfg(feature = "unicode")] +#[macro_use] +extern crate lazy_static; +extern crate memchr; +#[cfg(test)] +#[macro_use] +extern crate quickcheck; +#[cfg(feature = "unicode")] +extern crate regex_automata; +#[cfg(feature = "serde1-nostd")] +extern crate serde; +#[cfg(test)] +extern crate ucd_parse; + +pub use bstr::BStr; +#[cfg(feature = "std")] +pub use bstring::BString; +pub use ext_slice::{ + ByteSlice, Bytes, Fields, FieldsWith, Find, FindReverse, Finder, + FinderReverse, Lines, LinesWithTerminator, Split, SplitN, SplitNReverse, + SplitReverse, B, +}; +#[cfg(feature = "std")] +pub use ext_vec::{concat, join, ByteVec, DrainBytes, FromUtf8Error}; +#[cfg(feature = "unicode")] +pub use unicode::{ + GraphemeIndices, Graphemes, SentenceIndices, Sentences, WordIndices, + Words, WordsWithBreakIndices, WordsWithBreaks, +}; +pub use utf8::{ + decode as decode_utf8, decode_last as decode_last_utf8, CharIndices, + Chars, Utf8Chunk, Utf8Chunks, Utf8Error, +}; + +mod ascii; +mod bstr; +#[cfg(feature = "std")] +mod bstring; +mod byteset; +mod cow; +mod ext_slice; +#[cfg(feature = "std")] +mod ext_vec; +mod impls; +#[cfg(feature = "std")] +pub mod io; +mod search; +#[cfg(test)] +mod tests; +#[cfg(feature = "unicode")] +mod unicode; +mod utf8; + +#[cfg(test)] +mod apitests { + use bstr::BStr; + use bstring::BString; + use ext_slice::{Finder, FinderReverse}; + + #[test] + fn oibits() { + use std::panic::{RefUnwindSafe, UnwindSafe}; + + fn assert_send<T: Send>() {} + fn assert_sync<T: Sync>() {} + fn assert_unwind_safe<T: RefUnwindSafe + UnwindSafe>() {} + + assert_send::<&BStr>(); + assert_sync::<&BStr>(); + assert_unwind_safe::<&BStr>(); + assert_send::<BString>(); + assert_sync::<BString>(); + assert_unwind_safe::<BString>(); + + assert_send::<Finder>(); + assert_sync::<Finder>(); + assert_unwind_safe::<Finder>(); + assert_send::<FinderReverse>(); + assert_sync::<FinderReverse>(); + assert_unwind_safe::<FinderReverse>(); + } +} diff --git a/src/search/byte_frequencies.rs b/src/search/byte_frequencies.rs new file mode 100644 index 0000000..c313b62 --- /dev/null +++ b/src/search/byte_frequencies.rs @@ -0,0 +1,258 @@ +pub const BYTE_FREQUENCIES: [u8; 256] = [ + 55, // '\x00' + 52, // '\x01' + 51, // '\x02' + 50, // '\x03' + 49, // '\x04' + 48, // '\x05' + 47, // '\x06' + 46, // '\x07' + 45, // '\x08' + 103, // '\t' + 242, // '\n' + 66, // '\x0b' + 67, // '\x0c' + 229, // '\r' + 44, // '\x0e' + 43, // '\x0f' + 42, // '\x10' + 41, // '\x11' + 40, // '\x12' + 39, // '\x13' + 38, // '\x14' + 37, // '\x15' + 36, // '\x16' + 35, // '\x17' + 34, // '\x18' + 33, // '\x19' + 56, // '\x1a' + 32, // '\x1b' + 31, // '\x1c' + 30, // '\x1d' + 29, // '\x1e' + 28, // '\x1f' + 255, // ' ' + 148, // '!' + 164, // '"' + 149, // '#' + 136, // '$' + 160, // '%' + 155, // '&' + 173, // "'" + 221, // '(' + 222, // ')' + 134, // '*' + 122, // '+' + 232, // ',' + 202, // '-' + 215, // '.' + 224, // '/' + 208, // '0' + 220, // '1' + 204, // '2' + 187, // '3' + 183, // '4' + 179, // '5' + 177, // '6' + 168, // '7' + 178, // '8' + 200, // '9' + 226, // ':' + 195, // ';' + 154, // '<' + 184, // '=' + 174, // '>' + 126, // '?' + 120, // '@' + 191, // 'A' + 157, // 'B' + 194, // 'C' + 170, // 'D' + 189, // 'E' + 162, // 'F' + 161, // 'G' + 150, // 'H' + 193, // 'I' + 142, // 'J' + 137, // 'K' + 171, // 'L' + 176, // 'M' + 185, // 'N' + 167, // 'O' + 186, // 'P' + 112, // 'Q' + 175, // 'R' + 192, // 'S' + 188, // 'T' + 156, // 'U' + 140, // 'V' + 143, // 'W' + 123, // 'X' + 133, // 'Y' + 128, // 'Z' + 147, // '[' + 138, // '\\' + 146, // ']' + 114, // '^' + 223, // '_' + 151, // '`' + 249, // 'a' + 216, // 'b' + 238, // 'c' + 236, // 'd' + 253, // 'e' + 227, // 'f' + 218, // 'g' + 230, // 'h' + 247, // 'i' + 135, // 'j' + 180, // 'k' + 241, // 'l' + 233, // 'm' + 246, // 'n' + 244, // 'o' + 231, // 'p' + 139, // 'q' + 245, // 'r' + 243, // 's' + 251, // 't' + 235, // 'u' + 201, // 'v' + 196, // 'w' + 240, // 'x' + 214, // 'y' + 152, // 'z' + 182, // '{' + 205, // '|' + 181, // '}' + 127, // '~' + 27, // '\x7f' + 212, // '\x80' + 211, // '\x81' + 210, // '\x82' + 213, // '\x83' + 228, // '\x84' + 197, // '\x85' + 169, // '\x86' + 159, // '\x87' + 131, // '\x88' + 172, // '\x89' + 105, // '\x8a' + 80, // '\x8b' + 98, // '\x8c' + 96, // '\x8d' + 97, // '\x8e' + 81, // '\x8f' + 207, // '\x90' + 145, // '\x91' + 116, // '\x92' + 115, // '\x93' + 144, // '\x94' + 130, // '\x95' + 153, // '\x96' + 121, // '\x97' + 107, // '\x98' + 132, // '\x99' + 109, // '\x9a' + 110, // '\x9b' + 124, // '\x9c' + 111, // '\x9d' + 82, // '\x9e' + 108, // '\x9f' + 118, // '\xa0' + 141, // '¡' + 113, // '¢' + 129, // '£' + 119, // '¤' + 125, // '¥' + 165, // '¦' + 117, // '§' + 92, // '¨' + 106, // '©' + 83, // 'ª' + 72, // '«' + 99, // '¬' + 93, // '\xad' + 65, // '®' + 79, // '¯' + 166, // '°' + 237, // '±' + 163, // '²' + 199, // '³' + 190, // '´' + 225, // 'µ' + 209, // '¶' + 203, // '·' + 198, // '¸' + 217, // '¹' + 219, // 'º' + 206, // '»' + 234, // '¼' + 248, // '½' + 158, // '¾' + 239, // '¿' + 255, // 'À' + 255, // 'Á' + 255, // 'Â' + 255, // 'Ã' + 255, // 'Ä' + 255, // 'Å' + 255, // 'Æ' + 255, // 'Ç' + 255, // 'È' + 255, // 'É' + 255, // 'Ê' + 255, // 'Ë' + 255, // 'Ì' + 255, // 'Í' + 255, // 'Î' + 255, // 'Ï' + 255, // 'Ð' + 255, // 'Ñ' + 255, // 'Ò' + 255, // 'Ó' + 255, // 'Ô' + 255, // 'Õ' + 255, // 'Ö' + 255, // '×' + 255, // 'Ø' + 255, // 'Ù' + 255, // 'Ú' + 255, // 'Û' + 255, // 'Ü' + 255, // 'Ý' + 255, // 'Þ' + 255, // 'ß' + 255, // 'à' + 255, // 'á' + 255, // 'â' + 255, // 'ã' + 255, // 'ä' + 255, // 'å' + 255, // 'æ' + 255, // 'ç' + 255, // 'è' + 255, // 'é' + 255, // 'ê' + 255, // 'ë' + 255, // 'ì' + 255, // 'í' + 255, // 'î' + 255, // 'ï' + 255, // 'ð' + 255, // 'ñ' + 255, // 'ò' + 255, // 'ó' + 255, // 'ô' + 255, // 'õ' + 255, // 'ö' + 255, // '÷' + 255, // 'ø' + 255, // 'ù' + 255, // 'ú' + 255, // 'û' + 255, // 'ü' + 255, // 'ý' + 255, // 'þ' + 255, // 'ÿ' +]; diff --git a/src/search/mod.rs b/src/search/mod.rs new file mode 100644 index 0000000..a0d1b45 --- /dev/null +++ b/src/search/mod.rs @@ -0,0 +1,8 @@ +pub use self::prefilter::PrefilterState; +pub use self::twoway::TwoWay; + +mod byte_frequencies; +mod prefilter; +#[cfg(test)] +mod tests; +mod twoway; diff --git a/src/search/prefilter.rs b/src/search/prefilter.rs new file mode 100644 index 0000000..00e6acf --- /dev/null +++ b/src/search/prefilter.rs @@ -0,0 +1,424 @@ +use core::mem; + +use ext_slice::ByteSlice; +use search::byte_frequencies::BYTE_FREQUENCIES; + +/// PrefilterState tracks state associated with the effectiveness of a +/// prefilter. It is used to track how many bytes, on average, are skipped by +/// the prefilter. If this average dips below a certain threshold over time, +/// then the state renders the prefilter inert and stops using it. +/// +/// A prefilter state should be created for each search. (Where creating an +/// iterator via, e.g., `find_iter`, is treated as a single search.) +#[derive(Clone, Debug)] +pub struct PrefilterState { + /// The number of skips that has been executed. + skips: usize, + /// The total number of bytes that have been skipped. + skipped: usize, + /// The maximum length of a match. This is used to help determine how many + /// bytes on average should be skipped in order for a prefilter to be + /// effective. + max_match_len: usize, + /// Once this heuristic has been deemed ineffective, it will be inert + /// throughout the rest of its lifetime. This serves as a cheap way to + /// check inertness. + inert: bool, +} + +impl PrefilterState { + /// The minimum number of skip attempts to try before considering whether + /// a prefilter is effective or not. + const MIN_SKIPS: usize = 50; + + /// The minimum amount of bytes that skipping must average. + /// + /// This value was chosen based on varying it and checking the bstr/find/ + /// microbenchmarks. In particular, this can impact the + /// pathological/repeated-{huge,small} benchmarks quite a bit if it's + /// set too low. + const MIN_SKIP_BYTES: usize = 8; + + /// Create a fresh prefilter state. + pub fn new(max_match_len: usize) -> PrefilterState { + if max_match_len == 0 { + return PrefilterState::inert(); + } + PrefilterState { skips: 0, skipped: 0, max_match_len, inert: false } + } + + /// Create a fresh prefilter state that is always inert. + fn inert() -> PrefilterState { + PrefilterState { skips: 0, skipped: 0, max_match_len: 0, inert: true } + } + + /// Update this state with the number of bytes skipped on the last + /// invocation of the prefilter. + #[inline] + pub fn update(&mut self, skipped: usize) { + self.skips += 1; + self.skipped += skipped; + } + + /// Return true if and only if this state indicates that a prefilter is + /// still effective. + #[inline] + pub fn is_effective(&mut self) -> bool { + if self.inert { + return false; + } + if self.skips < PrefilterState::MIN_SKIPS { + return true; + } + if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips { + return true; + } + + // We're inert. + self.inert = true; + false + } +} + +/// A heuristic frequency based prefilter for searching a single needle. +/// +/// This prefilter attempts to pick out the byte in a needle that is predicted +/// to occur least frequently, and search for that using fast vectorized +/// routines. If a rare enough byte could not be found, then this prefilter's +/// constructors will return `None`. +/// +/// This can be combined with `PrefilterState` to dynamically render this +/// prefilter inert if it proves to ineffective. +#[derive(Clone, Debug)] +pub struct Freqy { + /// Whether this prefilter should be used or not. + inert: bool, + /// The length of the needle we're searching for. + needle_len: usize, + /// The rarest byte in the needle, according to pre-computed frequency + /// analysis. + rare1: u8, + /// The leftmost offset of the rarest byte in the needle. + rare1i: usize, + /// The second rarest byte in the needle, according to pre-computed + /// frequency analysis. (This may be equivalent to the rarest byte.) + /// + /// The second rarest byte is used as a type of guard for quickly detecting + /// a mismatch after memchr locates an instance of the rarest byte. This + /// is a hedge against pathological cases where the pre-computed frequency + /// analysis may be off. (But of course, does not prevent *all* + /// pathological cases.) + rare2: u8, + /// The leftmost offset of the second rarest byte in the needle. + rare2i: usize, +} + +impl Freqy { + /// The maximum frequency rank permitted. If the rarest byte in the needle + /// has a frequency rank above this value, then Freqy is not used. + const MAX_RANK: usize = 200; + + /// Return a fresh prefilter state that can be used with this prefilter. A + /// prefilter state is used to track the effectiveness of a prefilter for + /// speeding up searches. Therefore, the prefilter state should generally + /// be reused on subsequent searches (such as in an iterator). For searches + /// on a different haystack, then a new prefilter state should be used. + pub fn prefilter_state(&self) -> PrefilterState { + if self.inert { + PrefilterState::inert() + } else { + PrefilterState::new(self.needle_len) + } + } + + /// Returns a valid but inert prefilter. This is valid for both the forward + /// and reverse direction. + /// + /// It is never correct to use an inert prefilter. The results of finding + /// the next (or previous) candidate are unspecified. + fn inert() -> Freqy { + Freqy { + inert: true, + needle_len: 0, + rare1: 0, + rare1i: 0, + rare2: 0, + rare2i: 0, + } + } + + /// Return search info for the given needle in the forward direction. + pub fn forward(needle: &[u8]) -> Freqy { + if needle.is_empty() { + return Freqy::inert(); + } + + // Find the rarest two bytes. Try to make them distinct (but it's not + // required). + let (mut rare1, mut rare1i) = (needle[0], 0); + let (mut rare2, mut rare2i) = (needle[0], 0); + if needle.len() >= 2 { + rare2 = needle[1]; + rare2i = 1; + } + if Freqy::rank(rare2) < Freqy::rank(rare1) { + mem::swap(&mut rare1, &mut rare2); + mem::swap(&mut rare1i, &mut rare2i); + } + for (i, b) in needle.bytes().enumerate().skip(2) { + if Freqy::rank(b) < Freqy::rank(rare1) { + rare2 = rare1; + rare2i = rare1i; + rare1 = b; + rare1i = i; + } else if b != rare1 && Freqy::rank(b) < Freqy::rank(rare2) { + rare2 = b; + rare2i = i; + } + } + if Freqy::rank(rare1) > Freqy::MAX_RANK { + return Freqy::inert(); + } + let needle_len = needle.len(); + Freqy { inert: false, needle_len, rare1, rare1i, rare2, rare2i } + } + + /// Return search info for the given needle in the reverse direction. + pub fn reverse(needle: &[u8]) -> Freqy { + if needle.is_empty() { + return Freqy::inert(); + } + + // Find the rarest two bytes. Try to make them distinct (but it's not + // required). In reverse, the offsets correspond to the number of bytes + // from the end of the needle. So `0` is the last byte in the needle. + let (mut rare1i, mut rare2i) = (0, 0); + if needle.len() >= 2 { + rare2i += 1; + } + let mut rare1 = needle[needle.len() - rare1i - 1]; + let mut rare2 = needle[needle.len() - rare2i - 1]; + if Freqy::rank(rare2) < Freqy::rank(rare1) { + mem::swap(&mut rare1, &mut rare2); + mem::swap(&mut rare1i, &mut rare2i); + } + for (i, b) in needle.bytes().rev().enumerate().skip(2) { + if Freqy::rank(b) < Freqy::rank(rare1) { + rare2 = rare1; + rare2i = rare1i; + rare1 = b; + rare1i = i; + } else if b != rare1 && Freqy::rank(b) < Freqy::rank(rare2) { + rare2 = b; + rare2i = i; + } + } + if Freqy::rank(rare1) > Freqy::MAX_RANK { + return Freqy::inert(); + } + let needle_len = needle.len(); + Freqy { inert: false, needle_len, rare1, rare1i, rare2, rare2i } + } + + /// Look for a possible occurrence of needle. The position returned + /// corresponds to the beginning of the occurrence, if one exists. + /// + /// Callers may assume that this never returns false negatives (i.e., it + /// never misses an actual occurrence), but must check that the returned + /// position corresponds to a match. That is, it can return false + /// positives. + /// + /// This should only be used when Freqy is constructed for forward + /// searching. + pub fn find_candidate( + &self, + prestate: &mut PrefilterState, + haystack: &[u8], + ) -> Option<usize> { + debug_assert!(!self.inert); + + let mut i = 0; + while prestate.is_effective() { + // Use a fast vectorized implementation to skip to the next + // occurrence of the rarest byte (heuristically chosen) in the + // needle. + i += match haystack[i..].find_byte(self.rare1) { + None => return None, + Some(found) => { + prestate.update(found); + found + } + }; + + // If we can't align our first match with the haystack, then a + // match is impossible. + if i < self.rare1i { + i += 1; + continue; + } + + // Align our rare2 byte with the haystack. A mismatch means that + // a match is impossible. + let aligned_rare2i = i - self.rare1i + self.rare2i; + if haystack.get(aligned_rare2i) != Some(&self.rare2) { + i += 1; + continue; + } + + // We've done what we can. There might be a match here. + return Some(i - self.rare1i); + } + // The only way we get here is if we believe our skipping heuristic + // has become ineffective. We're allowed to return false positives, + // so return the position at which we advanced to, aligned to the + // haystack. + Some(i.saturating_sub(self.rare1i)) + } + + /// Look for a possible occurrence of needle, in reverse, starting from the + /// end of the given haystack. The position returned corresponds to the + /// position immediately after the end of the occurrence, if one exists. + /// + /// Callers may assume that this never returns false negatives (i.e., it + /// never misses an actual occurrence), but must check that the returned + /// position corresponds to a match. That is, it can return false + /// positives. + /// + /// This should only be used when Freqy is constructed for reverse + /// searching. + pub fn rfind_candidate( + &self, + prestate: &mut PrefilterState, + haystack: &[u8], + ) -> Option<usize> { + debug_assert!(!self.inert); + + let mut i = haystack.len(); + while prestate.is_effective() { + // Use a fast vectorized implementation to skip to the next + // occurrence of the rarest byte (heuristically chosen) in the + // needle. + i = match haystack[..i].rfind_byte(self.rare1) { + None => return None, + Some(found) => { + prestate.update(i - found); + found + } + }; + + // If we can't align our first match with the haystack, then a + // match is impossible. + if i + self.rare1i + 1 > haystack.len() { + continue; + } + + // Align our rare2 byte with the haystack. A mismatch means that + // a match is impossible. + let aligned = match (i + self.rare1i).checked_sub(self.rare2i) { + None => continue, + Some(aligned) => aligned, + }; + if haystack.get(aligned) != Some(&self.rare2) { + continue; + } + + // We've done what we can. There might be a match here. + return Some(i + self.rare1i + 1); + } + // The only way we get here is if we believe our skipping heuristic + // has become ineffective. We're allowed to return false positives, + // so return the position at which we advanced to, aligned to the + // haystack. + Some(i + self.rare1i + 1) + } + + /// Return the heuristical frequency rank of the given byte. A lower rank + /// means the byte is believed to occur less frequently. + fn rank(b: u8) -> usize { + BYTE_FREQUENCIES[b as usize] as usize + } +} + +#[cfg(test)] +mod tests { + use super::*; + use ext_slice::B; + + #[test] + fn freqy_forward() { + // N.B. We sometimes use uppercase here since that mostly ensures freqy + // will be constructable. Lowercase letters may be too common for freqy + // to work. + + let s = Freqy::forward(B("BAR")); + let mut pre = s.prefilter_state(); + assert_eq!(Some(0), s.find_candidate(&mut pre, B("BARFOO"))); + + let s = Freqy::forward(B("BAR")); + let mut pre = s.prefilter_state(); + assert_eq!(Some(3), s.find_candidate(&mut pre, B("FOOBAR"))); + + let s = Freqy::forward(B("zyzy")); + let mut pre = s.prefilter_state(); + assert_eq!(Some(0), s.find_candidate(&mut pre, B("zyzz"))); + + let s = Freqy::forward(B("zyzy")); + let mut pre = s.prefilter_state(); + assert_eq!(Some(2), s.find_candidate(&mut pre, B("zzzy"))); + + let s = Freqy::forward(B("zyzy")); + let mut pre = s.prefilter_state(); + assert_eq!(None, s.find_candidate(&mut pre, B("zazb"))); + + let s = Freqy::forward(B("yzyz")); + let mut pre = s.prefilter_state(); + assert_eq!(Some(0), s.find_candidate(&mut pre, B("yzyy"))); + + let s = Freqy::forward(B("yzyz")); + let mut pre = s.prefilter_state(); + assert_eq!(Some(2), s.find_candidate(&mut pre, B("yyyz"))); + + let s = Freqy::forward(B("yzyz")); + let mut pre = s.prefilter_state(); + assert_eq!(None, s.find_candidate(&mut pre, B("yayb"))); + } + + #[test] + fn freqy_reverse() { + // N.B. We sometimes use uppercase here since that mostly ensures freqy + // will be constructable. Lowercase letters may be too common for freqy + // to work. + + let s = Freqy::reverse(B("BAR")); + let mut pre = s.prefilter_state(); + assert_eq!(Some(3), s.rfind_candidate(&mut pre, B("BARFOO"))); + + let s = Freqy::reverse(B("BAR")); + let mut pre = s.prefilter_state(); + assert_eq!(Some(6), s.rfind_candidate(&mut pre, B("FOOBAR"))); + + let s = Freqy::reverse(B("zyzy")); + let mut pre = s.prefilter_state(); + assert_eq!(Some(2), s.rfind_candidate(&mut pre, B("zyzz"))); + + let s = Freqy::reverse(B("zyzy")); + let mut pre = s.prefilter_state(); + assert_eq!(Some(4), s.rfind_candidate(&mut pre, B("zzzy"))); + + let s = Freqy::reverse(B("zyzy")); + let mut pre = s.prefilter_state(); + assert_eq!(None, s.rfind_candidate(&mut pre, B("zazb"))); + + let s = Freqy::reverse(B("yzyz")); + let mut pre = s.prefilter_state(); + assert_eq!(Some(2), s.rfind_candidate(&mut pre, B("yzyy"))); + + let s = Freqy::reverse(B("yzyz")); + let mut pre = s.prefilter_state(); + assert_eq!(Some(4), s.rfind_candidate(&mut pre, B("yyyz"))); + + let s = Freqy::reverse(B("yzyz")); + let mut pre = s.prefilter_state(); + assert_eq!(None, s.rfind_candidate(&mut pre, B("yayb"))); + } +} diff --git a/src/search/tests.rs b/src/search/tests.rs new file mode 100644 index 0000000..827df92 --- /dev/null +++ b/src/search/tests.rs @@ -0,0 +1,225 @@ +use search::twoway::TwoWay; + +/// Each test is a (needle, haystack, expected_fwd, expected_rev) tuple. +type SearchTest = (&'static str, &'static str, Option<usize>, Option<usize>); + +const SEARCH_TESTS: &'static [SearchTest] = &[ + ("", "", Some(0), Some(0)), + ("", "a", Some(0), Some(1)), + ("", "ab", Some(0), Some(2)), + ("", "abc", Some(0), Some(3)), + ("a", "", None, None), + ("a", "a", Some(0), Some(0)), + ("a", "aa", Some(0), Some(1)), + ("a", "ba", Some(1), Some(1)), + ("a", "bba", Some(2), Some(2)), + ("a", "bbba", Some(3), Some(3)), + ("a", "bbbab", Some(3), Some(3)), + ("a", "bbbabb", Some(3), Some(3)), + ("a", "bbbabbb", Some(3), Some(3)), + ("a", "bbbbbb", None, None), + ("ab", "", None, None), + ("ab", "a", None, None), + ("ab", "b", None, None), + ("ab", "ab", Some(0), Some(0)), + ("ab", "aab", Some(1), Some(1)), + ("ab", "aaab", Some(2), Some(2)), + ("ab", "abaab", Some(0), Some(3)), + ("ab", "baaab", Some(3), Some(3)), + ("ab", "acb", None, None), + ("ab", "abba", Some(0), Some(0)), + ("abc", "ab", None, None), + ("abc", "abc", Some(0), Some(0)), + ("abc", "abcz", Some(0), Some(0)), + ("abc", "abczz", Some(0), Some(0)), + ("abc", "zabc", Some(1), Some(1)), + ("abc", "zzabc", Some(2), Some(2)), + ("abc", "azbc", None, None), + ("abc", "abzc", None, None), + ("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)), + ("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)), + // Failures caught by quickcheck. + ("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)), + ("\u{0}\u{1e}", "\u{1e}\u{0}", None, None), +]; + +#[test] +fn unit_twoway_fwd() { + run_search_tests_fwd("TwoWay", |n, h| TwoWay::forward(n).find(h)); +} + +#[test] +fn unit_twoway_rev() { + run_search_tests_rev("TwoWay", |n, h| TwoWay::reverse(n).rfind(h)); +} + +/// Run the substring search tests. `name` should be the type of searcher used, +/// for diagnostics. `search` should be a closure that accepts a needle and a +/// haystack and returns the starting position of the first occurrence of +/// needle in the haystack, or `None` if one doesn't exist. +fn run_search_tests_fwd( + name: &str, + mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, +) { + for &(needle, haystack, expected_fwd, _) in SEARCH_TESTS { + let (n, h) = (needle.as_bytes(), haystack.as_bytes()); + assert_eq!( + expected_fwd, + search(n, h), + "{}: needle: {:?}, haystack: {:?}, expected: {:?}", + name, + n, + h, + expected_fwd + ); + } +} + +/// Run the substring search tests. `name` should be the type of searcher used, +/// for diagnostics. `search` should be a closure that accepts a needle and a +/// haystack and returns the starting position of the last occurrence of +/// needle in the haystack, or `None` if one doesn't exist. +fn run_search_tests_rev( + name: &str, + mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, +) { + for &(needle, haystack, _, expected_rev) in SEARCH_TESTS { + let (n, h) = (needle.as_bytes(), haystack.as_bytes()); + assert_eq!( + expected_rev, + search(n, h), + "{}: needle: {:?}, haystack: {:?}, expected: {:?}", + name, + n, + h, + expected_rev + ); + } +} + +quickcheck! { + fn qc_twoway_fwd_prefix_is_substring(bs: Vec<u8>) -> bool { + prop_prefix_is_substring(false, &bs, |n, h| TwoWay::forward(n).find(h)) + } + + fn qc_twoway_fwd_suffix_is_substring(bs: Vec<u8>) -> bool { + prop_suffix_is_substring(false, &bs, |n, h| TwoWay::forward(n).find(h)) + } + + fn qc_twoway_rev_prefix_is_substring(bs: Vec<u8>) -> bool { + prop_prefix_is_substring(true, &bs, |n, h| TwoWay::reverse(n).rfind(h)) + } + + fn qc_twoway_rev_suffix_is_substring(bs: Vec<u8>) -> bool { + prop_suffix_is_substring(true, &bs, |n, h| TwoWay::reverse(n).rfind(h)) + } + + fn qc_twoway_fwd_matches_naive( + needle: Vec<u8>, + haystack: Vec<u8> + ) -> bool { + prop_matches_naive( + false, + &needle, + &haystack, + |n, h| TwoWay::forward(n).find(h), + ) + } + + fn qc_twoway_rev_matches_naive( + needle: Vec<u8>, + haystack: Vec<u8> + ) -> bool { + prop_matches_naive( + true, + &needle, + &haystack, + |n, h| TwoWay::reverse(n).rfind(h), + ) + } +} + +/// Check that every prefix of the given byte string is a substring. +fn prop_prefix_is_substring( + reverse: bool, + bs: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, +) -> bool { + if bs.is_empty() { + return true; + } + for i in 0..(bs.len() - 1) { + let prefix = &bs[..i]; + if reverse { + assert_eq!(naive_rfind(prefix, bs), search(prefix, bs)); + } else { + assert_eq!(naive_find(prefix, bs), search(prefix, bs)); + } + } + true +} + +/// Check that every suffix of the given byte string is a substring. +fn prop_suffix_is_substring( + reverse: bool, + bs: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, +) -> bool { + if bs.is_empty() { + return true; + } + for i in 0..(bs.len() - 1) { + let suffix = &bs[i..]; + if reverse { + assert_eq!(naive_rfind(suffix, bs), search(suffix, bs)); + } else { + assert_eq!(naive_find(suffix, bs), search(suffix, bs)); + } + } + true +} + +/// Check that naive substring search matches the result of the given search +/// algorithm. +fn prop_matches_naive( + reverse: bool, + needle: &[u8], + haystack: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, +) -> bool { + if reverse { + naive_rfind(needle, haystack) == search(needle, haystack) + } else { + naive_find(needle, haystack) == search(needle, haystack) + } +} + +/// Naively search forwards for the given needle in the given haystack. +fn naive_find(needle: &[u8], haystack: &[u8]) -> Option<usize> { + if needle.is_empty() { + return Some(0); + } else if haystack.len() < needle.len() { + return None; + } + for i in 0..(haystack.len() - needle.len() + 1) { + if needle == &haystack[i..i + needle.len()] { + return Some(i); + } + } + None +} + +/// Naively search in reverse for the given needle in the given haystack. +fn naive_rfind(needle: &[u8], haystack: &[u8]) -> Option<usize> { + if needle.is_empty() { + return Some(haystack.len()); + } else if haystack.len() < needle.len() { + return None; + } + for i in (0..(haystack.len() - needle.len() + 1)).rev() { + if needle == &haystack[i..i + needle.len()] { + return Some(i); + } + } + None +} diff --git a/src/search/twoway.rs b/src/search/twoway.rs new file mode 100644 index 0000000..5f1e8cf --- /dev/null +++ b/src/search/twoway.rs @@ -0,0 +1,871 @@ +use core::cmp; + +use cow::CowBytes; +use ext_slice::ByteSlice; +use search::prefilter::{Freqy, PrefilterState}; + +/// An implementation of the TwoWay substring search algorithm, with heuristics +/// for accelerating search based on frequency analysis. +/// +/// This searcher supports forward and reverse search, although not +/// simultaneously. It runs in O(n + m) time and O(1) space, where +/// `n ~ len(needle)` and `m ~ len(haystack)`. +/// +/// The implementation here roughly matches that which was developed by +/// Crochemore and Perrin in their 1991 paper "Two-way string-matching." The +/// only change in this implementation is the use of zero-based indices and +/// the addition of heuristics for a fast skip loop. That is, this will detect +/// bytes that are believed to be rare in the needle and use fast vectorized +/// instructions to find their occurrences quickly. The Two-Way algorithm is +/// then used to confirm whether a match at that location occurred. +/// +/// The heuristic for fast skipping is automatically shut off if it's +/// detected to be ineffective at search time. Generally, this only occurs in +/// pathological cases. But this is generally necessary in order to preserve +/// a `O(n + m)` time bound. +/// +/// The code below is fairly complex and not obviously correct at all. It's +/// likely necessary to read the Two-Way paper cited above in order to fully +/// grok this code. +#[derive(Clone, Debug)] +pub struct TwoWay<'b> { + /// The needle that we're looking for. + needle: CowBytes<'b>, + /// An implementation of a fast skip loop based on hard-coded frequency + /// data. This is only used when conditions are deemed favorable. + freqy: Freqy, + /// A critical position in needle. Specifically, this position corresponds + /// to beginning of either the minimal or maximal suffix in needle. (N.B. + /// See SuffixType below for why "minimal" isn't quite the correct word + /// here.) + /// + /// This is the position at which every search begins. Namely, search + /// starts by scanning text to the right of this position, and only if + /// there's a match does the text to the left of this position get scanned. + critical_pos: usize, + /// The amount we shift by in the Two-Way search algorithm. This + /// corresponds to the "small period" and "large period" cases. + shift: Shift, +} + +impl<'b> TwoWay<'b> { + /// Create a searcher that uses the Two-Way algorithm by searching forwards + /// through any haystack. + pub fn forward(needle: &'b [u8]) -> TwoWay<'b> { + let freqy = Freqy::forward(needle); + if needle.is_empty() { + return TwoWay { + needle: CowBytes::new(needle), + freqy, + critical_pos: 0, + shift: Shift::Large { shift: 0 }, + }; + } + + let min_suffix = Suffix::forward(needle, SuffixKind::Minimal); + let max_suffix = Suffix::forward(needle, SuffixKind::Maximal); + let (period_lower_bound, critical_pos) = + if min_suffix.pos > max_suffix.pos { + (min_suffix.period, min_suffix.pos) + } else { + (max_suffix.period, max_suffix.pos) + }; + let shift = Shift::forward(needle, period_lower_bound, critical_pos); + let needle = CowBytes::new(needle); + TwoWay { needle, freqy, critical_pos, shift } + } + + /// Create a searcher that uses the Two-Way algorithm by searching in + /// reverse through any haystack. + pub fn reverse(needle: &'b [u8]) -> TwoWay<'b> { + let freqy = Freqy::reverse(needle); + if needle.is_empty() { + return TwoWay { + needle: CowBytes::new(needle), + freqy, + critical_pos: 0, + shift: Shift::Large { shift: 0 }, + }; + } + + let min_suffix = Suffix::reverse(needle, SuffixKind::Minimal); + let max_suffix = Suffix::reverse(needle, SuffixKind::Maximal); + let (period_lower_bound, critical_pos) = + if min_suffix.pos < max_suffix.pos { + (min_suffix.period, min_suffix.pos) + } else { + (max_suffix.period, max_suffix.pos) + }; + let shift = Shift::reverse(needle, period_lower_bound, critical_pos); + let needle = CowBytes::new(needle); + TwoWay { needle, freqy, critical_pos, shift } + } + + /// Return a fresh prefilter state that can be used with this searcher. + /// A prefilter state is used to track the effectiveness of a searcher's + /// prefilter for speeding up searches. Therefore, the prefilter state + /// should generally be reused on subsequent searches (such as in an + /// iterator). For searches on a different haystack, then a new prefilter + /// state should be used. + /// + /// This always initializes a valid prefilter state even if this searcher + /// does not have a prefilter enabled. + pub fn prefilter_state(&self) -> PrefilterState { + self.freqy.prefilter_state() + } + + /// Return the needle used by this searcher. + pub fn needle(&self) -> &[u8] { + self.needle.as_slice() + } + + /// Convert this searched into an owned version, where the needle is + /// copied if it isn't already owned. + #[cfg(feature = "std")] + pub fn into_owned(self) -> TwoWay<'static> { + TwoWay { + needle: self.needle.into_owned(), + freqy: self.freqy, + critical_pos: self.critical_pos, + shift: self.shift, + } + } + + /// Find the position of the first occurrence of this searcher's needle in + /// the given haystack. If one does not exist, then return None. + /// + /// This will automatically initialize prefilter state. This should only + /// be used for one-off searches. + pub fn find(&self, haystack: &[u8]) -> Option<usize> { + self.find_with(&mut self.prefilter_state(), haystack) + } + + /// Find the position of the last occurrence of this searcher's needle + /// in the given haystack. If one does not exist, then return None. + /// + /// This will automatically initialize prefilter state. This should only + /// be used for one-off searches. + pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { + self.rfind_with(&mut self.prefilter_state(), haystack) + } + + /// Find the position of the first occurrence of this searcher's needle in + /// the given haystack. If one does not exist, then return None. + /// + /// This accepts prefilter state that is useful when using the same + /// searcher multiple times, such as in an iterator. + pub fn find_with( + &self, + prestate: &mut PrefilterState, + haystack: &[u8], + ) -> Option<usize> { + if self.needle.is_empty() { + return Some(0); + } else if haystack.len() < self.needle.len() { + return None; + } else if self.needle.len() == 1 { + return haystack.find_byte(self.needle[0]); + } + match self.shift { + Shift::Small { period } => { + self.find_small(prestate, haystack, period) + } + Shift::Large { shift } => { + self.find_large(prestate, haystack, shift) + } + } + } + + /// Find the position of the last occurrence of this searcher's needle + /// in the given haystack. If one does not exist, then return None. + /// + /// This accepts prefilter state that is useful when using the same + /// searcher multiple times, such as in an iterator. + pub fn rfind_with( + &self, + prestate: &mut PrefilterState, + haystack: &[u8], + ) -> Option<usize> { + if self.needle.is_empty() { + return Some(haystack.len()); + } else if haystack.len() < self.needle.len() { + return None; + } else if self.needle.len() == 1 { + return haystack.rfind_byte(self.needle[0]); + } + match self.shift { + Shift::Small { period } => { + self.rfind_small(prestate, haystack, period) + } + Shift::Large { shift } => { + self.rfind_large(prestate, haystack, shift) + } + } + } + + // Below is the actual implementation of TwoWay searching, including both + // forwards and backwards searching. Each forward and reverse search has + // two fairly similar implementations, each handling the small and large + // period cases, for a total 4 different search routines. + // + // On top of that, each search implementation can be accelerated by a + // Freqy prefilter, but it is not always enabled. To avoid its overhead + // when its disabled, we explicitly inline each search implementation based + // on whether Freqy will be used or not. This brings us up to a total of + // 8 monomorphized versions of the search code. + + #[inline(never)] + fn find_small( + &self, + prestate: &mut PrefilterState, + haystack: &[u8], + period: usize, + ) -> Option<usize> { + if prestate.is_effective() { + self.find_small_imp(prestate, true, haystack, period) + } else { + self.find_small_imp(prestate, false, haystack, period) + } + } + + #[inline(always)] + fn find_small_imp( + &self, + prestate: &mut PrefilterState, + prefilter: bool, + haystack: &[u8], + period: usize, + ) -> Option<usize> { + let needle = self.needle.as_slice(); + let mut pos = 0; + let mut shift = 0; + while pos + needle.len() <= haystack.len() { + let mut i = cmp::max(self.critical_pos, shift); + if prefilter && prestate.is_effective() { + match self.freqy.find_candidate(prestate, &haystack[pos..]) { + None => return None, + Some(found) => { + shift = 0; + i = self.critical_pos; + pos += found; + if pos + needle.len() > haystack.len() { + return None; + } + } + } + } + while i < needle.len() && needle[i] == haystack[pos + i] { + i += 1; + } + if i < needle.len() { + pos += i - self.critical_pos + 1; + shift = 0; + } else { + let mut j = self.critical_pos; + while j > shift && needle[j] == haystack[pos + j] { + j -= 1; + } + if j <= shift && needle[shift] == haystack[pos + shift] { + return Some(pos); + } + pos += period; + shift = needle.len() - period; + } + } + None + } + + #[inline(never)] + fn find_large( + &self, + prestate: &mut PrefilterState, + haystack: &[u8], + shift: usize, + ) -> Option<usize> { + if prestate.is_effective() { + self.find_large_imp(prestate, true, haystack, shift) + } else { + self.find_large_imp(prestate, false, haystack, shift) + } + } + + #[inline(always)] + fn find_large_imp( + &self, + prestate: &mut PrefilterState, + prefilter: bool, + haystack: &[u8], + shift: usize, + ) -> Option<usize> { + let needle = self.needle.as_slice(); + let mut pos = 0; + while pos + needle.len() <= haystack.len() { + let mut i = self.critical_pos; + if prefilter && prestate.is_effective() { + match self.freqy.find_candidate(prestate, &haystack[pos..]) { + None => return None, + Some(found) => { + pos += found; + if pos + needle.len() > haystack.len() { + return None; + } + } + } + } + while i < needle.len() && needle[i] == haystack[pos + i] { + i += 1; + } + if i < needle.len() { + pos += i - self.critical_pos + 1; + } else { + let mut j = self.critical_pos; + while j > 0 && needle[j] == haystack[pos + j] { + j -= 1; + } + if j == 0 && needle[0] == haystack[pos] { + return Some(pos); + } + pos += shift; + } + } + None + } + + #[inline(never)] + fn rfind_small( + &self, + prestate: &mut PrefilterState, + haystack: &[u8], + period: usize, + ) -> Option<usize> { + if prestate.is_effective() { + self.rfind_small_imp(prestate, true, haystack, period) + } else { + self.rfind_small_imp(prestate, false, haystack, period) + } + } + + #[inline(always)] + fn rfind_small_imp( + &self, + prestate: &mut PrefilterState, + prefilter: bool, + haystack: &[u8], + period: usize, + ) -> Option<usize> { + let needle = &*self.needle; + let nlen = needle.len(); + let mut pos = haystack.len(); + let mut shift = nlen; + while pos >= nlen { + let mut i = cmp::min(self.critical_pos, shift); + if prefilter && prestate.is_effective() { + match self.freqy.rfind_candidate(prestate, &haystack[..pos]) { + None => return None, + Some(found) => { + shift = nlen; + i = self.critical_pos; + pos = found; + if pos < nlen { + return None; + } + } + } + } + while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] { + i -= 1; + } + if i > 0 || needle[0] != haystack[pos - nlen] { + pos -= self.critical_pos - i + 1; + shift = nlen; + } else { + let mut j = self.critical_pos; + while j < shift && needle[j] == haystack[pos - nlen + j] { + j += 1; + } + if j == shift { + return Some(pos - nlen); + } + pos -= period; + shift = period; + } + } + None + } + + #[inline(never)] + fn rfind_large( + &self, + prestate: &mut PrefilterState, + haystack: &[u8], + shift: usize, + ) -> Option<usize> { + if prestate.is_effective() { + self.rfind_large_imp(prestate, true, haystack, shift) + } else { + self.rfind_large_imp(prestate, false, haystack, shift) + } + } + + #[inline(always)] + fn rfind_large_imp( + &self, + prestate: &mut PrefilterState, + prefilter: bool, + haystack: &[u8], + shift: usize, + ) -> Option<usize> { + let needle = &*self.needle; + let nlen = needle.len(); + let mut pos = haystack.len(); + while pos >= nlen { + if prefilter && prestate.is_effective() { + match self.freqy.rfind_candidate(prestate, &haystack[..pos]) { + None => return None, + Some(found) => { + pos = found; + if pos < nlen { + return None; + } + } + } + } + + let mut i = self.critical_pos; + while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] { + i -= 1; + } + if i > 0 || needle[0] != haystack[pos - nlen] { + pos -= self.critical_pos - i + 1; + } else { + let mut j = self.critical_pos; + while j < nlen && needle[j] == haystack[pos - nlen + j] { + j += 1; + } + if j == nlen { + return Some(pos - nlen); + } + pos -= shift; + } + } + None + } +} + +/// A representation of the amount we're allowed to shift by during Two-Way +/// search. +/// +/// When computing a critical factorization of the needle, we find the position +/// of the critical factorization by finding the needle's maximal (or minimal) +/// suffix, along with the period of that suffix. It turns out that the period +/// of that suffix is a lower bound on the period of the needle itself. +/// +/// This lower bound is equivalent to the actual period of the needle in +/// some cases. To describe that case, we denote the needle as `x` where +/// `x = uv` and `v` is the lexicographic maximal suffix of `v`. The lower +/// bound given here is always the period of `v`, which is `<= period(x)`. The +/// case where `period(v) == period(x)` occurs when `len(u) < (len(x) / 2)` and +/// where `u` is a suffix of `v[0..period(v)]`. +/// +/// This case is important because the search algorithm for when the +/// periods are equivalent is slightly different than the search algorithm +/// for when the periods are not equivalent. In particular, when they aren't +/// equivalent, we know that the period of the needle is no less than half its +/// length. In this case, we shift by an amount less than or equal to the +/// period of the needle (determined by the maximum length of the components +/// of the critical factorization of `x`, i.e., `max(len(u), len(v))`).. +/// +/// The above two cases are represented by the variants below. Each entails +/// a different instantiation of the Two-Way search algorithm. +/// +/// N.B. If we could find a way to compute the exact period in all cases, +/// then we could collapse this case analysis and simplify the algorithm. The +/// Two-Way paper suggests this is possible, but more reading is required to +/// grok why the authors didn't pursue that path. +#[derive(Clone, Debug)] +enum Shift { + Small { period: usize }, + Large { shift: usize }, +} + +impl Shift { + /// Compute the shift for a given needle in the forward direction. + /// + /// This requires a lower bound on the period and a critical position. + /// These can be computed by extracting both the minimal and maximal + /// lexicographic suffixes, and choosing the right-most starting position. + /// The lower bound on the period is then the period of the chosen suffix. + fn forward( + needle: &[u8], + period_lower_bound: usize, + critical_pos: usize, + ) -> Shift { + let large = cmp::max(critical_pos, needle.len() - critical_pos); + if critical_pos * 2 >= needle.len() { + return Shift::Large { shift: large }; + } + + let (u, v) = needle.split_at(critical_pos); + if !v[..period_lower_bound].ends_with(u) { + return Shift::Large { shift: large }; + } + Shift::Small { period: period_lower_bound } + } + + /// Compute the shift for a given needle in the reverse direction. + /// + /// This requires a lower bound on the period and a critical position. + /// These can be computed by extracting both the minimal and maximal + /// lexicographic suffixes, and choosing the left-most starting position. + /// The lower bound on the period is then the period of the chosen suffix. + fn reverse( + needle: &[u8], + period_lower_bound: usize, + critical_pos: usize, + ) -> Shift { + let large = cmp::max(critical_pos, needle.len() - critical_pos); + if (needle.len() - critical_pos) * 2 >= needle.len() { + return Shift::Large { shift: large }; + } + + let (v, u) = needle.split_at(critical_pos); + if !v[v.len() - period_lower_bound..].starts_with(u) { + return Shift::Large { shift: large }; + } + Shift::Small { period: period_lower_bound } + } +} + +/// A suffix extracted from a needle along with its period. +#[derive(Debug)] +struct Suffix { + /// The starting position of this suffix. + /// + /// If this is a forward suffix, then `&bytes[pos..]` can be used. If this + /// is a reverse suffix, then `&bytes[..pos]` can be used. That is, for + /// forward suffixes, this is an inclusive starting position, where as for + /// reverse suffixes, this is an exclusive ending position. + pos: usize, + /// The period of this suffix. + /// + /// Note that this is NOT necessarily the period of the string from which + /// this suffix comes from. (It is always less than or equal to the period + /// of the original string.) + period: usize, +} + +impl Suffix { + fn forward(needle: &[u8], kind: SuffixKind) -> Suffix { + debug_assert!(!needle.is_empty()); + + // suffix represents our maximal (or minimal) suffix, along with + // its period. + let mut suffix = Suffix { pos: 0, period: 1 }; + // The start of a suffix in `needle` that we are considering as a + // more maximal (or minimal) suffix than what's in `suffix`. + let mut candidate_start = 1; + // The current offset of our suffixes that we're comparing. + // + // When the characters at this offset are the same, then we mush on + // to the next position since no decision is possible. When the + // candidate's character is greater (or lesser) than the corresponding + // character than our current maximal (or minimal) suffix, then the + // current suffix is changed over to the candidate and we restart our + // search. Otherwise, the candidate suffix is no good and we restart + // our search on the next candidate. + // + // The three cases above correspond to the three cases in the loop + // below. + let mut offset = 0; + + while candidate_start + offset < needle.len() { + let current = needle[suffix.pos + offset]; + let candidate = needle[candidate_start + offset]; + match kind.cmp(current, candidate) { + SuffixOrdering::Accept => { + suffix = Suffix { pos: candidate_start, period: 1 }; + candidate_start += 1; + offset = 0; + } + SuffixOrdering::Skip => { + candidate_start += offset + 1; + offset = 0; + suffix.period = candidate_start - suffix.pos; + } + SuffixOrdering::Push => { + if offset + 1 == suffix.period { + candidate_start += suffix.period; + offset = 0; + } else { + offset += 1; + } + } + } + } + suffix + } + + fn reverse(needle: &[u8], kind: SuffixKind) -> Suffix { + debug_assert!(!needle.is_empty()); + + // See the comments in `forward` for how this works. + let mut suffix = Suffix { pos: needle.len(), period: 1 }; + if needle.len() == 1 { + return suffix; + } + let mut candidate_start = needle.len() - 1; + let mut offset = 0; + + while offset < candidate_start { + let current = needle[suffix.pos - offset - 1]; + let candidate = needle[candidate_start - offset - 1]; + match kind.cmp(current, candidate) { + SuffixOrdering::Accept => { + suffix = Suffix { pos: candidate_start, period: 1 }; + candidate_start -= 1; + offset = 0; + } + SuffixOrdering::Skip => { + candidate_start -= offset + 1; + offset = 0; + suffix.period = suffix.pos - candidate_start; + } + SuffixOrdering::Push => { + if offset + 1 == suffix.period { + candidate_start -= suffix.period; + offset = 0; + } else { + offset += 1; + } + } + } + } + suffix + } +} + +/// The kind of suffix to extract. +#[derive(Clone, Copy, Debug)] +enum SuffixKind { + /// Extract the smallest lexicographic suffix from a string. + /// + /// Technically, this doesn't actually pick the smallest lexicographic + /// suffix. e.g., Given the choice between `a` and `aa`, this will choose + /// the latter over the former, even though `a < aa`. The reasoning for + /// this isn't clear from the paper, but it still smells like a minimal + /// suffix. + Minimal, + /// Extract the largest lexicographic suffix from a string. + /// + /// Unlike `Minimal`, this really does pick the maximum suffix. e.g., Given + /// the choice between `z` and `zz`, this will choose the latter over the + /// former. + Maximal, +} + +/// The result of comparing corresponding bytes between two suffixes. +#[derive(Clone, Copy, Debug)] +enum SuffixOrdering { + /// This occurs when the given candidate byte indicates that the candidate + /// suffix is better than the current maximal (or minimal) suffix. That is, + /// the current candidate suffix should supplant the current maximal (or + /// minimal) suffix. + Accept, + /// This occurs when the given candidate byte excludes the candidate suffix + /// from being better than the current maximal (or minimal) suffix. That + /// is, the current candidate suffix should be dropped and the next one + /// should be considered. + Skip, + /// This occurs when no decision to accept or skip the candidate suffix + /// can be made, e.g., when corresponding bytes are equivalent. In this + /// case, the next corresponding bytes should be compared. + Push, +} + +impl SuffixKind { + /// Returns true if and only if the given candidate byte indicates that + /// it should replace the current suffix as the maximal (or minimal) + /// suffix. + fn cmp(self, current: u8, candidate: u8) -> SuffixOrdering { + use self::SuffixOrdering::*; + + match self { + SuffixKind::Minimal if candidate < current => Accept, + SuffixKind::Minimal if candidate > current => Skip, + SuffixKind::Minimal => Push, + SuffixKind::Maximal if candidate > current => Accept, + SuffixKind::Maximal if candidate < current => Skip, + SuffixKind::Maximal => Push, + } + } +} + +// N.B. There are more holistic tests in src/search/tests.rs. +#[cfg(test)] +mod tests { + use super::*; + use ext_slice::B; + + /// Convenience wrapper for computing the suffix as a byte string. + fn get_suffix_forward(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) { + let s = Suffix::forward(needle, kind); + (&needle[s.pos..], s.period) + } + + /// Convenience wrapper for computing the reverse suffix as a byte string. + fn get_suffix_reverse(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) { + let s = Suffix::reverse(needle, kind); + (&needle[..s.pos], s.period) + } + + /// Return all of the non-empty suffixes in the given byte string. + fn suffixes(bytes: &[u8]) -> Vec<&[u8]> { + (0..bytes.len()).map(|i| &bytes[i..]).collect() + } + + /// Return the lexicographically maximal suffix of the given byte string. + fn naive_maximal_suffix_forward(needle: &[u8]) -> &[u8] { + let mut sufs = suffixes(needle); + sufs.sort(); + sufs.pop().unwrap() + } + + /// Return the lexicographically maximal suffix of the reverse of the given + /// byte string. + fn naive_maximal_suffix_reverse(needle: &[u8]) -> Vec<u8> { + let mut reversed = needle.to_vec(); + reversed.reverse(); + let mut got = naive_maximal_suffix_forward(&reversed).to_vec(); + got.reverse(); + got + } + + #[test] + fn suffix_forward() { + macro_rules! assert_suffix_min { + ($given:expr, $expected:expr, $period:expr) => { + let (got_suffix, got_period) = + get_suffix_forward($given.as_bytes(), SuffixKind::Minimal); + assert_eq!((B($expected), $period), (got_suffix, got_period)); + }; + } + + macro_rules! assert_suffix_max { + ($given:expr, $expected:expr, $period:expr) => { + let (got_suffix, got_period) = + get_suffix_forward($given.as_bytes(), SuffixKind::Maximal); + assert_eq!((B($expected), $period), (got_suffix, got_period)); + }; + } + + assert_suffix_min!("a", "a", 1); + assert_suffix_max!("a", "a", 1); + + assert_suffix_min!("ab", "ab", 2); + assert_suffix_max!("ab", "b", 1); + + assert_suffix_min!("ba", "a", 1); + assert_suffix_max!("ba", "ba", 2); + + assert_suffix_min!("abc", "abc", 3); + assert_suffix_max!("abc", "c", 1); + + assert_suffix_min!("acb", "acb", 3); + assert_suffix_max!("acb", "cb", 2); + + assert_suffix_min!("cba", "a", 1); + assert_suffix_max!("cba", "cba", 3); + + assert_suffix_min!("abcabc", "abcabc", 3); + assert_suffix_max!("abcabc", "cabc", 3); + + assert_suffix_min!("abcabcabc", "abcabcabc", 3); + assert_suffix_max!("abcabcabc", "cabcabc", 3); + + assert_suffix_min!("abczz", "abczz", 5); + assert_suffix_max!("abczz", "zz", 1); + + assert_suffix_min!("zzabc", "abc", 3); + assert_suffix_max!("zzabc", "zzabc", 5); + + assert_suffix_min!("aaa", "aaa", 1); + assert_suffix_max!("aaa", "aaa", 1); + + assert_suffix_min!("foobar", "ar", 2); + assert_suffix_max!("foobar", "r", 1); + } + + #[test] + fn suffix_reverse() { + macro_rules! assert_suffix_min { + ($given:expr, $expected:expr, $period:expr) => { + let (got_suffix, got_period) = + get_suffix_reverse($given.as_bytes(), SuffixKind::Minimal); + assert_eq!((B($expected), $period), (got_suffix, got_period)); + }; + } + + macro_rules! assert_suffix_max { + ($given:expr, $expected:expr, $period:expr) => { + let (got_suffix, got_period) = + get_suffix_reverse($given.as_bytes(), SuffixKind::Maximal); + assert_eq!((B($expected), $period), (got_suffix, got_period)); + }; + } + + assert_suffix_min!("a", "a", 1); + assert_suffix_max!("a", "a", 1); + + assert_suffix_min!("ab", "a", 1); + assert_suffix_max!("ab", "ab", 2); + + assert_suffix_min!("ba", "ba", 2); + assert_suffix_max!("ba", "b", 1); + + assert_suffix_min!("abc", "a", 1); + assert_suffix_max!("abc", "abc", 3); + + assert_suffix_min!("acb", "a", 1); + assert_suffix_max!("acb", "ac", 2); + + assert_suffix_min!("cba", "cba", 3); + assert_suffix_max!("cba", "c", 1); + + assert_suffix_min!("abcabc", "abca", 3); + assert_suffix_max!("abcabc", "abcabc", 3); + + assert_suffix_min!("abcabcabc", "abcabca", 3); + assert_suffix_max!("abcabcabc", "abcabcabc", 3); + + assert_suffix_min!("abczz", "a", 1); + assert_suffix_max!("abczz", "abczz", 5); + + assert_suffix_min!("zzabc", "zza", 3); + assert_suffix_max!("zzabc", "zz", 1); + + assert_suffix_min!("aaa", "aaa", 1); + assert_suffix_max!("aaa", "aaa", 1); + } + + quickcheck! { + fn qc_suffix_forward_maximal(bytes: Vec<u8>) -> bool { + if bytes.is_empty() { + return true; + } + + let (got, _) = get_suffix_forward(&bytes, SuffixKind::Maximal); + let expected = naive_maximal_suffix_forward(&bytes); + got == expected + } + + fn qc_suffix_reverse_maximal(bytes: Vec<u8>) -> bool { + if bytes.is_empty() { + return true; + } + + let (got, _) = get_suffix_reverse(&bytes, SuffixKind::Maximal); + let expected = naive_maximal_suffix_reverse(&bytes); + expected == got + } + } +} diff --git a/src/tests.rs b/src/tests.rs new file mode 100644 index 0000000..f4179fd --- /dev/null +++ b/src/tests.rs @@ -0,0 +1,32 @@ +/// A sequence of tests for checking whether lossy decoding uses the maximal +/// subpart strategy correctly. Namely, if a sequence of otherwise invalid +/// UTF-8 bytes is a valid prefix of a valid UTF-8 sequence, then the entire +/// prefix is replaced by a single replacement codepoint. In all other cases, +/// each invalid byte is replaced by a single replacement codepoint. +/// +/// The first element in each tuple is the expected result of lossy decoding, +/// while the second element is the input given. +pub const LOSSY_TESTS: &[(&str, &[u8])] = &[ + ("a", b"a"), + ("\u{FFFD}", b"\xFF"), + ("\u{FFFD}\u{FFFD}", b"\xFF\xFF"), + ("β\u{FFFD}", b"\xCE\xB2\xFF"), + ("☃\u{FFFD}", b"\xE2\x98\x83\xFF"), + ("𝝱\u{FFFD}", b"\xF0\x9D\x9D\xB1\xFF"), + ("\u{FFFD}\u{FFFD}", b"\xCE\xF0"), + ("\u{FFFD}\u{FFFD}", b"\xCE\xFF"), + ("\u{FFFD}\u{FFFD}", b"\xE2\x98\xF0"), + ("\u{FFFD}\u{FFFD}", b"\xE2\x98\xFF"), + ("\u{FFFD}", b"\xF0\x9D\x9D"), + ("\u{FFFD}\u{FFFD}", b"\xF0\x9D\x9D\xF0"), + ("\u{FFFD}\u{FFFD}", b"\xF0\x9D\x9D\xFF"), + ("\u{FFFD}", b"\xCE"), + ("a\u{FFFD}", b"a\xCE"), + ("\u{FFFD}", b"\xE2\x98"), + ("a\u{FFFD}", b"a\xE2\x98"), + ("\u{FFFD}", b"\xF0\x9D\x9C"), + ("a\u{FFFD}", b"a\xF0\x9D\x9C"), + ("a\u{FFFD}\u{FFFD}\u{FFFD}z", b"a\xED\xA0\x80z"), + ("☃βツ\u{FFFD}", b"\xe2\x98\x83\xce\xb2\xe3\x83\x84\xFF"), + ("a\u{FFFD}\u{FFFD}\u{FFFD}b", b"\x61\xF1\x80\x80\xE1\x80\xC2\x62"), +]; diff --git a/src/unicode/data/GraphemeBreakTest.txt b/src/unicode/data/GraphemeBreakTest.txt new file mode 100644 index 0000000..fb4fec9 --- /dev/null +++ b/src/unicode/data/GraphemeBreakTest.txt @@ -0,0 +1,630 @@ +# GraphemeBreakTest-12.1.0.txt +# Date: 2019-03-10, 10:53:12 GMT +# © 2019 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# Default Grapheme_Cluster_Break Test +# +# Format: +# <string> (# <comment>)? +# <string> contains hex Unicode code points, with +# ÷ wherever there is a break opportunity, and +# × wherever there is not. +# <comment> the format can change, but currently it shows: +# - the sample character name +# - (x) the Grapheme_Cluster_Break property value for the sample character +# - [x] the rule that determines whether there is a break or not, +# as listed in the Rules section of GraphemeBreakTest.html +# +# These samples may be extended or changed in the future. +# +÷ 0020 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0020 × 034F ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0020 × 0308 × 034F ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0020 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0020 × 0308 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0020 × 0308 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0020 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0020 × 0308 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0020 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0020 × 0308 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0020 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0020 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0020 × 0308 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0020 × 0308 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SPACE (Other) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 000D ÷ 034F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 000D ÷ 0308 × 034F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000D ÷ 0600 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 000D ÷ 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000D ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000D ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000D ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000D ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000D ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] WATCH (ExtPict) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SPACE (Other) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 000A ÷ 034F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 000A ÷ 0308 × 034F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000A ÷ 0600 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 000A ÷ 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000A ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000A ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000A ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000A ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000A ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] WATCH (ExtPict) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0001 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0001 ÷ 034F ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0001 ÷ 0308 × 034F ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0001 ÷ 0600 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0001 ÷ 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0001 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0001 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0001 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0001 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0001 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] WATCH (ExtPict) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0001 ÷ 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0001 ÷ 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0001 ÷ 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 034F ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 034F × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 034F ÷ 000D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 034F × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 034F ÷ 000A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 034F × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 034F ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 034F × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 034F × 034F ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 034F × 0308 × 034F ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 034F ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 034F × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 034F ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 034F × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 034F × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 034F × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 034F ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 034F × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 034F ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 034F × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 034F ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 034F × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 034F ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 034F × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 034F ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 034F × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 034F ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 034F × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 034F × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 034F × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 034F × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 034F × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 034F ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 034F × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 1F1E6 × 034F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 1F1E6 × 0308 × 034F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1F1E6 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1F1E6 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1F1E6 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1F1E6 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1F1E6 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1F1E6 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0600 × 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] SPACE (Other) ÷ [0.3] +÷ 0600 × 0308 ÷ 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0600 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0600 × 0308 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0600 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0600 × 0308 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0600 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0600 × 0308 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0600 × 034F ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0600 × 0308 × 034F ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0600 × 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0600 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0600 × 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0600 × 0308 ÷ 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0600 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0600 × 0308 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0600 × 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0600 × 0308 ÷ 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0600 × 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0600 × 0308 ÷ 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0600 × 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0600 × 0308 ÷ 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0600 × AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0600 × 0308 ÷ AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0600 × AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0600 × 0308 ÷ AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0600 × 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] WATCH (ExtPict) ÷ [0.3] +÷ 0600 × 0308 ÷ 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0600 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0600 × 0308 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0600 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0600 × 0308 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0600 × 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] <reserved-0378> (Other) ÷ [0.3] +÷ 0600 × 0308 ÷ 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0903 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0903 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0903 × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0903 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0903 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0903 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0903 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1100 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1100 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1100 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1100 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 1100 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 1100 × 034F ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 1100 × 0308 × 034F ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 1100 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1100 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1100 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 1100 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1100 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1100 × 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1100 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1100 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1100 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1100 × AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1100 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 1100 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1100 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1160 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1160 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1160 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1160 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 1160 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 1160 × 034F ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 1160 × 0308 × 034F ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 1160 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1160 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1160 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 1160 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1160 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1160 × 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1160 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1160 × 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1160 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1160 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1160 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 1160 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1160 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 11A8 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 11A8 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 11A8 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 11A8 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 11A8 × 034F ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 11A8 × 0308 × 034F ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 11A8 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 11A8 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 11A8 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 11A8 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 11A8 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 11A8 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 11A8 × 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 11A8 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 11A8 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 11A8 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 11A8 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ AC00 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ AC00 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ AC00 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ AC00 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ AC00 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ AC00 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ AC00 × 0308 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ AC00 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ AC00 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ AC00 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ AC00 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC00 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC00 × 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC00 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC00 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC00 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC00 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC00 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ AC00 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ AC00 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ AC01 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ AC01 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ AC01 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ AC01 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ AC01 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ AC01 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ AC01 × 0308 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ AC01 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ AC01 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ AC01 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ AC01 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC01 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC01 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC01 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC01 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC01 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC01 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC01 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ AC01 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 231A ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 231A × 0308 ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 231A ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 231A × 0308 ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 231A ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 231A × 0308 ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 231A ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 231A × 0308 ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 231A × 034F ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 231A × 0308 × 034F ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 231A ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 231A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 231A ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 231A × 0308 ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 231A × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 231A × 0308 × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 231A ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 231A × 0308 ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 231A ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 231A × 0308 ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 231A ÷ 11A8 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 231A × 0308 ÷ 11A8 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 231A ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 231A × 0308 ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 231A ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 231A × 0308 ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 231A ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 231A × 0308 ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 231A × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 231A × 0308 × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 231A × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 231A × 0308 × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 231A ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 231A × 0308 ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0300 × 034F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0300 × 0308 × 034F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0300 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0300 × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0300 × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0300 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0300 × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0300 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0300 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0300 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0300 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0300 × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 200D × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 200D ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 200D × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 200D ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 200D × 0308 ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 200D × 034F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 200D × 0308 × 034F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 200D ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 200D ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 200D × 0308 ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 200D × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 200D × 0308 × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 200D ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 200D × 0308 ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 200D ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 200D × 0308 ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 200D ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 200D × 0308 ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 200D ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 200D × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 200D ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 200D × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 200D ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 200D × 0308 ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0378 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0378 × 0308 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0378 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0378 × 0308 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0378 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0378 × 0308 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0378 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0378 × 0308 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0378 × 034F ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0378 × 0308 × 034F ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0378 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0378 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0378 ÷ 0600 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0378 × 0308 ÷ 0600 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0378 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0378 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0378 × 0308 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0378 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0378 × 0308 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0378 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0378 × 0308 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0378 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0378 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0378 × 0308 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0378 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0378 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0378 × 0308 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 000D × 000A ÷ 0061 ÷ 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3] +÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3] +÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC LETTER NOON (Other) ÷ [0.3] +÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC00 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC01 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 200D ÷ 1F1E7 × 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0061 × 0308 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 × 0903 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 ÷ 0600 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3] +÷ 0061 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3] +÷ 0061 × 1F3FF ÷ 1F476 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3] +÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [0.3] +÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3] +÷ 0061 × 200D ÷ 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3] +÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] UPPER BLADE SCISSORS (Other) ÷ [0.3] +÷ 0061 × 200D ÷ 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] UPPER BLADE SCISSORS (Other) ÷ [0.3] +# +# Lines: 602 +# +# EOF diff --git a/src/unicode/data/LICENSE-UNICODE b/src/unicode/data/LICENSE-UNICODE new file mode 100644 index 0000000..ad06935 --- /dev/null +++ b/src/unicode/data/LICENSE-UNICODE @@ -0,0 +1,45 @@ +UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE +See Terms of Use for definitions of Unicode Inc.'s +Data Files and Software. + +NOTICE TO USER: Carefully read the following legal agreement. +BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +TERMS AND CONDITIONS OF THIS AGREEMENT. +IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +THE DATA FILES OR SOFTWARE. + +COPYRIGHT AND PERMISSION NOTICE + +Copyright © 1991-2019 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in https://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that either +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, or +(b) this copyright and permission notice appear in associated +Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. diff --git a/src/unicode/data/SentenceBreakTest.txt b/src/unicode/data/SentenceBreakTest.txt new file mode 100644 index 0000000..7c1c34a --- /dev/null +++ b/src/unicode/data/SentenceBreakTest.txt @@ -0,0 +1,530 @@ +# SentenceBreakTest-12.1.0.txt +# Date: 2019-03-10, 10:53:28 GMT +# © 2019 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# Default Sentence_Break Test +# +# Format: +# <string> (# <comment>)? +# <string> contains hex Unicode code points, with +# ÷ wherever there is a break opportunity, and +# × wherever there is not. +# <comment> the format can change, but currently it shows: +# - the sample character name +# - (x) the Sentence_Break property value for the sample character +# - [x] the rule that determines whether there is a break or not, +# as listed in the Rules section of SentenceBreakTest.html +# +# These samples may be extended or changed in the future. +# +÷ 0001 × 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0001 × 0308 × 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0001 × 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0001 × 0308 × 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0001 × 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0001 × 0308 × 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0001 × 0085 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0001 × 0308 × 0085 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0001 × 0009 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0001 × 0308 × 0009 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0001 × 0061 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0001 × 0308 × 0061 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0001 × 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0001 × 0308 × 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0001 × 01BB ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0001 × 0308 × 01BB ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0001 × 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0001 × 0308 × 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0001 × 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0001 × 0308 × 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0001 × 0021 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0001 × 0308 × 0021 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0001 × 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0001 × 0308 × 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0001 × 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0001 × 0308 × 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0001 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0001 × 0308 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0001 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0001 × 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 000D ÷ 0308 × 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000D ÷ 0308 × 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000D ÷ 0308 × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000D ÷ 0085 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 000D ÷ 0308 × 0085 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 000D ÷ 0009 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 000D ÷ 0308 × 0009 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 000D ÷ 0061 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 000D ÷ 0308 × 0061 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 000D ÷ 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 000D ÷ 0308 × 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 000D ÷ 01BB ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 000D ÷ 0308 × 01BB ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 000D ÷ 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 000D ÷ 0308 × 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 000D ÷ 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3] +÷ 000D ÷ 0308 × 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 000D ÷ 0021 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 000D ÷ 0308 × 0021 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 000D ÷ 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 000D ÷ 0308 × 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 000D ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMMA (SContinue) ÷ [0.3] +÷ 000D ÷ 0308 × 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 000D ÷ 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 000D ÷ 0308 × 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 000A ÷ 0308 × 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000A ÷ 0308 × 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000A ÷ 0308 × 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000A ÷ 0085 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 000A ÷ 0308 × 0085 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 000A ÷ 0009 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 000A ÷ 0308 × 0009 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 000A ÷ 0061 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 000A ÷ 0308 × 0061 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 000A ÷ 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 000A ÷ 0308 × 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 000A ÷ 01BB ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 000A ÷ 0308 × 01BB ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 000A ÷ 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 000A ÷ 0308 × 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 000A ÷ 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3] +÷ 000A ÷ 0308 × 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 000A ÷ 0021 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 000A ÷ 0308 × 0021 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 000A ÷ 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 000A ÷ 0308 × 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 000A ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMMA (SContinue) ÷ [0.3] +÷ 000A ÷ 0308 × 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 000A ÷ 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 000A ÷ 0308 × 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0085 ÷ 0001 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0085 ÷ 0308 × 0001 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0085 ÷ 000D ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0085 ÷ 0308 × 000D ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0085 ÷ 000A ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0085 ÷ 0308 × 000A ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0085 ÷ 0085 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0085 ÷ 0308 × 0085 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0085 ÷ 0009 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0085 ÷ 0308 × 0009 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0085 ÷ 0061 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0085 ÷ 0308 × 0061 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0085 ÷ 0041 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0085 ÷ 0308 × 0041 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0085 ÷ 01BB ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0085 ÷ 0308 × 01BB ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0085 ÷ 0030 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0085 ÷ 0308 × 0030 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0085 ÷ 002E ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0085 ÷ 0308 × 002E ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0085 ÷ 0021 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0085 ÷ 0308 × 0021 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0085 ÷ 0022 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0085 ÷ 0308 × 0022 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0085 ÷ 002C ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMMA (SContinue) ÷ [0.3] +÷ 0085 ÷ 0308 × 002C ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0085 ÷ 00AD ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0085 ÷ 0308 × 00AD ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0085 ÷ 0300 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0085 ÷ 0308 × 0300 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0009 × 0001 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0009 × 0308 × 0001 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0009 × 000D ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0009 × 0308 × 000D ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0009 × 000A ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0009 × 0308 × 000A ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0009 × 0085 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0009 × 0308 × 0085 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0009 × 0009 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0009 × 0308 × 0009 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0009 × 0061 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0009 × 0308 × 0061 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0009 × 0041 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0009 × 0308 × 0041 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0009 × 01BB ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0009 × 0308 × 01BB ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0009 × 0030 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0009 × 0308 × 0030 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0009 × 002E ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0009 × 0308 × 002E ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0009 × 0021 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0009 × 0308 × 0021 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0009 × 0022 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0009 × 0308 × 0022 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0009 × 002C ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0009 × 0308 × 002C ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0009 × 00AD ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0009 × 0308 × 00AD ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0009 × 0300 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0009 × 0308 × 0300 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0061 × 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0061 × 0308 × 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0061 × 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0061 × 0308 × 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0061 × 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0061 × 0308 × 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0061 × 0085 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0061 × 0308 × 0085 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0061 × 0009 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0061 × 0308 × 0009 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0061 × 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0061 × 0308 × 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0061 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0061 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0061 × 01BB ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0061 × 0308 × 01BB ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0061 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0061 × 0308 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0061 × 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0061 × 0308 × 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0061 × 0021 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0061 × 0308 × 0021 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0061 × 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0061 × 0308 × 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0061 × 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0061 × 0308 × 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0061 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0061 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0061 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0061 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0041 × 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0041 × 0308 × 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0041 × 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0041 × 0308 × 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0041 × 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0041 × 0308 × 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0041 × 0085 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0041 × 0308 × 0085 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0041 × 0009 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0041 × 0308 × 0009 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0041 × 0061 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0041 × 0308 × 0061 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0041 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0041 × 0308 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0041 × 01BB ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0041 × 0308 × 01BB ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0041 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0041 × 0308 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0041 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0041 × 0308 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0041 × 0021 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0041 × 0308 × 0021 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0041 × 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0041 × 0308 × 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0041 × 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0041 × 0308 × 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0041 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0041 × 0308 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0041 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0041 × 0308 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 01BB × 0001 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 01BB × 0308 × 0001 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 01BB × 000D ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 01BB × 0308 × 000D ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 01BB × 000A ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 01BB × 0308 × 000A ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 01BB × 0085 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 01BB × 0308 × 0085 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 01BB × 0009 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 01BB × 0308 × 0009 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 01BB × 0061 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 01BB × 0308 × 0061 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 01BB × 0041 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 01BB × 0308 × 0041 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 01BB × 01BB ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 01BB × 0308 × 01BB ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 01BB × 0030 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 01BB × 0308 × 0030 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 01BB × 002E ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 01BB × 0308 × 002E ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 01BB × 0021 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 01BB × 0308 × 0021 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 01BB × 0022 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 01BB × 0308 × 0022 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 01BB × 002C ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 01BB × 0308 × 002C ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 01BB × 00AD ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 01BB × 0308 × 00AD ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 01BB × 0300 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 01BB × 0308 × 0300 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0030 × 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0030 × 0308 × 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0030 × 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0030 × 0308 × 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0030 × 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0030 × 0308 × 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0030 × 0085 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0030 × 0308 × 0085 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0030 × 0009 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0030 × 0308 × 0009 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0030 × 0061 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0030 × 0308 × 0061 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0030 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0030 × 0308 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0030 × 01BB ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0030 × 0308 × 01BB ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0030 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0030 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0030 × 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0030 × 0308 × 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0030 × 0021 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0030 × 0308 × 0021 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0030 × 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0030 × 0308 × 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0030 × 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0030 × 0308 × 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0030 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0030 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0030 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0030 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 002E ÷ 0001 ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 002E × 0308 ÷ 0001 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 002E × 000D ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 002E × 0308 × 000D ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 002E × 000A ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 002E × 0308 × 000A ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 002E × 0085 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 002E × 0308 × 0085 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 002E × 0009 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 002E × 0308 × 0009 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 002E × 0061 ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 002E × 0308 × 0061 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 002E ÷ 0041 ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 002E × 0308 ÷ 0041 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 002E ÷ 01BB ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 002E × 0308 ÷ 01BB ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 002E × 0030 ÷ # ÷ [0.2] FULL STOP (ATerm) × [6.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 002E × 0308 × 0030 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [6.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 002E × 002E ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] FULL STOP (ATerm) ÷ [0.3] +÷ 002E × 0308 × 002E ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] FULL STOP (ATerm) ÷ [0.3] +÷ 002E × 0021 ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 002E × 0308 × 0021 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 002E × 0022 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 002E × 0308 × 0022 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 002E × 002C ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] COMMA (SContinue) ÷ [0.3] +÷ 002E × 0308 × 002C ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] COMMA (SContinue) ÷ [0.3] +÷ 002E × 00AD ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 002E × 0308 × 00AD ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 002E × 0300 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 002E × 0308 × 0300 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0021 ÷ 0001 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0021 × 0308 ÷ 0001 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0021 × 000D ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0021 × 0308 × 000D ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0021 × 000A ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0021 × 0308 × 000A ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0021 × 0085 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0021 × 0308 × 0085 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0021 × 0009 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0021 × 0308 × 0009 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0021 ÷ 0061 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0021 × 0308 ÷ 0061 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0021 ÷ 0041 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0021 × 0308 ÷ 0041 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0021 ÷ 01BB ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0021 × 0308 ÷ 01BB ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0021 ÷ 0030 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0021 × 0308 ÷ 0030 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0021 × 002E ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] FULL STOP (ATerm) ÷ [0.3] +÷ 0021 × 0308 × 002E ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] FULL STOP (ATerm) ÷ [0.3] +÷ 0021 × 0021 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0021 × 0308 × 0021 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0021 × 0022 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0021 × 0308 × 0022 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0021 × 002C ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] COMMA (SContinue) ÷ [0.3] +÷ 0021 × 0308 × 002C ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] COMMA (SContinue) ÷ [0.3] +÷ 0021 × 00AD ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0021 × 0308 × 00AD ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0021 × 0300 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0021 × 0308 × 0300 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0022 × 0001 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0022 × 0308 × 0001 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0022 × 000D ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0022 × 0308 × 000D ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0022 × 000A ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0022 × 0308 × 000A ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0022 × 0085 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0022 × 0308 × 0085 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0022 × 0009 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0022 × 0308 × 0009 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0022 × 0061 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0022 × 0308 × 0061 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0022 × 0041 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0022 × 0308 × 0041 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0022 × 01BB ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0022 × 0308 × 01BB ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0022 × 0030 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0022 × 0308 × 0030 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0022 × 002E ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0022 × 0308 × 002E ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0022 × 0021 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0022 × 0308 × 0021 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0022 × 0022 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0022 × 0308 × 0022 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0022 × 002C ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0022 × 0308 × 002C ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0022 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0022 × 0308 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0022 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0022 × 0308 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 002C × 0001 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 002C × 0308 × 0001 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 002C × 000D ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 002C × 0308 × 000D ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 002C × 000A ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 002C × 0308 × 000A ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 002C × 0085 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 002C × 0308 × 0085 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 002C × 0009 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 002C × 0308 × 0009 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 002C × 0061 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 002C × 0308 × 0061 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 002C × 0041 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 002C × 0308 × 0041 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 002C × 01BB ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 002C × 0308 × 01BB ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 002C × 0030 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 002C × 0308 × 0030 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 002C × 002E ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 002C × 0308 × 002E ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 002C × 0021 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 002C × 0308 × 0021 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 002C × 0022 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 002C × 0308 × 0022 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 002C × 002C ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 002C × 0308 × 002C ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 002C × 00AD ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 002C × 0300 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 00AD × 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 00AD × 0308 × 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 00AD × 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 00AD × 0308 × 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 00AD × 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 00AD × 0308 × 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 00AD × 0085 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 00AD × 0308 × 0085 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 00AD × 0009 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 00AD × 0308 × 0009 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 00AD × 0061 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 00AD × 0308 × 0061 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 00AD × 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 00AD × 0308 × 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 00AD × 01BB ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 00AD × 0308 × 01BB ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 00AD × 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 00AD × 0308 × 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 00AD × 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 00AD × 0308 × 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 00AD × 0021 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 00AD × 0308 × 0021 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 00AD × 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 00AD × 0308 × 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 00AD × 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 00AD × 0308 × 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 00AD × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 00AD × 0308 × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 00AD × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 00AD × 0308 × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0300 × 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0300 × 0308 × 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0300 × 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0300 × 0308 × 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0300 × 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0300 × 0308 × 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0300 × 0085 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0300 × 0308 × 0085 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3] +÷ 0300 × 0009 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0300 × 0308 × 0009 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3] +÷ 0300 × 0061 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0300 × 0308 × 0061 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3] +÷ 0300 × 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0300 × 0308 × 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3] +÷ 0300 × 01BB ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0300 × 0308 × 01BB ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3] +÷ 0300 × 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0300 × 0308 × 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0300 × 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0300 × 0308 × 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0300 × 0021 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0300 × 0308 × 0021 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3] +÷ 0300 × 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0300 × 0308 × 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3] +÷ 0300 × 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0300 × 0308 × 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3] +÷ 0300 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0300 × 0308 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 000D × 000A ÷ 0061 × 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Lower) × [998.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3] +÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3] +÷ 0020 × 200D × 0646 ÷ # ÷ [0.2] SPACE (Sp) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [998.0] ARABIC LETTER NOON (OLetter) ÷ [0.3] +÷ 0646 × 200D × 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (OLetter) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [998.0] SPACE (Sp) ÷ [0.3] +÷ 0028 × 0022 × 0047 × 006F × 002E × 0022 × 0029 × 0020 ÷ 0028 × 0048 × 0065 × 0020 × 0064 × 0069 × 0064 × 002E × 0029 ÷ # ÷ [0.2] LEFT PARENTHESIS (Close) × [998.0] QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER G (Upper) × [998.0] LATIN SMALL LETTER O (Lower) × [998.0] FULL STOP (ATerm) × [9.0] QUOTATION MARK (Close) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] SPACE (Sp) ÷ [11.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) ÷ [0.3] +÷ 0028 × 201C × 0047 × 006F × 003F × 201D × 0029 × 0020 ÷ 0028 × 0048 × 0065 × 0020 × 0064 × 0069 × 0064 × 002E × 0029 ÷ # ÷ [0.2] LEFT PARENTHESIS (Close) × [998.0] LEFT DOUBLE QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER G (Upper) × [998.0] LATIN SMALL LETTER O (Lower) × [998.0] QUESTION MARK (STerm) × [9.0] RIGHT DOUBLE QUOTATION MARK (Close) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] SPACE (Sp) ÷ [11.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) ÷ [0.3] +÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 002E × 0020 × 0069 × 0073 ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [8.0] SPACE (Sp) × [8.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER S (Lower) ÷ [0.3] +÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 003F × 0020 ÷ 0048 × 0065 ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUESTION MARK (STerm) × [9.0] SPACE (Sp) ÷ [11.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3] +÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3] +÷ 0033 × 002E × 0034 ÷ # ÷ [0.2] DIGIT THREE (Numeric) × [998.0] FULL STOP (ATerm) × [6.0] DIGIT FOUR (Numeric) ÷ [0.3] +÷ 0063 × 002E × 0064 ÷ # ÷ [0.2] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER D (Lower) ÷ [0.3] +÷ 0043 × 002E × 0064 ÷ # ÷ [0.2] LATIN CAPITAL LETTER C (Upper) × [998.0] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER D (Lower) ÷ [0.3] +÷ 0063 × 002E × 0044 ÷ # ÷ [0.2] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER D (Upper) ÷ [0.3] +÷ 0043 × 002E × 0044 ÷ # ÷ [0.2] LATIN CAPITAL LETTER C (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER D (Upper) ÷ [0.3] +÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [8.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3] +÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3] +÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 2018 × 0028 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [8.0] LEFT SINGLE QUOTATION MARK (Close) × [998.0] LEFT PARENTHESIS (Close) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3] +÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 ÷ 2018 × 0028 × 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) ÷ [11.0] LEFT SINGLE QUOTATION MARK (Close) × [998.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3] +÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0308 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3] +÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0308 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3] +÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 0308 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3] +÷ 0065 × 0074 × 0063 × 002E × 0029 × 000A ÷ 0308 × 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3] +÷ 0074 × 0068 × 0065 × 0020 × 0072 × 0065 × 0073 × 0070 × 002E × 0020 × 006C × 0065 × 0061 × 0064 × 0065 × 0072 × 0073 × 0020 × 0061 × 0072 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER S (Lower) × [998.0] LATIN SMALL LETTER P (Lower) × [998.0] FULL STOP (ATerm) × [8.0] SPACE (Sp) × [8.0] LATIN SMALL LETTER L (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER S (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3] +÷ 5B57 × 002E ÷ 5B57 ÷ # ÷ [0.2] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [998.0] FULL STOP (ATerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) ÷ [0.3] +÷ 0065 × 0074 × 0063 × 002E ÷ 5B83 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) ÷ [0.3] +÷ 0065 × 0074 × 0063 × 002E × 3002 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.1] IDEOGRAPHIC FULL STOP (STerm) ÷ [0.3] +÷ 5B57 × 3002 ÷ 5B83 ÷ # ÷ [0.2] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [998.0] IDEOGRAPHIC FULL STOP (STerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) ÷ [0.3] +÷ 0021 × 0020 × 0020 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] SPACE (Sp) × [10.0] SPACE (Sp) ÷ [0.3] +÷ 2060 × 0028 × 2060 × 0022 × 2060 × 0047 × 2060 × 006F × 2060 × 002E × 2060 × 0022 × 2060 × 0029 × 2060 × 0020 × 2060 ÷ 0028 × 2060 × 0048 × 2060 × 0065 × 2060 × 0020 × 2060 × 0064 × 2060 × 0069 × 2060 × 0064 × 2060 × 002E × 2060 × 0029 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER G (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER O (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0028 × 2060 × 201C × 2060 × 0047 × 2060 × 006F × 2060 × 003F × 2060 × 201D × 2060 × 0029 × 2060 × 0020 × 2060 ÷ 0028 × 2060 × 0048 × 2060 × 0065 × 2060 × 0020 × 2060 × 0064 × 2060 × 0069 × 2060 × 0064 × 2060 × 002E × 2060 × 0029 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT DOUBLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER G (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER O (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] QUESTION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT DOUBLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 002E × 2060 × 0020 × 2060 × 0069 × 2060 × 0073 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 003F × 2060 × 0020 × 2060 ÷ 0048 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUESTION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 002E × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0033 × 2060 × 002E × 2060 × 0034 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] DIGIT THREE (Numeric) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [6.0] DIGIT FOUR (Numeric) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0063 × 2060 × 002E × 2060 × 0064 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0043 × 2060 × 002E × 2060 × 0064 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER C (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0063 × 2060 × 002E × 2060 × 0044 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER D (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0043 × 2060 × 002E × 2060 × 0044 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER C (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER D (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 2018 × 2060 × 0028 × 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LEFT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 ÷ 2018 × 2060 × 0028 × 2060 × 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0308 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0308 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 0308 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 000A ÷ 2060 × 0308 × 2060 × 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [4.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 0020 × 2060 × 0072 × 2060 × 0065 × 2060 × 0073 × 2060 × 0070 × 2060 × 002E × 2060 × 0020 × 2060 × 006C × 2060 × 0065 × 2060 × 0061 × 2060 × 0064 × 2060 × 0065 × 2060 × 0072 × 2060 × 0073 × 2060 × 0020 × 2060 × 0061 × 2060 × 0072 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER P (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER L (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 5B57 × 2060 × 002E × 2060 ÷ 5B57 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 ÷ 5B83 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 3002 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.1] IDEOGRAPHIC FULL STOP (STerm) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 5B57 × 2060 × 3002 × 2060 ÷ 5B83 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [998.0] IDEOGRAPHIC FULL STOP (STerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 2060 × 0021 × 2060 × 0020 × 2060 × 0020 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] EXCLAMATION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [10.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3] +# +# Lines: 502 +# +# EOF diff --git a/src/unicode/data/WordBreakTest.txt b/src/unicode/data/WordBreakTest.txt new file mode 100644 index 0000000..facd892 --- /dev/null +++ b/src/unicode/data/WordBreakTest.txt @@ -0,0 +1,1851 @@ +# WordBreakTest-12.1.0.txt +# Date: 2019-03-10, 10:53:29 GMT +# © 2019 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# Default Word_Break Test +# +# Format: +# <string> (# <comment>)? +# <string> contains hex Unicode code points, with +# ÷ wherever there is a break opportunity, and +# × wherever there is not. +# <comment> the format can change, but currently it shows: +# - the sample character name +# - (x) the Word_Break property value for the sample character +# - [x] the rule that determines whether there is a break or not, +# as listed in the Rules section of WordBreakTest.html +# +# These samples may be extended or changed in the future. +# +÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0001 × 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0001 × 0308 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0001 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0001 × 0308 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0001 ÷ 000B ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0001 × 0308 ÷ 000B ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0001 ÷ 3031 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0001 × 0308 ÷ 3031 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0001 ÷ 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0001 × 0308 ÷ 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0001 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0001 × 0308 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0001 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0001 × 0308 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0001 ÷ 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0001 × 0308 ÷ 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0001 ÷ 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0001 × 0308 ÷ 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0001 ÷ 005F ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0001 × 0308 ÷ 005F ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0001 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0001 ÷ 05D0 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0001 × 0308 ÷ 05D0 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0001 ÷ 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0001 × 0308 ÷ 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0001 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0001 × 0308 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0001 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0001 × 0308 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0001 × 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0001 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0001 × 0308 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0001 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0001 × 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0001 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0001 × 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0001 ÷ 0061 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0001 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0001 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0001 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0001 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0001 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0001 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0001 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0001 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0001 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0001 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0001 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0001 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0001 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0001 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0001 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0001 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0001 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] <START OF HEADING> (Other) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000D ÷ 000B ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 000B ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 000D ÷ 3031 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 000D ÷ 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 000D ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COLON (MidLetter) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000D ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMMA (MidNum) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000D ÷ 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 000D ÷ 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 000D ÷ 005F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000D ÷ 05D0 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 000D ÷ 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 000D ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000D ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] WATCH (ExtPict) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] SPACE (WSegSpace) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 000D ÷ 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 000D ÷ 0308 × 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 000D ÷ 0061 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000D ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000D ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000D ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000D ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000D ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000D ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000D ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000D ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <START OF HEADING> (Other) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000A ÷ 000B ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 000B ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 000A ÷ 3031 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 000A ÷ 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 000A ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COLON (MidLetter) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000A ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMMA (MidNum) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000A ÷ 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 000A ÷ 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 000A ÷ 005F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000A ÷ 05D0 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 000A ÷ 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 000A ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000A ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] WATCH (ExtPict) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] SPACE (WSegSpace) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 000A ÷ 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 000A ÷ 0308 × 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 000A ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000B ÷ 0001 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <START OF HEADING> (Other) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 000B ÷ 000D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000B ÷ 000A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000B ÷ 000B ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 000B ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 000B ÷ 3031 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 000B ÷ 0041 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 000B ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COLON (MidLetter) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000B ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMMA (MidNum) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000B ÷ 002E ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 002E ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 000B ÷ 0030 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 000B ÷ 005F ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 000B ÷ 1F1E6 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000B ÷ 05D0 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 000B ÷ 0022 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 000B ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000B ÷ 231A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] WATCH (ExtPict) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 000B ÷ 0020 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] SPACE (WSegSpace) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 000B ÷ 00AD ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 000B ÷ 0308 × 00AD ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 000B ÷ 0300 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 000B ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 000B ÷ 200D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 000B ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 000B ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000B ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000B ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000B ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000B ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000B ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 000B ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 000B ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 000B ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000B ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 3031 ÷ 0001 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 3031 × 0308 ÷ 0001 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 3031 ÷ 000D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 3031 × 0308 ÷ 000D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 3031 ÷ 000A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 3031 × 0308 ÷ 000A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 3031 ÷ 000B ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 3031 × 0308 ÷ 000B ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 3031 × 3031 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 3031 × 0308 × 3031 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 3031 ÷ 0041 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 3031 × 0308 ÷ 0041 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 3031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 3031 × 0308 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 3031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 3031 × 0308 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 3031 ÷ 002E ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 3031 × 0308 ÷ 002E ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 3031 ÷ 0030 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 3031 × 0308 ÷ 0030 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 3031 × 005F ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 3031 × 0308 × 005F ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 3031 ÷ 1F1E6 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 3031 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 3031 ÷ 05D0 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 3031 × 0308 ÷ 05D0 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 3031 ÷ 0022 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 3031 × 0308 ÷ 0022 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 3031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 3031 × 0308 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 3031 ÷ 231A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 3031 × 0308 ÷ 231A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 3031 ÷ 0020 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 3031 × 0308 ÷ 0020 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 3031 × 00AD ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 3031 × 0308 × 00AD ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 3031 × 0300 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 3031 × 0308 × 0300 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 3031 × 200D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 3031 × 0308 × 200D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 3031 ÷ 0061 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 3031 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 3031 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 3031 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 3031 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 3031 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 3031 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 3031 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 3031 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 3031 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 3031 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 3031 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 3031 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 3031 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 3031 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 3031 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 3031 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 3031 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0041 ÷ 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0041 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0041 ÷ 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0041 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0041 ÷ 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0041 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0041 ÷ 000B ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0041 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0041 ÷ 3031 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0041 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0041 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0041 × 0308 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0041 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0041 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0041 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0041 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0041 ÷ 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0041 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0041 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0041 × 0308 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0041 × 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0041 × 0308 × 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0041 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0041 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0041 × 05D0 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0041 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0041 ÷ 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0041 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0041 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0041 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0041 ÷ 231A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0041 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0041 ÷ 0020 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0041 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0041 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0041 × 0308 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0041 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0041 × 0308 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0041 × 200D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0041 × 0308 × 200D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0041 × 0061 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0041 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0041 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0041 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0041 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0041 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0041 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0041 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0041 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0041 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0041 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0041 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0041 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0041 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0041 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0041 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0041 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0041 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 003A ÷ 0001 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 003A ÷ 000D ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 003A ÷ 000A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 003A ÷ 000B ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 003A ÷ 3031 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 003A ÷ 0041 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 003A × 0308 ÷ 0041 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 003A ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 003A ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 003A ÷ 002E ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 003A ÷ 0030 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 003A ÷ 005F ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 003A ÷ 05D0 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 003A × 0308 ÷ 05D0 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 003A ÷ 0022 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 003A ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 003A ÷ 231A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 003A × 0308 ÷ 231A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 003A ÷ 0020 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 003A × 0308 ÷ 0020 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 003A × 00AD ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 003A × 0300 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 003A × 200D ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 003A × 0308 × 200D ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 003A ÷ 0061 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 003A × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 003A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 003A × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 003A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 003A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 003A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 003A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 003A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 003A × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 002C ÷ 0001 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 002C ÷ 000D ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 002C ÷ 000A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 002C ÷ 000B ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 002C ÷ 3031 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 002C ÷ 0041 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 002C ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 002C ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 002C ÷ 002E ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 002C ÷ 0030 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 002C × 0308 ÷ 0030 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 002C ÷ 005F ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 002C ÷ 05D0 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 002C ÷ 0022 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 002C ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 002C ÷ 231A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 002C × 0308 ÷ 231A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 002C ÷ 0020 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 002C × 0308 ÷ 0020 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 002C × 00AD ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 002C × 0300 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 002C × 200D ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 002C × 0308 × 200D ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 002C ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 002C × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 002C ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 002C × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 002C ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 002C × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 002C ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 002C × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 002E ÷ 0001 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 002E × 0308 ÷ 0001 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 002E ÷ 000D ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 002E × 0308 ÷ 000D ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 002E ÷ 000A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 002E × 0308 ÷ 000A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 002E ÷ 000B ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 002E × 0308 ÷ 000B ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 002E ÷ 3031 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 002E × 0308 ÷ 3031 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 002E ÷ 0041 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 002E × 0308 ÷ 0041 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 002E ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 002E × 0308 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 002E ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 002E × 0308 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 002E ÷ 002E ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 002E × 0308 ÷ 002E ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 002E ÷ 0030 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 002E × 0308 ÷ 0030 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 002E ÷ 005F ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 002E × 0308 ÷ 005F ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 002E ÷ 1F1E6 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 002E × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 002E ÷ 05D0 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 002E × 0308 ÷ 05D0 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 002E ÷ 0022 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 002E × 0308 ÷ 0022 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 002E ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 002E × 0308 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 002E ÷ 231A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 002E × 0308 ÷ 231A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 002E ÷ 0020 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 002E × 0308 ÷ 0020 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 002E × 00AD ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 002E × 0308 × 00AD ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 002E × 0300 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 002E × 0308 × 0300 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 002E × 200D ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 002E × 0308 × 200D ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 002E ÷ 0061 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 002E × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 002E ÷ 0061 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 002E × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 002E ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 002E × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 002E ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 002E × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 002E ÷ 0061 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 002E × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 002E ÷ 0031 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 002E × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 002E ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 002E × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 002E ÷ 0031 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 002E × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 002E ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 002E × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0030 ÷ 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0030 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0030 ÷ 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0030 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0030 ÷ 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0030 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0030 ÷ 000B ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0030 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0030 ÷ 3031 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0030 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0030 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0030 × 0308 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0030 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0030 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0030 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0030 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0030 ÷ 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0030 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0030 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0030 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0030 × 005F ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0030 × 0308 × 005F ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0030 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0030 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0030 × 05D0 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0030 × 0308 × 05D0 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0030 ÷ 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0030 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0030 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0030 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0030 ÷ 231A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0030 × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0030 ÷ 0020 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0030 × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0030 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0030 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0030 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0030 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0030 × 200D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0030 × 0308 × 200D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0030 × 0061 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0030 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0030 × 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0030 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0030 × 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0030 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0030 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0030 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0030 × 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0030 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0030 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0030 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0030 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0030 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0030 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0030 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0030 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0030 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 005F ÷ 0001 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 005F × 0308 ÷ 0001 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 005F ÷ 000D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 005F × 0308 ÷ 000D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 005F ÷ 000A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 005F × 0308 ÷ 000A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 005F ÷ 000B ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 005F × 0308 ÷ 000B ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 005F × 3031 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 005F × 0308 × 3031 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 005F × 0041 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 005F × 0308 × 0041 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 005F ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 005F × 0308 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 005F ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 005F × 0308 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 005F ÷ 002E ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 005F × 0308 ÷ 002E ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 005F × 0030 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 005F × 0308 × 0030 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 005F × 005F ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 005F × 0308 × 005F ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 005F ÷ 1F1E6 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 005F × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 005F × 05D0 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 005F × 0308 × 05D0 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 005F ÷ 0022 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 005F × 0308 ÷ 0022 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 005F ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 005F × 0308 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 005F ÷ 231A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 005F × 0308 ÷ 231A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 005F ÷ 0020 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 005F × 0308 ÷ 0020 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 005F × 00AD ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 005F × 0308 × 00AD ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 005F × 0300 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 005F × 0308 × 0300 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 005F × 200D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 005F × 0308 × 200D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 005F × 0061 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 005F × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 005F × 0061 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 005F × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 005F × 0061 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 005F × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 005F × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 005F × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 005F × 0061 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 005F × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 005F × 0031 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 005F × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 005F × 0031 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 005F × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 005F × 0031 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 005F × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 005F × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 005F × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1F1E6 ÷ 000B ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 000B ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 1F1E6 ÷ 3031 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 3031 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 1F1E6 ÷ 0041 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0041 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 1F1E6 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 1F1E6 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 1F1E6 ÷ 002E ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 002E ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 1F1E6 ÷ 0030 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0030 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 1F1E6 ÷ 005F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 005F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [15.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1F1E6 × 0308 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [15.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1F1E6 ÷ 05D0 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 05D0 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 1F1E6 ÷ 0022 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0022 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 1F1E6 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 1F1E6 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 1F1E6 × 00AD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 1F1E6 × 0308 × 00AD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 1F1E6 ÷ 0061 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 1F1E6 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 1F1E6 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 1F1E6 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 1F1E6 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 1F1E6 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 1F1E6 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 1F1E6 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 1F1E6 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 05D0 ÷ 0001 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 05D0 × 0308 ÷ 0001 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 05D0 ÷ 000D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 05D0 × 0308 ÷ 000D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 05D0 ÷ 000A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 05D0 × 0308 ÷ 000A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 05D0 ÷ 000B ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 05D0 × 0308 ÷ 000B ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 05D0 ÷ 3031 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 05D0 × 0308 ÷ 3031 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 05D0 × 0041 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 05D0 × 0308 × 0041 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 05D0 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 05D0 × 0308 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 05D0 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 05D0 × 0308 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 05D0 ÷ 002E ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 05D0 × 0308 ÷ 002E ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 05D0 × 0030 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 05D0 × 0308 × 0030 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 05D0 × 005F ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 05D0 × 0308 × 005F ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 05D0 ÷ 1F1E6 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 05D0 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 05D0 × 05D0 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 05D0 × 0308 × 05D0 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 05D0 ÷ 0022 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 05D0 × 0308 ÷ 0022 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 05D0 × 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [7.1] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 05D0 × 0308 × 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.1] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 05D0 ÷ 231A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 05D0 × 0308 ÷ 231A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 05D0 ÷ 0020 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 05D0 × 0308 ÷ 0020 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 05D0 × 00AD ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 05D0 × 0308 × 00AD ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 05D0 × 0300 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 05D0 × 0308 × 0300 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 05D0 × 200D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 05D0 × 0308 × 200D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 05D0 × 0061 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 05D0 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 05D0 × 0061 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 05D0 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 05D0 × 0061 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 05D0 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 05D0 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 05D0 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 05D0 × 0061 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 05D0 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 05D0 × 0031 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 05D0 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 05D0 × 0031 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 05D0 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 05D0 × 0031 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 05D0 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 05D0 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 05D0 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0022 ÷ 0001 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0022 × 0308 ÷ 0001 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0022 ÷ 000D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0022 × 0308 ÷ 000D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0022 ÷ 000A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0022 × 0308 ÷ 000A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0022 ÷ 000B ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0022 × 0308 ÷ 000B ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0022 ÷ 3031 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0022 × 0308 ÷ 3031 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0022 ÷ 0041 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0022 × 0308 ÷ 0041 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0022 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0022 × 0308 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0022 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0022 × 0308 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0022 ÷ 002E ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0022 × 0308 ÷ 002E ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0022 ÷ 0030 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0022 × 0308 ÷ 0030 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0022 ÷ 005F ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0022 × 0308 ÷ 005F ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0022 ÷ 1F1E6 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0022 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0022 ÷ 05D0 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0022 × 0308 ÷ 05D0 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0022 ÷ 0022 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0022 × 0308 ÷ 0022 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0022 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0022 × 0308 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0022 ÷ 231A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0022 × 0308 ÷ 231A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0022 ÷ 0020 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0022 × 0308 ÷ 0020 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0022 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0022 × 0308 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0022 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0022 × 0308 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0022 × 200D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0022 × 0308 × 200D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0022 ÷ 0061 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0022 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0022 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0022 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0022 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0022 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0022 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0022 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0022 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0022 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0022 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0022 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0022 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0022 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0022 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0022 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0022 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0022 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0027 ÷ 0001 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0027 ÷ 000D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0027 ÷ 000A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0027 ÷ 000B ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0027 ÷ 3031 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0027 ÷ 0041 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0027 × 0308 ÷ 0041 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0027 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0027 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0027 ÷ 002E ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0027 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0027 × 0308 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0027 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0027 ÷ 05D0 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0027 × 0308 ÷ 05D0 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0027 ÷ 0022 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0027 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0027 ÷ 231A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0027 × 0308 ÷ 231A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0027 ÷ 0020 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0027 × 0308 ÷ 0020 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0027 × 00AD ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0027 × 0300 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0027 × 200D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0027 × 0308 × 200D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0027 ÷ 0061 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0027 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0027 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0027 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0027 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0027 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0027 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0027 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0027 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0027 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0027 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0027 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0027 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0027 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0027 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0027 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0027 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0027 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 231A ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 231A × 0308 ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 231A ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 231A × 0308 ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 231A ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 231A × 0308 ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 231A ÷ 000B ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 231A × 0308 ÷ 000B ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 231A ÷ 3031 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 231A × 0308 ÷ 3031 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 231A ÷ 0041 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 231A × 0308 ÷ 0041 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 231A ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 231A × 0308 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 231A ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 231A × 0308 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 231A ÷ 002E ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 231A × 0308 ÷ 002E ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 231A ÷ 0030 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 231A × 0308 ÷ 0030 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 231A ÷ 005F ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 231A × 0308 ÷ 005F ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 231A ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 231A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 231A ÷ 05D0 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 231A × 0308 ÷ 05D0 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 231A ÷ 0022 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 231A × 0308 ÷ 0022 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 231A ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 231A × 0308 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 231A ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 231A × 0308 ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 231A ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 231A × 0308 ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 231A × 00AD ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 231A × 0308 × 00AD ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 231A × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 231A × 0308 × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 231A × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 231A × 0308 × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 231A ÷ 0061 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 231A × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 231A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 231A × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 231A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 231A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 231A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 231A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 231A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 231A × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 231A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 231A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 231A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 231A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 231A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 231A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 231A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 231A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0020 ÷ 000B ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0020 × 0308 ÷ 000B ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0020 ÷ 3031 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0020 × 0308 ÷ 3031 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0020 ÷ 0041 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0020 × 0308 ÷ 0041 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0020 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0020 × 0308 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0020 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0020 × 0308 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0020 ÷ 002E ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0020 × 0308 ÷ 002E ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0020 ÷ 0030 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0020 × 0308 ÷ 0030 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0020 ÷ 005F ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0020 × 0308 ÷ 005F ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0020 ÷ 05D0 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0020 × 0308 ÷ 05D0 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0020 ÷ 0022 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0020 × 0308 ÷ 0022 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0020 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0020 × 0308 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0020 ÷ 231A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0020 × 0308 ÷ 231A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0020 × 0020 ÷ # ÷ [0.2] SPACE (WSegSpace) × [3.4] SPACE (WSegSpace) ÷ [0.3] +÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0020 × 00AD ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0020 × 0308 × 00AD ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0020 ÷ 0061 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0020 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0020 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0020 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0020 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0020 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0020 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0020 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0020 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0020 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0020 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0020 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0020 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0020 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0020 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0020 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0020 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0020 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 00AD ÷ 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 00AD × 0308 ÷ 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 00AD ÷ 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 00AD × 0308 ÷ 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 00AD ÷ 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 00AD × 0308 ÷ 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 00AD ÷ 000B ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 00AD × 0308 ÷ 000B ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 00AD ÷ 3031 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 00AD × 0308 ÷ 3031 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 00AD ÷ 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 00AD × 0308 ÷ 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 00AD ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 00AD × 0308 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 00AD ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 00AD × 0308 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 00AD ÷ 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 00AD × 0308 ÷ 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 00AD ÷ 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 00AD × 0308 ÷ 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 00AD ÷ 005F ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 00AD × 0308 ÷ 005F ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 00AD ÷ 1F1E6 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 00AD × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 00AD ÷ 05D0 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 00AD × 0308 ÷ 05D0 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 00AD ÷ 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 00AD × 0308 ÷ 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 00AD ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 00AD × 0308 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 00AD ÷ 231A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 00AD × 0308 ÷ 231A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 00AD ÷ 0020 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 00AD × 0308 ÷ 0020 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 00AD × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 00AD × 0308 × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 00AD × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 00AD × 0308 × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 00AD × 200D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 00AD × 0308 × 200D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 00AD ÷ 0061 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 00AD × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 00AD ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 00AD × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 00AD ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 00AD × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 00AD ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 00AD × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 00AD ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 00AD × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 00AD ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 00AD × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 00AD ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 00AD × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 00AD ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 00AD × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 00AD ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 00AD × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0300 ÷ 000B ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0300 × 0308 ÷ 000B ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0300 ÷ 3031 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0300 × 0308 ÷ 3031 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0300 ÷ 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0300 × 0308 ÷ 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0300 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0300 × 0308 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0300 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0300 × 0308 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0300 ÷ 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0300 × 0308 ÷ 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0300 ÷ 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0300 × 0308 ÷ 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0300 ÷ 005F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0300 × 0308 ÷ 005F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0300 ÷ 05D0 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0300 × 0308 ÷ 05D0 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0300 ÷ 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0300 × 0308 ÷ 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0300 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0300 × 0308 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0300 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0300 × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0300 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0300 × 0308 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0300 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0300 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0300 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0300 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0300 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0300 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0300 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0300 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0300 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0300 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0300 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0300 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0300 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0300 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0300 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0300 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0300 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0300 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 200D ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 200D × 0308 ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 200D × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 200D ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 200D × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 200D ÷ 000B ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 200D × 0308 ÷ 000B ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 200D ÷ 3031 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 200D × 0308 ÷ 3031 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 200D ÷ 0041 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 200D × 0308 ÷ 0041 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 200D ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 200D × 0308 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 200D ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 200D × 0308 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 200D ÷ 002E ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 200D × 0308 ÷ 002E ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 200D ÷ 0030 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 200D × 0308 ÷ 0030 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 200D ÷ 005F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 200D × 0308 ÷ 005F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 200D ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 200D ÷ 05D0 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 200D × 0308 ÷ 05D0 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 200D ÷ 0022 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 200D × 0308 ÷ 0022 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 200D ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 200D × 0308 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 200D × 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] WATCH (ExtPict) ÷ [0.3] +÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 200D × 00AD ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 200D × 0308 × 00AD ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 200D ÷ 0061 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 200D × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 200D ÷ 0061 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 200D × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 200D ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 200D × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 200D ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 200D × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 200D ÷ 0061 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 200D × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 200D ÷ 0031 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 200D × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 200D ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 200D × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 200D ÷ 0031 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 200D × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 200D ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 200D × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 2060 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0061 × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0061 × 2060 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0061 × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0061 × 2060 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0061 × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0061 × 2060 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0061 × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0061 × 2060 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0061 × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0061 × 2060 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 2060 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 × 2060 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 × 2060 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0061 × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0061 × 2060 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0061 × 2060 × 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0061 × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0061 × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0061 × 2060 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0061 × 2060 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0061 × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0061 × 2060 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 × 2060 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0061 × 2060 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0061 × 2060 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0061 × 2060 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0061 × 2060 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0061 × 2060 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0061 × 2060 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0061 × 2060 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 2060 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 × 2060 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 × 2060 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 2060 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 × 2060 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 × 2060 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 × 2060 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 × 2060 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 2060 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0061 × 003A × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 003A × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0061 × 003A × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0061 × 003A × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0061 ÷ 003A × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0061 ÷ 003A × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0061 ÷ 003A × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0061 × 003A × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 003A × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 003A × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 × 003A × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 × 003A × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 × 003A × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 × 003A × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 003A × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 003A × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 × 003A × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0061 × 0027 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 0027 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0061 × 0027 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0061 × 0027 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0061 ÷ 0027 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0061 ÷ 0027 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0061 ÷ 0027 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0061 × 0027 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 0027 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 0027 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 × 0027 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 × 0027 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 × 0027 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 × 0027 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 0027 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 0027 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 × 0027 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 0027 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0061 × 0027 × 2060 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 0027 × 2060 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0061 × 0027 × 2060 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0061 × 0027 × 2060 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0061 × 0027 × 2060 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 0027 × 2060 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 0027 × 2060 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 × 0027 × 2060 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 × 0027 × 2060 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 × 0027 × 2060 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0061 ÷ 002C × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0061 ÷ 002C × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0061 ÷ 002C × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0031 ÷ 003A × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0031 ÷ 003A × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0031 ÷ 003A × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0031 × 0027 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0031 × 0027 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0031 ÷ 0027 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0031 ÷ 0027 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0031 ÷ 0027 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 0027 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 × 0027 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 × 0027 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 × 0027 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 × 0027 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 × 0027 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 × 0027 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 × 0027 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 × 0027 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0031 × 002C × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0031 × 002C × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0031 ÷ 002C × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0031 ÷ 002C × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0031 ÷ 002C × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 × 002C × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 × 002C × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 × 002C × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 × 002C × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 × 002C × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 × 002C × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 × 002C × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 × 002C × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3] +÷ 0031 × 002E × 2060 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0031 × 002E × 2060 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 × 002E × 2060 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] +÷ 0031 × 002E × 2060 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 0031 × 002E × 2060 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] +÷ 0031 × 002E × 2060 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] +÷ 000D × 000A ÷ 0061 ÷ 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [0.3] +÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3] +÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] ARABIC LETTER NOON (ALetter) ÷ [0.3] +÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3] +÷ 0041 × 0041 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0041 × 003A × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0041 ÷ 003A ÷ 003A ÷ 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 05D0 × 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [7.1] APOSTROPHE (Single_Quote) ÷ [0.3] +÷ 05D0 × 0022 × 05D0 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [7.2] QUOTATION MARK (Double_Quote) × [7.3] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3] +÷ 0041 × 0030 × 0030 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ZERO (Numeric) × [8.0] DIGIT ZERO (Numeric) × [10.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 0030 × 002C × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 0030 ÷ 002C ÷ 002C ÷ 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] +÷ 3031 × 3031 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] +÷ 0041 × 005F × 0030 × 005F × 3031 × 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ZERO (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] +÷ 0041 × 005F × 005F × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] +÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [15.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 200D × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3] +÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [999.0] BABY (ExtPict) ÷ [0.3] +÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3] +÷ 0061 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3] +÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] UPPER BLADE SCISSORS (Other) ÷ [0.3] +÷ 0061 × 200D × 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] UPPER BLADE SCISSORS (Other) ÷ [0.3] +÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3] +÷ 1F6D1 × 1F3FF ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3] +÷ 200D × 1F6D1 × 1F3FF ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3] +÷ 200D × 1F6D1 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3] +÷ 200D × 1F6D1 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3] +÷ 1F6D1 ÷ 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3] +÷ 0061 × 0308 × 200D × 0308 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3] +÷ 0061 ÷ 0020 × 0020 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] SPACE (WSegSpace) × [3.4] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0031 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0031 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3] +÷ 0061 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +÷ 0061 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] +# +# Lines: 1823 +# +# EOF diff --git a/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa b/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa Binary files differnew file mode 100644 index 0000000..0efaaf2 --- /dev/null +++ b/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa diff --git a/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa b/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa Binary files differnew file mode 100644 index 0000000..eb24025 --- /dev/null +++ b/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa diff --git a/src/unicode/fsm/grapheme_break_fwd.rs b/src/unicode/fsm/grapheme_break_fwd.rs new file mode 100644 index 0000000..317ba96 --- /dev/null +++ b/src/unicode/fsm/grapheme_break_fwd.rs @@ -0,0 +1,41 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// ucd-generate dfa --name GRAPHEME_BREAK_FWD --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)] +// +// ucd-generate 0.2.8 is available on crates.io. + +#[cfg(target_endian = "big")] +lazy_static! { + pub static ref GRAPHEME_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("grapheme_break_fwd.bigendian.dfa"), + }; + + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } + }; +} + +#[cfg(target_endian = "little")] +lazy_static! { + pub static ref GRAPHEME_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("grapheme_break_fwd.littleendian.dfa"), + }; + + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } + }; +} diff --git a/src/unicode/fsm/grapheme_break_rev.bigendian.dfa b/src/unicode/fsm/grapheme_break_rev.bigendian.dfa Binary files differnew file mode 100644 index 0000000..d42cd36 --- /dev/null +++ b/src/unicode/fsm/grapheme_break_rev.bigendian.dfa diff --git a/src/unicode/fsm/grapheme_break_rev.littleendian.dfa b/src/unicode/fsm/grapheme_break_rev.littleendian.dfa Binary files differnew file mode 100644 index 0000000..c75ea5f --- /dev/null +++ b/src/unicode/fsm/grapheme_break_rev.littleendian.dfa diff --git a/src/unicode/fsm/grapheme_break_rev.rs b/src/unicode/fsm/grapheme_break_rev.rs new file mode 100644 index 0000000..db6b6ee --- /dev/null +++ b/src/unicode/fsm/grapheme_break_rev.rs @@ -0,0 +1,41 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// ucd-generate dfa --name GRAPHEME_BREAK_REV --reverse --longest --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)] +// +// ucd-generate 0.2.8 is available on crates.io. + +#[cfg(target_endian = "big")] +lazy_static! { + pub static ref GRAPHEME_BREAK_REV: ::regex_automata::SparseDFA<&'static [u8], u16> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("grapheme_break_rev.bigendian.dfa"), + }; + + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } + }; +} + +#[cfg(target_endian = "little")] +lazy_static! { + pub static ref GRAPHEME_BREAK_REV: ::regex_automata::SparseDFA<&'static [u8], u16> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("grapheme_break_rev.littleendian.dfa"), + }; + + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } + }; +} diff --git a/src/unicode/fsm/mod.rs b/src/unicode/fsm/mod.rs new file mode 100644 index 0000000..ae6c499 --- /dev/null +++ b/src/unicode/fsm/mod.rs @@ -0,0 +1,8 @@ +pub mod grapheme_break_fwd; +pub mod grapheme_break_rev; +pub mod regional_indicator_rev; +pub mod sentence_break_fwd; +pub mod simple_word_fwd; +pub mod whitespace_anchored_fwd; +pub mod whitespace_anchored_rev; +pub mod word_break_fwd; diff --git a/src/unicode/fsm/regional_indicator_rev.bigendian.dfa b/src/unicode/fsm/regional_indicator_rev.bigendian.dfa Binary files differnew file mode 100644 index 0000000..1a3357f --- /dev/null +++ b/src/unicode/fsm/regional_indicator_rev.bigendian.dfa diff --git a/src/unicode/fsm/regional_indicator_rev.littleendian.dfa b/src/unicode/fsm/regional_indicator_rev.littleendian.dfa Binary files differnew file mode 100644 index 0000000..e437aae --- /dev/null +++ b/src/unicode/fsm/regional_indicator_rev.littleendian.dfa diff --git a/src/unicode/fsm/regional_indicator_rev.rs b/src/unicode/fsm/regional_indicator_rev.rs new file mode 100644 index 0000000..3b6beff --- /dev/null +++ b/src/unicode/fsm/regional_indicator_rev.rs @@ -0,0 +1,41 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// ucd-generate dfa --name REGIONAL_INDICATOR_REV --reverse --classes --minimize --anchored --premultiply --state-size 1 src/unicode/fsm/ \p{gcb=Regional_Indicator} +// +// ucd-generate 0.2.8 is available on crates.io. + +#[cfg(target_endian = "big")] +lazy_static! { + pub static ref REGIONAL_INDICATOR_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"), + }; + + unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } + }; +} + +#[cfg(target_endian = "little")] +lazy_static! { + pub static ref REGIONAL_INDICATOR_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"), + }; + + unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } + }; +} diff --git a/src/unicode/fsm/sentence_break_fwd.bigendian.dfa b/src/unicode/fsm/sentence_break_fwd.bigendian.dfa Binary files differnew file mode 100644 index 0000000..a1813d7 --- /dev/null +++ b/src/unicode/fsm/sentence_break_fwd.bigendian.dfa diff --git a/src/unicode/fsm/sentence_break_fwd.littleendian.dfa b/src/unicode/fsm/sentence_break_fwd.littleendian.dfa Binary files differnew file mode 100644 index 0000000..2763583 --- /dev/null +++ b/src/unicode/fsm/sentence_break_fwd.littleendian.dfa diff --git a/src/unicode/fsm/sentence_break_fwd.rs b/src/unicode/fsm/sentence_break_fwd.rs new file mode 100644 index 0000000..46ecfcf --- /dev/null +++ b/src/unicode/fsm/sentence_break_fwd.rs @@ -0,0 +1,41 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// ucd-generate dfa --name SENTENCE_BREAK_FWD --minimize --sparse --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)] +// +// ucd-generate 0.2.8 is available on crates.io. + +#[cfg(target_endian = "big")] +lazy_static! { + pub static ref SENTENCE_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("sentence_break_fwd.bigendian.dfa"), + }; + + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } + }; +} + +#[cfg(target_endian = "little")] +lazy_static! { + pub static ref SENTENCE_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("sentence_break_fwd.littleendian.dfa"), + }; + + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } + }; +} diff --git a/src/unicode/fsm/simple_word_fwd.bigendian.dfa b/src/unicode/fsm/simple_word_fwd.bigendian.dfa Binary files differnew file mode 100644 index 0000000..adc64c1 --- /dev/null +++ b/src/unicode/fsm/simple_word_fwd.bigendian.dfa diff --git a/src/unicode/fsm/simple_word_fwd.littleendian.dfa b/src/unicode/fsm/simple_word_fwd.littleendian.dfa Binary files differnew file mode 100644 index 0000000..dd48386 --- /dev/null +++ b/src/unicode/fsm/simple_word_fwd.littleendian.dfa diff --git a/src/unicode/fsm/simple_word_fwd.rs b/src/unicode/fsm/simple_word_fwd.rs new file mode 100644 index 0000000..c5fabe3 --- /dev/null +++ b/src/unicode/fsm/simple_word_fwd.rs @@ -0,0 +1,41 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// ucd-generate dfa --name SIMPLE_WORD_FWD --sparse --minimize --state-size 2 src/unicode/fsm/ \w +// +// ucd-generate 0.2.8 is available on crates.io. + +#[cfg(target_endian = "big")] +lazy_static! { + pub static ref SIMPLE_WORD_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("simple_word_fwd.bigendian.dfa"), + }; + + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } + }; +} + +#[cfg(target_endian = "little")] +lazy_static! { + pub static ref SIMPLE_WORD_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("simple_word_fwd.littleendian.dfa"), + }; + + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } + }; +} diff --git a/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa b/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa Binary files differnew file mode 100644 index 0000000..bcfc4e9 --- /dev/null +++ b/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa diff --git a/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa b/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa Binary files differnew file mode 100644 index 0000000..d534a46 --- /dev/null +++ b/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa diff --git a/src/unicode/fsm/whitespace_anchored_fwd.rs b/src/unicode/fsm/whitespace_anchored_fwd.rs new file mode 100644 index 0000000..ea68582 --- /dev/null +++ b/src/unicode/fsm/whitespace_anchored_fwd.rs @@ -0,0 +1,41 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// ucd-generate dfa --name WHITESPACE_ANCHORED_FWD --anchored --classes --premultiply --minimize --state-size 1 src/unicode/fsm/ \s+ +// +// ucd-generate 0.2.8 is available on crates.io. + +#[cfg(target_endian = "big")] +lazy_static! { + pub static ref WHITESPACE_ANCHORED_FWD: ::regex_automata::DenseDFA<&'static [u8], u8> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"), + }; + + unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } + }; +} + +#[cfg(target_endian = "little")] +lazy_static! { + pub static ref WHITESPACE_ANCHORED_FWD: ::regex_automata::DenseDFA<&'static [u8], u8> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"), + }; + + unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } + }; +} diff --git a/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa b/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa Binary files differnew file mode 100644 index 0000000..bb217f1 --- /dev/null +++ b/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa diff --git a/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa b/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa Binary files differnew file mode 100644 index 0000000..a7cb5a7 --- /dev/null +++ b/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa diff --git a/src/unicode/fsm/whitespace_anchored_rev.rs b/src/unicode/fsm/whitespace_anchored_rev.rs new file mode 100644 index 0000000..72b444e --- /dev/null +++ b/src/unicode/fsm/whitespace_anchored_rev.rs @@ -0,0 +1,41 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// ucd-generate dfa --name WHITESPACE_ANCHORED_REV --reverse --anchored --classes --minimize --state-size 1 src/unicode/fsm/ \s+ +// +// ucd-generate 0.2.8 is available on crates.io. + +#[cfg(target_endian = "big")] +lazy_static! { + pub static ref WHITESPACE_ANCHORED_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"), + }; + + unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } + }; +} + +#[cfg(target_endian = "little")] +lazy_static! { + pub static ref WHITESPACE_ANCHORED_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"), + }; + + unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } + }; +} diff --git a/src/unicode/fsm/word_break_fwd.bigendian.dfa b/src/unicode/fsm/word_break_fwd.bigendian.dfa Binary files differnew file mode 100644 index 0000000..1e75db6 --- /dev/null +++ b/src/unicode/fsm/word_break_fwd.bigendian.dfa diff --git a/src/unicode/fsm/word_break_fwd.littleendian.dfa b/src/unicode/fsm/word_break_fwd.littleendian.dfa Binary files differnew file mode 100644 index 0000000..e3093a3 --- /dev/null +++ b/src/unicode/fsm/word_break_fwd.littleendian.dfa diff --git a/src/unicode/fsm/word_break_fwd.rs b/src/unicode/fsm/word_break_fwd.rs new file mode 100644 index 0000000..52e6bc2 --- /dev/null +++ b/src/unicode/fsm/word_break_fwd.rs @@ -0,0 +1,41 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// ucd-generate dfa --name WORD_BREAK_FWD --sparse --minimize --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)] +// +// ucd-generate 0.2.8 is available on crates.io. + +#[cfg(target_endian = "big")] +lazy_static! { + pub static ref WORD_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("word_break_fwd.bigendian.dfa"), + }; + + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } + }; +} + +#[cfg(target_endian = "little")] +lazy_static! { + pub static ref WORD_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = { + #[repr(C)] + struct Aligned<B: ?Sized> { + _align: [u8; 0], + bytes: B, + } + + static ALIGNED: &'static Aligned<[u8]> = &Aligned { + _align: [], + bytes: *include_bytes!("word_break_fwd.littleendian.dfa"), + }; + + unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } + }; +} diff --git a/src/unicode/grapheme.rs b/src/unicode/grapheme.rs new file mode 100644 index 0000000..e40a0de --- /dev/null +++ b/src/unicode/grapheme.rs @@ -0,0 +1,355 @@ +use regex_automata::DFA; + +use ext_slice::ByteSlice; +use unicode::fsm::grapheme_break_fwd::GRAPHEME_BREAK_FWD; +use unicode::fsm::grapheme_break_rev::GRAPHEME_BREAK_REV; +use unicode::fsm::regional_indicator_rev::REGIONAL_INDICATOR_REV; +use utf8; + +/// An iterator over grapheme clusters in a byte string. +/// +/// This iterator is typically constructed by +/// [`ByteSlice::graphemes`](trait.ByteSlice.html#method.graphemes). +/// +/// Unicode defines a grapheme cluster as an *approximation* to a single user +/// visible character. A grapheme cluster, or just "grapheme," is made up of +/// one or more codepoints. For end user oriented tasks, one should generally +/// prefer using graphemes instead of [`Chars`](struct.Chars.html), which +/// always yields one codepoint at a time. +/// +/// Since graphemes are made up of one or more codepoints, this iterator yields +/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints +/// are [substituted](index.html#handling-of-invalid-utf-8). +/// +/// This iterator can be used in reverse. When reversed, exactly the same +/// set of grapheme clusters are yielded, but in reverse order. +/// +/// This iterator only yields *extended* grapheme clusters, in accordance with +/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Grapheme_Cluster_Boundaries). +#[derive(Clone, Debug)] +pub struct Graphemes<'a> { + bs: &'a [u8], +} + +impl<'a> Graphemes<'a> { + pub(crate) fn new(bs: &'a [u8]) -> Graphemes<'a> { + Graphemes { bs } + } + + /// View the underlying data as a subslice of the original data. + /// + /// The slice returned has the same lifetime as the original slice, and so + /// the iterator can continue to be used while this exists. + /// + /// # Examples + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut it = b"abc".graphemes(); + /// + /// assert_eq!(b"abc", it.as_bytes()); + /// it.next(); + /// assert_eq!(b"bc", it.as_bytes()); + /// it.next(); + /// it.next(); + /// assert_eq!(b"", it.as_bytes()); + /// ``` + #[inline] + pub fn as_bytes(&self) -> &'a [u8] { + self.bs + } +} + +impl<'a> Iterator for Graphemes<'a> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + let (grapheme, size) = decode_grapheme(self.bs); + if size == 0 { + return None; + } + self.bs = &self.bs[size..]; + Some(grapheme) + } +} + +impl<'a> DoubleEndedIterator for Graphemes<'a> { + #[inline] + fn next_back(&mut self) -> Option<&'a str> { + let (grapheme, size) = decode_last_grapheme(self.bs); + if size == 0 { + return None; + } + self.bs = &self.bs[..self.bs.len() - size]; + Some(grapheme) + } +} + +/// An iterator over grapheme clusters in a byte string and their byte index +/// positions. +/// +/// This iterator is typically constructed by +/// [`ByteSlice::grapheme_indices`](trait.ByteSlice.html#method.grapheme_indices). +/// +/// Unicode defines a grapheme cluster as an *approximation* to a single user +/// visible character. A grapheme cluster, or just "grapheme," is made up of +/// one or more codepoints. For end user oriented tasks, one should generally +/// prefer using graphemes instead of [`Chars`](struct.Chars.html), which +/// always yields one codepoint at a time. +/// +/// Since graphemes are made up of one or more codepoints, this iterator +/// yields `&str` elements (along with their start and end byte offsets). +/// When invalid UTF-8 is encountered, replacement codepoints are +/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the +/// indices yielded by this iterator may not correspond to the length of the +/// grapheme cluster yielded with those indices. For example, when this +/// iterator encounters `\xFF` in the byte string, then it will yield a pair +/// of indices ranging over a single byte, but will provide an `&str` +/// equivalent to `"\u{FFFD}"`, which is three bytes in length. However, when +/// given only valid UTF-8, then all indices are in exact correspondence with +/// their paired grapheme cluster. +/// +/// This iterator can be used in reverse. When reversed, exactly the same +/// set of grapheme clusters are yielded, but in reverse order. +/// +/// This iterator only yields *extended* grapheme clusters, in accordance with +/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Grapheme_Cluster_Boundaries). +#[derive(Clone, Debug)] +pub struct GraphemeIndices<'a> { + bs: &'a [u8], + forward_index: usize, + reverse_index: usize, +} + +impl<'a> GraphemeIndices<'a> { + pub(crate) fn new(bs: &'a [u8]) -> GraphemeIndices<'a> { + GraphemeIndices { bs: bs, forward_index: 0, reverse_index: bs.len() } + } + + /// View the underlying data as a subslice of the original data. + /// + /// The slice returned has the same lifetime as the original slice, and so + /// the iterator can continue to be used while this exists. + /// + /// # Examples + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut it = b"abc".grapheme_indices(); + /// + /// assert_eq!(b"abc", it.as_bytes()); + /// it.next(); + /// assert_eq!(b"bc", it.as_bytes()); + /// it.next(); + /// it.next(); + /// assert_eq!(b"", it.as_bytes()); + /// ``` + #[inline] + pub fn as_bytes(&self) -> &'a [u8] { + self.bs + } +} + +impl<'a> Iterator for GraphemeIndices<'a> { + type Item = (usize, usize, &'a str); + + #[inline] + fn next(&mut self) -> Option<(usize, usize, &'a str)> { + let index = self.forward_index; + let (grapheme, size) = decode_grapheme(self.bs); + if size == 0 { + return None; + } + self.bs = &self.bs[size..]; + self.forward_index += size; + Some((index, index + size, grapheme)) + } +} + +impl<'a> DoubleEndedIterator for GraphemeIndices<'a> { + #[inline] + fn next_back(&mut self) -> Option<(usize, usize, &'a str)> { + let (grapheme, size) = decode_last_grapheme(self.bs); + if size == 0 { + return None; + } + self.bs = &self.bs[..self.bs.len() - size]; + self.reverse_index -= size; + Some((self.reverse_index, self.reverse_index + size, grapheme)) + } +} + +/// Decode a grapheme from the given byte string. +/// +/// This returns the resulting grapheme (which may be a Unicode replacement +/// codepoint if invalid UTF-8 was found), along with the number of bytes +/// decoded in the byte string. The number of bytes decoded may not be the +/// same as the length of grapheme in the case where invalid UTF-8 is found. +pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) { + if bs.is_empty() { + ("", 0) + } else if let Some(end) = GRAPHEME_BREAK_FWD.find(bs) { + // Safe because a match can only occur for valid UTF-8. + let grapheme = unsafe { bs[..end].to_str_unchecked() }; + (grapheme, grapheme.len()) + } else { + const INVALID: &'static str = "\u{FFFD}"; + // No match on non-empty bytes implies we found invalid UTF-8. + let (_, size) = utf8::decode_lossy(bs); + (INVALID, size) + } +} + +fn decode_last_grapheme(bs: &[u8]) -> (&str, usize) { + if bs.is_empty() { + ("", 0) + } else if let Some(mut start) = GRAPHEME_BREAK_REV.rfind(bs) { + start = adjust_rev_for_regional_indicator(bs, start); + // Safe because a match can only occur for valid UTF-8. + let grapheme = unsafe { bs[start..].to_str_unchecked() }; + (grapheme, grapheme.len()) + } else { + const INVALID: &'static str = "\u{FFFD}"; + // No match on non-empty bytes implies we found invalid UTF-8. + let (_, size) = utf8::decode_last_lossy(bs); + (INVALID, size) + } +} + +/// Return the correct offset for the next grapheme decoded at the end of the +/// given byte string, where `i` is the initial guess. In particular, +/// `&bs[i..]` represents the candidate grapheme. +/// +/// `i` is returned by this function in all cases except when `&bs[i..]` is +/// a pair of regional indicator codepoints. In that case, if an odd number of +/// additional regional indicator codepoints precedes `i`, then `i` is +/// adjusted such that it points to only a single regional indicator. +/// +/// This "fixing" is necessary to handle the requirement that a break cannot +/// occur between regional indicators where it would cause an odd number of +/// regional indicators to exist before the break from the *start* of the +/// string. A reverse regex cannot detect this case easily without look-around. +fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize { + // All regional indicators use a 4 byte encoding, and we only care about + // the case where we found a pair of regional indicators. + if bs.len() - i != 8 { + return i; + } + // Count all contiguous occurrences of regional indicators. If there's an + // even number of them, then we can accept the pair we found. Otherwise, + // we can only take one of them. + // + // FIXME: This is quadratic in the worst case, e.g., a string of just + // regional indicator codepoints. A fix probably requires refactoring this + // code a bit such that we don't rescan regional indicators. + let mut count = 0; + while let Some(start) = REGIONAL_INDICATOR_REV.rfind(bs) { + bs = &bs[..start]; + count += 1; + } + if count % 2 == 0 { + i + } else { + i + 4 + } +} + +#[cfg(test)] +mod tests { + use ucd_parse::GraphemeClusterBreakTest; + + use super::*; + use ext_slice::ByteSlice; + use tests::LOSSY_TESTS; + + #[test] + fn forward_ucd() { + for (i, test) in ucdtests().into_iter().enumerate() { + let given = test.grapheme_clusters.concat(); + let got: Vec<String> = Graphemes::new(given.as_bytes()) + .map(|cluster| cluster.to_string()) + .collect(); + assert_eq!( + test.grapheme_clusters, + got, + "\ngrapheme forward break test {} failed:\n\ + given: {:?}\n\ + expected: {:?}\n\ + got: {:?}\n", + i, + uniescape(&given), + uniescape_vec(&test.grapheme_clusters), + uniescape_vec(&got), + ); + } + } + + #[test] + fn reverse_ucd() { + for (i, test) in ucdtests().into_iter().enumerate() { + let given = test.grapheme_clusters.concat(); + let mut got: Vec<String> = Graphemes::new(given.as_bytes()) + .rev() + .map(|cluster| cluster.to_string()) + .collect(); + got.reverse(); + assert_eq!( + test.grapheme_clusters, + got, + "\n\ngrapheme reverse break test {} failed:\n\ + given: {:?}\n\ + expected: {:?}\n\ + got: {:?}\n", + i, + uniescape(&given), + uniescape_vec(&test.grapheme_clusters), + uniescape_vec(&got), + ); + } + } + + #[test] + fn forward_lossy() { + for &(expected, input) in LOSSY_TESTS { + let got = Graphemes::new(input.as_bytes()).collect::<String>(); + assert_eq!(expected, got); + } + } + + #[test] + fn reverse_lossy() { + for &(expected, input) in LOSSY_TESTS { + let expected: String = expected.chars().rev().collect(); + let got = + Graphemes::new(input.as_bytes()).rev().collect::<String>(); + assert_eq!(expected, got); + } + } + + fn uniescape(s: &str) -> String { + s.chars().flat_map(|c| c.escape_unicode()).collect::<String>() + } + + fn uniescape_vec(strs: &[String]) -> Vec<String> { + strs.iter().map(|s| uniescape(s)).collect() + } + + /// Return all of the UCD for grapheme breaks. + fn ucdtests() -> Vec<GraphemeClusterBreakTest> { + const TESTDATA: &'static str = + include_str!("data/GraphemeBreakTest.txt"); + + let mut tests = vec![]; + for mut line in TESTDATA.lines() { + line = line.trim(); + if line.starts_with("#") || line.contains("surrogate") { + continue; + } + tests.push(line.parse().unwrap()); + } + tests + } +} diff --git a/src/unicode/mod.rs b/src/unicode/mod.rs new file mode 100644 index 0000000..60318f4 --- /dev/null +++ b/src/unicode/mod.rs @@ -0,0 +1,12 @@ +pub use self::grapheme::{decode_grapheme, GraphemeIndices, Graphemes}; +pub use self::sentence::{SentenceIndices, Sentences}; +pub use self::whitespace::{whitespace_len_fwd, whitespace_len_rev}; +pub use self::word::{ + WordIndices, Words, WordsWithBreakIndices, WordsWithBreaks, +}; + +mod fsm; +mod grapheme; +mod sentence; +mod whitespace; +mod word; diff --git a/src/unicode/sentence.rs b/src/unicode/sentence.rs new file mode 100644 index 0000000..01f5473 --- /dev/null +++ b/src/unicode/sentence.rs @@ -0,0 +1,220 @@ +use regex_automata::DFA; + +use ext_slice::ByteSlice; +use unicode::fsm::sentence_break_fwd::SENTENCE_BREAK_FWD; +use utf8; + +/// An iterator over sentences in a byte string. +/// +/// This iterator is typically constructed by +/// [`ByteSlice::sentences`](trait.ByteSlice.html#method.sentences). +/// +/// Sentences typically include their trailing punctuation and whitespace. +/// +/// Since sentences are made up of one or more codepoints, this iterator yields +/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints +/// are [substituted](index.html#handling-of-invalid-utf-8). +/// +/// This iterator yields words in accordance with the default sentence boundary +/// rules specified in +/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Sentence_Boundaries). +#[derive(Clone, Debug)] +pub struct Sentences<'a> { + bs: &'a [u8], +} + +impl<'a> Sentences<'a> { + pub(crate) fn new(bs: &'a [u8]) -> Sentences<'a> { + Sentences { bs } + } + + /// View the underlying data as a subslice of the original data. + /// + /// The slice returned has the same lifetime as the original slice, and so + /// the iterator can continue to be used while this exists. + /// + /// # Examples + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut it = b"I want this. Not that. Right now.".sentences(); + /// + /// assert_eq!(&b"I want this. Not that. Right now."[..], it.as_bytes()); + /// it.next(); + /// assert_eq!(b"Not that. Right now.", it.as_bytes()); + /// it.next(); + /// it.next(); + /// assert_eq!(b"", it.as_bytes()); + /// ``` + #[inline] + pub fn as_bytes(&self) -> &'a [u8] { + self.bs + } +} + +impl<'a> Iterator for Sentences<'a> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + let (sentence, size) = decode_sentence(self.bs); + if size == 0 { + return None; + } + self.bs = &self.bs[size..]; + Some(sentence) + } +} + +/// An iterator over sentences in a byte string, along with their byte offsets. +/// +/// This iterator is typically constructed by +/// [`ByteSlice::sentence_indices`](trait.ByteSlice.html#method.sentence_indices). +/// +/// Sentences typically include their trailing punctuation and whitespace. +/// +/// Since sentences are made up of one or more codepoints, this iterator +/// yields `&str` elements (along with their start and end byte offsets). +/// When invalid UTF-8 is encountered, replacement codepoints are +/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the +/// indices yielded by this iterator may not correspond to the length of the +/// sentence yielded with those indices. For example, when this iterator +/// encounters `\xFF` in the byte string, then it will yield a pair of indices +/// ranging over a single byte, but will provide an `&str` equivalent to +/// `"\u{FFFD}"`, which is three bytes in length. However, when given only +/// valid UTF-8, then all indices are in exact correspondence with their paired +/// word. +/// +/// This iterator yields words in accordance with the default sentence boundary +/// rules specified in +/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Sentence_Boundaries). +#[derive(Clone, Debug)] +pub struct SentenceIndices<'a> { + bs: &'a [u8], + forward_index: usize, +} + +impl<'a> SentenceIndices<'a> { + pub(crate) fn new(bs: &'a [u8]) -> SentenceIndices<'a> { + SentenceIndices { bs: bs, forward_index: 0 } + } + + /// View the underlying data as a subslice of the original data. + /// + /// The slice returned has the same lifetime as the original slice, and so + /// the iterator can continue to be used while this exists. + /// + /// # Examples + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut it = b"I want this. Not that. Right now.".sentence_indices(); + /// + /// assert_eq!(&b"I want this. Not that. Right now."[..], it.as_bytes()); + /// it.next(); + /// assert_eq!(b"Not that. Right now.", it.as_bytes()); + /// it.next(); + /// it.next(); + /// assert_eq!(b"", it.as_bytes()); + /// ``` + #[inline] + pub fn as_bytes(&self) -> &'a [u8] { + self.bs + } +} + +impl<'a> Iterator for SentenceIndices<'a> { + type Item = (usize, usize, &'a str); + + #[inline] + fn next(&mut self) -> Option<(usize, usize, &'a str)> { + let index = self.forward_index; + let (word, size) = decode_sentence(self.bs); + if size == 0 { + return None; + } + self.bs = &self.bs[size..]; + self.forward_index += size; + Some((index, index + size, word)) + } +} + +fn decode_sentence(bs: &[u8]) -> (&str, usize) { + if bs.is_empty() { + ("", 0) + } else if let Some(end) = SENTENCE_BREAK_FWD.find(bs) { + // Safe because a match can only occur for valid UTF-8. + let sentence = unsafe { bs[..end].to_str_unchecked() }; + (sentence, sentence.len()) + } else { + const INVALID: &'static str = "\u{FFFD}"; + // No match on non-empty bytes implies we found invalid UTF-8. + let (_, size) = utf8::decode_lossy(bs); + (INVALID, size) + } +} + +#[cfg(test)] +mod tests { + use ucd_parse::SentenceBreakTest; + + use ext_slice::ByteSlice; + + #[test] + fn forward_ucd() { + for (i, test) in ucdtests().into_iter().enumerate() { + let given = test.sentences.concat(); + let got = sentences(given.as_bytes()); + assert_eq!( + test.sentences, + got, + "\n\nsentence forward break test {} failed:\n\ + given: {:?}\n\ + expected: {:?}\n\ + got: {:?}\n", + i, + given, + strs_to_bstrs(&test.sentences), + strs_to_bstrs(&got), + ); + } + } + + // Some additional tests that don't seem to be covered by the UCD tests. + #[test] + fn forward_additional() { + assert_eq!(vec!["a.. ", "A"], sentences(b"a.. A")); + assert_eq!(vec!["a.. a"], sentences(b"a.. a")); + + assert_eq!(vec!["a... ", "A"], sentences(b"a... A")); + assert_eq!(vec!["a... a"], sentences(b"a... a")); + + assert_eq!(vec!["a...,..., a"], sentences(b"a...,..., a")); + } + + fn sentences(bytes: &[u8]) -> Vec<&str> { + bytes.sentences().collect() + } + + fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> { + strs.iter().map(|s| s.as_ref().as_bytes()).collect() + } + + /// Return all of the UCD for sentence breaks. + fn ucdtests() -> Vec<SentenceBreakTest> { + const TESTDATA: &'static str = + include_str!("data/SentenceBreakTest.txt"); + + let mut tests = vec![]; + for mut line in TESTDATA.lines() { + line = line.trim(); + if line.starts_with("#") || line.contains("surrogate") { + continue; + } + tests.push(line.parse().unwrap()); + } + tests + } +} diff --git a/src/unicode/whitespace.rs b/src/unicode/whitespace.rs new file mode 100644 index 0000000..a8da144 --- /dev/null +++ b/src/unicode/whitespace.rs @@ -0,0 +1,14 @@ +use regex_automata::DFA; + +use unicode::fsm::whitespace_anchored_fwd::WHITESPACE_ANCHORED_FWD; +use unicode::fsm::whitespace_anchored_rev::WHITESPACE_ANCHORED_REV; + +/// Return the first position of a non-whitespace character. +pub fn whitespace_len_fwd(slice: &[u8]) -> usize { + WHITESPACE_ANCHORED_FWD.find(slice).unwrap_or(0) +} + +/// Return the last position of a non-whitespace character. +pub fn whitespace_len_rev(slice: &[u8]) -> usize { + WHITESPACE_ANCHORED_REV.rfind(slice).unwrap_or(slice.len()) +} diff --git a/src/unicode/word.rs b/src/unicode/word.rs new file mode 100644 index 0000000..1260e52 --- /dev/null +++ b/src/unicode/word.rs @@ -0,0 +1,406 @@ +use regex_automata::DFA; + +use ext_slice::ByteSlice; +use unicode::fsm::simple_word_fwd::SIMPLE_WORD_FWD; +use unicode::fsm::word_break_fwd::WORD_BREAK_FWD; +use utf8; + +/// An iterator over words in a byte string. +/// +/// This iterator is typically constructed by +/// [`ByteSlice::words`](trait.ByteSlice.html#method.words). +/// +/// This is similar to the [`WordsWithBreaks`](struct.WordsWithBreaks.html) +/// iterator, except it only returns elements that contain a "word" character. +/// A word character is defined by UTS #18 (Annex C) to be the combination +/// of the `Alphabetic` and `Join_Control` properties, along with the +/// `Decimal_Number`, `Mark` and `Connector_Punctuation` general categories. +/// +/// Since words are made up of one or more codepoints, this iterator yields +/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints +/// are [substituted](index.html#handling-of-invalid-utf-8). +/// +/// This iterator yields words in accordance with the default word boundary +/// rules specified in +/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries). +/// In particular, this may not be suitable for Japanese and Chinese scripts +/// that do not use spaces between words. +#[derive(Clone, Debug)] +pub struct Words<'a>(WordsWithBreaks<'a>); + +impl<'a> Words<'a> { + pub(crate) fn new(bs: &'a [u8]) -> Words<'a> { + Words(WordsWithBreaks::new(bs)) + } + + /// View the underlying data as a subslice of the original data. + /// + /// The slice returned has the same lifetime as the original slice, and so + /// the iterator can continue to be used while this exists. + /// + /// # Examples + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut it = b"foo bar baz".words(); + /// + /// assert_eq!(b"foo bar baz", it.as_bytes()); + /// it.next(); + /// it.next(); + /// assert_eq!(b" baz", it.as_bytes()); + /// it.next(); + /// assert_eq!(b"", it.as_bytes()); + /// ``` + #[inline] + pub fn as_bytes(&self) -> &'a [u8] { + self.0.as_bytes() + } +} + +impl<'a> Iterator for Words<'a> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + while let Some(word) = self.0.next() { + if SIMPLE_WORD_FWD.is_match(word.as_bytes()) { + return Some(word); + } + } + None + } +} + +/// An iterator over words in a byte string and their byte index positions. +/// +/// This iterator is typically constructed by +/// [`ByteSlice::word_indices`](trait.ByteSlice.html#method.word_indices). +/// +/// This is similar to the +/// [`WordsWithBreakIndices`](struct.WordsWithBreakIndices.html) iterator, +/// except it only returns elements that contain a "word" character. A +/// word character is defined by UTS #18 (Annex C) to be the combination +/// of the `Alphabetic` and `Join_Control` properties, along with the +/// `Decimal_Number`, `Mark` and `Connector_Punctuation` general categories. +/// +/// Since words are made up of one or more codepoints, this iterator +/// yields `&str` elements (along with their start and end byte offsets). +/// When invalid UTF-8 is encountered, replacement codepoints are +/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the +/// indices yielded by this iterator may not correspond to the length of the +/// word yielded with those indices. For example, when this iterator encounters +/// `\xFF` in the byte string, then it will yield a pair of indices ranging +/// over a single byte, but will provide an `&str` equivalent to `"\u{FFFD}"`, +/// which is three bytes in length. However, when given only valid UTF-8, then +/// all indices are in exact correspondence with their paired word. +/// +/// This iterator yields words in accordance with the default word boundary +/// rules specified in +/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries). +/// In particular, this may not be suitable for Japanese and Chinese scripts +/// that do not use spaces between words. +#[derive(Clone, Debug)] +pub struct WordIndices<'a>(WordsWithBreakIndices<'a>); + +impl<'a> WordIndices<'a> { + pub(crate) fn new(bs: &'a [u8]) -> WordIndices<'a> { + WordIndices(WordsWithBreakIndices::new(bs)) + } + + /// View the underlying data as a subslice of the original data. + /// + /// The slice returned has the same lifetime as the original slice, and so + /// the iterator can continue to be used while this exists. + /// + /// # Examples + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut it = b"foo bar baz".word_indices(); + /// + /// assert_eq!(b"foo bar baz", it.as_bytes()); + /// it.next(); + /// it.next(); + /// assert_eq!(b" baz", it.as_bytes()); + /// it.next(); + /// it.next(); + /// assert_eq!(b"", it.as_bytes()); + /// ``` + #[inline] + pub fn as_bytes(&self) -> &'a [u8] { + self.0.as_bytes() + } +} + +impl<'a> Iterator for WordIndices<'a> { + type Item = (usize, usize, &'a str); + + #[inline] + fn next(&mut self) -> Option<(usize, usize, &'a str)> { + while let Some((start, end, word)) = self.0.next() { + if SIMPLE_WORD_FWD.is_match(word.as_bytes()) { + return Some((start, end, word)); + } + } + None + } +} + +/// An iterator over all word breaks in a byte string. +/// +/// This iterator is typically constructed by +/// [`ByteSlice::words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks). +/// +/// This iterator yields not only all words, but the content that comes between +/// words. In particular, if all elements yielded by this iterator are +/// concatenated, then the result is the original string (subject to Unicode +/// replacement codepoint substitutions). +/// +/// Since words are made up of one or more codepoints, this iterator yields +/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints +/// are [substituted](index.html#handling-of-invalid-utf-8). +/// +/// This iterator yields words in accordance with the default word boundary +/// rules specified in +/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries). +/// In particular, this may not be suitable for Japanese and Chinese scripts +/// that do not use spaces between words. +#[derive(Clone, Debug)] +pub struct WordsWithBreaks<'a> { + bs: &'a [u8], +} + +impl<'a> WordsWithBreaks<'a> { + pub(crate) fn new(bs: &'a [u8]) -> WordsWithBreaks<'a> { + WordsWithBreaks { bs } + } + + /// View the underlying data as a subslice of the original data. + /// + /// The slice returned has the same lifetime as the original slice, and so + /// the iterator can continue to be used while this exists. + /// + /// # Examples + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut it = b"foo bar baz".words_with_breaks(); + /// + /// assert_eq!(b"foo bar baz", it.as_bytes()); + /// it.next(); + /// assert_eq!(b" bar baz", it.as_bytes()); + /// it.next(); + /// it.next(); + /// assert_eq!(b" baz", it.as_bytes()); + /// it.next(); + /// it.next(); + /// assert_eq!(b"", it.as_bytes()); + /// ``` + #[inline] + pub fn as_bytes(&self) -> &'a [u8] { + self.bs + } +} + +impl<'a> Iterator for WordsWithBreaks<'a> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + let (word, size) = decode_word(self.bs); + if size == 0 { + return None; + } + self.bs = &self.bs[size..]; + Some(word) + } +} + +/// An iterator over all word breaks in a byte string, along with their byte +/// index positions. +/// +/// This iterator is typically constructed by +/// [`ByteSlice::words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices). +/// +/// This iterator yields not only all words, but the content that comes between +/// words. In particular, if all elements yielded by this iterator are +/// concatenated, then the result is the original string (subject to Unicode +/// replacement codepoint substitutions). +/// +/// Since words are made up of one or more codepoints, this iterator +/// yields `&str` elements (along with their start and end byte offsets). +/// When invalid UTF-8 is encountered, replacement codepoints are +/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the +/// indices yielded by this iterator may not correspond to the length of the +/// word yielded with those indices. For example, when this iterator encounters +/// `\xFF` in the byte string, then it will yield a pair of indices ranging +/// over a single byte, but will provide an `&str` equivalent to `"\u{FFFD}"`, +/// which is three bytes in length. However, when given only valid UTF-8, then +/// all indices are in exact correspondence with their paired word. +/// +/// This iterator yields words in accordance with the default word boundary +/// rules specified in +/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries). +/// In particular, this may not be suitable for Japanese and Chinese scripts +/// that do not use spaces between words. +#[derive(Clone, Debug)] +pub struct WordsWithBreakIndices<'a> { + bs: &'a [u8], + forward_index: usize, +} + +impl<'a> WordsWithBreakIndices<'a> { + pub(crate) fn new(bs: &'a [u8]) -> WordsWithBreakIndices<'a> { + WordsWithBreakIndices { bs: bs, forward_index: 0 } + } + + /// View the underlying data as a subslice of the original data. + /// + /// The slice returned has the same lifetime as the original slice, and so + /// the iterator can continue to be used while this exists. + /// + /// # Examples + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut it = b"foo bar baz".words_with_break_indices(); + /// + /// assert_eq!(b"foo bar baz", it.as_bytes()); + /// it.next(); + /// assert_eq!(b" bar baz", it.as_bytes()); + /// it.next(); + /// it.next(); + /// assert_eq!(b" baz", it.as_bytes()); + /// it.next(); + /// it.next(); + /// assert_eq!(b"", it.as_bytes()); + /// ``` + #[inline] + pub fn as_bytes(&self) -> &'a [u8] { + self.bs + } +} + +impl<'a> Iterator for WordsWithBreakIndices<'a> { + type Item = (usize, usize, &'a str); + + #[inline] + fn next(&mut self) -> Option<(usize, usize, &'a str)> { + let index = self.forward_index; + let (word, size) = decode_word(self.bs); + if size == 0 { + return None; + } + self.bs = &self.bs[size..]; + self.forward_index += size; + Some((index, index + size, word)) + } +} + +fn decode_word(bs: &[u8]) -> (&str, usize) { + if bs.is_empty() { + ("", 0) + } else if let Some(end) = WORD_BREAK_FWD.find(bs) { + // Safe because a match can only occur for valid UTF-8. + let word = unsafe { bs[..end].to_str_unchecked() }; + (word, word.len()) + } else { + const INVALID: &'static str = "\u{FFFD}"; + // No match on non-empty bytes implies we found invalid UTF-8. + let (_, size) = utf8::decode_lossy(bs); + (INVALID, size) + } +} + +#[cfg(test)] +mod tests { + use ucd_parse::WordBreakTest; + + use ext_slice::ByteSlice; + + #[test] + fn forward_ucd() { + for (i, test) in ucdtests().into_iter().enumerate() { + let given = test.words.concat(); + let got = words(given.as_bytes()); + assert_eq!( + test.words, + got, + "\n\nword forward break test {} failed:\n\ + given: {:?}\n\ + expected: {:?}\n\ + got: {:?}\n", + i, + given, + strs_to_bstrs(&test.words), + strs_to_bstrs(&got), + ); + } + } + + // Some additional tests that don't seem to be covered by the UCD tests. + // + // It's pretty amazing that the UCD tests miss these cases. I only found + // them by running this crate's segmenter and ICU's segmenter on the same + // text and comparing the output. + #[test] + fn forward_additional() { + assert_eq!(vec!["a", ".", " ", "Y"], words(b"a. Y")); + assert_eq!(vec!["r", ".", " ", "Yo"], words(b"r. Yo")); + assert_eq!( + vec!["whatsoever", ".", " ", "You", " ", "may"], + words(b"whatsoever. You may") + ); + assert_eq!( + vec!["21stcentury'syesterday"], + words(b"21stcentury'syesterday") + ); + + assert_eq!(vec!["Bonta_", "'", "s"], words(b"Bonta_'s")); + assert_eq!(vec!["_vhat's"], words(b"_vhat's")); + assert_eq!(vec!["__on'anima"], words(b"__on'anima")); + assert_eq!(vec!["123_", "'", "4"], words(b"123_'4")); + assert_eq!(vec!["_123'4"], words(b"_123'4")); + assert_eq!(vec!["__12'345"], words(b"__12'345")); + + assert_eq!( + vec!["tomorrowat4", ":", "00", ","], + words(b"tomorrowat4:00,") + ); + assert_eq!(vec!["RS1", "'", "s"], words(b"RS1's")); + assert_eq!(vec!["X38"], words(b"X38")); + + assert_eq!(vec!["4abc", ":", "00", ","], words(b"4abc:00,")); + assert_eq!(vec!["12S", "'", "1"], words(b"12S'1")); + assert_eq!(vec!["1XY"], words(b"1XY")); + + assert_eq!(vec!["\u{FEFF}", "Ты"], words("\u{FEFF}Ты".as_bytes())); + } + + fn words(bytes: &[u8]) -> Vec<&str> { + bytes.words_with_breaks().collect() + } + + fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> { + strs.iter().map(|s| s.as_ref().as_bytes()).collect() + } + + /// Return all of the UCD for word breaks. + fn ucdtests() -> Vec<WordBreakTest> { + const TESTDATA: &'static str = include_str!("data/WordBreakTest.txt"); + + let mut tests = vec![]; + for mut line in TESTDATA.lines() { + line = line.trim(); + if line.starts_with("#") || line.contains("surrogate") { + continue; + } + tests.push(line.parse().unwrap()); + } + tests + } +} diff --git a/src/utf8.rs b/src/utf8.rs new file mode 100644 index 0000000..2d4035d --- /dev/null +++ b/src/utf8.rs @@ -0,0 +1,1370 @@ +use core::char; +use core::cmp; +use core::fmt; +use core::str; +#[cfg(feature = "std")] +use std::error; + +use ascii; +use bstr::BStr; +use ext_slice::ByteSlice; + +// The UTF-8 decoder provided here is based on the one presented here: +// https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ +// +// We *could* have done UTF-8 decoding by using a DFA generated by `\p{any}` +// using regex-automata that is roughly the same size. The real benefit of +// Hoehrmann's formulation is that the byte class mapping below is manually +// tailored such that each byte's class doubles as a shift to mask out the +// bits necessary for constructing the leading bits of each codepoint value +// from the initial byte. +// +// There are some minor differences between this implementation and Hoehrmann's +// formulation. +// +// Firstly, we make REJECT have state ID 0, since it makes the state table +// itself a little easier to read and is consistent with the notion that 0 +// means "false" or "bad." +// +// Secondly, when doing bulk decoding, we add a SIMD accelerated ASCII fast +// path. +// +// Thirdly, we pre-multiply the state IDs to avoid a multiplication instruction +// in the core decoding loop. (Which is what regex-automata would do by +// default.) +// +// Fourthly, we split the byte class mapping and transition table into two +// arrays because it's clearer. +// +// It is unlikely that this is the fastest way to do UTF-8 decoding, however, +// it is fairly simple. + +const ACCEPT: usize = 12; +const REJECT: usize = 0; + +/// SAFETY: The decode below function relies on the correctness of these +/// equivalence classes. +#[cfg_attr(rustfmt, rustfmt::skip)] +const CLASSES: [u8; 256] = [ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, +]; + +/// SAFETY: The decode below function relies on the correctness of this state +/// machine. +#[cfg_attr(rustfmt, rustfmt::skip)] +const STATES_FORWARD: &'static [u8] = &[ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 12, 0, 24, 36, 60, 96, 84, 0, 0, 0, 48, 72, + 0, 12, 0, 0, 0, 0, 0, 12, 0, 12, 0, 0, + 0, 24, 0, 0, 0, 0, 0, 24, 0, 24, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, + 0, 24, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 36, 0, 36, 0, 0, + 0, 36, 0, 0, 0, 0, 0, 36, 0, 36, 0, 0, + 0, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +]; + +/// An iterator over Unicode scalar values in a byte string. +/// +/// When invalid UTF-8 byte sequences are found, they are substituted with the +/// Unicode replacement codepoint (`U+FFFD`) using the +/// ["maximal subpart" strategy](http://www.unicode.org/review/pr-121.html). +/// +/// This iterator is created by the +/// [`chars`](trait.ByteSlice.html#method.chars) method provided by the +/// [`ByteSlice`](trait.ByteSlice.html) extension trait for `&[u8]`. +#[derive(Clone, Debug)] +pub struct Chars<'a> { + bs: &'a [u8], +} + +impl<'a> Chars<'a> { + pub(crate) fn new(bs: &'a [u8]) -> Chars<'a> { + Chars { bs } + } + + /// View the underlying data as a subslice of the original data. + /// + /// The slice returned has the same lifetime as the original slice, and so + /// the iterator can continue to be used while this exists. + /// + /// # Examples + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut chars = b"abc".chars(); + /// + /// assert_eq!(b"abc", chars.as_bytes()); + /// chars.next(); + /// assert_eq!(b"bc", chars.as_bytes()); + /// chars.next(); + /// chars.next(); + /// assert_eq!(b"", chars.as_bytes()); + /// ``` + #[inline] + pub fn as_bytes(&self) -> &'a [u8] { + self.bs + } +} + +impl<'a> Iterator for Chars<'a> { + type Item = char; + + #[inline] + fn next(&mut self) -> Option<char> { + let (ch, size) = decode_lossy(self.bs); + if size == 0 { + return None; + } + self.bs = &self.bs[size..]; + Some(ch) + } +} + +impl<'a> DoubleEndedIterator for Chars<'a> { + #[inline] + fn next_back(&mut self) -> Option<char> { + let (ch, size) = decode_last_lossy(self.bs); + if size == 0 { + return None; + } + self.bs = &self.bs[..self.bs.len() - size]; + Some(ch) + } +} + +/// An iterator over Unicode scalar values in a byte string and their +/// byte index positions. +/// +/// When invalid UTF-8 byte sequences are found, they are substituted with the +/// Unicode replacement codepoint (`U+FFFD`) using the +/// ["maximal subpart" strategy](http://www.unicode.org/review/pr-121.html). +/// +/// Note that this is slightly different from the `CharIndices` iterator +/// provided by the standard library. Aside from working on possibly invalid +/// UTF-8, this iterator provides both the corresponding starting and ending +/// byte indices of each codepoint yielded. The ending position is necessary to +/// slice the original byte string when invalid UTF-8 bytes are converted into +/// a Unicode replacement codepoint, since a single replacement codepoint can +/// substitute anywhere from 1 to 3 invalid bytes (inclusive). +/// +/// This iterator is created by the +/// [`char_indices`](trait.ByteSlice.html#method.char_indices) method provided +/// by the [`ByteSlice`](trait.ByteSlice.html) extension trait for `&[u8]`. +#[derive(Clone, Debug)] +pub struct CharIndices<'a> { + bs: &'a [u8], + forward_index: usize, + reverse_index: usize, +} + +impl<'a> CharIndices<'a> { + pub(crate) fn new(bs: &'a [u8]) -> CharIndices<'a> { + CharIndices { bs: bs, forward_index: 0, reverse_index: bs.len() } + } + + /// View the underlying data as a subslice of the original data. + /// + /// The slice returned has the same lifetime as the original slice, and so + /// the iterator can continue to be used while this exists. + /// + /// # Examples + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let mut it = b"abc".char_indices(); + /// + /// assert_eq!(b"abc", it.as_bytes()); + /// it.next(); + /// assert_eq!(b"bc", it.as_bytes()); + /// it.next(); + /// it.next(); + /// assert_eq!(b"", it.as_bytes()); + /// ``` + #[inline] + pub fn as_bytes(&self) -> &'a [u8] { + self.bs + } +} + +impl<'a> Iterator for CharIndices<'a> { + type Item = (usize, usize, char); + + #[inline] + fn next(&mut self) -> Option<(usize, usize, char)> { + let index = self.forward_index; + let (ch, size) = decode_lossy(self.bs); + if size == 0 { + return None; + } + self.bs = &self.bs[size..]; + self.forward_index += size; + Some((index, index + size, ch)) + } +} + +impl<'a> DoubleEndedIterator for CharIndices<'a> { + #[inline] + fn next_back(&mut self) -> Option<(usize, usize, char)> { + let (ch, size) = decode_last_lossy(self.bs); + if size == 0 { + return None; + } + self.bs = &self.bs[..self.bs.len() - size]; + self.reverse_index -= size; + Some((self.reverse_index, self.reverse_index + size, ch)) + } +} + +impl<'a> ::core::iter::FusedIterator for CharIndices<'a> {} + +/// An iterator over chunks of valid UTF-8 in a byte slice. +/// +/// See [`utf8_chunks`](trait.ByteSlice.html#method.utf8_chunks). +#[derive(Clone, Debug)] +pub struct Utf8Chunks<'a> { + pub(super) bytes: &'a [u8], +} + +/// A chunk of valid UTF-8, possibly followed by invalid UTF-8 bytes. +/// +/// This is yielded by the +/// [`Utf8Chunks`](struct.Utf8Chunks.html) +/// iterator, which can be created via the +/// [`ByteSlice::utf8_chunks`](trait.ByteSlice.html#method.utf8_chunks) +/// method. +/// +/// The `'a` lifetime parameter corresponds to the lifetime of the bytes that +/// are being iterated over. +#[cfg_attr(test, derive(Debug, PartialEq))] +pub struct Utf8Chunk<'a> { + /// A valid UTF-8 piece, at the start, end, or between invalid UTF-8 bytes. + /// + /// This is empty between adjacent invalid UTF-8 byte sequences. + valid: &'a str, + /// A sequence of invalid UTF-8 bytes. + /// + /// Can only be empty in the last chunk. + /// + /// Should be replaced by a single unicode replacement character, if not + /// empty. + invalid: &'a BStr, + /// Indicates whether the invalid sequence could've been valid if there + /// were more bytes. + /// + /// Can only be true in the last chunk. + incomplete: bool, +} + +impl<'a> Utf8Chunk<'a> { + /// Returns the (possibly empty) valid UTF-8 bytes in this chunk. + /// + /// This may be empty if there are consecutive sequences of invalid UTF-8 + /// bytes. + #[inline] + pub fn valid(&self) -> &'a str { + self.valid + } + + /// Returns the (possibly empty) invalid UTF-8 bytes in this chunk that + /// immediately follow the valid UTF-8 bytes in this chunk. + /// + /// This is only empty when this chunk corresponds to the last chunk in + /// the original bytes. + /// + /// The maximum length of this slice is 3. That is, invalid UTF-8 byte + /// sequences greater than 1 always correspond to a valid _prefix_ of + /// a valid UTF-8 encoded codepoint. This corresponds to the "substitution + /// of maximal subparts" strategy that is described in more detail in the + /// docs for the + /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) + /// method. + #[inline] + pub fn invalid(&self) -> &'a [u8] { + self.invalid.as_bytes() + } + + /// Returns whether the invalid sequence might still become valid if more + /// bytes are added. + /// + /// Returns true if the end of the input was reached unexpectedly, + /// without encountering an unexpected byte. + /// + /// This can only be the case for the last chunk. + #[inline] + pub fn incomplete(&self) -> bool { + self.incomplete + } +} + +impl<'a> Iterator for Utf8Chunks<'a> { + type Item = Utf8Chunk<'a>; + + #[inline] + fn next(&mut self) -> Option<Utf8Chunk<'a>> { + if self.bytes.is_empty() { + return None; + } + match validate(self.bytes) { + Ok(()) => { + let valid = self.bytes; + self.bytes = &[]; + Some(Utf8Chunk { + // SAFETY: This is safe because of the guarantees provided + // by utf8::validate. + valid: unsafe { str::from_utf8_unchecked(valid) }, + invalid: [].as_bstr(), + incomplete: false, + }) + } + Err(e) => { + let (valid, rest) = self.bytes.split_at(e.valid_up_to()); + // SAFETY: This is safe because of the guarantees provided by + // utf8::validate. + let valid = unsafe { str::from_utf8_unchecked(valid) }; + let (invalid_len, incomplete) = match e.error_len() { + Some(n) => (n, false), + None => (rest.len(), true), + }; + let (invalid, rest) = rest.split_at(invalid_len); + self.bytes = rest; + Some(Utf8Chunk { + valid, + invalid: invalid.as_bstr(), + incomplete, + }) + } + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + if self.bytes.is_empty() { + (0, Some(0)) + } else { + (1, Some(self.bytes.len())) + } + } +} + +impl<'a> ::core::iter::FusedIterator for Utf8Chunks<'a> {} + +/// An error that occurs when UTF-8 decoding fails. +/// +/// This error occurs when attempting to convert a non-UTF-8 byte +/// string to a Rust string that must be valid UTF-8. For example, +/// [`to_str`](trait.ByteSlice.html#method.to_str) is one such method. +/// +/// # Example +/// +/// This example shows what happens when a given byte sequence is invalid, +/// but ends with a sequence that is a possible prefix of valid UTF-8. +/// +/// ``` +/// use bstr::{B, ByteSlice}; +/// +/// let s = B(b"foobar\xF1\x80\x80"); +/// let err = s.to_str().unwrap_err(); +/// assert_eq!(err.valid_up_to(), 6); +/// assert_eq!(err.error_len(), None); +/// ``` +/// +/// This example shows what happens when a given byte sequence contains +/// invalid UTF-8. +/// +/// ``` +/// use bstr::ByteSlice; +/// +/// let s = b"foobar\xF1\x80\x80quux"; +/// let err = s.to_str().unwrap_err(); +/// assert_eq!(err.valid_up_to(), 6); +/// // The error length reports the maximum number of bytes that correspond to +/// // a valid prefix of a UTF-8 encoded codepoint. +/// assert_eq!(err.error_len(), Some(3)); +/// +/// // In contrast to the above which contains a single invalid prefix, +/// // consider the case of multiple individal bytes that are never valid +/// // prefixes. Note how the value of error_len changes! +/// let s = b"foobar\xFF\xFFquux"; +/// let err = s.to_str().unwrap_err(); +/// assert_eq!(err.valid_up_to(), 6); +/// assert_eq!(err.error_len(), Some(1)); +/// +/// // The fact that it's an invalid prefix does not change error_len even +/// // when it immediately precedes the end of the string. +/// let s = b"foobar\xFF"; +/// let err = s.to_str().unwrap_err(); +/// assert_eq!(err.valid_up_to(), 6); +/// assert_eq!(err.error_len(), Some(1)); +/// ``` +#[derive(Debug, Eq, PartialEq)] +pub struct Utf8Error { + valid_up_to: usize, + error_len: Option<usize>, +} + +impl Utf8Error { + /// Returns the byte index of the position immediately following the last + /// valid UTF-8 byte. + /// + /// # Example + /// + /// This examples shows how `valid_up_to` can be used to retrieve a + /// possibly empty prefix that is guaranteed to be valid UTF-8: + /// + /// ``` + /// use bstr::ByteSlice; + /// + /// let s = b"foobar\xF1\x80\x80quux"; + /// let err = s.to_str().unwrap_err(); + /// + /// // This is guaranteed to never panic. + /// let string = s[..err.valid_up_to()].to_str().unwrap(); + /// assert_eq!(string, "foobar"); + /// ``` + #[inline] + pub fn valid_up_to(&self) -> usize { + self.valid_up_to + } + + /// Returns the total number of invalid UTF-8 bytes immediately following + /// the position returned by `valid_up_to`. This value is always at least + /// `1`, but can be up to `3` if bytes form a valid prefix of some UTF-8 + /// encoded codepoint. + /// + /// If the end of the original input was found before a valid UTF-8 encoded + /// codepoint could be completed, then this returns `None`. This is useful + /// when processing streams, where a `None` value signals that more input + /// might be needed. + #[inline] + pub fn error_len(&self) -> Option<usize> { + self.error_len + } +} + +#[cfg(feature = "std")] +impl error::Error for Utf8Error { + fn description(&self) -> &str { + "invalid UTF-8" + } +} + +impl fmt::Display for Utf8Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "invalid UTF-8 found at byte offset {}", self.valid_up_to) + } +} + +/// Returns OK if and only if the given slice is completely valid UTF-8. +/// +/// If the slice isn't valid UTF-8, then an error is returned that explains +/// the first location at which invalid UTF-8 was detected. +pub fn validate(slice: &[u8]) -> Result<(), Utf8Error> { + // The fast path for validating UTF-8. It steps through a UTF-8 automaton + // and uses a SIMD accelerated ASCII fast path on x86_64. If an error is + // detected, it backs up and runs the slower version of the UTF-8 automaton + // to determine correct error information. + fn fast(slice: &[u8]) -> Result<(), Utf8Error> { + let mut state = ACCEPT; + let mut i = 0; + + while i < slice.len() { + let b = slice[i]; + + // ASCII fast path. If we see two consecutive ASCII bytes, then try + // to validate as much ASCII as possible very quickly. + if state == ACCEPT + && b <= 0x7F + && slice.get(i + 1).map_or(false, |&b| b <= 0x7F) + { + i += ascii::first_non_ascii_byte(&slice[i..]); + continue; + } + + state = step(state, b); + if state == REJECT { + return Err(find_valid_up_to(slice, i)); + } + i += 1; + } + if state != ACCEPT { + Err(find_valid_up_to(slice, slice.len())) + } else { + Ok(()) + } + } + + // Given the first position at which a UTF-8 sequence was determined to be + // invalid, return an error that correctly reports the position at which + // the last complete UTF-8 sequence ends. + #[inline(never)] + fn find_valid_up_to(slice: &[u8], rejected_at: usize) -> Utf8Error { + // In order to find the last valid byte, we need to back up an amount + // that guarantees every preceding byte is part of a valid UTF-8 + // code unit sequence. To do this, we simply locate the last leading + // byte that occurs before rejected_at. + let mut backup = rejected_at.saturating_sub(1); + while backup > 0 && !is_leading_or_invalid_utf8_byte(slice[backup]) { + backup -= 1; + } + let upto = cmp::min(slice.len(), rejected_at.saturating_add(1)); + let mut err = slow(&slice[backup..upto]).unwrap_err(); + err.valid_up_to += backup; + err + } + + // Like top-level UTF-8 decoding, except it correctly reports a UTF-8 error + // when an invalid sequence is found. This is split out from validate so + // that the fast path doesn't need to keep track of the position of the + // last valid UTF-8 byte. In particular, tracking this requires checking + // for an ACCEPT state on each byte, which degrades throughput pretty + // badly. + fn slow(slice: &[u8]) -> Result<(), Utf8Error> { + let mut state = ACCEPT; + let mut valid_up_to = 0; + for (i, &b) in slice.iter().enumerate() { + state = step(state, b); + if state == ACCEPT { + valid_up_to = i + 1; + } else if state == REJECT { + // Our error length must always be at least 1. + let error_len = Some(cmp::max(1, i - valid_up_to)); + return Err(Utf8Error { valid_up_to, error_len }); + } + } + if state != ACCEPT { + Err(Utf8Error { valid_up_to, error_len: None }) + } else { + Ok(()) + } + } + + // Advance to the next state given the current state and current byte. + fn step(state: usize, b: u8) -> usize { + let class = CLASSES[b as usize]; + // SAFETY: This is safe because 'class' is always <=11 and 'state' is + // always <=96. Therefore, the maximal index is 96+11 = 107, where + // STATES_FORWARD.len() = 108 such that every index is guaranteed to be + // valid by construction of the state machine and the byte equivalence + // classes. + unsafe { + *STATES_FORWARD.get_unchecked(state + class as usize) as usize + } + } + + fast(slice) +} + +/// UTF-8 decode a single Unicode scalar value from the beginning of a slice. +/// +/// When successful, the corresponding Unicode scalar value is returned along +/// with the number of bytes it was encoded with. The number of bytes consumed +/// for a successful decode is always between 1 and 4, inclusive. +/// +/// When unsuccessful, `None` is returned along with the number of bytes that +/// make up a maximal prefix of a valid UTF-8 code unit sequence. In this case, +/// the number of bytes consumed is always between 0 and 3, inclusive, where +/// 0 is only returned when `slice` is empty. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use bstr::decode_utf8; +/// +/// // Decoding a valid codepoint. +/// let (ch, size) = decode_utf8(b"\xE2\x98\x83"); +/// assert_eq!(Some('☃'), ch); +/// assert_eq!(3, size); +/// +/// // Decoding an incomplete codepoint. +/// let (ch, size) = decode_utf8(b"\xE2\x98"); +/// assert_eq!(None, ch); +/// assert_eq!(2, size); +/// ``` +/// +/// This example shows how to iterate over all codepoints in UTF-8 encoded +/// bytes, while replacing invalid UTF-8 sequences with the replacement +/// codepoint: +/// +/// ``` +/// use bstr::{B, decode_utf8}; +/// +/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61"); +/// let mut chars = vec![]; +/// while !bytes.is_empty() { +/// let (ch, size) = decode_utf8(bytes); +/// bytes = &bytes[size..]; +/// chars.push(ch.unwrap_or('\u{FFFD}')); +/// } +/// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars); +/// ``` +#[inline] +pub fn decode<B: AsRef<[u8]>>(slice: B) -> (Option<char>, usize) { + let slice = slice.as_ref(); + match slice.get(0) { + None => return (None, 0), + Some(&b) if b <= 0x7F => return (Some(b as char), 1), + _ => {} + } + + let (mut state, mut cp, mut i) = (ACCEPT, 0, 0); + while i < slice.len() { + decode_step(&mut state, &mut cp, slice[i]); + i += 1; + + if state == ACCEPT { + // SAFETY: This is safe because `decode_step` guarantees that + // `cp` is a valid Unicode scalar value in an ACCEPT state. + let ch = unsafe { char::from_u32_unchecked(cp) }; + return (Some(ch), i); + } else if state == REJECT { + // At this point, we always want to advance at least one byte. + return (None, cmp::max(1, i.saturating_sub(1))); + } + } + (None, i) +} + +/// Lossily UTF-8 decode a single Unicode scalar value from the beginning of a +/// slice. +/// +/// When successful, the corresponding Unicode scalar value is returned along +/// with the number of bytes it was encoded with. The number of bytes consumed +/// for a successful decode is always between 1 and 4, inclusive. +/// +/// When unsuccessful, the Unicode replacement codepoint (`U+FFFD`) is returned +/// along with the number of bytes that make up a maximal prefix of a valid +/// UTF-8 code unit sequence. In this case, the number of bytes consumed is +/// always between 0 and 3, inclusive, where 0 is only returned when `slice` is +/// empty. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ```ignore +/// use bstr::decode_utf8_lossy; +/// +/// // Decoding a valid codepoint. +/// let (ch, size) = decode_utf8_lossy(b"\xE2\x98\x83"); +/// assert_eq!('☃', ch); +/// assert_eq!(3, size); +/// +/// // Decoding an incomplete codepoint. +/// let (ch, size) = decode_utf8_lossy(b"\xE2\x98"); +/// assert_eq!('\u{FFFD}', ch); +/// assert_eq!(2, size); +/// ``` +/// +/// This example shows how to iterate over all codepoints in UTF-8 encoded +/// bytes, while replacing invalid UTF-8 sequences with the replacement +/// codepoint: +/// +/// ```ignore +/// use bstr::{B, decode_utf8_lossy}; +/// +/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61"); +/// let mut chars = vec![]; +/// while !bytes.is_empty() { +/// let (ch, size) = decode_utf8_lossy(bytes); +/// bytes = &bytes[size..]; +/// chars.push(ch); +/// } +/// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars); +/// ``` +#[inline] +pub fn decode_lossy<B: AsRef<[u8]>>(slice: B) -> (char, usize) { + match decode(slice) { + (Some(ch), size) => (ch, size), + (None, size) => ('\u{FFFD}', size), + } +} + +/// UTF-8 decode a single Unicode scalar value from the end of a slice. +/// +/// When successful, the corresponding Unicode scalar value is returned along +/// with the number of bytes it was encoded with. The number of bytes consumed +/// for a successful decode is always between 1 and 4, inclusive. +/// +/// When unsuccessful, `None` is returned along with the number of bytes that +/// make up a maximal prefix of a valid UTF-8 code unit sequence. In this case, +/// the number of bytes consumed is always between 0 and 3, inclusive, where +/// 0 is only returned when `slice` is empty. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use bstr::decode_last_utf8; +/// +/// // Decoding a valid codepoint. +/// let (ch, size) = decode_last_utf8(b"\xE2\x98\x83"); +/// assert_eq!(Some('☃'), ch); +/// assert_eq!(3, size); +/// +/// // Decoding an incomplete codepoint. +/// let (ch, size) = decode_last_utf8(b"\xE2\x98"); +/// assert_eq!(None, ch); +/// assert_eq!(2, size); +/// ``` +/// +/// This example shows how to iterate over all codepoints in UTF-8 encoded +/// bytes in reverse, while replacing invalid UTF-8 sequences with the +/// replacement codepoint: +/// +/// ``` +/// use bstr::{B, decode_last_utf8}; +/// +/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61"); +/// let mut chars = vec![]; +/// while !bytes.is_empty() { +/// let (ch, size) = decode_last_utf8(bytes); +/// bytes = &bytes[..bytes.len()-size]; +/// chars.push(ch.unwrap_or('\u{FFFD}')); +/// } +/// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars); +/// ``` +#[inline] +pub fn decode_last<B: AsRef<[u8]>>(slice: B) -> (Option<char>, usize) { + // TODO: We could implement this by reversing the UTF-8 automaton, but for + // now, we do it the slow way by using the forward automaton. + + let slice = slice.as_ref(); + if slice.is_empty() { + return (None, 0); + } + let mut start = slice.len() - 1; + let limit = slice.len().saturating_sub(4); + while start > limit && !is_leading_or_invalid_utf8_byte(slice[start]) { + start -= 1; + } + let (ch, size) = decode(&slice[start..]); + // If we didn't consume all of the bytes, then that means there's at least + // one stray byte that never occurs in a valid code unit prefix, so we can + // advance by one byte. + if start + size != slice.len() { + (None, 1) + } else { + (ch, size) + } +} + +/// Lossily UTF-8 decode a single Unicode scalar value from the end of a slice. +/// +/// When successful, the corresponding Unicode scalar value is returned along +/// with the number of bytes it was encoded with. The number of bytes consumed +/// for a successful decode is always between 1 and 4, inclusive. +/// +/// When unsuccessful, the Unicode replacement codepoint (`U+FFFD`) is returned +/// along with the number of bytes that make up a maximal prefix of a valid +/// UTF-8 code unit sequence. In this case, the number of bytes consumed is +/// always between 0 and 3, inclusive, where 0 is only returned when `slice` is +/// empty. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ```ignore +/// use bstr::decode_last_utf8_lossy; +/// +/// // Decoding a valid codepoint. +/// let (ch, size) = decode_last_utf8_lossy(b"\xE2\x98\x83"); +/// assert_eq!('☃', ch); +/// assert_eq!(3, size); +/// +/// // Decoding an incomplete codepoint. +/// let (ch, size) = decode_last_utf8_lossy(b"\xE2\x98"); +/// assert_eq!('\u{FFFD}', ch); +/// assert_eq!(2, size); +/// ``` +/// +/// This example shows how to iterate over all codepoints in UTF-8 encoded +/// bytes in reverse, while replacing invalid UTF-8 sequences with the +/// replacement codepoint: +/// +/// ```ignore +/// use bstr::decode_last_utf8_lossy; +/// +/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61"); +/// let mut chars = vec![]; +/// while !bytes.is_empty() { +/// let (ch, size) = decode_last_utf8_lossy(bytes); +/// bytes = &bytes[..bytes.len()-size]; +/// chars.push(ch); +/// } +/// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars); +/// ``` +#[inline] +pub fn decode_last_lossy<B: AsRef<[u8]>>(slice: B) -> (char, usize) { + match decode_last(slice) { + (Some(ch), size) => (ch, size), + (None, size) => ('\u{FFFD}', size), + } +} + +/// SAFETY: The decode function relies on state being equal to ACCEPT only if +/// cp is a valid Unicode scalar value. +#[inline] +pub fn decode_step(state: &mut usize, cp: &mut u32, b: u8) { + let class = CLASSES[b as usize]; + if *state == ACCEPT { + *cp = (0xFF >> class) & (b as u32); + } else { + *cp = (b as u32 & 0b111111) | (*cp << 6); + } + *state = STATES_FORWARD[*state + class as usize] as usize; +} + +/// Returns true if and only if the given byte is either a valid leading UTF-8 +/// byte, or is otherwise an invalid byte that can never appear anywhere in a +/// valid UTF-8 sequence. +fn is_leading_or_invalid_utf8_byte(b: u8) -> bool { + // In the ASCII case, the most significant bit is never set. The leading + // byte of a 2/3/4-byte sequence always has the top two most significant + // bits set. For bytes that can never appear anywhere in valid UTF-8, this + // also returns true, since every such byte has its two most significant + // bits set: + // + // \xC0 :: 11000000 + // \xC1 :: 11000001 + // \xF5 :: 11110101 + // \xF6 :: 11110110 + // \xF7 :: 11110111 + // \xF8 :: 11111000 + // \xF9 :: 11111001 + // \xFA :: 11111010 + // \xFB :: 11111011 + // \xFC :: 11111100 + // \xFD :: 11111101 + // \xFE :: 11111110 + // \xFF :: 11111111 + (b & 0b1100_0000) != 0b1000_0000 +} + +#[cfg(test)] +mod tests { + use std::char; + + use ext_slice::{ByteSlice, B}; + use tests::LOSSY_TESTS; + use utf8::{self, Utf8Error}; + + fn utf8e(valid_up_to: usize) -> Utf8Error { + Utf8Error { valid_up_to, error_len: None } + } + + fn utf8e2(valid_up_to: usize, error_len: usize) -> Utf8Error { + Utf8Error { valid_up_to, error_len: Some(error_len) } + } + + #[test] + fn validate_all_codepoints() { + for i in 0..(0x10FFFF + 1) { + let cp = match char::from_u32(i) { + None => continue, + Some(cp) => cp, + }; + let mut buf = [0; 4]; + let s = cp.encode_utf8(&mut buf); + assert_eq!(Ok(()), utf8::validate(s.as_bytes())); + } + } + + #[test] + fn validate_multiple_codepoints() { + assert_eq!(Ok(()), utf8::validate(b"abc")); + assert_eq!(Ok(()), utf8::validate(b"a\xE2\x98\x83a")); + assert_eq!(Ok(()), utf8::validate(b"a\xF0\x9D\x9C\xB7a")); + assert_eq!(Ok(()), utf8::validate(b"\xE2\x98\x83\xF0\x9D\x9C\xB7",)); + assert_eq!( + Ok(()), + utf8::validate(b"a\xE2\x98\x83a\xF0\x9D\x9C\xB7a",) + ); + assert_eq!( + Ok(()), + utf8::validate(b"\xEF\xBF\xBD\xE2\x98\x83\xEF\xBF\xBD",) + ); + } + + #[test] + fn validate_errors() { + // single invalid byte + assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xFF")); + // single invalid byte after ASCII + assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xFF")); + // single invalid byte after 2 byte sequence + assert_eq!(Err(utf8e2(2, 1)), utf8::validate(b"\xCE\xB2\xFF")); + // single invalid byte after 3 byte sequence + assert_eq!(Err(utf8e2(3, 1)), utf8::validate(b"\xE2\x98\x83\xFF")); + // single invalid byte after 4 byte sequence + assert_eq!(Err(utf8e2(4, 1)), utf8::validate(b"\xF0\x9D\x9D\xB1\xFF")); + + // An invalid 2-byte sequence with a valid 1-byte prefix. + assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xCE\xF0")); + // An invalid 3-byte sequence with a valid 2-byte prefix. + assert_eq!(Err(utf8e2(0, 2)), utf8::validate(b"\xE2\x98\xF0")); + // An invalid 4-byte sequence with a valid 3-byte prefix. + assert_eq!(Err(utf8e2(0, 3)), utf8::validate(b"\xF0\x9D\x9D\xF0")); + + // An overlong sequence. Should be \xE2\x82\xAC, but we encode the + // same codepoint value in 4 bytes. This not only tests that we reject + // overlong sequences, but that we get valid_up_to correct. + assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xF0\x82\x82\xAC")); + assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xF0\x82\x82\xAC")); + assert_eq!( + Err(utf8e2(3, 1)), + utf8::validate(b"\xE2\x98\x83\xF0\x82\x82\xAC",) + ); + + // Check that encoding a surrogate codepoint using the UTF-8 scheme + // fails validation. + assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xED\xA0\x80")); + assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xED\xA0\x80")); + assert_eq!( + Err(utf8e2(3, 1)), + utf8::validate(b"\xE2\x98\x83\xED\xA0\x80",) + ); + + // Check that an incomplete 2-byte sequence fails. + assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xCEa")); + assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xCEa")); + assert_eq!( + Err(utf8e2(3, 1)), + utf8::validate(b"\xE2\x98\x83\xCE\xE2\x98\x83",) + ); + // Check that an incomplete 3-byte sequence fails. + assert_eq!(Err(utf8e2(0, 2)), utf8::validate(b"\xE2\x98a")); + assert_eq!(Err(utf8e2(1, 2)), utf8::validate(b"a\xE2\x98a")); + assert_eq!( + Err(utf8e2(3, 2)), + utf8::validate(b"\xE2\x98\x83\xE2\x98\xE2\x98\x83",) + ); + // Check that an incomplete 4-byte sequence fails. + assert_eq!(Err(utf8e2(0, 3)), utf8::validate(b"\xF0\x9D\x9Ca")); + assert_eq!(Err(utf8e2(1, 3)), utf8::validate(b"a\xF0\x9D\x9Ca")); + assert_eq!( + Err(utf8e2(4, 3)), + utf8::validate(b"\xF0\x9D\x9C\xB1\xF0\x9D\x9C\xE2\x98\x83",) + ); + assert_eq!( + Err(utf8e2(6, 3)), + utf8::validate(b"foobar\xF1\x80\x80quux",) + ); + + // Check that an incomplete (EOF) 2-byte sequence fails. + assert_eq!(Err(utf8e(0)), utf8::validate(b"\xCE")); + assert_eq!(Err(utf8e(1)), utf8::validate(b"a\xCE")); + assert_eq!(Err(utf8e(3)), utf8::validate(b"\xE2\x98\x83\xCE")); + // Check that an incomplete (EOF) 3-byte sequence fails. + assert_eq!(Err(utf8e(0)), utf8::validate(b"\xE2\x98")); + assert_eq!(Err(utf8e(1)), utf8::validate(b"a\xE2\x98")); + assert_eq!(Err(utf8e(3)), utf8::validate(b"\xE2\x98\x83\xE2\x98")); + // Check that an incomplete (EOF) 4-byte sequence fails. + assert_eq!(Err(utf8e(0)), utf8::validate(b"\xF0\x9D\x9C")); + assert_eq!(Err(utf8e(1)), utf8::validate(b"a\xF0\x9D\x9C")); + assert_eq!( + Err(utf8e(4)), + utf8::validate(b"\xF0\x9D\x9C\xB1\xF0\x9D\x9C",) + ); + + // Test that we errors correct even after long valid sequences. This + // checks that our "backup" logic for detecting errors is correct. + assert_eq!( + Err(utf8e2(8, 1)), + utf8::validate(b"\xe2\x98\x83\xce\xb2\xe3\x83\x84\xFF",) + ); + } + + #[test] + fn decode_valid() { + fn d(mut s: &str) -> Vec<char> { + let mut chars = vec![]; + while !s.is_empty() { + let (ch, size) = utf8::decode(s.as_bytes()); + s = &s[size..]; + chars.push(ch.unwrap()); + } + chars + } + + assert_eq!(vec!['☃'], d("☃")); + assert_eq!(vec!['☃', '☃'], d("☃☃")); + assert_eq!(vec!['α', 'β', 'γ', 'δ', 'ε'], d("αβγδε")); + assert_eq!(vec!['☃', '⛄', '⛇'], d("☃⛄⛇")); + assert_eq!(vec!['𝗮', '𝗯', '𝗰', '𝗱', '𝗲'], d("𝗮𝗯𝗰𝗱𝗲")); + } + + #[test] + fn decode_invalid() { + let (ch, size) = utf8::decode(b""); + assert_eq!(None, ch); + assert_eq!(0, size); + + let (ch, size) = utf8::decode(b"\xFF"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode(b"\xCE\xF0"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode(b"\xE2\x98\xF0"); + assert_eq!(None, ch); + assert_eq!(2, size); + + let (ch, size) = utf8::decode(b"\xF0\x9D\x9D"); + assert_eq!(None, ch); + assert_eq!(3, size); + + let (ch, size) = utf8::decode(b"\xF0\x9D\x9D\xF0"); + assert_eq!(None, ch); + assert_eq!(3, size); + + let (ch, size) = utf8::decode(b"\xF0\x82\x82\xAC"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode(b"\xED\xA0\x80"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode(b"\xCEa"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode(b"\xE2\x98a"); + assert_eq!(None, ch); + assert_eq!(2, size); + + let (ch, size) = utf8::decode(b"\xF0\x9D\x9Ca"); + assert_eq!(None, ch); + assert_eq!(3, size); + } + + #[test] + fn decode_lossy() { + let (ch, size) = utf8::decode_lossy(b""); + assert_eq!('\u{FFFD}', ch); + assert_eq!(0, size); + + let (ch, size) = utf8::decode_lossy(b"\xFF"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_lossy(b"\xCE\xF0"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_lossy(b"\xE2\x98\xF0"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(2, size); + + let (ch, size) = utf8::decode_lossy(b"\xF0\x9D\x9D\xF0"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(3, size); + + let (ch, size) = utf8::decode_lossy(b"\xF0\x82\x82\xAC"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_lossy(b"\xED\xA0\x80"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_lossy(b"\xCEa"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_lossy(b"\xE2\x98a"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(2, size); + + let (ch, size) = utf8::decode_lossy(b"\xF0\x9D\x9Ca"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(3, size); + } + + #[test] + fn decode_last_valid() { + fn d(mut s: &str) -> Vec<char> { + let mut chars = vec![]; + while !s.is_empty() { + let (ch, size) = utf8::decode_last(s.as_bytes()); + s = &s[..s.len() - size]; + chars.push(ch.unwrap()); + } + chars + } + + assert_eq!(vec!['☃'], d("☃")); + assert_eq!(vec!['☃', '☃'], d("☃☃")); + assert_eq!(vec!['ε', 'δ', 'γ', 'β', 'α'], d("αβγδε")); + assert_eq!(vec!['⛇', '⛄', '☃'], d("☃⛄⛇")); + assert_eq!(vec!['𝗲', '𝗱', '𝗰', '𝗯', '𝗮'], d("𝗮𝗯𝗰𝗱𝗲")); + } + + #[test] + fn decode_last_invalid() { + let (ch, size) = utf8::decode_last(b""); + assert_eq!(None, ch); + assert_eq!(0, size); + + let (ch, size) = utf8::decode_last(b"\xFF"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last(b"\xCE\xF0"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last(b"\xCE"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last(b"\xE2\x98\xF0"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last(b"\xE2\x98"); + assert_eq!(None, ch); + assert_eq!(2, size); + + let (ch, size) = utf8::decode_last(b"\xF0\x9D\x9D\xF0"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last(b"\xF0\x9D\x9D"); + assert_eq!(None, ch); + assert_eq!(3, size); + + let (ch, size) = utf8::decode_last(b"\xF0\x82\x82\xAC"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last(b"\xED\xA0\x80"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last(b"\xED\xA0"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last(b"\xED"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last(b"a\xCE"); + assert_eq!(None, ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last(b"a\xE2\x98"); + assert_eq!(None, ch); + assert_eq!(2, size); + + let (ch, size) = utf8::decode_last(b"a\xF0\x9D\x9C"); + assert_eq!(None, ch); + assert_eq!(3, size); + } + + #[test] + fn decode_last_lossy() { + let (ch, size) = utf8::decode_last_lossy(b""); + assert_eq!('\u{FFFD}', ch); + assert_eq!(0, size); + + let (ch, size) = utf8::decode_last_lossy(b"\xFF"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last_lossy(b"\xCE\xF0"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last_lossy(b"\xCE"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last_lossy(b"\xE2\x98\xF0"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last_lossy(b"\xE2\x98"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(2, size); + + let (ch, size) = utf8::decode_last_lossy(b"\xF0\x9D\x9D\xF0"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last_lossy(b"\xF0\x9D\x9D"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(3, size); + + let (ch, size) = utf8::decode_last_lossy(b"\xF0\x82\x82\xAC"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last_lossy(b"\xED\xA0\x80"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last_lossy(b"\xED\xA0"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last_lossy(b"\xED"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last_lossy(b"a\xCE"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(1, size); + + let (ch, size) = utf8::decode_last_lossy(b"a\xE2\x98"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(2, size); + + let (ch, size) = utf8::decode_last_lossy(b"a\xF0\x9D\x9C"); + assert_eq!('\u{FFFD}', ch); + assert_eq!(3, size); + } + + #[test] + fn chars() { + for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() { + let got: String = B(input).chars().collect(); + assert_eq!( + expected, got, + "chars(ith: {:?}, given: {:?})", + i, input, + ); + let got: String = + B(input).char_indices().map(|(_, _, ch)| ch).collect(); + assert_eq!( + expected, got, + "char_indices(ith: {:?}, given: {:?})", + i, input, + ); + + let expected: String = expected.chars().rev().collect(); + + let got: String = B(input).chars().rev().collect(); + assert_eq!( + expected, got, + "chars.rev(ith: {:?}, given: {:?})", + i, input, + ); + let got: String = + B(input).char_indices().rev().map(|(_, _, ch)| ch).collect(); + assert_eq!( + expected, got, + "char_indices.rev(ith: {:?}, given: {:?})", + i, input, + ); + } + } + + #[test] + fn utf8_chunks() { + let mut c = utf8::Utf8Chunks { bytes: b"123\xC0" }; + assert_eq!( + (c.next(), c.next()), + ( + Some(utf8::Utf8Chunk { + valid: "123", + invalid: b"\xC0".as_bstr(), + incomplete: false, + }), + None, + ) + ); + + let mut c = utf8::Utf8Chunks { bytes: b"123\xFF\xFF" }; + assert_eq!( + (c.next(), c.next(), c.next()), + ( + Some(utf8::Utf8Chunk { + valid: "123", + invalid: b"\xFF".as_bstr(), + incomplete: false, + }), + Some(utf8::Utf8Chunk { + valid: "", + invalid: b"\xFF".as_bstr(), + incomplete: false, + }), + None, + ) + ); + + let mut c = utf8::Utf8Chunks { bytes: b"123\xD0" }; + assert_eq!( + (c.next(), c.next()), + ( + Some(utf8::Utf8Chunk { + valid: "123", + invalid: b"\xD0".as_bstr(), + incomplete: true, + }), + None, + ) + ); + + let mut c = utf8::Utf8Chunks { bytes: b"123\xD0456" }; + assert_eq!( + (c.next(), c.next(), c.next()), + ( + Some(utf8::Utf8Chunk { + valid: "123", + invalid: b"\xD0".as_bstr(), + incomplete: false, + }), + Some(utf8::Utf8Chunk { + valid: "456", + invalid: b"".as_bstr(), + incomplete: false, + }), + None, + ) + ); + + let mut c = utf8::Utf8Chunks { bytes: b"123\xE2\x98" }; + assert_eq!( + (c.next(), c.next()), + ( + Some(utf8::Utf8Chunk { + valid: "123", + invalid: b"\xE2\x98".as_bstr(), + incomplete: true, + }), + None, + ) + ); + + let mut c = utf8::Utf8Chunks { bytes: b"123\xF4\x8F\xBF" }; + assert_eq!( + (c.next(), c.next()), + ( + Some(utf8::Utf8Chunk { + valid: "123", + invalid: b"\xF4\x8F\xBF".as_bstr(), + incomplete: true, + }), + None, + ) + ); + } +} |