aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kotur <qtr@google.com>2021-03-16 20:22:31 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2021-03-16 20:22:31 +0000
commit2cb058fabfe04b3d486badfa02d764aeee4f0abc (patch)
treedb39b49c7217032e6b3ab6255f27b3d84122bad9
parent9a565aa594c047b0553c5f1dd0be3c7aee131747 (diff)
parent9bf6d11f8461e4faefa7b5b038470944a1b81ed6 (diff)
downloadbstr-2cb058fabfe04b3d486badfa02d764aeee4f0abc.tar.gz
Initial import of bstr-0.2.14. am: dd59cf911d am: 8606fb7c0b am: 9bf6d11f84
Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/bstr/+/1620788 Change-Id: I84c0cba42fa71eba0c9ab48a77a7287bebe6bf52
-rw-r--r--.cargo_vcs_info.json5
-rw-r--r--.gitignore4
-rw-r--r--Cargo.toml68
-rw-r--r--Cargo.toml.orig41
-rw-r--r--README.md254
-rw-r--r--examples/graphemes-std.rs28
-rw-r--r--examples/graphemes.rs24
-rw-r--r--examples/lines-std.rs17
-rw-r--r--examples/lines.rs19
-rw-r--r--examples/uppercase-std.rs15
-rw-r--r--examples/uppercase.rs20
-rw-r--r--examples/words-std.rs20
-rw-r--r--examples/words.rs17
-rw-r--r--rustfmt.toml2
-rwxr-xr-xscripts/generate-unicode-data149
-rw-r--r--scripts/regex/grapheme.sh50
-rw-r--r--scripts/regex/sentence.sh176
-rw-r--r--scripts/regex/word.sh111
-rw-r--r--src/ascii.rs336
-rw-r--r--src/bstr.rs74
-rw-r--r--src/bstring.rs59
-rw-r--r--src/byteset/mod.rs115
-rw-r--r--src/byteset/scalar.rs295
-rw-r--r--src/cow.rs84
-rw-r--r--src/ext_slice.rs3695
-rw-r--r--src/ext_vec.rs1108
-rw-r--r--src/impls.rs969
-rw-r--r--src/io.rs514
-rw-r--r--src/lib.rs456
-rw-r--r--src/search/byte_frequencies.rs258
-rw-r--r--src/search/mod.rs8
-rw-r--r--src/search/prefilter.rs424
-rw-r--r--src/search/tests.rs225
-rw-r--r--src/search/twoway.rs871
-rw-r--r--src/tests.rs32
-rw-r--r--src/unicode/data/GraphemeBreakTest.txt630
-rw-r--r--src/unicode/data/LICENSE-UNICODE45
-rw-r--r--src/unicode/data/SentenceBreakTest.txt530
-rw-r--r--src/unicode/data/WordBreakTest.txt1851
-rw-r--r--src/unicode/fsm/grapheme_break_fwd.bigendian.dfabin0 -> 10589 bytes
-rw-r--r--src/unicode/fsm/grapheme_break_fwd.littleendian.dfabin0 -> 10589 bytes
-rw-r--r--src/unicode/fsm/grapheme_break_fwd.rs41
-rw-r--r--src/unicode/fsm/grapheme_break_rev.bigendian.dfabin0 -> 53905 bytes
-rw-r--r--src/unicode/fsm/grapheme_break_rev.littleendian.dfabin0 -> 53905 bytes
-rw-r--r--src/unicode/fsm/grapheme_break_rev.rs41
-rw-r--r--src/unicode/fsm/mod.rs8
-rw-r--r--src/unicode/fsm/regional_indicator_rev.bigendian.dfabin0 -> 366 bytes
-rw-r--r--src/unicode/fsm/regional_indicator_rev.littleendian.dfabin0 -> 366 bytes
-rw-r--r--src/unicode/fsm/regional_indicator_rev.rs41
-rw-r--r--src/unicode/fsm/sentence_break_fwd.bigendian.dfabin0 -> 149903 bytes
-rw-r--r--src/unicode/fsm/sentence_break_fwd.littleendian.dfabin0 -> 149903 bytes
-rw-r--r--src/unicode/fsm/sentence_break_fwd.rs41
-rw-r--r--src/unicode/fsm/simple_word_fwd.bigendian.dfabin0 -> 8975 bytes
-rw-r--r--src/unicode/fsm/simple_word_fwd.littleendian.dfabin0 -> 8975 bytes
-rw-r--r--src/unicode/fsm/simple_word_fwd.rs41
-rw-r--r--src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfabin0 -> 572 bytes
-rw-r--r--src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfabin0 -> 572 bytes
-rw-r--r--src/unicode/fsm/whitespace_anchored_fwd.rs41
-rw-r--r--src/unicode/fsm/whitespace_anchored_rev.bigendian.dfabin0 -> 598 bytes
-rw-r--r--src/unicode/fsm/whitespace_anchored_rev.littleendian.dfabin0 -> 598 bytes
-rw-r--r--src/unicode/fsm/whitespace_anchored_rev.rs41
-rw-r--r--src/unicode/fsm/word_break_fwd.bigendian.dfabin0 -> 229739 bytes
-rw-r--r--src/unicode/fsm/word_break_fwd.littleendian.dfabin0 -> 229739 bytes
-rw-r--r--src/unicode/fsm/word_break_fwd.rs41
-rw-r--r--src/unicode/grapheme.rs355
-rw-r--r--src/unicode/mod.rs12
-rw-r--r--src/unicode/sentence.rs220
-rw-r--r--src/unicode/whitespace.rs14
-rw-r--r--src/unicode/word.rs406
-rw-r--r--src/utf8.rs1370
70 files changed, 16312 insertions, 0 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644
index 0000000..2ecf829
--- /dev/null
+++ b/.cargo_vcs_info.json
@@ -0,0 +1,5 @@
+{
+ "git": {
+ "sha1": "7f0ad15d9628c0abec8cf6b7585539cae63e6d5b"
+ }
+}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..42cacb3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+.*.swp
+tags
+target
+/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..b4215b4
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,68 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+name = "bstr"
+version = "0.2.14"
+authors = ["Andrew Gallant <jamslam@gmail.com>"]
+exclude = ["/.github"]
+description = "A string type that is not required to be valid UTF-8."
+homepage = "https://github.com/BurntSushi/bstr"
+documentation = "https://docs.rs/bstr"
+readme = "README.md"
+keywords = ["string", "str", "byte", "bytes", "text"]
+categories = ["text-processing", "encoding"]
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/BurntSushi/bstr"
+[profile.release]
+debug = true
+
+[lib]
+bench = false
+[dependencies.lazy_static]
+version = "1.2"
+optional = true
+
+[dependencies.memchr]
+version = "2.1.2"
+default-features = false
+
+[dependencies.regex-automata]
+version = "0.1.5"
+optional = true
+default-features = false
+
+[dependencies.serde]
+version = "1.0.85"
+optional = true
+default-features = false
+[dev-dependencies.quickcheck]
+version = "0.8.1"
+default-features = false
+
+[dev-dependencies.ucd-parse]
+version = "0.1.3"
+
+[dev-dependencies.unicode-segmentation]
+version = "1.2.1"
+
+[features]
+default = ["std", "unicode"]
+serde1 = ["std", "serde1-nostd", "serde/std"]
+serde1-nostd = ["serde"]
+std = ["memchr/use_std"]
+unicode = ["lazy_static", "regex-automata"]
+[badges.appveyor]
+repository = "BurntSushi/bstr"
+
+[badges.travis-ci]
+repository = "BurntSushi/bstr"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..63388bc
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,41 @@
+[package]
+name = "bstr"
+version = "0.2.14" #:version
+authors = ["Andrew Gallant <jamslam@gmail.com>"]
+description = "A string type that is not required to be valid UTF-8."
+documentation = "https://docs.rs/bstr"
+homepage = "https://github.com/BurntSushi/bstr"
+repository = "https://github.com/BurntSushi/bstr"
+readme = "README.md"
+keywords = ["string", "str", "byte", "bytes", "text"]
+license = "MIT OR Apache-2.0"
+categories = ["text-processing", "encoding"]
+exclude = ["/.github"]
+
+[badges]
+travis-ci = { repository = "BurntSushi/bstr" }
+appveyor = { repository = "BurntSushi/bstr" }
+
+[lib]
+bench = false
+
+[features]
+default = ["std", "unicode"]
+std = ["memchr/use_std"]
+unicode = ["lazy_static", "regex-automata"]
+serde1 = ["std", "serde1-nostd", "serde/std"]
+serde1-nostd = ["serde"]
+
+[dependencies]
+memchr = { version = "2.1.2", default-features = false }
+lazy_static = { version = "1.2", optional = true }
+regex-automata = { version = "0.1.5", default-features = false, optional = true }
+serde = { version = "1.0.85", default-features = false, optional = true }
+
+[dev-dependencies]
+quickcheck = { version = "0.8.1", default-features = false }
+ucd-parse = "0.1.3"
+unicode-segmentation = "1.2.1"
+
+[profile.release]
+debug = true
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3e4ef8b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,254 @@
+bstr
+====
+This crate provides extension traits for `&[u8]` and `Vec<u8>` that enable
+their use as byte strings, where byte strings are _conventionally_ UTF-8. This
+differs from the standard library's `String` and `str` types in that they are
+not required to be valid UTF-8, but may be fully or partially valid UTF-8.
+
+[![Build status](https://github.com/BurntSushi/bstr/workflows/ci/badge.svg)](https://github.com/BurntSushi/bstr/actions)
+[![](https://meritbadge.herokuapp.com/bstr)](https://crates.io/crates/bstr)
+
+
+### Documentation
+
+https://docs.rs/bstr
+
+
+### When should I use byte strings?
+
+See this part of the documentation for more details:
+https://docs.rs/bstr/0.2.*/bstr/#when-should-i-use-byte-strings.
+
+The short story is that byte strings are useful when it is inconvenient or
+incorrect to require valid UTF-8.
+
+
+### Usage
+
+Add this to your `Cargo.toml`:
+
+```toml
+[dependencies]
+bstr = "0.2"
+```
+
+
+### Examples
+
+The following two examples exhibit both the API features of byte strings and
+the I/O convenience functions provided for reading line-by-line quickly.
+
+This first example simply shows how to efficiently iterate over lines in
+stdin, and print out lines containing a particular substring:
+
+```rust
+use std::error::Error;
+use std::io::{self, Write};
+
+use bstr::{ByteSlice, io::BufReadExt};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ if line.contains_str("Dimension") {
+ stdout.write_all(line)?;
+ }
+ Ok(true)
+ })?;
+ Ok(())
+}
+```
+
+This example shows how to count all of the words (Unicode-aware) in stdin,
+line-by-line:
+
+```rust
+use std::error::Error;
+use std::io;
+
+use bstr::{ByteSlice, io::BufReadExt};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut words = 0;
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ words += line.words().count();
+ Ok(true)
+ })?;
+ println!("{}", words);
+ Ok(())
+}
+```
+
+This example shows how to convert a stream on stdin to uppercase without
+performing UTF-8 validation _and_ amortizing allocation. On standard ASCII
+text, this is quite a bit faster than what you can (easily) do with standard
+library APIs. (N.B. Any invalid UTF-8 bytes are passed through unchanged.)
+
+```rust
+use std::error::Error;
+use std::io::{self, Write};
+
+use bstr::{ByteSlice, io::BufReadExt};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ let mut upper = vec![];
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ upper.clear();
+ line.to_uppercase_into(&mut upper);
+ stdout.write_all(&upper)?;
+ Ok(true)
+ })?;
+ Ok(())
+}
+```
+
+This example shows how to extract the first 10 visual characters (as grapheme
+clusters) from each line, where invalid UTF-8 sequences are generally treated
+as a single character and are passed through correctly:
+
+```rust
+use std::error::Error;
+use std::io::{self, Write};
+
+use bstr::{ByteSlice, io::BufReadExt};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ let end = line
+ .grapheme_indices()
+ .map(|(_, end, _)| end)
+ .take(10)
+ .last()
+ .unwrap_or(line.len());
+ stdout.write_all(line[..end].trim_end())?;
+ stdout.write_all(b"\n")?;
+ Ok(true)
+ })?;
+ Ok(())
+}
+```
+
+
+### Cargo features
+
+This crates comes with a few features that control standard library, serde
+and Unicode support.
+
+* `std` - **Enabled** by default. This provides APIs that require the standard
+ library, such as `Vec<u8>`.
+* `unicode` - **Enabled** by default. This provides APIs that require sizable
+ Unicode data compiled into the binary. This includes, but is not limited to,
+ grapheme/word/sentence segmenters. When this is disabled, basic support such
+ as UTF-8 decoding is still included.
+* `serde1` - **Disabled** by default. Enables implementations of serde traits
+ for the `BStr` and `BString` types.
+* `serde1-nostd` - **Disabled** by default. Enables implementations of serde
+ traits for the `BStr` type only, intended for use without the standard
+ library. Generally, you either want `serde1` or `serde1-nostd`, not both.
+
+
+### Minimum Rust version policy
+
+This crate's minimum supported `rustc` version (MSRV) is `1.28.0`.
+
+In general, this crate will be conservative with respect to the minimum
+supported version of Rust. MSRV may be bumped in minor version releases.
+
+
+### Future work
+
+Since this is meant to be a core crate, getting a `1.0` release is a priority.
+My hope is to move to `1.0` within the next year and commit to its API so that
+`bstr` can be used as a public dependency.
+
+A large part of the API surface area was taken from the standard library, so
+from an API design perspective, a good portion of this crate should be mature.
+The main differences from the standard library are in how the various substring
+search routines work. The standard library provides generic infrastructure for
+supporting different types of searches with a single method, where as this
+library prefers to define new methods for each type of search and drop the
+generic infrastructure.
+
+Some _probable_ future considerations for APIs include, but are not limited to:
+
+* A convenience layer on top of the `aho-corasick` crate.
+* Unicode normalization.
+* More sophisticated support for dealing with Unicode case, perhaps by
+ combining the use cases supported by [`caseless`](https://docs.rs/caseless)
+ and [`unicase`](https://docs.rs/unicase).
+* Add facilities for dealing with OS strings and file paths, probably via
+ simple conversion routines.
+
+Here are some examples that are _probably_ out of scope for this crate:
+
+* Regular expressions.
+* Unicode collation.
+
+The exact scope isn't quite clear, but I expect we can iterate on it.
+
+In general, as stated below, this crate is an experiment in bringing lots of
+related APIs together into a single crate while simultaneously attempting to
+keep the total number of dependencies low. Indeed, every dependency of `bstr`,
+except for `memchr`, is optional.
+
+
+### High level motivation
+
+Strictly speaking, the `bstr` crate provides very little that can't already be
+achieved with the standard library `Vec<u8>`/`&[u8]` APIs and the ecosystem of
+library crates. For example:
+
+* The standard library's
+ [`Utf8Error`](https://doc.rust-lang.org/std/str/struct.Utf8Error.html)
+ can be used for incremental lossy decoding of `&[u8]`.
+* The
+ [`unicode-segmentation`](https://unicode-rs.github.io/unicode-segmentation/unicode_segmentation/index.html)
+ crate can be used for iterating over graphemes (or words), but is only
+ implemented for `&str` types. One could use `Utf8Error` above to implement
+ grapheme iteration with the same semantics as what `bstr` provides (automatic
+ Unicode replacement codepoint substitution).
+* The [`twoway`](https://docs.rs/twoway) crate can be used for
+ fast substring searching on `&[u8]`.
+
+So why create `bstr`? Part of the point of the `bstr` crate is to provide a
+uniform API of coupled components instead of relying on users to piece together
+loosely coupled components from the crate ecosystem. For example, if you wanted
+to perform a search and replace in a `Vec<u8>`, then writing the code to do
+that with the `twoway` crate is not that difficult, but it's still additional
+glue code you have to write. This work adds up depending on what you're doing.
+Consider, for example, trimming and splitting, along with their different
+variants.
+
+In other words, `bstr` is partially a way of pushing back against the
+micro-crate ecosystem that appears to be evolving. It's not clear to me whether
+this experiment will be successful or not, but it is definitely a goal of
+`bstr` to keep its dependency list lightweight. For example, `serde` is an
+optional dependency because there is no feasible alternative, but `twoway` is
+not, where we instead prefer to implement our own substring search. In service
+of this philosophy, currently, the only required dependency of `bstr` is
+`memchr`.
+
+
+### License
+
+This project is licensed under either of
+
+ * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
+ https://www.apache.org/licenses/LICENSE-2.0)
+ * MIT license ([LICENSE-MIT](LICENSE-MIT) or
+ https://opensource.org/licenses/MIT)
+
+at your option.
+
+The data in `src/unicode/data/` is licensed under the Unicode License Agreement
+([LICENSE-UNICODE](https://www.unicode.org/copyright.html#License)), although
+this data is only used in tests.
diff --git a/examples/graphemes-std.rs b/examples/graphemes-std.rs
new file mode 100644
index 0000000..3522736
--- /dev/null
+++ b/examples/graphemes-std.rs
@@ -0,0 +1,28 @@
+extern crate unicode_segmentation;
+
+use std::error::Error;
+use std::io::{self, BufRead, Write};
+
+use unicode_segmentation::UnicodeSegmentation;
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdin = stdin.lock();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ let mut line = String::new();
+ while stdin.read_line(&mut line)? > 0 {
+ let end = line
+ .grapheme_indices(true)
+ .map(|(start, g)| start + g.len())
+ .take(10)
+ .last()
+ .unwrap_or(line.len());
+ #[allow(deprecated)] // for Rust 1.28.0
+ stdout.write_all(line[..end].trim_right().as_bytes())?;
+ stdout.write_all(b"\n")?;
+
+ line.clear();
+ }
+ Ok(())
+}
diff --git a/examples/graphemes.rs b/examples/graphemes.rs
new file mode 100644
index 0000000..2372490
--- /dev/null
+++ b/examples/graphemes.rs
@@ -0,0 +1,24 @@
+extern crate bstr;
+
+use std::error::Error;
+use std::io::{self, Write};
+
+use bstr::{io::BufReadExt, ByteSlice};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ let end = line
+ .grapheme_indices()
+ .map(|(_, end, _)| end)
+ .take(10)
+ .last()
+ .unwrap_or(line.len());
+ stdout.write_all(line[..end].trim_end())?;
+ stdout.write_all(b"\n")?;
+ Ok(true)
+ })?;
+ Ok(())
+}
diff --git a/examples/lines-std.rs b/examples/lines-std.rs
new file mode 100644
index 0000000..69fc6a5
--- /dev/null
+++ b/examples/lines-std.rs
@@ -0,0 +1,17 @@
+use std::error::Error;
+use std::io::{self, BufRead, Write};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdin = stdin.lock();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ let mut line = String::new();
+ while stdin.read_line(&mut line)? > 0 {
+ if line.contains("Dimension") {
+ stdout.write_all(line.as_bytes())?;
+ }
+ line.clear();
+ }
+ Ok(())
+}
diff --git a/examples/lines.rs b/examples/lines.rs
new file mode 100644
index 0000000..4b1045f
--- /dev/null
+++ b/examples/lines.rs
@@ -0,0 +1,19 @@
+extern crate bstr;
+
+use std::error::Error;
+use std::io::{self, Write};
+
+use bstr::{io::BufReadExt, ByteSlice};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ if line.contains_str("Dimension") {
+ stdout.write_all(line)?;
+ }
+ Ok(true)
+ })?;
+ Ok(())
+}
diff --git a/examples/uppercase-std.rs b/examples/uppercase-std.rs
new file mode 100644
index 0000000..672bd71
--- /dev/null
+++ b/examples/uppercase-std.rs
@@ -0,0 +1,15 @@
+use std::error::Error;
+use std::io::{self, BufRead, Write};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdin = stdin.lock();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ let mut line = String::new();
+ while stdin.read_line(&mut line)? > 0 {
+ stdout.write_all(line.to_uppercase().as_bytes())?;
+ line.clear();
+ }
+ Ok(())
+}
diff --git a/examples/uppercase.rs b/examples/uppercase.rs
new file mode 100644
index 0000000..f9771e0
--- /dev/null
+++ b/examples/uppercase.rs
@@ -0,0 +1,20 @@
+extern crate bstr;
+
+use std::error::Error;
+use std::io::{self, Write};
+
+use bstr::{io::BufReadExt, ByteSlice};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdout = io::BufWriter::new(io::stdout());
+
+ let mut upper = vec![];
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ upper.clear();
+ line.to_uppercase_into(&mut upper);
+ stdout.write_all(&upper)?;
+ Ok(true)
+ })?;
+ Ok(())
+}
diff --git a/examples/words-std.rs b/examples/words-std.rs
new file mode 100644
index 0000000..7eae116
--- /dev/null
+++ b/examples/words-std.rs
@@ -0,0 +1,20 @@
+extern crate unicode_segmentation;
+
+use std::error::Error;
+use std::io::{self, BufRead};
+
+use unicode_segmentation::UnicodeSegmentation;
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut stdin = stdin.lock();
+
+ let mut words = 0;
+ let mut line = String::new();
+ while stdin.read_line(&mut line)? > 0 {
+ words += line.unicode_words().count();
+ line.clear();
+ }
+ println!("{}", words);
+ Ok(())
+}
diff --git a/examples/words.rs b/examples/words.rs
new file mode 100644
index 0000000..eb20c0d
--- /dev/null
+++ b/examples/words.rs
@@ -0,0 +1,17 @@
+extern crate bstr;
+
+use std::error::Error;
+use std::io;
+
+use bstr::{io::BufReadExt, ByteSlice};
+
+fn main() -> Result<(), Box<dyn Error>> {
+ let stdin = io::stdin();
+ let mut words = 0;
+ stdin.lock().for_byte_line_with_terminator(|line| {
+ words += line.words().count();
+ Ok(true)
+ })?;
+ println!("{}", words);
+ Ok(())
+}
diff --git a/rustfmt.toml b/rustfmt.toml
new file mode 100644
index 0000000..aa37a21
--- /dev/null
+++ b/rustfmt.toml
@@ -0,0 +1,2 @@
+max_width = 79
+use_small_heuristics = "max"
diff --git a/scripts/generate-unicode-data b/scripts/generate-unicode-data
new file mode 100755
index 0000000..6b59fae
--- /dev/null
+++ b/scripts/generate-unicode-data
@@ -0,0 +1,149 @@
+#!/bin/sh
+
+set -e
+D="$(dirname "$0")"
+
+# Convenience function for checking that a command exists.
+requires() {
+ cmd="$1"
+ if ! command -v "$cmd" > /dev/null 2>&1; then
+ echo "DEPENDENCY MISSING: $cmd must be installed" >&2
+ exit 1
+ fi
+}
+
+# Test if an array ($2) contains a particular element ($1).
+array_exists() {
+ needle="$1"
+ shift
+
+ for el in "$@"; do
+ if [ "$el" = "$needle" ]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+graphemes() {
+ regex="$(sh "$D/regex/grapheme.sh")"
+
+ echo "generating forward grapheme DFA"
+ ucd-generate dfa \
+ --name GRAPHEME_BREAK_FWD \
+ --sparse --minimize --anchored --state-size 2 \
+ src/unicode/fsm/ \
+ "$regex"
+
+ echo "generating reverse grapheme DFA"
+ ucd-generate dfa \
+ --name GRAPHEME_BREAK_REV \
+ --reverse --longest \
+ --sparse --minimize --anchored --state-size 2 \
+ src/unicode/fsm/ \
+ "$regex"
+}
+
+words() {
+ regex="$(sh "$D/regex/word.sh")"
+
+ echo "generating forward word DFA (this can take a while)"
+ ucd-generate dfa \
+ --name WORD_BREAK_FWD \
+ --sparse --minimize --anchored --state-size 4 \
+ src/unicode/fsm/ \
+ "$regex"
+}
+
+sentences() {
+ regex="$(sh "$D/regex/sentence.sh")"
+
+ echo "generating forward sentence DFA (this can take a while)"
+ ucd-generate dfa \
+ --name SENTENCE_BREAK_FWD \
+ --minimize \
+ --sparse --anchored --state-size 4 \
+ src/unicode/fsm/ \
+ "$regex"
+}
+
+regional_indicator() {
+ # For finding all occurrences of region indicators. This is used to handle
+ # regional indicators as a special case for the reverse grapheme iterator
+ # and the reverse word iterator.
+ echo "generating regional indicator DFA"
+ ucd-generate dfa \
+ --name REGIONAL_INDICATOR_REV \
+ --reverse \
+ --classes --minimize --anchored --premultiply --state-size 1 \
+ src/unicode/fsm/ \
+ "\p{gcb=Regional_Indicator}"
+}
+
+simple_word() {
+ echo "generating forward simple word DFA"
+ ucd-generate dfa \
+ --name SIMPLE_WORD_FWD \
+ --sparse --minimize --state-size 2 \
+ src/unicode/fsm/ \
+ "\w"
+}
+
+whitespace() {
+ echo "generating forward whitespace DFA"
+ ucd-generate dfa \
+ --name WHITESPACE_ANCHORED_FWD \
+ --anchored --classes --premultiply --minimize --state-size 1 \
+ src/unicode/fsm/ \
+ "\s+"
+
+ echo "generating reverse whitespace DFA"
+ ucd-generate dfa \
+ --name WHITESPACE_ANCHORED_REV \
+ --reverse \
+ --anchored --classes --premultiply --minimize --state-size 1 \
+ src/unicode/fsm/ \
+ "\s+"
+}
+
+main() {
+ if array_exists "-h" "$@" || array_exists "--help" "$@"; then
+ echo "Usage: $(basename "$0") [--list-commands] [<command>] ..." >&2
+ exit
+ fi
+
+ commands="
+ graphemes
+ sentences
+ words
+ regional-indicator
+ simple-word
+ whitespace
+ "
+ if array_exists "--list-commands" "$@"; then
+ for cmd in $commands; do
+ echo "$cmd"
+ done
+ exit
+ fi
+
+ # ucd-generate is used to compile regexes into DFAs.
+ requires ucd-generate
+
+ mkdir -p src/unicode/fsm/
+
+ cmds=$*
+ if [ $# -eq 0 ] || array_exists "all" "$@"; then
+ cmds=$commands
+ fi
+ for cmd in $cmds; do
+ if array_exists "$cmd" $commands; then
+ fun="$(echo "$cmd" | sed 's/-/_/g')"
+ eval "$fun"
+ else
+ echo "unrecognized command: $cmd" >&2
+ fi
+ done
+}
+
+main "$@"
diff --git a/scripts/regex/grapheme.sh b/scripts/regex/grapheme.sh
new file mode 100644
index 0000000..0b2b54d
--- /dev/null
+++ b/scripts/regex/grapheme.sh
@@ -0,0 +1,50 @@
+#!/bin/sh
+
+# vim: indentexpr= nosmartindent autoindent
+# vim: tabstop=2 shiftwidth=2 softtabstop=2
+
+# This regex was manually written, derived from the rules in UAX #29.
+# Particularly, from Table 1c, which lays out a regex for grapheme clusters.
+
+CR="\p{gcb=CR}"
+LF="\p{gcb=LF}"
+Control="\p{gcb=Control}"
+Prepend="\p{gcb=Prepend}"
+L="\p{gcb=L}"
+V="\p{gcb=V}"
+LV="\p{gcb=LV}"
+LVT="\p{gcb=LVT}"
+T="\p{gcb=T}"
+RI="\p{gcb=RI}"
+Extend="\p{gcb=Extend}"
+ZWJ="\p{gcb=ZWJ}"
+SpacingMark="\p{gcb=SpacingMark}"
+
+Any="\p{any}"
+ExtendPict="\p{Extended_Pictographic}"
+
+echo "(?x)
+$CR $LF
+|
+$Control
+|
+$Prepend*
+(
+ (
+ ($L* ($V+ | $LV $V* | $LVT) $T*)
+ |
+ $L+
+ |
+ $T+
+ )
+ |
+ $RI $RI
+ |
+ $ExtendPict ($Extend* $ZWJ $ExtendPict)*
+ |
+ [^$Control $CR $LF]
+)
+[$Extend $ZWJ $SpacingMark]*
+|
+$Any
+"
diff --git a/scripts/regex/sentence.sh b/scripts/regex/sentence.sh
new file mode 100644
index 0000000..689d184
--- /dev/null
+++ b/scripts/regex/sentence.sh
@@ -0,0 +1,176 @@
+#!/bin/sh
+
+# vim: indentexpr= nosmartindent autoindent
+# vim: tabstop=2 shiftwidth=2 softtabstop=2
+
+# This is a regex that I reverse engineered from the sentence boundary chain
+# rules in UAX #29. Unlike the grapheme regex, which is essentially provided
+# for us in UAX #29, no such sentence regex exists.
+#
+# I looked into how ICU achieves this, since UAX #29 hints that producing
+# finite state machines for grapheme/sentence/word/line breaking is possible,
+# but only easy to do for graphemes. ICU does this by implementing their own
+# DSL for describing the break algorithms in terms of the chaining rules
+# directly. You can see an example for sentences in
+# icu4c/source/data/brkitr/rules/sent.txt. ICU then builds a finite state
+# machine from those rules in a mostly standard way, but implements the
+# "chaining" aspect of the rules by connecting overlapping end and start
+# states. For example, given SB7:
+#
+# (Upper | Lower) ATerm x Upper
+#
+# Then the naive way to convert this into a regex would be something like
+#
+# [\p{sb=Upper}\p{sb=Lower}]\p{sb=ATerm}\p{sb=Upper}
+#
+# Unfortunately, this is incorrect. Why? Well, consider an example like so:
+#
+# U.S.A.
+#
+# A correct implementation of the sentence breaking algorithm should not insert
+# any breaks here, exactly in accordance with repeatedly applying rule SB7 as
+# given above. Our regex fails to do this because it will first match `U.S`
+# without breaking them---which is correct---but will then start looking for
+# its next rule beginning with a full stop (in ATerm) and followed by an
+# uppercase letter (A). This will wind up triggering rule SB11 (without
+# matching `A`), which inserts a break.
+#
+# The reason why this happens is because our initial application of rule SB7
+# "consumes" the next uppercase letter (S), which we want to reuse as a prefix
+# in the next rule application. A natural way to express this would be with
+# look-around, although it's not clear that works in every case since you
+# ultimately might want to consume that ending uppercase letter. In any case,
+# we can't use look-around in our truly regular regexes, so we must fix this.
+# The approach we take is to explicitly repeat rules when a suffix of a rule
+# is a prefix of another rule. In the case of SB7, the end of the rule, an
+# uppercase letter, also happens to match the beginning of the rule. This can
+# in turn be repeated indefinitely. Thus, our actual translation to a regex is:
+#
+# [\p{sb=Upper}\p{sb=Lower}]\p{sb=ATerm}\p{sb=Upper}(\p{sb=ATerm}\p{sb=Upper}*
+#
+# It turns out that this is exactly what ICU does, but in their case, they do
+# it automatically. In our case, we connect the chaining rules manually. It's
+# tedious. With that said, we do no implement Unicode line breaking with this
+# approach, which is a far scarier beast. In that case, it would probably be
+# worth writing the code to do what ICU does.
+#
+# In the case of sentence breaks, there aren't *too* many overlaps of this
+# nature. We list them out exhaustively to make this clear, because it's
+# essentially impossible to easily observe this in the regex. (It took me a
+# full day to figure all of this out.) Rules marked with N/A mean that they
+# specify a break, and this strategy only really applies to stringing together
+# non-breaks.
+#
+# SB1 - N/A
+# SB2 - N/A
+# SB3 - None
+# SB4 - N/A
+# SB5 - None
+# SB6 - None
+# SB7 - End overlaps with beginning of SB7
+# SB8 - End overlaps with beginning of SB7
+# SB8a - End overlaps with beginning of SB6, SB8, SB8a, SB9, SB10, SB11
+# SB9 - None
+# SB10 - None
+# SB11 - None
+# SB998 - N/A
+#
+# SB8a is in particular quite tricky to get right without look-ahead, since it
+# allows ping-ponging between match rules SB8a and SB9-11, where SB9-11
+# otherwise indicate that a break has been found. In the regex below, we tackle
+# this by only permitting part of SB8a to match inside our core non-breaking
+# repetition. In particular, we only allow the parts of SB8a to match that
+# permit the non-breaking components to continue. If a part of SB8a matches
+# that guarantees a pop out to SB9-11, (like `STerm STerm`), then we let it
+# happen. This still isn't correct because an SContinue might be seen which
+# would allow moving back into SB998 and thus the non-breaking repetition, so
+# we handle that case as well.
+#
+# Finally, the last complication here is the sprinkling of $Ex* everywhere.
+# This essentially corresponds to the implementation of SB5 by following
+# UAX #29's recommendation in S6.2. Essentially, we use it avoid ever breaking
+# in the middle of a grapheme cluster.
+
+CR="\p{sb=CR}"
+LF="\p{sb=LF}"
+Sep="\p{sb=Sep}"
+Close="\p{sb=Close}"
+Sp="\p{sb=Sp}"
+STerm="\p{sb=STerm}"
+ATerm="\p{sb=ATerm}"
+SContinue="\p{sb=SContinue}"
+Numeric="\p{sb=Numeric}"
+Upper="\p{sb=Upper}"
+Lower="\p{sb=Lower}"
+OLetter="\p{sb=OLetter}"
+
+Ex="[\p{sb=Extend}\p{sb=Format}]"
+ParaSep="[$Sep $CR $LF]"
+SATerm="[$STerm $ATerm]"
+
+LetterSepTerm="[$OLetter $Upper $Lower $ParaSep $SATerm]"
+
+echo "(?x)
+(
+ # SB6
+ $ATerm $Ex*
+ $Numeric
+ |
+ # SB7
+ [$Upper $Lower] $Ex* $ATerm $Ex*
+ $Upper $Ex*
+ # overlap with SB7
+ ($ATerm $Ex* $Upper $Ex*)*
+ |
+ # SB8
+ $ATerm $Ex* $Close* $Ex* $Sp* $Ex*
+ ([^$LetterSepTerm] $Ex*)* $Lower $Ex*
+ # overlap with SB7
+ ($ATerm $Ex* $Upper $Ex*)*
+ |
+ # SB8a
+ $SATerm $Ex* $Close* $Ex* $Sp* $Ex*
+ (
+ $SContinue
+ |
+ $ATerm $Ex*
+ # Permit repetition of SB8a
+ (($Close $Ex*)* ($Sp $Ex*)* $SATerm)*
+ # In order to continue non-breaking matching, we now must observe
+ # a match with a rule that keeps us in SB6-8a. Otherwise, we've entered
+ # one of SB9-11 and know that a break must follow.
+ (
+ # overlap with SB6
+ $Numeric
+ |
+ # overlap with SB8
+ ($Close $Ex*)* ($Sp $Ex*)*
+ ([^$LetterSepTerm] $Ex*)* $Lower $Ex*
+ # overlap with SB7
+ ($ATerm $Ex* $Upper $Ex*)*
+ |
+ # overlap with SB8a
+ ($Close $Ex*)* ($Sp $Ex*)* $SContinue
+ )
+ |
+ $STerm $Ex*
+ # Permit repetition of SB8a
+ (($Close $Ex*)* ($Sp $Ex*)* $SATerm)*
+ # As with ATerm above, in order to continue non-breaking matching, we
+ # must now observe a match with a rule that keeps us out of SB9-11.
+ # For STerm, the only such possibility is to see an SContinue. Anything
+ # else will result in a break.
+ ($Close $Ex*)* ($Sp $Ex*)* $SContinue
+ )
+ |
+ # SB998
+ # The logic behind this catch-all is that if we get to this point and
+ # see a Sep, CR, LF, STerm or ATerm, then it has to fall into one of
+ # SB9, SB10 or SB11. In the cases of SB9-11, we always find a break since
+ # SB11 acts as a catch-all to induce a break following a SATerm that isn't
+ # handled by rules SB6-SB8a.
+ [^$ParaSep $SATerm]
+)*
+# The following collapses rules SB3, SB4, part of SB8a, SB9, SB10 and SB11.
+($SATerm $Ex* ($Close $Ex*)* ($Sp $Ex*)*)* ($CR $LF | $ParaSep)?
+"
diff --git a/scripts/regex/word.sh b/scripts/regex/word.sh
new file mode 100644
index 0000000..78c7a05
--- /dev/null
+++ b/scripts/regex/word.sh
@@ -0,0 +1,111 @@
+#!/bin/sh
+
+# vim: indentexpr= nosmartindent autoindent
+# vim: tabstop=2 shiftwidth=2 softtabstop=2
+
+# See the comments in regex/sentence.sh for the general approach to how this
+# regex was written.
+#
+# Writing the regex for this was *hard*. It took me two days of hacking to get
+# this far, and that was after I had finished the sentence regex, so my brain
+# was fully cached on this. Unlike the sentence regex, the rules in the regex
+# below don't correspond as nicely to the rules in UAX #29. In particular, the
+# UAX #29 rules have a ton of overlap with each other, which requires crazy
+# stuff in the regex. I'm not even sure the regex below is 100% correct or even
+# minimal, however, I did compare this with the ICU word segmenter on a few
+# different corpora, and it produces identical results. (In addition to of
+# course passing the UCD tests.)
+#
+# In general, I consider this approach to be a failure. Firstly, this is
+# clearly a write-only regex. Secondly, building the minimized DFA for this is
+# incredibly slow. Thirdly, the DFA is itself very large (~240KB). Fourthly,
+# reversing this regex (for reverse word iteration) results in a >19MB DFA.
+# Yes. That's MB. Wat. And it took 5 minutes to build.
+#
+# I think we might consider changing our approach to this problem. The normal
+# path I've seen, I think, is to decode codepoints one at a time, and then
+# thread them through a state machine in the code itself. We could take this
+# approach, or possibly combine it with a DFA that tells us which Word_Break
+# value a codepoint has. I'd prefer the latter approach, but it requires adding
+# RegexSet support to regex-automata. Something that should definitely be done,
+# but is a fair amount of work.
+#
+# Gah.
+
+CR="\p{wb=CR}"
+LF="\p{wb=LF}"
+Newline="\p{wb=Newline}"
+ZWJ="\p{wb=ZWJ}"
+RI="\p{wb=Regional_Indicator}"
+Katakana="\p{wb=Katakana}"
+HebrewLet="\p{wb=HebrewLetter}"
+ALetter="\p{wb=ALetter}"
+SingleQuote="\p{wb=SingleQuote}"
+DoubleQuote="\p{wb=DoubleQuote}"
+MidNumLet="\p{wb=MidNumLet}"
+MidLetter="\p{wb=MidLetter}"
+MidNum="\p{wb=MidNum}"
+Numeric="\p{wb=Numeric}"
+ExtendNumLet="\p{wb=ExtendNumLet}"
+WSegSpace="\p{wb=WSegSpace}"
+
+Any="\p{any}"
+Ex="[\p{wb=Extend} \p{wb=Format} $ZWJ]"
+ExtendPict="\p{Extended_Pictographic}"
+AHLetter="[$ALetter $HebrewLet]"
+MidNumLetQ="[$MidNumLet $SingleQuote]"
+
+AHLetterRepeat="$AHLetter $Ex* ([$MidLetter $MidNumLetQ] $Ex* $AHLetter $Ex*)*"
+NumericRepeat="$Numeric $Ex* ([$MidNum $MidNumLetQ] $Ex* $Numeric $Ex*)*"
+
+echo "(?x)
+$CR $LF
+|
+[$Newline $CR $LF]
+|
+$WSegSpace $WSegSpace+
+|
+(
+ ([^$Newline $CR $LF]? $Ex* $ZWJ $ExtendPict $Ex*)+
+ |
+ ($ExtendNumLet $Ex*)* $AHLetter $Ex*
+ (
+ (
+ ($NumericRepeat | $ExtendNumLet $Ex*)*
+ |
+ [$MidLetter $MidNumLetQ] $Ex*
+ )
+ $AHLetter $Ex*
+ )+
+ ($NumericRepeat | $ExtendNumLet $Ex*)*
+ |
+ ($ExtendNumLet $Ex*)* $AHLetter $Ex* ($NumericRepeat | $ExtendNumLet $Ex*)+
+ |
+ ($ExtendNumLet $Ex*)* $Numeric $Ex*
+ (
+ (
+ ($AHLetterRepeat | $ExtendNumLet $Ex*)*
+ |
+ [$MidNum $MidNumLetQ] $Ex*
+ )
+ $Numeric $Ex*
+ )+
+ ($AHLetterRepeat | $ExtendNumLet $Ex*)*
+ |
+ ($ExtendNumLet $Ex*)* $Numeric $Ex* ($AHLetterRepeat | $ExtendNumLet $Ex*)+
+ |
+ $Katakana $Ex*
+ (($Katakana | $ExtendNumLet) $Ex*)+
+ |
+ $ExtendNumLet $Ex*
+ (($ExtendNumLet | $AHLetter | $Numeric | $Katakana) $Ex*)+
+)+
+|
+$HebrewLet $Ex* $SingleQuote $Ex*
+|
+($HebrewLet $Ex* $DoubleQuote $Ex*)+ $HebrewLet $Ex*
+|
+$RI $Ex* $RI $Ex*
+|
+$Any $Ex*
+"
diff --git a/src/ascii.rs b/src/ascii.rs
new file mode 100644
index 0000000..bb2b679
--- /dev/null
+++ b/src/ascii.rs
@@ -0,0 +1,336 @@
+use core::mem;
+
+// The following ~400 lines of code exists for exactly one purpose, which is
+// to optimize this code:
+//
+// byte_slice.iter().position(|&b| b > 0x7F).unwrap_or(byte_slice.len())
+//
+// Yes... Overengineered is a word that comes to mind, but this is effectively
+// a very similar problem to memchr, and virtually nobody has been able to
+// resist optimizing the crap out of that (except for perhaps the BSD and MUSL
+// folks). In particular, this routine makes a very common case (ASCII) very
+// fast, which seems worth it. We do stop short of adding AVX variants of the
+// code below in order to retain our sanity and also to avoid needing to deal
+// with runtime target feature detection. RESIST!
+//
+// In order to understand the SIMD version below, it would be good to read this
+// comment describing how my memchr routine works:
+// https://github.com/BurntSushi/rust-memchr/blob/b0a29f267f4a7fad8ffcc8fe8377a06498202883/src/x86/sse2.rs#L19-L106
+//
+// The primary difference with memchr is that for ASCII, we can do a bit less
+// work. In particular, we don't need to detect the presence of a specific
+// byte, but rather, whether any byte has its most significant bit set. That
+// means we can effectively skip the _mm_cmpeq_epi8 step and jump straight to
+// _mm_movemask_epi8.
+
+#[cfg(any(test, not(target_arch = "x86_64")))]
+const USIZE_BYTES: usize = mem::size_of::<usize>();
+#[cfg(any(test, not(target_arch = "x86_64")))]
+const FALLBACK_LOOP_SIZE: usize = 2 * USIZE_BYTES;
+
+// This is a mask where the most significant bit of each byte in the usize
+// is set. We test this bit to determine whether a character is ASCII or not.
+// Namely, a single byte is regarded as an ASCII codepoint if and only if it's
+// most significant bit is not set.
+#[cfg(any(test, not(target_arch = "x86_64")))]
+const ASCII_MASK_U64: u64 = 0x8080808080808080;
+#[cfg(any(test, not(target_arch = "x86_64")))]
+const ASCII_MASK: usize = ASCII_MASK_U64 as usize;
+
+/// Returns the index of the first non ASCII byte in the given slice.
+///
+/// If slice only contains ASCII bytes, then the length of the slice is
+/// returned.
+pub fn first_non_ascii_byte(slice: &[u8]) -> usize {
+ #[cfg(not(target_arch = "x86_64"))]
+ {
+ first_non_ascii_byte_fallback(slice)
+ }
+
+ #[cfg(target_arch = "x86_64")]
+ {
+ first_non_ascii_byte_sse2(slice)
+ }
+}
+
+#[cfg(any(test, not(target_arch = "x86_64")))]
+fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize {
+ let align = USIZE_BYTES - 1;
+ let start_ptr = slice.as_ptr();
+ let end_ptr = slice[slice.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ unsafe {
+ if slice.len() < USIZE_BYTES {
+ return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
+ }
+
+ let chunk = read_unaligned_usize(ptr);
+ let mask = chunk & ASCII_MASK;
+ if mask != 0 {
+ return first_non_ascii_byte_mask(mask);
+ }
+
+ ptr = ptr_add(ptr, USIZE_BYTES - (start_ptr as usize & align));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(ptr_sub(end_ptr, USIZE_BYTES) >= start_ptr);
+ if slice.len() >= FALLBACK_LOOP_SIZE {
+ while ptr <= ptr_sub(end_ptr, FALLBACK_LOOP_SIZE) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let a = *(ptr as *const usize);
+ let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize);
+ if (a | b) & ASCII_MASK != 0 {
+ // What a kludge. We wrap the position finding code into
+ // a non-inlineable function, which makes the codegen in
+ // the tight loop above a bit better by avoiding a
+ // couple extra movs. We pay for it by two additional
+ // stores, but only in the case of finding a non-ASCII
+ // byte.
+ #[inline(never)]
+ unsafe fn findpos(
+ start_ptr: *const u8,
+ ptr: *const u8,
+ ) -> usize {
+ let a = *(ptr as *const usize);
+ let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize);
+
+ let mut at = sub(ptr, start_ptr);
+ let maska = a & ASCII_MASK;
+ if maska != 0 {
+ return at + first_non_ascii_byte_mask(maska);
+ }
+
+ at += USIZE_BYTES;
+ let maskb = b & ASCII_MASK;
+ debug_assert!(maskb != 0);
+ return at + first_non_ascii_byte_mask(maskb);
+ }
+ return findpos(start_ptr, ptr);
+ }
+ ptr = ptr_add(ptr, FALLBACK_LOOP_SIZE);
+ }
+ }
+ first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
+ }
+}
+
+#[cfg(target_arch = "x86_64")]
+fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize {
+ use core::arch::x86_64::*;
+
+ const VECTOR_SIZE: usize = mem::size_of::<__m128i>();
+ const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
+ const VECTOR_LOOP_SIZE: usize = 4 * VECTOR_SIZE;
+
+ let start_ptr = slice.as_ptr();
+ let end_ptr = slice[slice.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ unsafe {
+ if slice.len() < VECTOR_SIZE {
+ return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
+ }
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let mask = _mm_movemask_epi8(chunk);
+ if mask != 0 {
+ return mask.trailing_zeros() as usize;
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ if slice.len() >= VECTOR_LOOP_SIZE {
+ while ptr <= ptr_sub(end_ptr, VECTOR_LOOP_SIZE) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let c =
+ _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
+ let d =
+ _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
+
+ let or1 = _mm_or_si128(a, b);
+ let or2 = _mm_or_si128(c, d);
+ let or3 = _mm_or_si128(or1, or2);
+ if _mm_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask = _mm_movemask_epi8(a);
+ if mask != 0 {
+ return at + mask.trailing_zeros() as usize;
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(b);
+ if mask != 0 {
+ return at + mask.trailing_zeros() as usize;
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(c);
+ if mask != 0 {
+ return at + mask.trailing_zeros() as usize;
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(d);
+ debug_assert!(mask != 0);
+ return at + mask.trailing_zeros() as usize;
+ }
+ ptr = ptr_add(ptr, VECTOR_LOOP_SIZE);
+ }
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let mask = _mm_movemask_epi8(chunk);
+ if mask != 0 {
+ return sub(ptr, start_ptr) + mask.trailing_zeros() as usize;
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
+ }
+}
+
+#[inline(always)]
+unsafe fn first_non_ascii_byte_slow(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ mut ptr: *const u8,
+) -> usize {
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr);
+
+ while ptr < end_ptr {
+ if *ptr > 0x7F {
+ return sub(ptr, start_ptr);
+ }
+ ptr = ptr.offset(1);
+ }
+ sub(end_ptr, start_ptr)
+}
+
+/// Compute the position of the first ASCII byte in the given mask.
+///
+/// The mask should be computed by `chunk & ASCII_MASK`, where `chunk` is
+/// 8 contiguous bytes of the slice being checked where *at least* one of those
+/// bytes is not an ASCII byte.
+///
+/// The position returned is always in the inclusive range [0, 7].
+#[cfg(any(test, not(target_arch = "x86_64")))]
+fn first_non_ascii_byte_mask(mask: usize) -> usize {
+ #[cfg(target_endian = "little")]
+ {
+ mask.trailing_zeros() as usize / 8
+ }
+ #[cfg(target_endian = "big")]
+ {
+ mask.leading_zeros() as usize / 8
+ }
+}
+
+/// Increment the given pointer by the given amount.
+unsafe fn ptr_add(ptr: *const u8, amt: usize) -> *const u8 {
+ debug_assert!(amt < ::core::isize::MAX as usize);
+ ptr.offset(amt as isize)
+}
+
+/// Decrement the given pointer by the given amount.
+unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
+ debug_assert!(amt < ::core::isize::MAX as usize);
+ ptr.offset((amt as isize).wrapping_neg())
+}
+
+#[cfg(any(test, not(target_arch = "x86_64")))]
+unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
+ use core::ptr;
+
+ let mut n: usize = 0;
+ ptr::copy_nonoverlapping(ptr, &mut n as *mut _ as *mut u8, USIZE_BYTES);
+ n
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(a >= b);
+ (a as usize) - (b as usize)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ // Our testing approach here is to try and exhaustively test every case.
+ // This includes the position at which a non-ASCII byte occurs in addition
+ // to the alignment of the slice that we're searching.
+
+ #[test]
+ fn positive_fallback_forward() {
+ for i in 0..517 {
+ let s = "a".repeat(i);
+ assert_eq!(
+ i,
+ first_non_ascii_byte_fallback(s.as_bytes()),
+ "i: {:?}, len: {:?}, s: {:?}",
+ i,
+ s.len(),
+ s
+ );
+ }
+ }
+
+ #[test]
+ #[cfg(target_arch = "x86_64")]
+ fn positive_sse2_forward() {
+ for i in 0..517 {
+ let b = "a".repeat(i).into_bytes();
+ assert_eq!(b.len(), first_non_ascii_byte_sse2(&b));
+ }
+ }
+
+ #[test]
+ fn negative_fallback_forward() {
+ for i in 0..517 {
+ for align in 0..65 {
+ let mut s = "a".repeat(i);
+ s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃");
+ let s = s.get(align..).unwrap_or("");
+ assert_eq!(
+ i.saturating_sub(align),
+ first_non_ascii_byte_fallback(s.as_bytes()),
+ "i: {:?}, align: {:?}, len: {:?}, s: {:?}",
+ i,
+ align,
+ s.len(),
+ s
+ );
+ }
+ }
+ }
+
+ #[test]
+ #[cfg(target_arch = "x86_64")]
+ fn negative_sse2_forward() {
+ for i in 0..517 {
+ for align in 0..65 {
+ let mut s = "a".repeat(i);
+ s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃");
+ let s = s.get(align..).unwrap_or("");
+ assert_eq!(
+ i.saturating_sub(align),
+ first_non_ascii_byte_sse2(s.as_bytes()),
+ "i: {:?}, align: {:?}, len: {:?}, s: {:?}",
+ i,
+ align,
+ s.len(),
+ s
+ );
+ }
+ }
+ }
+}
diff --git a/src/bstr.rs b/src/bstr.rs
new file mode 100644
index 0000000..1e3c91b
--- /dev/null
+++ b/src/bstr.rs
@@ -0,0 +1,74 @@
+use core::mem;
+
+/// A wrapper for `&[u8]` that provides convenient string oriented trait impls.
+///
+/// If you need ownership or a growable byte string buffer, then use
+/// [`BString`](struct.BString.html).
+///
+/// Using a `&BStr` is just like using a `&[u8]`, since `BStr`
+/// implements `Deref` to `[u8]`. So all methods available on `[u8]`
+/// are also available on `BStr`.
+///
+/// # Representation
+///
+/// A `&BStr` has the same representation as a `&str`. That is, a `&BStr` is
+/// a fat pointer which consists of a pointer to some bytes and a length.
+///
+/// # Trait implementations
+///
+/// The `BStr` type has a number of trait implementations, and in particular,
+/// defines equality and ordinal comparisons between `&BStr`, `&str` and
+/// `&[u8]` for convenience.
+///
+/// The `Debug` implementation for `BStr` shows its bytes as a normal string.
+/// For invalid UTF-8, hex escape sequences are used.
+///
+/// The `Display` implementation behaves as if `BStr` were first lossily
+/// converted to a `str`. Invalid UTF-8 bytes are substituted with the Unicode
+/// replacement codepoint, which looks like this: �.
+#[derive(Hash)]
+#[repr(transparent)]
+pub struct BStr {
+ pub(crate) bytes: [u8],
+}
+
+impl BStr {
+ #[inline]
+ pub(crate) fn new<B: ?Sized + AsRef<[u8]>>(bytes: &B) -> &BStr {
+ BStr::from_bytes(bytes.as_ref())
+ }
+
+ #[inline]
+ pub(crate) fn new_mut<B: ?Sized + AsMut<[u8]>>(
+ bytes: &mut B,
+ ) -> &mut BStr {
+ BStr::from_bytes_mut(bytes.as_mut())
+ }
+
+ #[inline]
+ pub(crate) fn from_bytes(slice: &[u8]) -> &BStr {
+ unsafe { mem::transmute(slice) }
+ }
+
+ #[inline]
+ pub(crate) fn from_bytes_mut(slice: &mut [u8]) -> &mut BStr {
+ unsafe { mem::transmute(slice) }
+ }
+
+ #[inline]
+ #[cfg(feature = "std")]
+ pub(crate) fn from_boxed_bytes(slice: Box<[u8]>) -> Box<BStr> {
+ unsafe { Box::from_raw(Box::into_raw(slice) as _) }
+ }
+
+ #[inline]
+ #[cfg(feature = "std")]
+ pub(crate) fn into_boxed_bytes(slice: Box<BStr>) -> Box<[u8]> {
+ unsafe { Box::from_raw(Box::into_raw(slice) as _) }
+ }
+
+ #[inline]
+ pub(crate) fn as_bytes(&self) -> &[u8] {
+ &self.bytes
+ }
+}
diff --git a/src/bstring.rs b/src/bstring.rs
new file mode 100644
index 0000000..f04c651
--- /dev/null
+++ b/src/bstring.rs
@@ -0,0 +1,59 @@
+use bstr::BStr;
+
+/// A wrapper for `Vec<u8>` that provides convenient string oriented trait
+/// impls.
+///
+/// A `BString` has ownership over its contents and corresponds to
+/// a growable or shrinkable buffer. Its borrowed counterpart is a
+/// [`BStr`](struct.BStr.html), called a byte string slice.
+///
+/// Using a `BString` is just like using a `Vec<u8>`, since `BString`
+/// implements `Deref` to `Vec<u8>`. So all methods available on `Vec<u8>`
+/// are also available on `BString`.
+///
+/// # Examples
+///
+/// You can create a new `BString` from a `Vec<u8>` via a `From` impl:
+///
+/// ```
+/// use bstr::BString;
+///
+/// let s = BString::from("Hello, world!");
+/// ```
+///
+/// # Deref
+///
+/// The `BString` type implements `Deref` and `DerefMut`, where the target
+/// types are `&Vec<u8>` and `&mut Vec<u8>`, respectively. `Deref` permits all of the
+/// methods defined on `Vec<u8>` to be implicitly callable on any `BString`.
+///
+/// For more information about how deref works, see the documentation for the
+/// [`std::ops::Deref`](https://doc.rust-lang.org/std/ops/trait.Deref.html)
+/// trait.
+///
+/// # Representation
+///
+/// A `BString` has the same representation as a `Vec<u8>` and a `String`.
+/// That is, it is made up of three word sized components: a pointer to a
+/// region of memory containing the bytes, a length and a capacity.
+#[derive(Clone, Hash)]
+pub struct BString {
+ pub(crate) bytes: Vec<u8>,
+}
+
+impl BString {
+ #[inline]
+ pub(crate) fn as_bytes(&self) -> &[u8] {
+ &self.bytes
+ }
+
+ #[inline]
+ pub(crate) fn as_bstr(&self) -> &BStr {
+ BStr::new(&self.bytes)
+ }
+
+ #[inline]
+ pub(crate) fn as_mut_bstr(&mut self) -> &mut BStr {
+ BStr::new_mut(&mut self.bytes)
+ }
+}
diff --git a/src/byteset/mod.rs b/src/byteset/mod.rs
new file mode 100644
index 0000000..969b0e3
--- /dev/null
+++ b/src/byteset/mod.rs
@@ -0,0 +1,115 @@
+use memchr::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
+mod scalar;
+
+#[inline]
+fn build_table(byteset: &[u8]) -> [u8; 256] {
+ let mut table = [0u8; 256];
+ for &b in byteset {
+ table[b as usize] = 1;
+ }
+ table
+}
+
+#[inline]
+pub(crate) fn find(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
+ match byteset.len() {
+ 0 => return None,
+ 1 => memchr(byteset[0], haystack),
+ 2 => memchr2(byteset[0], byteset[1], haystack),
+ 3 => memchr3(byteset[0], byteset[1], byteset[2], haystack),
+ _ => {
+ let table = build_table(byteset);
+ scalar::forward_search_bytes(haystack, |b| table[b as usize] != 0)
+ }
+ }
+}
+
+#[inline]
+pub(crate) fn rfind(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
+ match byteset.len() {
+ 0 => return None,
+ 1 => memrchr(byteset[0], haystack),
+ 2 => memrchr2(byteset[0], byteset[1], haystack),
+ 3 => memrchr3(byteset[0], byteset[1], byteset[2], haystack),
+ _ => {
+ let table = build_table(byteset);
+ scalar::reverse_search_bytes(haystack, |b| table[b as usize] != 0)
+ }
+ }
+}
+
+#[inline]
+pub(crate) fn find_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
+ if haystack.is_empty() {
+ return None;
+ }
+ match byteset.len() {
+ 0 => return Some(0),
+ 1 => scalar::inv_memchr(byteset[0], haystack),
+ 2 => scalar::forward_search_bytes(haystack, |b| {
+ b != byteset[0] && b != byteset[1]
+ }),
+ 3 => scalar::forward_search_bytes(haystack, |b| {
+ b != byteset[0] && b != byteset[1] && b != byteset[2]
+ }),
+ _ => {
+ let table = build_table(byteset);
+ scalar::forward_search_bytes(haystack, |b| table[b as usize] == 0)
+ }
+ }
+}
+#[inline]
+pub(crate) fn rfind_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
+ if haystack.is_empty() {
+ return None;
+ }
+ match byteset.len() {
+ 0 => return Some(haystack.len() - 1),
+ 1 => scalar::inv_memrchr(byteset[0], haystack),
+ 2 => scalar::reverse_search_bytes(haystack, |b| {
+ b != byteset[0] && b != byteset[1]
+ }),
+ 3 => scalar::reverse_search_bytes(haystack, |b| {
+ b != byteset[0] && b != byteset[1] && b != byteset[2]
+ }),
+ _ => {
+ let table = build_table(byteset);
+ scalar::reverse_search_bytes(haystack, |b| table[b as usize] == 0)
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+
+ quickcheck! {
+ fn qc_byteset_forward_matches_naive(
+ haystack: Vec<u8>,
+ needles: Vec<u8>
+ ) -> bool {
+ super::find(&haystack, &needles)
+ == haystack.iter().position(|b| needles.contains(b))
+ }
+ fn qc_byteset_backwards_matches_naive(
+ haystack: Vec<u8>,
+ needles: Vec<u8>
+ ) -> bool {
+ super::rfind(&haystack, &needles)
+ == haystack.iter().rposition(|b| needles.contains(b))
+ }
+ fn qc_byteset_forward_not_matches_naive(
+ haystack: Vec<u8>,
+ needles: Vec<u8>
+ ) -> bool {
+ super::find_not(&haystack, &needles)
+ == haystack.iter().position(|b| !needles.contains(b))
+ }
+ fn qc_byteset_backwards_not_matches_naive(
+ haystack: Vec<u8>,
+ needles: Vec<u8>
+ ) -> bool {
+ super::rfind_not(&haystack, &needles)
+ == haystack.iter().rposition(|b| !needles.contains(b))
+ }
+ }
+}
diff --git a/src/byteset/scalar.rs b/src/byteset/scalar.rs
new file mode 100644
index 0000000..3fe1f53
--- /dev/null
+++ b/src/byteset/scalar.rs
@@ -0,0 +1,295 @@
+// This is adapted from `fallback.rs` from rust-memchr. It's modified to return
+// the 'inverse' query of memchr, e.g. finding the first byte not in the provided
+// set. This is simple for the 1-byte case.
+
+use core::cmp;
+use core::usize;
+
+#[cfg(target_pointer_width = "32")]
+const USIZE_BYTES: usize = 4;
+
+#[cfg(target_pointer_width = "64")]
+const USIZE_BYTES: usize = 8;
+
+// The number of bytes to loop at in one iteration of memchr/memrchr.
+const LOOP_SIZE: usize = 2 * USIZE_BYTES;
+
+/// Repeat the given byte into a word size number. That is, every 8 bits
+/// is equivalent to the given byte. For example, if `b` is `\x4E` or
+/// `01001110` in binary, then the returned value on a 32-bit system would be:
+/// `01001110_01001110_01001110_01001110`.
+#[inline(always)]
+fn repeat_byte(b: u8) -> usize {
+ (b as usize) * (usize::MAX / 255)
+}
+
+pub fn inv_memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let confirm = |byte| byte != n1;
+ let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = read_unaligned_usize(ptr);
+ if (chunk ^ vn1) != 0 {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
+ while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let a = *(ptr as *const usize);
+ let b = *(ptr.add(USIZE_BYTES) as *const usize);
+ let eqa = (a ^ vn1) != 0;
+ let eqb = (b ^ vn1) != 0;
+ if eqa || eqb {
+ break;
+ }
+ ptr = ptr.add(LOOP_SIZE);
+ }
+ forward_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+/// Return the last index not matching the byte `x` in `text`.
+pub fn inv_memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let confirm = |byte| byte != n1;
+ let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+ let end_ptr = haystack[haystack.len()..].as_ptr();
+ let mut ptr = end_ptr;
+
+ unsafe {
+ if haystack.len() < USIZE_BYTES {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = read_unaligned_usize(ptr.sub(USIZE_BYTES));
+ if (chunk ^ vn1) != 0 {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = (end_ptr as usize & !align) as *const u8;
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize);
+ let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize);
+ let eqa = (a ^ vn1) != 0;
+ let eqb = (b ^ vn1) != 0;
+ if eqa || eqb {
+ break;
+ }
+ ptr = ptr.sub(loop_size);
+ }
+ reverse_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+#[inline(always)]
+unsafe fn forward_search<F: Fn(u8) -> bool>(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ mut ptr: *const u8,
+ confirm: F,
+) -> Option<usize> {
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr);
+
+ while ptr < end_ptr {
+ if confirm(*ptr) {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ None
+}
+
+#[inline(always)]
+unsafe fn reverse_search<F: Fn(u8) -> bool>(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ mut ptr: *const u8,
+ confirm: F,
+) -> Option<usize> {
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr);
+
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if confirm(*ptr) {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ None
+}
+
+unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
+ (ptr as *const usize).read_unaligned()
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(a >= b);
+ (a as usize) - (b as usize)
+}
+
+/// Safe wrapper around `forward_search`
+#[inline]
+pub(crate) fn forward_search_bytes<F: Fn(u8) -> bool>(
+ s: &[u8],
+ confirm: F,
+) -> Option<usize> {
+ unsafe {
+ let start = s.as_ptr();
+ let end = start.add(s.len());
+ forward_search(start, end, start, confirm)
+ }
+}
+
+/// Safe wrapper around `reverse_search`
+#[inline]
+pub(crate) fn reverse_search_bytes<F: Fn(u8) -> bool>(
+ s: &[u8],
+ confirm: F,
+) -> Option<usize> {
+ unsafe {
+ let start = s.as_ptr();
+ let end = start.add(s.len());
+ reverse_search(start, end, end, confirm)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{inv_memchr, inv_memrchr};
+ // search string, search byte, inv_memchr result, inv_memrchr result.
+ // these are expanded into a much larger set of tests in build_tests
+ const TESTS: &[(&[u8], u8, usize, usize)] = &[
+ (b"z", b'a', 0, 0),
+ (b"zz", b'a', 0, 1),
+ (b"aza", b'a', 1, 1),
+ (b"zaz", b'a', 0, 2),
+ (b"zza", b'a', 0, 1),
+ (b"zaa", b'a', 0, 0),
+ (b"zzz", b'a', 0, 2),
+ ];
+
+ type TestCase = (Vec<u8>, u8, Option<(usize, usize)>);
+
+ fn build_tests() -> Vec<TestCase> {
+ let mut result = vec![];
+ for &(search, byte, fwd_pos, rev_pos) in TESTS {
+ result.push((search.to_vec(), byte, Some((fwd_pos, rev_pos))));
+ for i in 1..515 {
+ // add a bunch of copies of the search byte to the end.
+ let mut suffixed: Vec<u8> = search.into();
+ suffixed.extend(std::iter::repeat(byte).take(i));
+ result.push((suffixed, byte, Some((fwd_pos, rev_pos))));
+
+ // add a bunch of copies of the search byte to the start.
+ let mut prefixed: Vec<u8> =
+ std::iter::repeat(byte).take(i).collect();
+ prefixed.extend(search);
+ result.push((
+ prefixed,
+ byte,
+ Some((fwd_pos + i, rev_pos + i)),
+ ));
+
+ // add a bunch of copies of the search byte to both ends.
+ let mut surrounded: Vec<u8> =
+ std::iter::repeat(byte).take(i).collect();
+ surrounded.extend(search);
+ surrounded.extend(std::iter::repeat(byte).take(i));
+ result.push((
+ surrounded,
+ byte,
+ Some((fwd_pos + i, rev_pos + i)),
+ ));
+ }
+ }
+
+ // build non-matching tests for several sizes
+ for i in 0..515 {
+ result.push((
+ std::iter::repeat(b'\0').take(i).collect(),
+ b'\0',
+ None,
+ ));
+ }
+
+ result
+ }
+
+ #[test]
+ fn test_inv_memchr() {
+ use {ByteSlice, B};
+ for (search, byte, matching) in build_tests() {
+ assert_eq!(
+ inv_memchr(byte, &search),
+ matching.map(|m| m.0),
+ "inv_memchr when searching for {:?} in {:?}",
+ byte as char,
+ // better printing
+ B(&search).as_bstr(),
+ );
+ assert_eq!(
+ inv_memrchr(byte, &search),
+ matching.map(|m| m.1),
+ "inv_memrchr when searching for {:?} in {:?}",
+ byte as char,
+ // better printing
+ B(&search).as_bstr(),
+ );
+ // Test a rather large number off offsets for potential alignment issues
+ for offset in 1..130 {
+ if offset >= search.len() {
+ break;
+ }
+ // If this would cause us to shift the results off the end, skip
+ // it so that we don't have to recompute them.
+ if let Some((f, r)) = matching {
+ if offset > f || offset > r {
+ break;
+ }
+ }
+ let realigned = &search[offset..];
+
+ let forward_pos = matching.map(|m| m.0 - offset);
+ let reverse_pos = matching.map(|m| m.1 - offset);
+
+ assert_eq!(
+ inv_memchr(byte, &realigned),
+ forward_pos,
+ "inv_memchr when searching (realigned by {}) for {:?} in {:?}",
+ offset,
+ byte as char,
+ realigned.as_bstr(),
+ );
+ assert_eq!(
+ inv_memrchr(byte, &realigned),
+ reverse_pos,
+ "inv_memrchr when searching (realigned by {}) for {:?} in {:?}",
+ offset,
+ byte as char,
+ realigned.as_bstr(),
+ );
+ }
+ }
+ }
+}
diff --git a/src/cow.rs b/src/cow.rs
new file mode 100644
index 0000000..1556353
--- /dev/null
+++ b/src/cow.rs
@@ -0,0 +1,84 @@
+use core::ops;
+#[cfg(feature = "std")]
+use std::borrow::Cow;
+
+/// A specialized copy-on-write byte string.
+///
+/// The purpose of this type is to permit usage of a "borrowed or owned
+/// byte string" in a way that keeps std/no-std compatibility. That is, in
+/// no-std mode, this type devolves into a simple &[u8] with no owned variant
+/// availble.
+#[derive(Clone, Debug)]
+pub struct CowBytes<'a>(Imp<'a>);
+
+#[cfg(feature = "std")]
+#[derive(Clone, Debug)]
+struct Imp<'a>(Cow<'a, [u8]>);
+
+#[cfg(not(feature = "std"))]
+#[derive(Clone, Debug)]
+struct Imp<'a>(&'a [u8]);
+
+impl<'a> ops::Deref for CowBytes<'a> {
+ type Target = [u8];
+
+ fn deref(&self) -> &[u8] {
+ self.as_slice()
+ }
+}
+
+impl<'a> CowBytes<'a> {
+ /// Create a new borrowed CowBytes.
+ pub fn new<B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> CowBytes<'a> {
+ CowBytes(Imp::new(bytes.as_ref()))
+ }
+
+ /// Create a new owned CowBytes.
+ #[cfg(feature = "std")]
+ pub fn new_owned(bytes: Vec<u8>) -> CowBytes<'static> {
+ CowBytes(Imp(Cow::Owned(bytes)))
+ }
+
+ /// Return a borrowed byte string, regardless of whether this is an owned
+ /// or borrowed byte string internally.
+ pub fn as_slice(&self) -> &[u8] {
+ self.0.as_slice()
+ }
+
+ /// Return an owned version of this copy-on-write byte string.
+ ///
+ /// If this is already an owned byte string internally, then this is a
+ /// no-op. Otherwise, the internal byte string is copied.
+ #[cfg(feature = "std")]
+ pub fn into_owned(self) -> CowBytes<'static> {
+ match (self.0).0 {
+ Cow::Borrowed(b) => CowBytes::new_owned(b.to_vec()),
+ Cow::Owned(b) => CowBytes::new_owned(b),
+ }
+ }
+}
+
+impl<'a> Imp<'a> {
+ #[cfg(feature = "std")]
+ pub fn new(bytes: &'a [u8]) -> Imp<'a> {
+ Imp(Cow::Borrowed(bytes))
+ }
+
+ #[cfg(not(feature = "std"))]
+ pub fn new(bytes: &'a [u8]) -> Imp<'a> {
+ Imp(bytes)
+ }
+
+ #[cfg(feature = "std")]
+ pub fn as_slice(&self) -> &[u8] {
+ match self.0 {
+ Cow::Owned(ref x) => x,
+ Cow::Borrowed(x) => x,
+ }
+ }
+
+ #[cfg(not(feature = "std"))]
+ pub fn as_slice(&self) -> &[u8] {
+ self.0
+ }
+}
diff --git a/src/ext_slice.rs b/src/ext_slice.rs
new file mode 100644
index 0000000..fa08190
--- /dev/null
+++ b/src/ext_slice.rs
@@ -0,0 +1,3695 @@
+#[cfg(feature = "std")]
+use std::borrow::Cow;
+#[cfg(feature = "std")]
+use std::ffi::OsStr;
+#[cfg(feature = "std")]
+use std::path::Path;
+
+use core::{cmp, iter, ops, ptr, slice, str};
+use memchr::{memchr, memrchr};
+
+use ascii;
+use bstr::BStr;
+use byteset;
+#[cfg(feature = "std")]
+use ext_vec::ByteVec;
+use search::{PrefilterState, TwoWay};
+#[cfg(feature = "unicode")]
+use unicode::{
+ whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes,
+ SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices,
+ WordsWithBreaks,
+};
+use utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error};
+
+/// A short-hand constructor for building a `&[u8]`.
+///
+/// This idiosyncratic constructor is useful for concisely building byte string
+/// slices. Its primary utility is in conveniently writing byte string literals
+/// in a uniform way. For example, consider this code that does not compile:
+///
+/// ```ignore
+/// let strs = vec![b"a", b"xy"];
+/// ```
+///
+/// The above code doesn't compile because the type of the byte string literal
+/// `b"a"` is `&'static [u8; 1]`, and the type of `b"xy"` is
+/// `&'static [u8; 2]`. Since their types aren't the same, they can't be stored
+/// in the same `Vec`. (This is dissimilar from normal Unicode string slices,
+/// where both `"a"` and `"xy"` have the same type of `&'static str`.)
+///
+/// One way of getting the above code to compile is to convert byte strings to
+/// slices. You might try this:
+///
+/// ```ignore
+/// let strs = vec![&b"a", &b"xy"];
+/// ```
+///
+/// But this just creates values with type `& &'static [u8; 1]` and
+/// `& &'static [u8; 2]`. Instead, you need to force the issue like so:
+///
+/// ```
+/// let strs = vec![&b"a"[..], &b"xy"[..]];
+/// // or
+/// let strs = vec![b"a".as_ref(), b"xy".as_ref()];
+/// ```
+///
+/// But neither of these are particularly convenient to type, especially when
+/// it's something as common as a string literal. Thus, this constructor
+/// permits writing the following instead:
+///
+/// ```
+/// use bstr::B;
+///
+/// let strs = vec![B("a"), B(b"xy")];
+/// ```
+///
+/// Notice that this also lets you mix and match both string literals and byte
+/// string literals. This can be quite convenient!
+#[allow(non_snake_case)]
+#[inline]
+pub fn B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8] {
+ bytes.as_ref()
+}
+
+impl ByteSlice for [u8] {
+ #[inline]
+ fn as_bytes(&self) -> &[u8] {
+ self
+ }
+
+ #[inline]
+ fn as_bytes_mut(&mut self) -> &mut [u8] {
+ self
+ }
+}
+
+/// Ensure that callers cannot implement `ByteSlice` by making an
+/// umplementable trait its super trait.
+pub trait Sealed {}
+impl Sealed for [u8] {}
+
+/// A trait that extends `&[u8]` with string oriented methods.
+pub trait ByteSlice: Sealed {
+ /// A method for accessing the raw bytes of this type. This is always a
+ /// no-op and callers shouldn't care about it. This only exists for making
+ /// the extension trait work.
+ #[doc(hidden)]
+ fn as_bytes(&self) -> &[u8];
+
+ /// A method for accessing the raw bytes of this type, mutably. This is
+ /// always a no-op and callers shouldn't care about it. This only exists
+ /// for making the extension trait work.
+ #[doc(hidden)]
+ fn as_bytes_mut(&mut self) -> &mut [u8];
+
+ /// Return this byte slice as a `&BStr`.
+ ///
+ /// Use `&BStr` is useful because of its `fmt::Debug` representation
+ /// and various other trait implementations (such as `PartialEq` and
+ /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
+ /// shows its bytes as a normal string. For invalid UTF-8, hex escape
+ /// sequences are used.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// println!("{:?}", b"foo\xFFbar".as_bstr());
+ /// ```
+ #[inline]
+ fn as_bstr(&self) -> &BStr {
+ BStr::new(self.as_bytes())
+ }
+
+ /// Return this byte slice as a `&mut BStr`.
+ ///
+ /// Use `&mut BStr` is useful because of its `fmt::Debug` representation
+ /// and various other trait implementations (such as `PartialEq` and
+ /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
+ /// shows its bytes as a normal string. For invalid UTF-8, hex escape
+ /// sequences are used.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut bytes = *b"foo\xFFbar";
+ /// println!("{:?}", &mut bytes.as_bstr_mut());
+ /// ```
+ #[inline]
+ fn as_bstr_mut(&mut self) -> &mut BStr {
+ BStr::new_mut(self.as_bytes_mut())
+ }
+
+ /// Create an immutable byte string from an OS string slice.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns `None` if the given OS string is not valid UTF-8. (For
+ /// example, on Windows, file paths are allowed to be a sequence of
+ /// arbitrary 16-bit integers. Not all such sequences can be transcoded to
+ /// valid UTF-8.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ ///
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let os_str = OsStr::new("foo");
+ /// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8");
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn from_os_str(os_str: &OsStr) -> Option<&[u8]> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(os_str: &OsStr) -> Option<&[u8]> {
+ use std::os::unix::ffi::OsStrExt;
+
+ Some(os_str.as_bytes())
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(os_str: &OsStr) -> Option<&[u8]> {
+ os_str.to_str().map(|s| s.as_bytes())
+ }
+
+ imp(os_str)
+ }
+
+ /// Create an immutable byte string from a file path.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns `None` if the given path is not valid UTF-8. (For example,
+ /// on Windows, file paths are allowed to be a sequence of arbitrary 16-bit
+ /// integers. Not all such sequences can be transcoded to valid UTF-8.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::path::Path;
+ ///
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let path = Path::new("foo");
+ /// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8");
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn from_path(path: &Path) -> Option<&[u8]> {
+ Self::from_os_str(path.as_os_str())
+ }
+
+ /// Safely convert this byte string into a `&str` if it's valid UTF-8.
+ ///
+ /// If this byte string is not valid UTF-8, then an error is returned. The
+ /// error returned indicates the first invalid byte found and the length
+ /// of the error.
+ ///
+ /// In cases where a lossy conversion to `&str` is acceptable, then use one
+ /// of the [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) or
+ /// [`to_str_lossy_into`](trait.ByteSlice.html#method.to_str_lossy_into)
+ /// methods.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice, ByteVec};
+ ///
+ /// # fn example() -> Result<(), bstr::Utf8Error> {
+ /// let s = B("☃βツ").to_str()?;
+ /// assert_eq!("☃βツ", s);
+ ///
+ /// let mut bstring = <Vec<u8>>::from("☃βツ");
+ /// bstring.push(b'\xFF');
+ /// let err = bstring.to_str().unwrap_err();
+ /// assert_eq!(8, err.valid_up_to());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ #[inline]
+ fn to_str(&self) -> Result<&str, Utf8Error> {
+ utf8::validate(self.as_bytes()).map(|_| {
+ // SAFETY: This is safe because of the guarantees provided by
+ // utf8::validate.
+ unsafe { str::from_utf8_unchecked(self.as_bytes()) }
+ })
+ }
+
+ /// Unsafely convert this byte string into a `&str`, without checking for
+ /// valid UTF-8.
+ ///
+ /// # Safety
+ ///
+ /// Callers *must* ensure that this byte string is valid UTF-8 before
+ /// calling this method. Converting a byte string into a `&str` that is
+ /// not valid UTF-8 is considered undefined behavior.
+ ///
+ /// This routine is useful in performance sensitive contexts where the
+ /// UTF-8 validity of the byte string is already known and it is
+ /// undesirable to pay the cost of an additional UTF-8 validation check
+ /// that [`to_str`](trait.ByteSlice.html#method.to_str) performs.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// // SAFETY: This is safe because string literals are guaranteed to be
+ /// // valid UTF-8 by the Rust compiler.
+ /// let s = unsafe { B("☃βツ").to_str_unchecked() };
+ /// assert_eq!("☃βツ", s);
+ /// ```
+ #[inline]
+ unsafe fn to_str_unchecked(&self) -> &str {
+ str::from_utf8_unchecked(self.as_bytes())
+ }
+
+ /// Convert this byte string to a valid UTF-8 string by replacing invalid
+ /// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`).
+ ///
+ /// If the byte string is already valid UTF-8, then no copying or
+ /// allocation is performed and a borrrowed string slice is returned. If
+ /// the byte string is not valid UTF-8, then an owned string buffer is
+ /// returned with invalid bytes replaced by the replacement codepoint.
+ ///
+ /// This method uses the "substitution of maximal subparts" (Unicode
+ /// Standard, Chapter 3, Section 9) strategy for inserting the replacement
+ /// codepoint. Specifically, a replacement codepoint is inserted whenever a
+ /// byte is found that cannot possibly lead to a valid code unit sequence.
+ /// If there were previous bytes that represented a prefix of a well-formed
+ /// code unit sequence, then all of those bytes are substituted with a
+ /// single replacement codepoint. The "substitution of maximal subparts"
+ /// strategy is the same strategy used by
+ /// [W3C's Encoding standard](https://www.w3.org/TR/encoding/).
+ /// For a more precise description of the maximal subpart strategy, see
+ /// the Unicode Standard, Chapter 3, Section 9. See also
+ /// [Public Review Issue #121](http://www.unicode.org/review/pr-121.html).
+ ///
+ /// N.B. Rust's standard library also appears to use the same strategy,
+ /// but it does not appear to be an API guarantee.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::borrow::Cow;
+ ///
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut bstring = <Vec<u8>>::from("☃βツ");
+ /// assert_eq!(Cow::Borrowed("☃βツ"), bstring.to_str_lossy());
+ ///
+ /// // Add a byte that makes the sequence invalid.
+ /// bstring.push(b'\xFF');
+ /// assert_eq!(Cow::Borrowed("☃βツ\u{FFFD}"), bstring.to_str_lossy());
+ /// ```
+ ///
+ /// This demonstrates the "maximal subpart" substitution logic.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// // \x61 is the ASCII codepoint for 'a'.
+ /// // \xF1\x80\x80 is a valid 3-byte code unit prefix.
+ /// // \xE1\x80 is a valid 2-byte code unit prefix.
+ /// // \xC2 is a valid 1-byte code unit prefix.
+ /// // \x62 is the ASCII codepoint for 'b'.
+ /// //
+ /// // In sum, each of the prefixes is replaced by a single replacement
+ /// // codepoint since none of the prefixes are properly completed. This
+ /// // is in contrast to other strategies that might insert a replacement
+ /// // codepoint for every single byte.
+ /// let bs = B(b"\x61\xF1\x80\x80\xE1\x80\xC2\x62");
+ /// assert_eq!("a\u{FFFD}\u{FFFD}\u{FFFD}b", bs.to_str_lossy());
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_str_lossy(&self) -> Cow<str> {
+ match utf8::validate(self.as_bytes()) {
+ Ok(()) => {
+ // SAFETY: This is safe because of the guarantees provided by
+ // utf8::validate.
+ unsafe {
+ Cow::Borrowed(str::from_utf8_unchecked(self.as_bytes()))
+ }
+ }
+ Err(err) => {
+ let mut lossy = String::with_capacity(self.as_bytes().len());
+ let (valid, after) =
+ self.as_bytes().split_at(err.valid_up_to());
+ // SAFETY: This is safe because utf8::validate guarantees
+ // that all of `valid` is valid UTF-8.
+ lossy.push_str(unsafe { str::from_utf8_unchecked(valid) });
+ lossy.push_str("\u{FFFD}");
+ if let Some(len) = err.error_len() {
+ after[len..].to_str_lossy_into(&mut lossy);
+ }
+ Cow::Owned(lossy)
+ }
+ }
+ }
+
+ /// Copy the contents of this byte string into the given owned string
+ /// buffer, while replacing invalid UTF-8 code unit sequences with the
+ /// Unicode replacement codepoint (`U+FFFD`).
+ ///
+ /// This method uses the same "substitution of maximal subparts" strategy
+ /// for inserting the replacement codepoint as the
+ /// [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) method.
+ ///
+ /// This routine is useful for amortizing allocation. However, unlike
+ /// `to_str_lossy`, this routine will _always_ copy the contents of this
+ /// byte string into the destination buffer, even if this byte string is
+ /// valid UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::borrow::Cow;
+ ///
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut bstring = <Vec<u8>>::from("☃βツ");
+ /// // Add a byte that makes the sequence invalid.
+ /// bstring.push(b'\xFF');
+ ///
+ /// let mut dest = String::new();
+ /// bstring.to_str_lossy_into(&mut dest);
+ /// assert_eq!("☃βツ\u{FFFD}", dest);
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_str_lossy_into(&self, dest: &mut String) {
+ let mut bytes = self.as_bytes();
+ dest.reserve(bytes.len());
+ loop {
+ match utf8::validate(bytes) {
+ Ok(()) => {
+ // SAFETY: This is safe because utf8::validate guarantees
+ // that all of `bytes` is valid UTF-8.
+ dest.push_str(unsafe { str::from_utf8_unchecked(bytes) });
+ break;
+ }
+ Err(err) => {
+ let (valid, after) = bytes.split_at(err.valid_up_to());
+ // SAFETY: This is safe because utf8::validate guarantees
+ // that all of `valid` is valid UTF-8.
+ dest.push_str(unsafe { str::from_utf8_unchecked(valid) });
+ dest.push_str("\u{FFFD}");
+ match err.error_len() {
+ None => break,
+ Some(len) => bytes = &after[len..],
+ }
+ }
+ }
+ }
+ }
+
+ /// Create an OS string slice from this byte string.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns a UTF-8 decoding error if this byte string is not valid
+ /// UTF-8. (For example, on Windows, file paths are allowed to be a
+ /// sequence of arbitrary 16-bit integers. There is no obvious mapping from
+ /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of
+ /// 16-bit integers.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let os_str = b"foo".to_os_str().expect("should be valid UTF-8");
+ /// assert_eq!(os_str, "foo");
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_os_str(&self) -> Result<&OsStr, Utf8Error> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
+ use std::os::unix::ffi::OsStrExt;
+
+ Ok(OsStr::from_bytes(bytes))
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
+ bytes.to_str().map(OsStr::new)
+ }
+
+ imp(self.as_bytes())
+ }
+
+ /// Lossily create an OS string slice from this byte string.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this will perform a UTF-8 check and lossily convert this byte string
+ /// into valid UTF-8 using the Unicode replacement codepoint.
+ ///
+ /// Note that this can prevent the correct roundtripping of file paths on
+ /// non-Unix systems such as Windows, where file paths are an arbitrary
+ /// sequence of 16-bit integers.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let os_str = b"foo\xFFbar".to_os_str_lossy();
+ /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_os_str_lossy(&self) -> Cow<OsStr> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(bytes: &[u8]) -> Cow<OsStr> {
+ use std::os::unix::ffi::OsStrExt;
+
+ Cow::Borrowed(OsStr::from_bytes(bytes))
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(bytes: &[u8]) -> Cow<OsStr> {
+ use std::ffi::OsString;
+
+ match bytes.to_str_lossy() {
+ Cow::Borrowed(x) => Cow::Borrowed(OsStr::new(x)),
+ Cow::Owned(x) => Cow::Owned(OsString::from(x)),
+ }
+ }
+
+ imp(self.as_bytes())
+ }
+
+ /// Create a path slice from this byte string.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns a UTF-8 decoding error if this byte string is not valid
+ /// UTF-8. (For example, on Windows, file paths are allowed to be a
+ /// sequence of arbitrary 16-bit integers. There is no obvious mapping from
+ /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of
+ /// 16-bit integers.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let path = b"foo".to_path().expect("should be valid UTF-8");
+ /// assert_eq!(path.as_os_str(), "foo");
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_path(&self) -> Result<&Path, Utf8Error> {
+ self.to_os_str().map(Path::new)
+ }
+
+ /// Lossily create a path slice from this byte string.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this will perform a UTF-8 check and lossily convert this byte string
+ /// into valid UTF-8 using the Unicode replacement codepoint.
+ ///
+ /// Note that this can prevent the correct roundtripping of file paths on
+ /// non-Unix systems such as Windows, where file paths are an arbitrary
+ /// sequence of 16-bit integers.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"foo\xFFbar";
+ /// let path = bs.to_path_lossy();
+ /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_path_lossy(&self) -> Cow<Path> {
+ use std::path::PathBuf;
+
+ match self.to_os_str_lossy() {
+ Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
+ Cow::Owned(x) => Cow::Owned(PathBuf::from(x)),
+ }
+ }
+
+ /// Create a new byte string by repeating this byte string `n` times.
+ ///
+ /// # Panics
+ ///
+ /// This function panics if the capacity of the new byte string would
+ /// overflow.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert_eq!(b"foo".repeatn(4), B("foofoofoofoo"));
+ /// assert_eq!(b"foo".repeatn(0), B(""));
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn repeatn(&self, n: usize) -> Vec<u8> {
+ let bs = self.as_bytes();
+ let mut dst = vec![0; bs.len() * n];
+ for i in 0..n {
+ dst[i * bs.len()..(i + 1) * bs.len()].copy_from_slice(bs);
+ }
+ dst
+ }
+
+ /// Returns true if and only if this byte string contains the given needle.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert!(b"foo bar".contains_str("foo"));
+ /// assert!(b"foo bar".contains_str("bar"));
+ /// assert!(!b"foo".contains_str("foobar"));
+ /// ```
+ #[inline]
+ fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool {
+ self.find(needle).is_some()
+ }
+
+ /// Returns true if and only if this byte string has the given prefix.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert!(b"foo bar".starts_with_str("foo"));
+ /// assert!(!b"foo bar".starts_with_str("bar"));
+ /// assert!(!b"foo".starts_with_str("foobar"));
+ /// ```
+ #[inline]
+ fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool {
+ self.as_bytes().starts_with(prefix.as_ref())
+ }
+
+ /// Returns true if and only if this byte string has the given suffix.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert!(b"foo bar".ends_with_str("bar"));
+ /// assert!(!b"foo bar".ends_with_str("foo"));
+ /// assert!(!b"bar".ends_with_str("foobar"));
+ /// ```
+ #[inline]
+ fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool {
+ self.as_bytes().ends_with(suffix.as_ref())
+ }
+
+ /// Returns the index of the first occurrence of the given needle.
+ ///
+ /// The needle may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// Note that if you're are searching for the same needle in many
+ /// different small haystacks, it may be faster to initialize a
+ /// [`Finder`](struct.Finder.html) once, and reuse it for each search.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo bar baz";
+ /// assert_eq!(Some(0), s.find("foo"));
+ /// assert_eq!(Some(4), s.find("bar"));
+ /// assert_eq!(None, s.find("quux"));
+ /// ```
+ #[inline]
+ fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
+ Finder::new(needle.as_ref()).find(self.as_bytes())
+ }
+
+ /// Returns the index of the last occurrence of the given needle.
+ ///
+ /// The needle may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// Note that if you're are searching for the same needle in many
+ /// different small haystacks, it may be faster to initialize a
+ /// [`FinderReverse`](struct.FinderReverse.html) once, and reuse it for
+ /// each search.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo bar baz";
+ /// assert_eq!(Some(0), s.rfind("foo"));
+ /// assert_eq!(Some(4), s.rfind("bar"));
+ /// assert_eq!(Some(8), s.rfind("ba"));
+ /// assert_eq!(None, s.rfind("quux"));
+ /// ```
+ #[inline]
+ fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
+ FinderReverse::new(needle.as_ref()).rfind(self.as_bytes())
+ }
+
+ /// Returns an iterator of the non-overlapping occurrences of the given
+ /// needle. The iterator yields byte offset positions indicating the start
+ /// of each match.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo bar foo foo quux foo";
+ /// let matches: Vec<usize> = s.find_iter("foo").collect();
+ /// assert_eq!(matches, vec![0, 8, 12, 21]);
+ /// ```
+ ///
+ /// An empty string matches at every position, including the position
+ /// immediately following the last byte:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let matches: Vec<usize> = b"foo".find_iter("").collect();
+ /// assert_eq!(matches, vec![0, 1, 2, 3]);
+ ///
+ /// let matches: Vec<usize> = b"".find_iter("").collect();
+ /// assert_eq!(matches, vec![0]);
+ /// ```
+ #[inline]
+ fn find_iter<'a, B: ?Sized + AsRef<[u8]>>(
+ &'a self,
+ needle: &'a B,
+ ) -> Find<'a> {
+ Find::new(self.as_bytes(), needle.as_ref())
+ }
+
+ /// Returns an iterator of the non-overlapping occurrences of the given
+ /// needle in reverse. The iterator yields byte offset positions indicating
+ /// the start of each match.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo bar foo foo quux foo";
+ /// let matches: Vec<usize> = s.rfind_iter("foo").collect();
+ /// assert_eq!(matches, vec![21, 12, 8, 0]);
+ /// ```
+ ///
+ /// An empty string matches at every position, including the position
+ /// immediately following the last byte:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let matches: Vec<usize> = b"foo".rfind_iter("").collect();
+ /// assert_eq!(matches, vec![3, 2, 1, 0]);
+ ///
+ /// let matches: Vec<usize> = b"".rfind_iter("").collect();
+ /// assert_eq!(matches, vec![0]);
+ /// ```
+ #[inline]
+ fn rfind_iter<'a, B: ?Sized + AsRef<[u8]>>(
+ &'a self,
+ needle: &'a B,
+ ) -> FindReverse<'a> {
+ FindReverse::new(self.as_bytes(), needle.as_ref())
+ }
+
+ /// Returns the index of the first occurrence of the given byte. If the
+ /// byte does not occur in this byte string, then `None` is returned.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(Some(10), b"foo bar baz".find_byte(b'z'));
+ /// assert_eq!(None, b"foo bar baz".find_byte(b'y'));
+ /// ```
+ #[inline]
+ fn find_byte(&self, byte: u8) -> Option<usize> {
+ memchr(byte, self.as_bytes())
+ }
+
+ /// Returns the index of the last occurrence of the given byte. If the
+ /// byte does not occur in this byte string, then `None` is returned.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(Some(10), b"foo bar baz".rfind_byte(b'z'));
+ /// assert_eq!(None, b"foo bar baz".rfind_byte(b'y'));
+ /// ```
+ #[inline]
+ fn rfind_byte(&self, byte: u8) -> Option<usize> {
+ memrchr(byte, self.as_bytes())
+ }
+
+ /// Returns the index of the first occurrence of the given codepoint.
+ /// If the codepoint does not occur in this byte string, then `None` is
+ /// returned.
+ ///
+ /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
+ /// then only explicit occurrences of that encoding will be found. Invalid
+ /// UTF-8 sequences will not be matched.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert_eq!(Some(10), b"foo bar baz".find_char('z'));
+ /// assert_eq!(Some(4), B("αβγγδ").find_char('γ'));
+ /// assert_eq!(None, b"foo bar baz".find_char('y'));
+ /// ```
+ #[inline]
+ fn find_char(&self, ch: char) -> Option<usize> {
+ self.find(ch.encode_utf8(&mut [0; 4]))
+ }
+
+ /// Returns the index of the last occurrence of the given codepoint.
+ /// If the codepoint does not occur in this byte string, then `None` is
+ /// returned.
+ ///
+ /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
+ /// then only explicit occurrences of that encoding will be found. Invalid
+ /// UTF-8 sequences will not be matched.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert_eq!(Some(10), b"foo bar baz".rfind_char('z'));
+ /// assert_eq!(Some(6), B("αβγγδ").rfind_char('γ'));
+ /// assert_eq!(None, b"foo bar baz".rfind_char('y'));
+ /// ```
+ #[inline]
+ fn rfind_char(&self, ch: char) -> Option<usize> {
+ self.rfind(ch.encode_utf8(&mut [0; 4]))
+ }
+
+ /// Returns the index of the first occurrence of any of the bytes in the
+ /// provided set.
+ ///
+ /// The `byteset` may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
+ /// note that passing a `&str` which contains multibyte characters may not
+ /// behave as you expect: each byte in the `&str` is treated as an
+ /// individual member of the byte set.
+ ///
+ /// Note that order is irrelevant for the `byteset` parameter, and
+ /// duplicate bytes present in its body are ignored.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the set of bytes and the haystack. That is, this
+ /// runs in `O(byteset.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(b"foo bar baz".find_byteset(b"zr"), Some(6));
+ /// assert_eq!(b"foo baz bar".find_byteset(b"bzr"), Some(4));
+ /// assert_eq!(None, b"foo baz bar".find_byteset(b"\t\n"));
+ /// ```
+ #[inline]
+ fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
+ byteset::find(self.as_bytes(), byteset.as_ref())
+ }
+
+ /// Returns the index of the first occurrence of a byte that is not a member
+ /// of the provided set.
+ ///
+ /// The `byteset` may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
+ /// note that passing a `&str` which contains multibyte characters may not
+ /// behave as you expect: each byte in the `&str` is treated as an
+ /// individual member of the byte set.
+ ///
+ /// Note that order is irrelevant for the `byteset` parameter, and
+ /// duplicate bytes present in its body are ignored.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the set of bytes and the haystack. That is, this
+ /// runs in `O(byteset.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(b"foo bar baz".find_not_byteset(b"fo "), Some(4));
+ /// assert_eq!(b"\t\tbaz bar".find_not_byteset(b" \t\r\n"), Some(2));
+ /// assert_eq!(b"foo\nbaz\tbar".find_not_byteset(b"\t\n"), Some(0));
+ /// ```
+ #[inline]
+ fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
+ byteset::find_not(self.as_bytes(), byteset.as_ref())
+ }
+
+ /// Returns the index of the last occurrence of any of the bytes in the
+ /// provided set.
+ ///
+ /// The `byteset` may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
+ /// note that passing a `&str` which contains multibyte characters may not
+ /// behave as you expect: each byte in the `&str` is treated as an
+ /// individual member of the byte set.
+ ///
+ /// Note that order is irrelevant for the `byteset` parameter, and duplicate
+ /// bytes present in its body are ignored.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the set of bytes and the haystack. That is, this
+ /// runs in `O(byteset.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(b"foo bar baz".rfind_byteset(b"agb"), Some(9));
+ /// assert_eq!(b"foo baz bar".rfind_byteset(b"rabz "), Some(10));
+ /// assert_eq!(b"foo baz bar".rfind_byteset(b"\n123"), None);
+ /// ```
+ #[inline]
+ fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
+ byteset::rfind(self.as_bytes(), byteset.as_ref())
+ }
+
+ /// Returns the index of the last occurrence of a byte that is not a member
+ /// of the provided set.
+ ///
+ /// The `byteset` may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
+ /// note that passing a `&str` which contains multibyte characters may not
+ /// behave as you expect: each byte in the `&str` is treated as an
+ /// individual member of the byte set.
+ ///
+ /// Note that order is irrelevant for the `byteset` parameter, and
+ /// duplicate bytes present in its body are ignored.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the set of bytes and the haystack. That is, this
+ /// runs in `O(byteset.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(b"foo bar baz,\t".rfind_not_byteset(b",\t"), Some(10));
+ /// assert_eq!(b"foo baz bar".rfind_not_byteset(b"rabz "), Some(2));
+ /// assert_eq!(None, b"foo baz bar".rfind_not_byteset(b"barfoz "));
+ /// ```
+ #[inline]
+ fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
+ byteset::rfind_not(self.as_bytes(), byteset.as_ref())
+ }
+
+ /// Returns an iterator over the fields in a byte string, separated by
+ /// contiguous whitespace.
+ ///
+ /// # Example
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(" foo\tbar\t\u{2003}\nquux \n");
+ /// let fields: Vec<&[u8]> = s.fields().collect();
+ /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
+ /// ```
+ ///
+ /// A byte string consisting of just whitespace yields no elements:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert_eq!(0, B(" \n\t\u{2003}\n \t").fields().count());
+ /// ```
+ #[inline]
+ fn fields(&self) -> Fields {
+ Fields::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the fields in a byte string, separated by
+ /// contiguous codepoints satisfying the given predicate.
+ ///
+ /// If this byte string is not valid UTF-8, then the given closure will
+ /// be called with a Unicode replacement codepoint when invalid UTF-8
+ /// bytes are seen.
+ ///
+ /// # Example
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"123foo999999bar1quux123456";
+ /// let fields: Vec<&[u8]> = s.fields_with(|c| c.is_numeric()).collect();
+ /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
+ /// ```
+ ///
+ /// A byte string consisting of all codepoints satisfying the predicate
+ /// yields no elements:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(0, b"1911354563".fields_with(|c| c.is_numeric()).count());
+ /// ```
+ #[inline]
+ fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<F> {
+ FieldsWith::new(self.as_bytes(), f)
+ }
+
+ /// Returns an iterator over substrings of this byte string, separated
+ /// by the given byte string. Each element yielded is guaranteed not to
+ /// include the splitter substring.
+ ///
+ /// The splitter may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"Mary had a little lamb".split_str(" ").collect();
+ /// assert_eq!(x, vec![
+ /// B("Mary"), B("had"), B("a"), B("little"), B("lamb"),
+ /// ]);
+ ///
+ /// let x: Vec<&[u8]> = b"".split_str("X").collect();
+ /// assert_eq!(x, vec![b""]);
+ ///
+ /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".split_str("X").collect();
+ /// assert_eq!(x, vec![B("lion"), B(""), B("tiger"), B("leopard")]);
+ ///
+ /// let x: Vec<&[u8]> = b"lion::tiger::leopard".split_str("::").collect();
+ /// assert_eq!(x, vec![B("lion"), B("tiger"), B("leopard")]);
+ /// ```
+ ///
+ /// If a string contains multiple contiguous separators, you will end up
+ /// with empty strings yielded by the iterator:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"||||a||b|c".split_str("|").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
+ /// ]);
+ ///
+ /// let x: Vec<&[u8]> = b"(///)".split_str("/").collect();
+ /// assert_eq!(x, vec![B("("), B(""), B(""), B(")")]);
+ /// ```
+ ///
+ /// Separators at the start or end of a string are neighbored by empty
+ /// strings.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"010".split_str("0").collect();
+ /// assert_eq!(x, vec![B(""), B("1"), B("")]);
+ /// ```
+ ///
+ /// When the empty string is used as a separator, it splits every **byte**
+ /// in the byte string, along with the beginning and end of the byte
+ /// string.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"rust".split_str("").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B("r"), B("u"), B("s"), B("t"), B(""),
+ /// ]);
+ ///
+ /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
+ /// // may not be valid UTF-8!
+ /// let x: Vec<&[u8]> = B("☃").split_str("").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B(b"\xE2"), B(b"\x98"), B(b"\x83"), B(""),
+ /// ]);
+ /// ```
+ ///
+ /// Contiguous separators, especially whitespace, can lead to possibly
+ /// surprising behavior. For example, this code is correct:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b" a b c".split_str(" ").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
+ /// ]);
+ /// ```
+ ///
+ /// It does *not* give you `["a", "b", "c"]`. For that behavior, use
+ /// [`fields`](#method.fields) instead.
+ #[inline]
+ fn split_str<'a, B: ?Sized + AsRef<[u8]>>(
+ &'a self,
+ splitter: &'a B,
+ ) -> Split<'a> {
+ Split::new(self.as_bytes(), splitter.as_ref())
+ }
+
+ /// Returns an iterator over substrings of this byte string, separated by
+ /// the given byte string, in reverse. Each element yielded is guaranteed
+ /// not to include the splitter substring.
+ ///
+ /// The splitter may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> =
+ /// b"Mary had a little lamb".rsplit_str(" ").collect();
+ /// assert_eq!(x, vec![
+ /// B("lamb"), B("little"), B("a"), B("had"), B("Mary"),
+ /// ]);
+ ///
+ /// let x: Vec<&[u8]> = b"".rsplit_str("X").collect();
+ /// assert_eq!(x, vec![b""]);
+ ///
+ /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".rsplit_str("X").collect();
+ /// assert_eq!(x, vec![B("leopard"), B("tiger"), B(""), B("lion")]);
+ ///
+ /// let x: Vec<&[u8]> = b"lion::tiger::leopard".rsplit_str("::").collect();
+ /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lion")]);
+ /// ```
+ ///
+ /// If a string contains multiple contiguous separators, you will end up
+ /// with empty strings yielded by the iterator:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"||||a||b|c".rsplit_str("|").collect();
+ /// assert_eq!(x, vec![
+ /// B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
+ /// ]);
+ ///
+ /// let x: Vec<&[u8]> = b"(///)".rsplit_str("/").collect();
+ /// assert_eq!(x, vec![B(")"), B(""), B(""), B("(")]);
+ /// ```
+ ///
+ /// Separators at the start or end of a string are neighbored by empty
+ /// strings.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"010".rsplit_str("0").collect();
+ /// assert_eq!(x, vec![B(""), B("1"), B("")]);
+ /// ```
+ ///
+ /// When the empty string is used as a separator, it splits every **byte**
+ /// in the byte string, along with the beginning and end of the byte
+ /// string.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"rust".rsplit_str("").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B("t"), B("s"), B("u"), B("r"), B(""),
+ /// ]);
+ ///
+ /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
+ /// // may not be valid UTF-8!
+ /// let x: Vec<&[u8]> = B("☃").rsplit_str("").collect();
+ /// assert_eq!(x, vec![B(""), B(b"\x83"), B(b"\x98"), B(b"\xE2"), B("")]);
+ /// ```
+ ///
+ /// Contiguous separators, especially whitespace, can lead to possibly
+ /// surprising behavior. For example, this code is correct:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b" a b c".rsplit_str(" ").collect();
+ /// assert_eq!(x, vec![
+ /// B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
+ /// ]);
+ /// ```
+ ///
+ /// It does *not* give you `["a", "b", "c"]`.
+ #[inline]
+ fn rsplit_str<'a, B: ?Sized + AsRef<[u8]>>(
+ &'a self,
+ splitter: &'a B,
+ ) -> SplitReverse<'a> {
+ SplitReverse::new(self.as_bytes(), splitter.as_ref())
+ }
+
+ /// Returns an iterator of at most `limit` substrings of this byte string,
+ /// separated by the given byte string. If `limit` substrings are yielded,
+ /// then the last substring will contain the remainder of this byte string.
+ ///
+ /// The needle may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<_> = b"Mary had a little lamb".splitn_str(3, " ").collect();
+ /// assert_eq!(x, vec![B("Mary"), B("had"), B("a little lamb")]);
+ ///
+ /// let x: Vec<_> = b"".splitn_str(3, "X").collect();
+ /// assert_eq!(x, vec![b""]);
+ ///
+ /// let x: Vec<_> = b"lionXXtigerXleopard".splitn_str(3, "X").collect();
+ /// assert_eq!(x, vec![B("lion"), B(""), B("tigerXleopard")]);
+ ///
+ /// let x: Vec<_> = b"lion::tiger::leopard".splitn_str(2, "::").collect();
+ /// assert_eq!(x, vec![B("lion"), B("tiger::leopard")]);
+ ///
+ /// let x: Vec<_> = b"abcXdef".splitn_str(1, "X").collect();
+ /// assert_eq!(x, vec![B("abcXdef")]);
+ ///
+ /// let x: Vec<_> = b"abcdef".splitn_str(2, "X").collect();
+ /// assert_eq!(x, vec![B("abcdef")]);
+ ///
+ /// let x: Vec<_> = b"abcXdef".splitn_str(0, "X").collect();
+ /// assert!(x.is_empty());
+ /// ```
+ #[inline]
+ fn splitn_str<'a, B: ?Sized + AsRef<[u8]>>(
+ &'a self,
+ limit: usize,
+ splitter: &'a B,
+ ) -> SplitN<'a> {
+ SplitN::new(self.as_bytes(), splitter.as_ref(), limit)
+ }
+
+ /// Returns an iterator of at most `limit` substrings of this byte string,
+ /// separated by the given byte string, in reverse. If `limit` substrings
+ /// are yielded, then the last substring will contain the remainder of this
+ /// byte string.
+ ///
+ /// The needle may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<_> =
+ /// b"Mary had a little lamb".rsplitn_str(3, " ").collect();
+ /// assert_eq!(x, vec![B("lamb"), B("little"), B("Mary had a")]);
+ ///
+ /// let x: Vec<_> = b"".rsplitn_str(3, "X").collect();
+ /// assert_eq!(x, vec![b""]);
+ ///
+ /// let x: Vec<_> = b"lionXXtigerXleopard".rsplitn_str(3, "X").collect();
+ /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lionX")]);
+ ///
+ /// let x: Vec<_> = b"lion::tiger::leopard".rsplitn_str(2, "::").collect();
+ /// assert_eq!(x, vec![B("leopard"), B("lion::tiger")]);
+ ///
+ /// let x: Vec<_> = b"abcXdef".rsplitn_str(1, "X").collect();
+ /// assert_eq!(x, vec![B("abcXdef")]);
+ ///
+ /// let x: Vec<_> = b"abcdef".rsplitn_str(2, "X").collect();
+ /// assert_eq!(x, vec![B("abcdef")]);
+ ///
+ /// let x: Vec<_> = b"abcXdef".rsplitn_str(0, "X").collect();
+ /// assert!(x.is_empty());
+ /// ```
+ #[inline]
+ fn rsplitn_str<'a, B: ?Sized + AsRef<[u8]>>(
+ &'a self,
+ limit: usize,
+ splitter: &'a B,
+ ) -> SplitNReverse<'a> {
+ SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit)
+ }
+
+ /// Replace all matches of the given needle with the given replacement, and
+ /// the result as a new `Vec<u8>`.
+ ///
+ /// This routine is useful as a convenience. If you need to reuse an
+ /// allocation, use [`replace_into`](#method.replace_into) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"this is old".replace("old", "new");
+ /// assert_eq!(s, "this is new".as_bytes());
+ /// ```
+ ///
+ /// When the pattern doesn't match:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"this is old".replace("nada nada", "limonada");
+ /// assert_eq!(s, "this is old".as_bytes());
+ /// ```
+ ///
+ /// When the needle is an empty string:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo".replace("", "Z");
+ /// assert_eq!(s, "ZfZoZoZ".as_bytes());
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn replace<N: AsRef<[u8]>, R: AsRef<[u8]>>(
+ &self,
+ needle: N,
+ replacement: R,
+ ) -> Vec<u8> {
+ let mut dest = Vec::with_capacity(self.as_bytes().len());
+ self.replace_into(needle, replacement, &mut dest);
+ dest
+ }
+
+ /// Replace up to `limit` matches of the given needle with the given
+ /// replacement, and the result as a new `Vec<u8>`.
+ ///
+ /// This routine is useful as a convenience. If you need to reuse an
+ /// allocation, use [`replacen_into`](#method.replacen_into) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foofoo".replacen("o", "z", 2);
+ /// assert_eq!(s, "fzzfoo".as_bytes());
+ /// ```
+ ///
+ /// When the pattern doesn't match:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foofoo".replacen("a", "z", 2);
+ /// assert_eq!(s, "foofoo".as_bytes());
+ /// ```
+ ///
+ /// When the needle is an empty string:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo".replacen("", "Z", 2);
+ /// assert_eq!(s, "ZfZoo".as_bytes());
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>(
+ &self,
+ needle: N,
+ replacement: R,
+ limit: usize,
+ ) -> Vec<u8> {
+ let mut dest = Vec::with_capacity(self.as_bytes().len());
+ self.replacen_into(needle, replacement, limit, &mut dest);
+ dest
+ }
+
+ /// Replace all matches of the given needle with the given replacement,
+ /// and write the result into the provided `Vec<u8>`.
+ ///
+ /// This does **not** clear `dest` before writing to it.
+ ///
+ /// This routine is useful for reusing allocation. For a more convenient
+ /// API, use [`replace`](#method.replace) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"this is old";
+ ///
+ /// let mut dest = vec![];
+ /// s.replace_into("old", "new", &mut dest);
+ /// assert_eq!(dest, "this is new".as_bytes());
+ /// ```
+ ///
+ /// When the pattern doesn't match:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"this is old";
+ ///
+ /// let mut dest = vec![];
+ /// s.replace_into("nada nada", "limonada", &mut dest);
+ /// assert_eq!(dest, "this is old".as_bytes());
+ /// ```
+ ///
+ /// When the needle is an empty string:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo";
+ ///
+ /// let mut dest = vec![];
+ /// s.replace_into("", "Z", &mut dest);
+ /// assert_eq!(dest, "ZfZoZoZ".as_bytes());
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
+ &self,
+ needle: N,
+ replacement: R,
+ dest: &mut Vec<u8>,
+ ) {
+ let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
+
+ let mut last = 0;
+ for start in self.find_iter(needle) {
+ dest.push_str(&self.as_bytes()[last..start]);
+ dest.push_str(replacement);
+ last = start + needle.len();
+ }
+ dest.push_str(&self.as_bytes()[last..]);
+ }
+
+ /// Replace up to `limit` matches of the given needle with the given
+ /// replacement, and write the result into the provided `Vec<u8>`.
+ ///
+ /// This does **not** clear `dest` before writing to it.
+ ///
+ /// This routine is useful for reusing allocation. For a more convenient
+ /// API, use [`replacen`](#method.replacen) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foofoo";
+ ///
+ /// let mut dest = vec![];
+ /// s.replacen_into("o", "z", 2, &mut dest);
+ /// assert_eq!(dest, "fzzfoo".as_bytes());
+ /// ```
+ ///
+ /// When the pattern doesn't match:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foofoo";
+ ///
+ /// let mut dest = vec![];
+ /// s.replacen_into("a", "z", 2, &mut dest);
+ /// assert_eq!(dest, "foofoo".as_bytes());
+ /// ```
+ ///
+ /// When the needle is an empty string:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo";
+ ///
+ /// let mut dest = vec![];
+ /// s.replacen_into("", "Z", 2, &mut dest);
+ /// assert_eq!(dest, "ZfZoo".as_bytes());
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
+ &self,
+ needle: N,
+ replacement: R,
+ limit: usize,
+ dest: &mut Vec<u8>,
+ ) {
+ let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
+
+ let mut last = 0;
+ for start in self.find_iter(needle).take(limit) {
+ dest.push_str(&self.as_bytes()[last..start]);
+ dest.push_str(replacement);
+ last = start + needle.len();
+ }
+ dest.push_str(&self.as_bytes()[last..]);
+ }
+
+ /// Returns an iterator over the bytes in this byte string.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"foobar";
+ /// let bytes: Vec<u8> = bs.bytes().collect();
+ /// assert_eq!(bytes, bs);
+ /// ```
+ #[inline]
+ fn bytes(&self) -> Bytes {
+ Bytes { it: self.as_bytes().iter() }
+ }
+
+ /// Returns an iterator over the Unicode scalar values in this byte string.
+ /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
+ /// is yielded instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
+ /// let chars: Vec<char> = bs.chars().collect();
+ /// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars);
+ /// ```
+ ///
+ /// Codepoints can also be iterated over in reverse:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
+ /// let chars: Vec<char> = bs.chars().rev().collect();
+ /// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars);
+ /// ```
+ #[inline]
+ fn chars(&self) -> Chars {
+ Chars::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the Unicode scalar values in this byte string
+ /// along with their starting and ending byte index positions. If invalid
+ /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
+ /// instead.
+ ///
+ /// Note that this is slightly different from the `CharIndices` iterator
+ /// provided by the standard library. Aside from working on possibly
+ /// invalid UTF-8, this iterator provides both the corresponding starting
+ /// and ending byte indices of each codepoint yielded. The ending position
+ /// is necessary to slice the original byte string when invalid UTF-8 bytes
+ /// are converted into a Unicode replacement codepoint, since a single
+ /// replacement codepoint can substitute anywhere from 1 to 3 invalid bytes
+ /// (inclusive).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
+ /// let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
+ /// assert_eq!(chars, vec![
+ /// (0, 3, '☃'),
+ /// (3, 4, '\u{FFFD}'),
+ /// (4, 8, '𝞃'),
+ /// (8, 10, '\u{FFFD}'),
+ /// (10, 11, 'a'),
+ /// ]);
+ /// ```
+ ///
+ /// Codepoints can also be iterated over in reverse:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
+ /// let chars: Vec<(usize, usize, char)> = bs
+ /// .char_indices()
+ /// .rev()
+ /// .collect();
+ /// assert_eq!(chars, vec![
+ /// (10, 11, 'a'),
+ /// (8, 10, '\u{FFFD}'),
+ /// (4, 8, '𝞃'),
+ /// (3, 4, '\u{FFFD}'),
+ /// (0, 3, '☃'),
+ /// ]);
+ /// ```
+ #[inline]
+ fn char_indices(&self) -> CharIndices {
+ CharIndices::new(self.as_bytes())
+ }
+
+ /// Iterate over chunks of valid UTF-8.
+ ///
+ /// The iterator returned yields chunks of valid UTF-8 separated by invalid
+ /// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes,
+ /// which are determined via the "substitution of maximal subparts"
+ /// strategy described in the docs for the
+ /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy)
+ /// method.
+ ///
+ /// # Examples
+ ///
+ /// This example shows how to gather all valid and invalid chunks from a
+ /// byte slice:
+ ///
+ /// ```
+ /// use bstr::{ByteSlice, Utf8Chunk};
+ ///
+ /// let bytes = b"foo\xFD\xFEbar\xFF";
+ ///
+ /// let (mut valid_chunks, mut invalid_chunks) = (vec![], vec![]);
+ /// for chunk in bytes.utf8_chunks() {
+ /// if !chunk.valid().is_empty() {
+ /// valid_chunks.push(chunk.valid());
+ /// }
+ /// if !chunk.invalid().is_empty() {
+ /// invalid_chunks.push(chunk.invalid());
+ /// }
+ /// }
+ ///
+ /// assert_eq!(valid_chunks, vec!["foo", "bar"]);
+ /// assert_eq!(invalid_chunks, vec![b"\xFD", b"\xFE", b"\xFF"]);
+ /// ```
+ #[inline]
+ fn utf8_chunks(&self) -> Utf8Chunks {
+ Utf8Chunks { bytes: self.as_bytes() }
+ }
+
+ /// Returns an iterator over the grapheme clusters in this byte string.
+ /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
+ /// is yielded instead.
+ ///
+ /// # Examples
+ ///
+ /// This example shows how multiple codepoints can combine to form a
+ /// single grapheme cluster:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
+ /// let graphemes: Vec<&str> = bs.graphemes().collect();
+ /// assert_eq!(vec!["à̖", "🇺🇸"], graphemes);
+ /// ```
+ ///
+ /// This shows that graphemes can be iterated over in reverse:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
+ /// let graphemes: Vec<&str> = bs.graphemes().rev().collect();
+ /// assert_eq!(vec!["🇺🇸", "à̖"], graphemes);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn graphemes(&self) -> Graphemes {
+ Graphemes::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the grapheme clusters in this byte string
+ /// along with their starting and ending byte index positions. If invalid
+ /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
+ /// instead.
+ ///
+ /// # Examples
+ ///
+ /// This example shows how to get the byte offsets of each individual
+ /// grapheme cluster:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
+ /// let graphemes: Vec<(usize, usize, &str)> =
+ /// bs.grapheme_indices().collect();
+ /// assert_eq!(vec![(0, 5, "à̖"), (5, 13, "🇺🇸")], graphemes);
+ /// ```
+ ///
+ /// This example shows what happens when invalid UTF-8 is enountered. Note
+ /// that the offsets are valid indices into the original string, and do
+ /// not necessarily correspond to the length of the `&str` returned!
+ ///
+ /// ```
+ /// use bstr::{ByteSlice, ByteVec};
+ ///
+ /// let mut bytes = vec![];
+ /// bytes.push_str("a\u{0300}\u{0316}");
+ /// bytes.push(b'\xFF');
+ /// bytes.push_str("\u{1F1FA}\u{1F1F8}");
+ ///
+ /// let graphemes: Vec<(usize, usize, &str)> =
+ /// bytes.grapheme_indices().collect();
+ /// assert_eq!(
+ /// graphemes,
+ /// vec![(0, 5, "à̖"), (5, 6, "\u{FFFD}"), (6, 14, "🇺🇸")]
+ /// );
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn grapheme_indices(&self) -> GraphemeIndices {
+ GraphemeIndices::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the words in this byte string. If invalid
+ /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
+ /// instead.
+ ///
+ /// This is similar to
+ /// [`words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks),
+ /// except it only returns elements that contain a "word" character. A word
+ /// character is defined by UTS #18 (Annex C) to be the combination of the
+ /// `Alphabetic` and `Join_Control` properties, along with the
+ /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
+ /// categories.
+ ///
+ /// Since words are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
+ /// let words: Vec<&str> = bs.words().collect();
+ /// assert_eq!(words, vec![
+ /// "The", "quick", "brown", "fox", "can't",
+ /// "jump", "32.3", "feet", "right",
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn words(&self) -> Words {
+ Words::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the words in this byte string along with
+ /// their starting and ending byte index positions.
+ ///
+ /// This is similar to
+ /// [`words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices),
+ /// except it only returns elements that contain a "word" character. A word
+ /// character is defined by UTS #18 (Annex C) to be the combination of the
+ /// `Alphabetic` and `Join_Control` properties, along with the
+ /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
+ /// categories.
+ ///
+ /// Since words are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// This example shows how to get the byte offsets of each individual
+ /// word:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"can't jump 32.3 feet";
+ /// let words: Vec<(usize, usize, &str)> = bs.word_indices().collect();
+ /// assert_eq!(words, vec![
+ /// (0, 5, "can't"),
+ /// (6, 10, "jump"),
+ /// (11, 15, "32.3"),
+ /// (16, 20, "feet"),
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn word_indices(&self) -> WordIndices {
+ WordIndices::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the words in this byte string, along with
+ /// all breaks between the words. Concatenating all elements yielded by
+ /// the iterator results in the original string (modulo Unicode replacement
+ /// codepoint substitutions if invalid UTF-8 is encountered).
+ ///
+ /// Since words are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
+ /// let words: Vec<&str> = bs.words_with_breaks().collect();
+ /// assert_eq!(words, vec![
+ /// "The", " ", "quick", " ", "(", "\"", "brown", "\"", ")",
+ /// " ", "fox", " ", "can't", " ", "jump", " ", "32.3", " ", "feet",
+ /// ",", " ", "right", "?",
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn words_with_breaks(&self) -> WordsWithBreaks {
+ WordsWithBreaks::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the words and their byte offsets in this
+ /// byte string, along with all breaks between the words. Concatenating
+ /// all elements yielded by the iterator results in the original string
+ /// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is
+ /// encountered).
+ ///
+ /// Since words are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// This example shows how to get the byte offsets of each individual
+ /// word:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"can't jump 32.3 feet";
+ /// let words: Vec<(usize, usize, &str)> =
+ /// bs.words_with_break_indices().collect();
+ /// assert_eq!(words, vec![
+ /// (0, 5, "can't"),
+ /// (5, 6, " "),
+ /// (6, 10, "jump"),
+ /// (10, 11, " "),
+ /// (11, 15, "32.3"),
+ /// (15, 16, " "),
+ /// (16, 20, "feet"),
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn words_with_break_indices(&self) -> WordsWithBreakIndices {
+ WordsWithBreakIndices::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the sentences in this byte string.
+ ///
+ /// Typically, a sentence will include its trailing punctuation and
+ /// whitespace. Concatenating all elements yielded by the iterator
+ /// results in the original string (modulo Unicode replacement codepoint
+ /// substitutions if invalid UTF-8 is encountered).
+ ///
+ /// Since sentences are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"I want this. Not that. Right now.";
+ /// let sentences: Vec<&str> = bs.sentences().collect();
+ /// assert_eq!(sentences, vec![
+ /// "I want this. ",
+ /// "Not that. ",
+ /// "Right now.",
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn sentences(&self) -> Sentences {
+ Sentences::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the sentences in this byte string along with
+ /// their starting and ending byte index positions.
+ ///
+ /// Typically, a sentence will include its trailing punctuation and
+ /// whitespace. Concatenating all elements yielded by the iterator
+ /// results in the original string (modulo Unicode replacement codepoint
+ /// substitutions if invalid UTF-8 is encountered).
+ ///
+ /// Since sentences are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"I want this. Not that. Right now.";
+ /// let sentences: Vec<(usize, usize, &str)> =
+ /// bs.sentence_indices().collect();
+ /// assert_eq!(sentences, vec![
+ /// (0, 13, "I want this. "),
+ /// (13, 23, "Not that. "),
+ /// (23, 33, "Right now."),
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn sentence_indices(&self) -> SentenceIndices {
+ SentenceIndices::new(self.as_bytes())
+ }
+
+ /// An iterator over all lines in a byte string, without their
+ /// terminators.
+ ///
+ /// For this iterator, the only line terminators recognized are `\r\n` and
+ /// `\n`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"\
+ /// foo
+ ///
+ /// bar\r
+ /// baz
+ ///
+ ///
+ /// quux";
+ /// let lines: Vec<&[u8]> = s.lines().collect();
+ /// assert_eq!(lines, vec![
+ /// B("foo"), B(""), B("bar"), B("baz"), B(""), B(""), B("quux"),
+ /// ]);
+ /// ```
+ #[inline]
+ fn lines(&self) -> Lines {
+ Lines::new(self.as_bytes())
+ }
+
+ /// An iterator over all lines in a byte string, including their
+ /// terminators.
+ ///
+ /// For this iterator, the only line terminator recognized is `\n`. (Since
+ /// line terminators are included, this also handles `\r\n` line endings.)
+ ///
+ /// Line terminators are only included if they are present in the original
+ /// byte string. For example, the last line in a byte string may not end
+ /// with a line terminator.
+ ///
+ /// Concatenating all elements yielded by this iterator is guaranteed to
+ /// yield the original byte string.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"\
+ /// foo
+ ///
+ /// bar\r
+ /// baz
+ ///
+ ///
+ /// quux";
+ /// let lines: Vec<&[u8]> = s.lines_with_terminator().collect();
+ /// assert_eq!(lines, vec![
+ /// B("foo\n"),
+ /// B("\n"),
+ /// B("bar\r\n"),
+ /// B("baz\n"),
+ /// B("\n"),
+ /// B("\n"),
+ /// B("quux"),
+ /// ]);
+ /// ```
+ #[inline]
+ fn lines_with_terminator(&self) -> LinesWithTerminator {
+ LinesWithTerminator::new(self.as_bytes())
+ }
+
+ /// Return a byte string slice with leading and trailing whitespace
+ /// removed.
+ ///
+ /// Whitespace is defined according to the terms of the `White_Space`
+ /// Unicode property.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(" foo\tbar\t\u{2003}\n");
+ /// assert_eq!(s.trim(), B("foo\tbar"));
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn trim(&self) -> &[u8] {
+ self.trim_start().trim_end()
+ }
+
+ /// Return a byte string slice with leading whitespace removed.
+ ///
+ /// Whitespace is defined according to the terms of the `White_Space`
+ /// Unicode property.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(" foo\tbar\t\u{2003}\n");
+ /// assert_eq!(s.trim_start(), B("foo\tbar\t\u{2003}\n"));
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn trim_start(&self) -> &[u8] {
+ let start = whitespace_len_fwd(self.as_bytes());
+ &self.as_bytes()[start..]
+ }
+
+ /// Return a byte string slice with trailing whitespace removed.
+ ///
+ /// Whitespace is defined according to the terms of the `White_Space`
+ /// Unicode property.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(" foo\tbar\t\u{2003}\n");
+ /// assert_eq!(s.trim_end(), B(" foo\tbar"));
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn trim_end(&self) -> &[u8] {
+ let end = whitespace_len_rev(self.as_bytes());
+ &self.as_bytes()[..end]
+ }
+
+ /// Return a byte string slice with leading and trailing characters
+ /// satisfying the given predicate removed.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"123foo5bar789";
+ /// assert_eq!(s.trim_with(|c| c.is_numeric()), B("foo5bar"));
+ /// ```
+ #[inline]
+ fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
+ self.trim_start_with(&mut trim).trim_end_with(&mut trim)
+ }
+
+ /// Return a byte string slice with leading characters satisfying the given
+ /// predicate removed.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"123foo5bar789";
+ /// assert_eq!(s.trim_start_with(|c| c.is_numeric()), B("foo5bar789"));
+ /// ```
+ #[inline]
+ fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
+ for (s, _, ch) in self.char_indices() {
+ if !trim(ch) {
+ return &self.as_bytes()[s..];
+ }
+ }
+ b""
+ }
+
+ /// Return a byte string slice with trailing characters satisfying the
+ /// given predicate removed.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"123foo5bar789";
+ /// assert_eq!(s.trim_end_with(|c| c.is_numeric()), B("123foo5bar"));
+ /// ```
+ #[inline]
+ fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
+ for (_, e, ch) in self.char_indices().rev() {
+ if !trim(ch) {
+ return &self.as_bytes()[..e];
+ }
+ }
+ b""
+ }
+
+ /// Returns a new `Vec<u8>` containing the lowercase equivalent of this
+ /// byte string.
+ ///
+ /// In this case, lowercase is defined according to the `Lowercase` Unicode
+ /// property.
+ ///
+ /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
+ /// then it is written to the given buffer unchanged.
+ ///
+ /// Note that some characters in this byte string may expand into multiple
+ /// characters when changing the case, so the number of bytes written to
+ /// the given byte string may not be equivalent to the number of bytes in
+ /// this byte string.
+ ///
+ /// If you'd like to reuse an allocation for performance reasons, then use
+ /// [`to_lowercase_into`](#method.to_lowercase_into) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("HELLO Β");
+ /// assert_eq!("hello β".as_bytes(), s.to_lowercase().as_bytes());
+ /// ```
+ ///
+ /// Scripts without case are not changed:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("农历新年");
+ /// assert_eq!("农历新年".as_bytes(), s.to_lowercase().as_bytes());
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
+ /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), s.to_lowercase().as_bytes());
+ /// ```
+ #[cfg(all(feature = "std", feature = "unicode"))]
+ #[inline]
+ fn to_lowercase(&self) -> Vec<u8> {
+ let mut buf = vec![];
+ self.to_lowercase_into(&mut buf);
+ buf
+ }
+
+ /// Writes the lowercase equivalent of this byte string into the given
+ /// buffer. The buffer is not cleared before written to.
+ ///
+ /// In this case, lowercase is defined according to the `Lowercase`
+ /// Unicode property.
+ ///
+ /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
+ /// then it is written to the given buffer unchanged.
+ ///
+ /// Note that some characters in this byte string may expand into multiple
+ /// characters when changing the case, so the number of bytes written to
+ /// the given byte string may not be equivalent to the number of bytes in
+ /// this byte string.
+ ///
+ /// If you don't need to amortize allocation and instead prefer
+ /// convenience, then use [`to_lowercase`](#method.to_lowercase) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("HELLO Β");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_lowercase_into(&mut buf);
+ /// assert_eq!("hello β".as_bytes(), buf.as_bytes());
+ /// ```
+ ///
+ /// Scripts without case are not changed:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("农历新年");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_lowercase_into(&mut buf);
+ /// assert_eq!("农历新年".as_bytes(), buf.as_bytes());
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_lowercase_into(&mut buf);
+ /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), buf.as_bytes());
+ /// ```
+ #[cfg(all(feature = "std", feature = "unicode"))]
+ #[inline]
+ fn to_lowercase_into(&self, buf: &mut Vec<u8>) {
+ // TODO: This is the best we can do given what std exposes I think.
+ // If we roll our own case handling, then we might be able to do this
+ // a bit faster. We shouldn't roll our own case handling unless we
+ // need to, e.g., for doing caseless matching or case folding.
+
+ // TODO(BUG): This doesn't handle any special casing rules.
+
+ buf.reserve(self.as_bytes().len());
+ for (s, e, ch) in self.char_indices() {
+ if ch == '\u{FFFD}' {
+ buf.push_str(&self.as_bytes()[s..e]);
+ } else if ch.is_ascii() {
+ buf.push_char(ch.to_ascii_lowercase());
+ } else {
+ for upper in ch.to_lowercase() {
+ buf.push_char(upper);
+ }
+ }
+ }
+ }
+
+ /// Returns a new `Vec<u8>` containing the ASCII lowercase equivalent of
+ /// this byte string.
+ ///
+ /// In this case, lowercase is only defined in ASCII letters. Namely, the
+ /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
+ /// In particular, the length of the byte string returned is always
+ /// equivalent to the length of this byte string.
+ ///
+ /// If you'd like to reuse an allocation for performance reasons, then use
+ /// [`make_ascii_lowercase`](#method.make_ascii_lowercase) to perform
+ /// the conversion in place.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("HELLO Β");
+ /// assert_eq!("hello Β".as_bytes(), s.to_ascii_lowercase().as_bytes());
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
+ /// assert_eq!(s.to_ascii_lowercase(), B(b"foo\xFFbar\xE2\x98baz"));
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_ascii_lowercase(&self) -> Vec<u8> {
+ self.as_bytes().to_ascii_lowercase()
+ }
+
+ /// Convert this byte string to its lowercase ASCII equivalent in place.
+ ///
+ /// In this case, lowercase is only defined in ASCII letters. Namely, the
+ /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
+ ///
+ /// If you don't need to do the conversion in
+ /// place and instead prefer convenience, then use
+ /// [`to_ascii_lowercase`](#method.to_ascii_lowercase) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("HELLO Β");
+ /// s.make_ascii_lowercase();
+ /// assert_eq!(s, "hello Β".as_bytes());
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice, ByteVec};
+ ///
+ /// let mut s = <Vec<u8>>::from_slice(b"FOO\xFFBAR\xE2\x98BAZ");
+ /// s.make_ascii_lowercase();
+ /// assert_eq!(s, B(b"foo\xFFbar\xE2\x98baz"));
+ /// ```
+ #[inline]
+ fn make_ascii_lowercase(&mut self) {
+ self.as_bytes_mut().make_ascii_lowercase();
+ }
+
+ /// Returns a new `Vec<u8>` containing the uppercase equivalent of this
+ /// byte string.
+ ///
+ /// In this case, uppercase is defined according to the `Uppercase`
+ /// Unicode property.
+ ///
+ /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
+ /// then it is written to the given buffer unchanged.
+ ///
+ /// Note that some characters in this byte string may expand into multiple
+ /// characters when changing the case, so the number of bytes written to
+ /// the given byte string may not be equivalent to the number of bytes in
+ /// this byte string.
+ ///
+ /// If you'd like to reuse an allocation for performance reasons, then use
+ /// [`to_uppercase_into`](#method.to_uppercase_into) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("hello β");
+ /// assert_eq!(s.to_uppercase(), B("HELLO Β"));
+ /// ```
+ ///
+ /// Scripts without case are not changed:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("农历新年");
+ /// assert_eq!(s.to_uppercase(), B("农历新年"));
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"foo\xFFbar\xE2\x98baz");
+ /// assert_eq!(s.to_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
+ /// ```
+ #[cfg(all(feature = "std", feature = "unicode"))]
+ #[inline]
+ fn to_uppercase(&self) -> Vec<u8> {
+ let mut buf = vec![];
+ self.to_uppercase_into(&mut buf);
+ buf
+ }
+
+ /// Writes the uppercase equivalent of this byte string into the given
+ /// buffer. The buffer is not cleared before written to.
+ ///
+ /// In this case, uppercase is defined according to the `Uppercase`
+ /// Unicode property.
+ ///
+ /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
+ /// then it is written to the given buffer unchanged.
+ ///
+ /// Note that some characters in this byte string may expand into multiple
+ /// characters when changing the case, so the number of bytes written to
+ /// the given byte string may not be equivalent to the number of bytes in
+ /// this byte string.
+ ///
+ /// If you don't need to amortize allocation and instead prefer
+ /// convenience, then use [`to_uppercase`](#method.to_uppercase) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("hello β");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_uppercase_into(&mut buf);
+ /// assert_eq!(buf, B("HELLO Β"));
+ /// ```
+ ///
+ /// Scripts without case are not changed:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("农历新年");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_uppercase_into(&mut buf);
+ /// assert_eq!(buf, B("农历新年"));
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"foo\xFFbar\xE2\x98baz");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_uppercase_into(&mut buf);
+ /// assert_eq!(buf, B(b"FOO\xFFBAR\xE2\x98BAZ"));
+ /// ```
+ #[cfg(all(feature = "std", feature = "unicode"))]
+ #[inline]
+ fn to_uppercase_into(&self, buf: &mut Vec<u8>) {
+ // TODO: This is the best we can do given what std exposes I think.
+ // If we roll our own case handling, then we might be able to do this
+ // a bit faster. We shouldn't roll our own case handling unless we
+ // need to, e.g., for doing caseless matching or case folding.
+ buf.reserve(self.as_bytes().len());
+ for (s, e, ch) in self.char_indices() {
+ if ch == '\u{FFFD}' {
+ buf.push_str(&self.as_bytes()[s..e]);
+ } else if ch.is_ascii() {
+ buf.push_char(ch.to_ascii_uppercase());
+ } else {
+ for upper in ch.to_uppercase() {
+ buf.push_char(upper);
+ }
+ }
+ }
+ }
+
+ /// Returns a new `Vec<u8>` containing the ASCII uppercase equivalent of
+ /// this byte string.
+ ///
+ /// In this case, uppercase is only defined in ASCII letters. Namely, the
+ /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
+ /// In particular, the length of the byte string returned is always
+ /// equivalent to the length of this byte string.
+ ///
+ /// If you'd like to reuse an allocation for performance reasons, then use
+ /// [`make_ascii_uppercase`](#method.make_ascii_uppercase) to perform
+ /// the conversion in place.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("hello β");
+ /// assert_eq!(s.to_ascii_uppercase(), B("HELLO β"));
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"foo\xFFbar\xE2\x98baz");
+ /// assert_eq!(s.to_ascii_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_ascii_uppercase(&self) -> Vec<u8> {
+ self.as_bytes().to_ascii_uppercase()
+ }
+
+ /// Convert this byte string to its uppercase ASCII equivalent in place.
+ ///
+ /// In this case, uppercase is only defined in ASCII letters. Namely, the
+ /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
+ ///
+ /// If you don't need to do the conversion in
+ /// place and instead prefer convenience, then use
+ /// [`to_ascii_uppercase`](#method.to_ascii_uppercase) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let mut s = <Vec<u8>>::from("hello β");
+ /// s.make_ascii_uppercase();
+ /// assert_eq!(s, B("HELLO β"));
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice, ByteVec};
+ ///
+ /// let mut s = <Vec<u8>>::from_slice(b"foo\xFFbar\xE2\x98baz");
+ /// s.make_ascii_uppercase();
+ /// assert_eq!(s, B(b"FOO\xFFBAR\xE2\x98BAZ"));
+ /// ```
+ #[inline]
+ fn make_ascii_uppercase(&mut self) {
+ self.as_bytes_mut().make_ascii_uppercase();
+ }
+
+ /// Reverse the bytes in this string, in place.
+ ///
+ /// This is not necessarily a well formed operation! For example, if this
+ /// byte string contains valid UTF-8 that isn't ASCII, then reversing the
+ /// string will likely result in invalid UTF-8 and otherwise non-sensical
+ /// content.
+ ///
+ /// Note that this is equivalent to the generic `[u8]::reverse` method.
+ /// This method is provided to permit callers to explicitly differentiate
+ /// between reversing bytes, codepoints and graphemes.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("hello");
+ /// s.reverse_bytes();
+ /// assert_eq!(s, "olleh".as_bytes());
+ /// ```
+ #[inline]
+ fn reverse_bytes(&mut self) {
+ self.as_bytes_mut().reverse();
+ }
+
+ /// Reverse the codepoints in this string, in place.
+ ///
+ /// If this byte string is valid UTF-8, then its reversal by codepoint
+ /// is also guaranteed to be valid UTF-8.
+ ///
+ /// This operation is equivalent to the following, but without allocating:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("foo☃bar");
+ ///
+ /// let mut chars: Vec<char> = s.chars().collect();
+ /// chars.reverse();
+ ///
+ /// let reversed: String = chars.into_iter().collect();
+ /// assert_eq!(reversed, "rab☃oof");
+ /// ```
+ ///
+ /// Note that this is not necessarily a well formed operation. For example,
+ /// if this byte string contains grapheme clusters with more than one
+ /// codepoint, then those grapheme clusters will not necessarily be
+ /// preserved. If you'd like to preserve grapheme clusters, then use
+ /// [`reverse_graphemes`](#method.reverse_graphemes) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("foo☃bar");
+ /// s.reverse_chars();
+ /// assert_eq!(s, "rab☃oof".as_bytes());
+ /// ```
+ ///
+ /// This example shows that not all reversals lead to a well formed string.
+ /// For example, in this case, combining marks are used to put accents over
+ /// some letters, and those accent marks must appear after the codepoints
+ /// they modify.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let mut s = <Vec<u8>>::from("résumé");
+ /// s.reverse_chars();
+ /// assert_eq!(s, B(b"\xCC\x81emus\xCC\x81er"));
+ /// ```
+ ///
+ /// A word of warning: the above example relies on the fact that
+ /// `résumé` is in decomposed normal form, which means there are separate
+ /// codepoints for the accents above `e`. If it is instead in composed
+ /// normal form, then the example works:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let mut s = <Vec<u8>>::from("résumé");
+ /// s.reverse_chars();
+ /// assert_eq!(s, B("émusér"));
+ /// ```
+ ///
+ /// The point here is to be cautious and not assume that just because
+ /// `reverse_chars` works in one case, that it therefore works in all
+ /// cases.
+ #[inline]
+ fn reverse_chars(&mut self) {
+ let mut i = 0;
+ loop {
+ let (_, size) = utf8::decode(&self.as_bytes()[i..]);
+ if size == 0 {
+ break;
+ }
+ if size > 1 {
+ self.as_bytes_mut()[i..i + size].reverse_bytes();
+ }
+ i += size;
+ }
+ self.reverse_bytes();
+ }
+
+ /// Reverse the graphemes in this string, in place.
+ ///
+ /// If this byte string is valid UTF-8, then its reversal by grapheme
+ /// is also guaranteed to be valid UTF-8.
+ ///
+ /// This operation is equivalent to the following, but without allocating:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("foo☃bar");
+ ///
+ /// let mut graphemes: Vec<&str> = s.graphemes().collect();
+ /// graphemes.reverse();
+ ///
+ /// let reversed = graphemes.concat();
+ /// assert_eq!(reversed, "rab☃oof");
+ /// ```
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("foo☃bar");
+ /// s.reverse_graphemes();
+ /// assert_eq!(s, "rab☃oof".as_bytes());
+ /// ```
+ ///
+ /// This example shows how this correctly handles grapheme clusters,
+ /// unlike `reverse_chars`.
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("résumé");
+ /// s.reverse_graphemes();
+ /// assert_eq!(s, "émusér".as_bytes());
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn reverse_graphemes(&mut self) {
+ use unicode::decode_grapheme;
+
+ let mut i = 0;
+ loop {
+ let (_, size) = decode_grapheme(&self.as_bytes()[i..]);
+ if size == 0 {
+ break;
+ }
+ if size > 1 {
+ self.as_bytes_mut()[i..i + size].reverse_bytes();
+ }
+ i += size;
+ }
+ self.reverse_bytes();
+ }
+
+ /// Returns true if and only if every byte in this byte string is ASCII.
+ ///
+ /// ASCII is an encoding that defines 128 codepoints. A byte corresponds to
+ /// an ASCII codepoint if and only if it is in the inclusive range
+ /// `[0, 127]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert!(B("abc").is_ascii());
+ /// assert!(!B("☃βツ").is_ascii());
+ /// assert!(!B(b"\xFF").is_ascii());
+ /// ```
+ #[inline]
+ fn is_ascii(&self) -> bool {
+ ascii::first_non_ascii_byte(self.as_bytes()) == self.as_bytes().len()
+ }
+
+ /// Returns true if and only if the entire byte string is valid UTF-8.
+ ///
+ /// If you need location information about where a byte string's first
+ /// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert!(B("abc").is_utf8());
+ /// assert!(B("☃βツ").is_utf8());
+ /// // invalid bytes
+ /// assert!(!B(b"abc\xFF").is_utf8());
+ /// // surrogate encoding
+ /// assert!(!B(b"\xED\xA0\x80").is_utf8());
+ /// // incomplete sequence
+ /// assert!(!B(b"\xF0\x9D\x9Ca").is_utf8());
+ /// // overlong sequence
+ /// assert!(!B(b"\xF0\x82\x82\xAC").is_utf8());
+ /// ```
+ #[inline]
+ fn is_utf8(&self) -> bool {
+ utf8::validate(self.as_bytes()).is_ok()
+ }
+
+ /// Returns the last byte in this byte string, if it's non-empty. If this
+ /// byte string is empty, this returns `None`.
+ ///
+ /// Note that this is like the generic `[u8]::last`, except this returns
+ /// the byte by value instead of a reference to the byte.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(Some(b'z'), b"baz".last_byte());
+ /// assert_eq!(None, b"".last_byte());
+ /// ```
+ #[inline]
+ fn last_byte(&self) -> Option<u8> {
+ let bytes = self.as_bytes();
+ bytes.get(bytes.len().saturating_sub(1)).map(|&b| b)
+ }
+
+ /// Returns the index of the first non-ASCII byte in this byte string (if
+ /// any such indices exist). Specifically, it returns the index of the
+ /// first byte with a value greater than or equal to `0x80`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{ByteSlice, B};
+ ///
+ /// assert_eq!(Some(3), b"abc\xff".find_non_ascii_byte());
+ /// assert_eq!(None, b"abcde".find_non_ascii_byte());
+ /// assert_eq!(Some(0), B("😀").find_non_ascii_byte());
+ /// ```
+ #[inline]
+ fn find_non_ascii_byte(&self) -> Option<usize> {
+ let index = ascii::first_non_ascii_byte(self.as_bytes());
+ if index == self.as_bytes().len() {
+ None
+ } else {
+ Some(index)
+ }
+ }
+
+ /// Copies elements from one part of the slice to another part of itself,
+ /// where the parts may be overlapping.
+ ///
+ /// `src` is the range within this byte string to copy from, while `dest`
+ /// is the starting index of the range within this byte string to copy to.
+ /// The length indicated by `src` must be less than or equal to the number
+ /// of bytes from `dest` to the end of the byte string.
+ ///
+ /// # Panics
+ ///
+ /// Panics if either range is out of bounds, or if `src` is too big to fit
+ /// into `dest`, or if the end of `src` is before the start.
+ ///
+ /// # Examples
+ ///
+ /// Copying four bytes within a byte string:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let mut buf = *b"Hello, World!";
+ /// let s = &mut buf;
+ /// s.copy_within_str(1..5, 8);
+ /// assert_eq!(s, B("Hello, Wello!"));
+ /// ```
+ #[inline]
+ fn copy_within_str<R>(&mut self, src: R, dest: usize)
+ where
+ R: ops::RangeBounds<usize>,
+ {
+ // TODO: Deprecate this once slice::copy_within stabilizes.
+ let src_start = match src.start_bound() {
+ ops::Bound::Included(&n) => n,
+ ops::Bound::Excluded(&n) => {
+ n.checked_add(1).expect("attempted to index slice beyond max")
+ }
+ ops::Bound::Unbounded => 0,
+ };
+ let src_end = match src.end_bound() {
+ ops::Bound::Included(&n) => {
+ n.checked_add(1).expect("attempted to index slice beyond max")
+ }
+ ops::Bound::Excluded(&n) => n,
+ ops::Bound::Unbounded => self.as_bytes().len(),
+ };
+ assert!(src_start <= src_end, "src end is before src start");
+ assert!(src_end <= self.as_bytes().len(), "src is out of bounds");
+ let count = src_end - src_start;
+ assert!(
+ dest <= self.as_bytes().len() - count,
+ "dest is out of bounds",
+ );
+
+ // SAFETY: This is safe because we use ptr::copy to handle overlapping
+ // copies, and is also safe because we've checked all the bounds above.
+ // Finally, we are only dealing with u8 data, which is Copy, which
+ // means we can copy without worrying about ownership/destructors.
+ unsafe {
+ ptr::copy(
+ self.as_bytes().get_unchecked(src_start),
+ self.as_bytes_mut().get_unchecked_mut(dest),
+ count,
+ );
+ }
+ }
+}
+
+/// A single substring searcher fixed to a particular needle.
+///
+/// The purpose of this type is to permit callers to construct a substring
+/// searcher that can be used to search haystacks without the overhead of
+/// constructing the searcher in the first place. This is a somewhat niche
+/// concern when it's necessary to re-use the same needle to search multiple
+/// different haystacks with as little overhead as possible. In general, using
+/// [`ByteSlice::find`](trait.ByteSlice.html#method.find)
+/// or
+/// [`ByteSlice::find_iter`](trait.ByteSlice.html#method.find_iter)
+/// is good enough, but `Finder` is useful when you can meaningfully observe
+/// searcher construction time in a profile.
+///
+/// When the `std` feature is enabled, then this type has an `into_owned`
+/// version which permits building a `Finder` that is not connected to the
+/// lifetime of its needle.
+#[derive(Clone, Debug)]
+pub struct Finder<'a> {
+ searcher: TwoWay<'a>,
+}
+
+impl<'a> Finder<'a> {
+ /// Create a new finder for the given needle.
+ #[inline]
+ pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> {
+ Finder { searcher: TwoWay::forward(needle.as_ref()) }
+ }
+
+ /// Convert this finder into its owned variant, such that it no longer
+ /// borrows the needle.
+ ///
+ /// If this is already an owned finder, then this is a no-op. Otherwise,
+ /// this copies the needle.
+ ///
+ /// This is only available when the `std` feature is enabled.
+ #[cfg(feature = "std")]
+ #[inline]
+ pub fn into_owned(self) -> Finder<'static> {
+ Finder { searcher: self.searcher.into_owned() }
+ }
+
+ /// Returns the needle that this finder searches for.
+ ///
+ /// Note that the lifetime of the needle returned is tied to the lifetime
+ /// of the finder, and may be shorter than the `'a` lifetime. Namely, a
+ /// finder's needle can be either borrowed or owned, so the lifetime of the
+ /// needle returned must necessarily be the shorter of the two.
+ #[inline]
+ pub fn needle(&self) -> &[u8] {
+ self.searcher.needle()
+ }
+
+ /// Returns the index of the first occurrence of this needle in the given
+ /// haystack.
+ ///
+ /// The haystack may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::Finder;
+ ///
+ /// let haystack = "foo bar baz";
+ /// assert_eq!(Some(0), Finder::new("foo").find(haystack));
+ /// assert_eq!(Some(4), Finder::new("bar").find(haystack));
+ /// assert_eq!(None, Finder::new("quux").find(haystack));
+ /// ```
+ #[inline]
+ pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
+ self.searcher.find(haystack.as_ref())
+ }
+}
+
+/// A single substring reverse searcher fixed to a particular needle.
+///
+/// The purpose of this type is to permit callers to construct a substring
+/// searcher that can be used to search haystacks without the overhead of
+/// constructing the searcher in the first place. This is a somewhat niche
+/// concern when it's necessary to re-use the same needle to search multiple
+/// different haystacks with as little overhead as possible. In general, using
+/// [`ByteSlice::rfind`](trait.ByteSlice.html#method.rfind)
+/// or
+/// [`ByteSlice::rfind_iter`](trait.ByteSlice.html#method.rfind_iter)
+/// is good enough, but `FinderReverse` is useful when you can meaningfully
+/// observe searcher construction time in a profile.
+///
+/// When the `std` feature is enabled, then this type has an `into_owned`
+/// version which permits building a `FinderReverse` that is not connected to
+/// the lifetime of its needle.
+#[derive(Clone, Debug)]
+pub struct FinderReverse<'a> {
+ searcher: TwoWay<'a>,
+}
+
+impl<'a> FinderReverse<'a> {
+ /// Create a new reverse finder for the given needle.
+ #[inline]
+ pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> {
+ FinderReverse { searcher: TwoWay::reverse(needle.as_ref()) }
+ }
+
+ /// Convert this finder into its owned variant, such that it no longer
+ /// borrows the needle.
+ ///
+ /// If this is already an owned finder, then this is a no-op. Otherwise,
+ /// this copies the needle.
+ ///
+ /// This is only available when the `std` feature is enabled.
+ #[cfg(feature = "std")]
+ #[inline]
+ pub fn into_owned(self) -> FinderReverse<'static> {
+ FinderReverse { searcher: self.searcher.into_owned() }
+ }
+
+ /// Returns the needle that this finder searches for.
+ ///
+ /// Note that the lifetime of the needle returned is tied to the lifetime
+ /// of this finder, and may be shorter than the `'a` lifetime. Namely,
+ /// a finder's needle can be either borrowed or owned, so the lifetime of
+ /// the needle returned must necessarily be the shorter of the two.
+ #[inline]
+ pub fn needle(&self) -> &[u8] {
+ self.searcher.needle()
+ }
+
+ /// Returns the index of the last occurrence of this needle in the given
+ /// haystack.
+ ///
+ /// The haystack may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::FinderReverse;
+ ///
+ /// let haystack = "foo bar baz";
+ /// assert_eq!(Some(0), FinderReverse::new("foo").rfind(haystack));
+ /// assert_eq!(Some(4), FinderReverse::new("bar").rfind(haystack));
+ /// assert_eq!(None, FinderReverse::new("quux").rfind(haystack));
+ /// ```
+ #[inline]
+ pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
+ self.searcher.rfind(haystack.as_ref())
+ }
+}
+
+/// An iterator over non-overlapping substring matches.
+///
+/// Matches are reported by the byte offset at which they begin.
+///
+/// `'a` is the shorter of two lifetimes: the byte string being searched or the
+/// byte string being looked for.
+#[derive(Debug)]
+pub struct Find<'a> {
+ haystack: &'a [u8],
+ prestate: PrefilterState,
+ searcher: TwoWay<'a>,
+ pos: usize,
+}
+
+impl<'a> Find<'a> {
+ fn new(haystack: &'a [u8], needle: &'a [u8]) -> Find<'a> {
+ let searcher = TwoWay::forward(needle);
+ let prestate = searcher.prefilter_state();
+ Find { haystack, prestate, searcher, pos: 0 }
+ }
+}
+
+impl<'a> Iterator for Find<'a> {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ if self.pos > self.haystack.len() {
+ return None;
+ }
+ let result = self
+ .searcher
+ .find_with(&mut self.prestate, &self.haystack[self.pos..]);
+ match result {
+ None => None,
+ Some(i) => {
+ let pos = self.pos + i;
+ self.pos = pos + cmp::max(1, self.searcher.needle().len());
+ Some(pos)
+ }
+ }
+ }
+}
+
+/// An iterator over non-overlapping substring matches in reverse.
+///
+/// Matches are reported by the byte offset at which they begin.
+///
+/// `'a` is the shorter of two lifetimes: the byte string being searched or the
+/// byte string being looked for.
+#[derive(Debug)]
+pub struct FindReverse<'a> {
+ haystack: &'a [u8],
+ prestate: PrefilterState,
+ searcher: TwoWay<'a>,
+ /// When searching with an empty needle, this gets set to `None` after
+ /// we've yielded the last element at `0`.
+ pos: Option<usize>,
+}
+
+impl<'a> FindReverse<'a> {
+ fn new(haystack: &'a [u8], needle: &'a [u8]) -> FindReverse<'a> {
+ let searcher = TwoWay::reverse(needle);
+ let prestate = searcher.prefilter_state();
+ let pos = Some(haystack.len());
+ FindReverse { haystack, prestate, searcher, pos }
+ }
+
+ fn haystack(&self) -> &'a [u8] {
+ self.haystack
+ }
+
+ fn needle(&self) -> &[u8] {
+ self.searcher.needle()
+ }
+}
+
+impl<'a> Iterator for FindReverse<'a> {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ let pos = match self.pos {
+ None => return None,
+ Some(pos) => pos,
+ };
+ let result = self
+ .searcher
+ .rfind_with(&mut self.prestate, &self.haystack[..pos]);
+ match result {
+ None => None,
+ Some(i) => {
+ if pos == i {
+ self.pos = pos.checked_sub(1);
+ } else {
+ self.pos = Some(i);
+ }
+ Some(i)
+ }
+ }
+ }
+}
+
+/// An iterator over the bytes in a byte string.
+///
+/// `'a` is the lifetime of the byte string being traversed.
+#[derive(Clone, Debug)]
+pub struct Bytes<'a> {
+ it: slice::Iter<'a, u8>,
+}
+
+impl<'a> Bytes<'a> {
+ /// Views the remaining underlying data as a subslice of the original data.
+ /// This has the same lifetime as the original slice,
+ /// and so the iterator can continue to be used while this exists.
+ #[inline]
+ pub fn as_slice(&self) -> &'a [u8] {
+ self.it.as_slice()
+ }
+}
+
+impl<'a> Iterator for Bytes<'a> {
+ type Item = u8;
+
+ #[inline]
+ fn next(&mut self) -> Option<u8> {
+ self.it.next().map(|&b| b)
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.it.size_hint()
+ }
+}
+
+impl<'a> DoubleEndedIterator for Bytes<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<u8> {
+ self.it.next_back().map(|&b| b)
+ }
+}
+
+impl<'a> ExactSizeIterator for Bytes<'a> {
+ #[inline]
+ fn len(&self) -> usize {
+ self.it.len()
+ }
+}
+
+impl<'a> iter::FusedIterator for Bytes<'a> {}
+
+/// An iterator over the fields in a byte string, separated by whitespace.
+///
+/// This iterator splits on contiguous runs of whitespace, such that the fields
+/// in `foo\t\t\n \nbar` are `foo` and `bar`.
+///
+/// `'a` is the lifetime of the byte string being split.
+#[derive(Debug)]
+pub struct Fields<'a> {
+ it: FieldsWith<'a, fn(char) -> bool>,
+}
+
+impl<'a> Fields<'a> {
+ fn new(bytes: &'a [u8]) -> Fields<'a> {
+ Fields { it: bytes.fields_with(|ch| ch.is_whitespace()) }
+ }
+}
+
+impl<'a> Iterator for Fields<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ self.it.next()
+ }
+}
+
+/// An iterator over fields in the byte string, separated by a predicate over
+/// codepoints.
+///
+/// This iterator splits a byte string based on its predicate function such
+/// that the elements returned are separated by contiguous runs of codepoints
+/// for which the predicate returns true.
+///
+/// `'a` is the lifetime of the byte string being split, while `F` is the type
+/// of the predicate, i.e., `FnMut(char) -> bool`.
+#[derive(Debug)]
+pub struct FieldsWith<'a, F> {
+ f: F,
+ bytes: &'a [u8],
+ chars: CharIndices<'a>,
+}
+
+impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> {
+ fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> {
+ FieldsWith { f, bytes, chars: bytes.char_indices() }
+ }
+}
+
+impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ let (start, mut end);
+ loop {
+ match self.chars.next() {
+ None => return None,
+ Some((s, e, ch)) => {
+ if !(self.f)(ch) {
+ start = s;
+ end = e;
+ break;
+ }
+ }
+ }
+ }
+ while let Some((_, e, ch)) = self.chars.next() {
+ if (self.f)(ch) {
+ break;
+ }
+ end = e;
+ }
+ Some(&self.bytes[start..end])
+ }
+}
+
+/// An iterator over substrings in a byte string, split by a separator.
+///
+/// `'a` is the lifetime of the byte string being split.
+#[derive(Debug)]
+pub struct Split<'a> {
+ finder: Find<'a>,
+ /// The end position of the previous match of our splitter. The element
+ /// we yield corresponds to the substring starting at `last` up to the
+ /// beginning of the next match of the splitter.
+ last: usize,
+ /// Only set when iteration is complete. A corner case here is when a
+ /// splitter is matched at the end of the haystack. At that point, we still
+ /// need to yield an empty string following it.
+ done: bool,
+}
+
+impl<'a> Split<'a> {
+ fn new(haystack: &'a [u8], splitter: &'a [u8]) -> Split<'a> {
+ let finder = haystack.find_iter(splitter);
+ Split { finder, last: 0, done: false }
+ }
+}
+
+impl<'a> Iterator for Split<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ let haystack = self.finder.haystack;
+ match self.finder.next() {
+ Some(start) => {
+ let next = &haystack[self.last..start];
+ self.last = start + self.finder.searcher.needle().len();
+ Some(next)
+ }
+ None => {
+ if self.last >= haystack.len() {
+ if !self.done {
+ self.done = true;
+ Some(b"")
+ } else {
+ None
+ }
+ } else {
+ let s = &haystack[self.last..];
+ self.last = haystack.len();
+ self.done = true;
+ Some(s)
+ }
+ }
+ }
+ }
+}
+
+/// An iterator over substrings in a byte string, split by a separator, in
+/// reverse.
+///
+/// `'a` is the lifetime of the byte string being split, while `F` is the type
+/// of the predicate, i.e., `FnMut(char) -> bool`.
+#[derive(Debug)]
+pub struct SplitReverse<'a> {
+ finder: FindReverse<'a>,
+ /// The end position of the previous match of our splitter. The element
+ /// we yield corresponds to the substring starting at `last` up to the
+ /// beginning of the next match of the splitter.
+ last: usize,
+ /// Only set when iteration is complete. A corner case here is when a
+ /// splitter is matched at the end of the haystack. At that point, we still
+ /// need to yield an empty string following it.
+ done: bool,
+}
+
+impl<'a> SplitReverse<'a> {
+ fn new(haystack: &'a [u8], splitter: &'a [u8]) -> SplitReverse<'a> {
+ let finder = haystack.rfind_iter(splitter);
+ SplitReverse { finder, last: haystack.len(), done: false }
+ }
+}
+
+impl<'a> Iterator for SplitReverse<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ let haystack = self.finder.haystack();
+ match self.finder.next() {
+ Some(start) => {
+ let nlen = self.finder.needle().len();
+ let next = &haystack[start + nlen..self.last];
+ self.last = start;
+ Some(next)
+ }
+ None => {
+ if self.last == 0 {
+ if !self.done {
+ self.done = true;
+ Some(b"")
+ } else {
+ None
+ }
+ } else {
+ let s = &haystack[..self.last];
+ self.last = 0;
+ self.done = true;
+ Some(s)
+ }
+ }
+ }
+ }
+}
+
+/// An iterator over at most `n` substrings in a byte string, split by a
+/// separator.
+///
+/// `'a` is the lifetime of the byte string being split, while `F` is the type
+/// of the predicate, i.e., `FnMut(char) -> bool`.
+#[derive(Debug)]
+pub struct SplitN<'a> {
+ split: Split<'a>,
+ limit: usize,
+ count: usize,
+}
+
+impl<'a> SplitN<'a> {
+ fn new(
+ haystack: &'a [u8],
+ splitter: &'a [u8],
+ limit: usize,
+ ) -> SplitN<'a> {
+ let split = haystack.split_str(splitter);
+ SplitN { split, limit, count: 0 }
+ }
+}
+
+impl<'a> Iterator for SplitN<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ self.count += 1;
+ if self.count > self.limit || self.split.done {
+ None
+ } else if self.count == self.limit {
+ Some(&self.split.finder.haystack[self.split.last..])
+ } else {
+ self.split.next()
+ }
+ }
+}
+
+/// An iterator over at most `n` substrings in a byte string, split by a
+/// separator, in reverse.
+///
+/// `'a` is the lifetime of the byte string being split, while `F` is the type
+/// of the predicate, i.e., `FnMut(char) -> bool`.
+#[derive(Debug)]
+pub struct SplitNReverse<'a> {
+ split: SplitReverse<'a>,
+ limit: usize,
+ count: usize,
+}
+
+impl<'a> SplitNReverse<'a> {
+ fn new(
+ haystack: &'a [u8],
+ splitter: &'a [u8],
+ limit: usize,
+ ) -> SplitNReverse<'a> {
+ let split = haystack.rsplit_str(splitter);
+ SplitNReverse { split, limit, count: 0 }
+ }
+}
+
+impl<'a> Iterator for SplitNReverse<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ self.count += 1;
+ if self.count > self.limit || self.split.done {
+ None
+ } else if self.count == self.limit {
+ Some(&self.split.finder.haystack()[..self.split.last])
+ } else {
+ self.split.next()
+ }
+ }
+}
+
+/// An iterator over all lines in a byte string, without their terminators.
+///
+/// For this iterator, the only line terminators recognized are `\r\n` and
+/// `\n`.
+///
+/// `'a` is the lifetime of the byte string being iterated over.
+pub struct Lines<'a> {
+ it: LinesWithTerminator<'a>,
+}
+
+impl<'a> Lines<'a> {
+ fn new(bytes: &'a [u8]) -> Lines<'a> {
+ Lines { it: LinesWithTerminator::new(bytes) }
+ }
+}
+
+impl<'a> Iterator for Lines<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ let mut line = self.it.next()?;
+ if line.last_byte() == Some(b'\n') {
+ line = &line[..line.len() - 1];
+ if line.last_byte() == Some(b'\r') {
+ line = &line[..line.len() - 1];
+ }
+ }
+ Some(line)
+ }
+}
+
+/// An iterator over all lines in a byte string, including their terminators.
+///
+/// For this iterator, the only line terminator recognized is `\n`. (Since
+/// line terminators are included, this also handles `\r\n` line endings.)
+///
+/// Line terminators are only included if they are present in the original
+/// byte string. For example, the last line in a byte string may not end with
+/// a line terminator.
+///
+/// Concatenating all elements yielded by this iterator is guaranteed to yield
+/// the original byte string.
+///
+/// `'a` is the lifetime of the byte string being iterated over.
+pub struct LinesWithTerminator<'a> {
+ bytes: &'a [u8],
+}
+
+impl<'a> LinesWithTerminator<'a> {
+ fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> {
+ LinesWithTerminator { bytes }
+ }
+}
+
+impl<'a> Iterator for LinesWithTerminator<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ match self.bytes.find_byte(b'\n') {
+ None if self.bytes.is_empty() => None,
+ None => {
+ let line = self.bytes;
+ self.bytes = b"";
+ Some(line)
+ }
+ Some(end) => {
+ let line = &self.bytes[..end + 1];
+ self.bytes = &self.bytes[end + 1..];
+ Some(line)
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use ext_slice::{ByteSlice, B};
+ use tests::LOSSY_TESTS;
+
+ #[test]
+ fn to_str_lossy() {
+ for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() {
+ let got = B(input).to_str_lossy();
+ assert_eq!(
+ expected.as_bytes(),
+ got.as_bytes(),
+ "to_str_lossy(ith: {:?}, given: {:?})",
+ i,
+ input,
+ );
+
+ let mut got = String::new();
+ B(input).to_str_lossy_into(&mut got);
+ assert_eq!(
+ expected.as_bytes(),
+ got.as_bytes(),
+ "to_str_lossy_into",
+ );
+
+ let got = String::from_utf8_lossy(input);
+ assert_eq!(expected.as_bytes(), got.as_bytes(), "std");
+ }
+ }
+
+ #[test]
+ #[should_panic]
+ fn copy_within_fail1() {
+ let mut buf = *b"foobar";
+ let s = &mut buf;
+ s.copy_within_str(0..2, 5);
+ }
+
+ #[test]
+ #[should_panic]
+ fn copy_within_fail2() {
+ let mut buf = *b"foobar";
+ let s = &mut buf;
+ s.copy_within_str(3..2, 0);
+ }
+
+ #[test]
+ #[should_panic]
+ fn copy_within_fail3() {
+ let mut buf = *b"foobar";
+ let s = &mut buf;
+ s.copy_within_str(5..7, 0);
+ }
+
+ #[test]
+ #[should_panic]
+ fn copy_within_fail4() {
+ let mut buf = *b"foobar";
+ let s = &mut buf;
+ s.copy_within_str(0..1, 6);
+ }
+}
diff --git a/src/ext_vec.rs b/src/ext_vec.rs
new file mode 100644
index 0000000..6f6db56
--- /dev/null
+++ b/src/ext_vec.rs
@@ -0,0 +1,1108 @@
+#![allow(unused_imports)]
+
+use std::borrow::Cow;
+use std::error;
+use std::ffi::{OsStr, OsString};
+use std::fmt;
+use std::iter;
+use std::ops;
+use std::path::{Path, PathBuf};
+use std::ptr;
+use std::str;
+use std::vec;
+
+use ext_slice::ByteSlice;
+use utf8::{self, Utf8Error};
+
+/// Concatenate the elements given by the iterator together into a single
+/// `Vec<u8>`.
+///
+/// The elements may be any type that can be cheaply converted into an `&[u8]`.
+/// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr;
+///
+/// let s = bstr::concat(&["foo", "bar", "baz"]);
+/// assert_eq!(s, "foobarbaz".as_bytes());
+/// ```
+#[inline]
+pub fn concat<T, I>(elements: I) -> Vec<u8>
+where
+ T: AsRef<[u8]>,
+ I: IntoIterator<Item = T>,
+{
+ let mut dest = vec![];
+ for element in elements {
+ dest.push_str(element);
+ }
+ dest
+}
+
+/// Join the elements given by the iterator with the given separator into a
+/// single `Vec<u8>`.
+///
+/// Both the separator and the elements may be any type that can be cheaply
+/// converted into an `&[u8]`. This includes, but is not limited to,
+/// `&str`, `&BStr` and `&[u8]` itself.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr;
+///
+/// let s = bstr::join(",", &["foo", "bar", "baz"]);
+/// assert_eq!(s, "foo,bar,baz".as_bytes());
+/// ```
+#[inline]
+pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8>
+where
+ B: AsRef<[u8]>,
+ T: AsRef<[u8]>,
+ I: IntoIterator<Item = T>,
+{
+ let mut it = elements.into_iter();
+ let mut dest = vec![];
+ match it.next() {
+ None => return dest,
+ Some(first) => {
+ dest.push_str(first);
+ }
+ }
+ for element in it {
+ dest.push_str(&separator);
+ dest.push_str(element);
+ }
+ dest
+}
+
+impl ByteVec for Vec<u8> {
+ #[inline]
+ fn as_vec(&self) -> &Vec<u8> {
+ self
+ }
+
+ #[inline]
+ fn as_vec_mut(&mut self) -> &mut Vec<u8> {
+ self
+ }
+
+ #[inline]
+ fn into_vec(self) -> Vec<u8> {
+ self
+ }
+}
+
+/// Ensure that callers cannot implement `ByteSlice` by making an
+/// umplementable trait its super trait.
+pub trait Sealed {}
+impl Sealed for Vec<u8> {}
+
+/// A trait that extends `Vec<u8>` with string oriented methods.
+///
+/// Note that when using the constructor methods, such as
+/// `ByteVec::from_slice`, one should actually call them using the concrete
+/// type. For example:
+///
+/// ```
+/// use bstr::{B, ByteVec};
+///
+/// let s = Vec::from_slice(b"abc"); // NOT ByteVec::from_slice("...")
+/// assert_eq!(s, B("abc"));
+/// ```
+pub trait ByteVec: Sealed {
+ /// A method for accessing the raw vector bytes of this type. This is
+ /// always a no-op and callers shouldn't care about it. This only exists
+ /// for making the extension trait work.
+ #[doc(hidden)]
+ fn as_vec(&self) -> &Vec<u8>;
+
+ /// A method for accessing the raw vector bytes of this type, mutably. This
+ /// is always a no-op and callers shouldn't care about it. This only exists
+ /// for making the extension trait work.
+ #[doc(hidden)]
+ fn as_vec_mut(&mut self) -> &mut Vec<u8>;
+
+ /// A method for consuming ownership of this vector. This is always a no-op
+ /// and callers shouldn't care about it. This only exists for making the
+ /// extension trait work.
+ #[doc(hidden)]
+ fn into_vec(self) -> Vec<u8>
+ where
+ Self: Sized;
+
+ /// Create a new owned byte string from the given byte slice.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let s = Vec::from_slice(b"abc");
+ /// assert_eq!(s, B("abc"));
+ /// ```
+ #[inline]
+ fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> {
+ bytes.as_ref().to_vec()
+ }
+
+ /// Create a new byte string from an owned OS string.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns the original OS string if it is not valid UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let os_str = OsString::from("foo");
+ /// let bs = Vec::from_os_string(os_str).expect("valid UTF-8");
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[inline]
+ fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
+ use std::os::unix::ffi::OsStringExt;
+
+ Ok(Vec::from(os_str.into_vec()))
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
+ os_str.into_string().map(Vec::from)
+ }
+
+ imp(os_str)
+ }
+
+ /// Lossily create a new byte string from an OS string slice.
+ ///
+ /// On Unix, this always succeeds, is zero cost and always returns a slice.
+ /// On non-Unix systems, this does a UTF-8 check. If the given OS string
+ /// slice is not valid UTF-8, then it is lossily decoded into valid UTF-8
+ /// (with invalid bytes replaced by the Unicode replacement codepoint).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ ///
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let os_str = OsStr::new("foo");
+ /// let bs = Vec::from_os_str_lossy(os_str);
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[inline]
+ fn from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
+ use std::os::unix::ffi::OsStrExt;
+
+ Cow::Borrowed(os_str.as_bytes())
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
+ match os_str.to_string_lossy() {
+ Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()),
+ Cow::Owned(x) => Cow::Owned(Vec::from(x)),
+ }
+ }
+
+ imp(os_str)
+ }
+
+ /// Create a new byte string from an owned file path.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns the original path if it is not valid UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::path::PathBuf;
+ ///
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let path = PathBuf::from("foo");
+ /// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8");
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[inline]
+ fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
+ Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
+ }
+
+ /// Lossily create a new byte string from a file path.
+ ///
+ /// On Unix, this always succeeds, is zero cost and always returns a slice.
+ /// On non-Unix systems, this does a UTF-8 check. If the given path is not
+ /// valid UTF-8, then it is lossily decoded into valid UTF-8 (with invalid
+ /// bytes replaced by the Unicode replacement codepoint).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::path::Path;
+ ///
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let path = Path::new("foo");
+ /// let bs = Vec::from_path_lossy(path);
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[inline]
+ fn from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]> {
+ Vec::from_os_str_lossy(path.as_os_str())
+ }
+
+ /// Appends the given byte to the end of this byte string.
+ ///
+ /// Note that this is equivalent to the generic `Vec::push` method. This
+ /// method is provided to permit callers to explicitly differentiate
+ /// between pushing bytes, codepoints and strings.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = <Vec<u8>>::from("abc");
+ /// s.push_byte(b'\xE2');
+ /// s.push_byte(b'\x98');
+ /// s.push_byte(b'\x83');
+ /// assert_eq!(s, "abc☃".as_bytes());
+ /// ```
+ #[inline]
+ fn push_byte(&mut self, byte: u8) {
+ self.as_vec_mut().push(byte);
+ }
+
+ /// Appends the given `char` to the end of this byte string.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = <Vec<u8>>::from("abc");
+ /// s.push_char('1');
+ /// s.push_char('2');
+ /// s.push_char('3');
+ /// assert_eq!(s, "abc123".as_bytes());
+ /// ```
+ #[inline]
+ fn push_char(&mut self, ch: char) {
+ if ch.len_utf8() == 1 {
+ self.push_byte(ch as u8);
+ return;
+ }
+ self.as_vec_mut()
+ .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes());
+ }
+
+ /// Appends the given slice to the end of this byte string. This accepts
+ /// any type that be converted to a `&[u8]`. This includes, but is not
+ /// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = <Vec<u8>>::from("abc");
+ /// s.push_str(b"123");
+ /// assert_eq!(s, "abc123".as_bytes());
+ /// ```
+ #[inline]
+ fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) {
+ self.as_vec_mut().extend_from_slice(bytes.as_ref());
+ }
+
+ /// Converts a `Vec<u8>` into a `String` if and only if this byte string is
+ /// valid UTF-8.
+ ///
+ /// If it is not valid UTF-8, then a
+ /// [`FromUtf8Error`](struct.FromUtf8Error.html)
+ /// is returned. (This error can be used to examine why UTF-8 validation
+ /// failed, or to regain the original byte string.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
+ /// let bytes = Vec::from("hello");
+ /// let string = bytes.into_string()?;
+ ///
+ /// assert_eq!("hello", string);
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ ///
+ /// If this byte string is not valid UTF-8, then an error will be returned.
+ /// That error can then be used to inspect the location at which invalid
+ /// UTF-8 was found, or to regain the original byte string:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let err = bytes.into_string().unwrap_err();
+ ///
+ /// assert_eq!(err.utf8_error().valid_up_to(), 3);
+ /// assert_eq!(err.utf8_error().error_len(), Some(1));
+ ///
+ /// // At no point in this example is an allocation performed.
+ /// let bytes = Vec::from(err.into_vec());
+ /// assert_eq!(bytes, B(b"foo\xFFbar"));
+ /// ```
+ #[inline]
+ fn into_string(self) -> Result<String, FromUtf8Error>
+ where
+ Self: Sized,
+ {
+ match utf8::validate(self.as_vec()) {
+ Err(err) => Err(FromUtf8Error { original: self.into_vec(), err }),
+ Ok(()) => {
+ // SAFETY: This is safe because of the guarantees provided by
+ // utf8::validate.
+ unsafe { Ok(self.into_string_unchecked()) }
+ }
+ }
+ }
+
+ /// Lossily converts a `Vec<u8>` into a `String`. If this byte string
+ /// contains invalid UTF-8, then the invalid bytes are replaced with the
+ /// Unicode replacement codepoint.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let string = bytes.into_string_lossy();
+ /// assert_eq!(string, "foo\u{FFFD}bar");
+ /// ```
+ #[inline]
+ fn into_string_lossy(self) -> String
+ where
+ Self: Sized,
+ {
+ match self.as_vec().to_str_lossy() {
+ Cow::Borrowed(_) => {
+ // SAFETY: to_str_lossy() returning a Cow::Borrowed guarantees
+ // the entire string is valid utf8.
+ unsafe { self.into_string_unchecked() }
+ }
+ Cow::Owned(s) => s,
+ }
+ }
+
+ /// Unsafely convert this byte string into a `String`, without checking for
+ /// valid UTF-8.
+ ///
+ /// # Safety
+ ///
+ /// Callers *must* ensure that this byte string is valid UTF-8 before
+ /// calling this method. Converting a byte string into a `String` that is
+ /// not valid UTF-8 is considered undefined behavior.
+ ///
+ /// This routine is useful in performance sensitive contexts where the
+ /// UTF-8 validity of the byte string is already known and it is
+ /// undesirable to pay the cost of an additional UTF-8 validation check
+ /// that [`into_string`](#method.into_string) performs.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// // SAFETY: This is safe because string literals are guaranteed to be
+ /// // valid UTF-8 by the Rust compiler.
+ /// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() };
+ /// assert_eq!("☃βツ", s);
+ /// ```
+ #[inline]
+ unsafe fn into_string_unchecked(self) -> String
+ where
+ Self: Sized,
+ {
+ String::from_utf8_unchecked(self.into_vec())
+ }
+
+ /// Converts this byte string into an OS string, in place.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns the original byte string if it is not valid UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ ///
+ /// use bstr::ByteVec;
+ ///
+ /// let bs = Vec::from("foo");
+ /// let os_str = bs.into_os_string().expect("should be valid UTF-8");
+ /// assert_eq!(os_str, OsStr::new("foo"));
+ /// ```
+ #[inline]
+ fn into_os_string(self) -> Result<OsString, Vec<u8>>
+ where
+ Self: Sized,
+ {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(v: Vec<u8>) -> Result<OsString, Vec<u8>> {
+ use std::os::unix::ffi::OsStringExt;
+
+ Ok(OsString::from_vec(v))
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(v: Vec<u8>) -> Result<OsString, Vec<u8>> {
+ match v.into_string() {
+ Ok(s) => Ok(OsString::from(s)),
+ Err(err) => Err(err.into_vec()),
+ }
+ }
+
+ imp(self.into_vec())
+ }
+
+ /// Lossily converts this byte string into an OS string, in place.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this will perform a UTF-8 check and lossily convert this byte string
+ /// into valid UTF-8 using the Unicode replacement codepoint.
+ ///
+ /// Note that this can prevent the correct roundtripping of file paths on
+ /// non-Unix systems such as Windows, where file paths are an arbitrary
+ /// sequence of 16-bit integers.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let bs = Vec::from_slice(b"foo\xFFbar");
+ /// let os_str = bs.into_os_string_lossy();
+ /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
+ /// ```
+ #[inline]
+ fn into_os_string_lossy(self) -> OsString
+ where
+ Self: Sized,
+ {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(v: Vec<u8>) -> OsString {
+ use std::os::unix::ffi::OsStringExt;
+
+ OsString::from_vec(v)
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(v: Vec<u8>) -> OsString {
+ OsString::from(v.into_string_lossy())
+ }
+
+ imp(self.into_vec())
+ }
+
+ /// Converts this byte string into an owned file path, in place.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this returns the original byte string if it is not valid UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let bs = Vec::from("foo");
+ /// let path = bs.into_path_buf().expect("should be valid UTF-8");
+ /// assert_eq!(path.as_os_str(), "foo");
+ /// ```
+ #[inline]
+ fn into_path_buf(self) -> Result<PathBuf, Vec<u8>>
+ where
+ Self: Sized,
+ {
+ self.into_os_string().map(PathBuf::from)
+ }
+
+ /// Lossily converts this byte string into an owned file path, in place.
+ ///
+ /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
+ /// this will perform a UTF-8 check and lossily convert this byte string
+ /// into valid UTF-8 using the Unicode replacement codepoint.
+ ///
+ /// Note that this can prevent the correct roundtripping of file paths on
+ /// non-Unix systems such as Windows, where file paths are an arbitrary
+ /// sequence of 16-bit integers.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let bs = Vec::from_slice(b"foo\xFFbar");
+ /// let path = bs.into_path_buf_lossy();
+ /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
+ /// ```
+ #[inline]
+ fn into_path_buf_lossy(self) -> PathBuf
+ where
+ Self: Sized,
+ {
+ PathBuf::from(self.into_os_string_lossy())
+ }
+
+ /// Removes the last byte from this `Vec<u8>` and returns it.
+ ///
+ /// If this byte string is empty, then `None` is returned.
+ ///
+ /// If the last codepoint in this byte string is not ASCII, then removing
+ /// the last byte could make this byte string contain invalid UTF-8.
+ ///
+ /// Note that this is equivalent to the generic `Vec::pop` method. This
+ /// method is provided to permit callers to explicitly differentiate
+ /// between popping bytes and codepoints.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foo");
+ /// assert_eq!(s.pop_byte(), Some(b'o'));
+ /// assert_eq!(s.pop_byte(), Some(b'o'));
+ /// assert_eq!(s.pop_byte(), Some(b'f'));
+ /// assert_eq!(s.pop_byte(), None);
+ /// ```
+ #[inline]
+ fn pop_byte(&mut self) -> Option<u8> {
+ self.as_vec_mut().pop()
+ }
+
+ /// Removes the last codepoint from this `Vec<u8>` and returns it.
+ ///
+ /// If this byte string is empty, then `None` is returned. If the last
+ /// bytes of this byte string do not correspond to a valid UTF-8 code unit
+ /// sequence, then the Unicode replacement codepoint is yielded instead in
+ /// accordance with the
+ /// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foo");
+ /// assert_eq!(s.pop_char(), Some('o'));
+ /// assert_eq!(s.pop_char(), Some('o'));
+ /// assert_eq!(s.pop_char(), Some('f'));
+ /// assert_eq!(s.pop_char(), None);
+ /// ```
+ ///
+ /// This shows the replacement codepoint substitution policy. Note that
+ /// the first pop yields a replacement codepoint but actually removes two
+ /// bytes. This is in contrast with subsequent pops when encountering
+ /// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8
+ /// code unit sequence.
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from_slice(b"f\xFF\xFF\xFFoo\xE2\x98");
+ /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
+ /// assert_eq!(s.pop_char(), Some('o'));
+ /// assert_eq!(s.pop_char(), Some('o'));
+ /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
+ /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
+ /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
+ /// assert_eq!(s.pop_char(), Some('f'));
+ /// assert_eq!(s.pop_char(), None);
+ /// ```
+ #[inline]
+ fn pop_char(&mut self) -> Option<char> {
+ let (ch, size) = utf8::decode_last_lossy(self.as_vec());
+ if size == 0 {
+ return None;
+ }
+ let new_len = self.as_vec().len() - size;
+ self.as_vec_mut().truncate(new_len);
+ Some(ch)
+ }
+
+ /// Removes a `char` from this `Vec<u8>` at the given byte position and
+ /// returns it.
+ ///
+ /// If the bytes at the given position do not lead to a valid UTF-8 code
+ /// unit sequence, then a
+ /// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8).
+ ///
+ /// # Panics
+ ///
+ /// Panics if `at` is larger than or equal to this byte string's length.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foo☃bar");
+ /// assert_eq!(s.remove_char(3), '☃');
+ /// assert_eq!(s, b"foobar");
+ /// ```
+ ///
+ /// This example shows how the Unicode replacement codepoint policy is
+ /// used:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from_slice(b"foo\xFFbar");
+ /// assert_eq!(s.remove_char(3), '\u{FFFD}');
+ /// assert_eq!(s, b"foobar");
+ /// ```
+ #[inline]
+ fn remove_char(&mut self, at: usize) -> char {
+ let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]);
+ assert!(
+ size > 0,
+ "expected {} to be less than {}",
+ at,
+ self.as_vec().len(),
+ );
+ self.as_vec_mut().drain(at..at + size);
+ ch
+ }
+
+ /// Inserts the given codepoint into this `Vec<u8>` at a particular byte
+ /// position.
+ ///
+ /// This is an `O(n)` operation as it may copy a number of elements in this
+ /// byte string proportional to its length.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `at` is larger than the byte string's length.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foobar");
+ /// s.insert_char(3, '☃');
+ /// assert_eq!(s, "foo☃bar".as_bytes());
+ /// ```
+ #[inline]
+ fn insert_char(&mut self, at: usize, ch: char) {
+ self.insert_str(at, ch.encode_utf8(&mut [0; 4]).as_bytes());
+ }
+
+ /// Inserts the given byte string into this byte string at a particular
+ /// byte position.
+ ///
+ /// This is an `O(n)` operation as it may copy a number of elements in this
+ /// byte string proportional to its length.
+ ///
+ /// The given byte string may be any type that can be cheaply converted
+ /// into a `&[u8]`. This includes, but is not limited to, `&str` and
+ /// `&[u8]`.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `at` is larger than the byte string's length.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foobar");
+ /// s.insert_str(3, "☃☃☃");
+ /// assert_eq!(s, "foo☃☃☃bar".as_bytes());
+ /// ```
+ #[inline]
+ fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) {
+ let bytes = bytes.as_ref();
+ let len = self.as_vec().len();
+ assert!(at <= len, "expected {} to be <= {}", at, len);
+
+ // SAFETY: We'd like to efficiently splice in the given bytes into
+ // this byte string. Since we are only working with `u8` elements here,
+ // we only need to consider whether our bounds are correct and whether
+ // our byte string has enough space.
+ self.as_vec_mut().reserve(bytes.len());
+ unsafe {
+ // Shift bytes after `at` over by the length of `bytes` to make
+ // room for it. This requires referencing two regions of memory
+ // that may overlap, so we use ptr::copy.
+ ptr::copy(
+ self.as_vec().as_ptr().add(at),
+ self.as_vec_mut().as_mut_ptr().add(at + bytes.len()),
+ len - at,
+ );
+ // Now copy the bytes given into the room we made above. In this
+ // case, we know that the given bytes cannot possibly overlap
+ // with this byte string since we have a mutable borrow of the
+ // latter. Thus, we can use a nonoverlapping copy.
+ ptr::copy_nonoverlapping(
+ bytes.as_ptr(),
+ self.as_vec_mut().as_mut_ptr().add(at),
+ bytes.len(),
+ );
+ self.as_vec_mut().set_len(len + bytes.len());
+ }
+ }
+
+ /// Removes the specified range in this byte string and replaces it with
+ /// the given bytes. The given bytes do not need to have the same length
+ /// as the range provided.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the given range is invalid.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foobar");
+ /// s.replace_range(2..4, "xxxxx");
+ /// assert_eq!(s, "foxxxxxar".as_bytes());
+ /// ```
+ #[inline]
+ fn replace_range<R, B>(&mut self, range: R, replace_with: B)
+ where
+ R: ops::RangeBounds<usize>,
+ B: AsRef<[u8]>,
+ {
+ self.as_vec_mut().splice(range, replace_with.as_ref().iter().cloned());
+ }
+
+ /// Creates a draining iterator that removes the specified range in this
+ /// `Vec<u8>` and yields each of the removed bytes.
+ ///
+ /// Note that the elements specified by the given range are removed
+ /// regardless of whether the returned iterator is fully exhausted.
+ ///
+ /// Also note that is is unspecified how many bytes are removed from the
+ /// `Vec<u8>` if the `DrainBytes` iterator is leaked.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the given range is not valid.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foobar");
+ /// {
+ /// let mut drainer = s.drain_bytes(2..4);
+ /// assert_eq!(drainer.next(), Some(b'o'));
+ /// assert_eq!(drainer.next(), Some(b'b'));
+ /// assert_eq!(drainer.next(), None);
+ /// }
+ /// assert_eq!(s, "foar".as_bytes());
+ /// ```
+ #[inline]
+ fn drain_bytes<R>(&mut self, range: R) -> DrainBytes
+ where
+ R: ops::RangeBounds<usize>,
+ {
+ DrainBytes { it: self.as_vec_mut().drain(range) }
+ }
+}
+
+/// A draining byte oriented iterator for `Vec<u8>`.
+///
+/// This iterator is created by
+/// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes).
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr::ByteVec;
+///
+/// let mut s = Vec::from("foobar");
+/// {
+/// let mut drainer = s.drain_bytes(2..4);
+/// assert_eq!(drainer.next(), Some(b'o'));
+/// assert_eq!(drainer.next(), Some(b'b'));
+/// assert_eq!(drainer.next(), None);
+/// }
+/// assert_eq!(s, "foar".as_bytes());
+/// ```
+#[derive(Debug)]
+pub struct DrainBytes<'a> {
+ it: vec::Drain<'a, u8>,
+}
+
+impl<'a> iter::FusedIterator for DrainBytes<'a> {}
+
+impl<'a> Iterator for DrainBytes<'a> {
+ type Item = u8;
+
+ #[inline]
+ fn next(&mut self) -> Option<u8> {
+ self.it.next()
+ }
+}
+
+impl<'a> DoubleEndedIterator for DrainBytes<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<u8> {
+ self.it.next_back()
+ }
+}
+
+impl<'a> ExactSizeIterator for DrainBytes<'a> {
+ #[inline]
+ fn len(&self) -> usize {
+ self.it.len()
+ }
+}
+
+/// An error that may occur when converting a `Vec<u8>` to a `String`.
+///
+/// This error includes the original `Vec<u8>` that failed to convert to a
+/// `String`. This permits callers to recover the allocation used even if it
+/// it not valid UTF-8.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr::{B, ByteVec};
+///
+/// let bytes = Vec::from_slice(b"foo\xFFbar");
+/// let err = bytes.into_string().unwrap_err();
+///
+/// assert_eq!(err.utf8_error().valid_up_to(), 3);
+/// assert_eq!(err.utf8_error().error_len(), Some(1));
+///
+/// // At no point in this example is an allocation performed.
+/// let bytes = Vec::from(err.into_vec());
+/// assert_eq!(bytes, B(b"foo\xFFbar"));
+/// ```
+#[derive(Debug, Eq, PartialEq)]
+pub struct FromUtf8Error {
+ original: Vec<u8>,
+ err: Utf8Error,
+}
+
+impl FromUtf8Error {
+ /// Return the original bytes as a slice that failed to convert to a
+ /// `String`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let err = bytes.into_string().unwrap_err();
+ ///
+ /// // At no point in this example is an allocation performed.
+ /// assert_eq!(err.as_bytes(), B(b"foo\xFFbar"));
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &[u8] {
+ &self.original
+ }
+
+ /// Consume this error and return the original byte string that failed to
+ /// convert to a `String`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let err = bytes.into_string().unwrap_err();
+ /// let original = err.into_vec();
+ ///
+ /// // At no point in this example is an allocation performed.
+ /// assert_eq!(original, B(b"foo\xFFbar"));
+ /// ```
+ #[inline]
+ pub fn into_vec(self) -> Vec<u8> {
+ self.original
+ }
+
+ /// Return the underlying UTF-8 error that occurred. This error provides
+ /// information on the nature and location of the invalid UTF-8 detected.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let err = bytes.into_string().unwrap_err();
+ ///
+ /// assert_eq!(err.utf8_error().valid_up_to(), 3);
+ /// assert_eq!(err.utf8_error().error_len(), Some(1));
+ /// ```
+ #[inline]
+ pub fn utf8_error(&self) -> &Utf8Error {
+ &self.err
+ }
+}
+
+impl error::Error for FromUtf8Error {
+ #[inline]
+ fn description(&self) -> &str {
+ "invalid UTF-8 vector"
+ }
+}
+
+impl fmt::Display for FromUtf8Error {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}", self.err)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use ext_slice::B;
+ use ext_vec::ByteVec;
+
+ #[test]
+ fn insert() {
+ let mut s = vec![];
+ s.insert_str(0, "foo");
+ assert_eq!(s, "foo".as_bytes());
+
+ let mut s = Vec::from("a");
+ s.insert_str(0, "foo");
+ assert_eq!(s, "fooa".as_bytes());
+
+ let mut s = Vec::from("a");
+ s.insert_str(1, "foo");
+ assert_eq!(s, "afoo".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(3, "quux");
+ assert_eq!(s, "fooquuxbar".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(3, "x");
+ assert_eq!(s, "fooxbar".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(0, "x");
+ assert_eq!(s, "xfoobar".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(6, "x");
+ assert_eq!(s, "foobarx".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(3, "quuxbazquux");
+ assert_eq!(s, "fooquuxbazquuxbar".as_bytes());
+ }
+
+ #[test]
+ #[should_panic]
+ fn insert_fail1() {
+ let mut s = vec![];
+ s.insert_str(1, "foo");
+ }
+
+ #[test]
+ #[should_panic]
+ fn insert_fail2() {
+ let mut s = Vec::from("a");
+ s.insert_str(2, "foo");
+ }
+
+ #[test]
+ #[should_panic]
+ fn insert_fail3() {
+ let mut s = Vec::from("foobar");
+ s.insert_str(7, "foo");
+ }
+}
diff --git a/src/impls.rs b/src/impls.rs
new file mode 100644
index 0000000..c7d0be3
--- /dev/null
+++ b/src/impls.rs
@@ -0,0 +1,969 @@
+macro_rules! impl_partial_eq {
+ ($lhs:ty, $rhs:ty) => {
+ impl<'a, 'b> PartialEq<$rhs> for $lhs {
+ #[inline]
+ fn eq(&self, other: &$rhs) -> bool {
+ let other: &[u8] = other.as_ref();
+ PartialEq::eq(self.as_bytes(), other)
+ }
+ }
+
+ impl<'a, 'b> PartialEq<$lhs> for $rhs {
+ #[inline]
+ fn eq(&self, other: &$lhs) -> bool {
+ let this: &[u8] = self.as_ref();
+ PartialEq::eq(this, other.as_bytes())
+ }
+ }
+ };
+}
+
+#[cfg(feature = "std")]
+macro_rules! impl_partial_eq_cow {
+ ($lhs:ty, $rhs:ty) => {
+ impl<'a, 'b> PartialEq<$rhs> for $lhs {
+ #[inline]
+ fn eq(&self, other: &$rhs) -> bool {
+ let other: &[u8] = (&**other).as_ref();
+ PartialEq::eq(self.as_bytes(), other)
+ }
+ }
+
+ impl<'a, 'b> PartialEq<$lhs> for $rhs {
+ #[inline]
+ fn eq(&self, other: &$lhs) -> bool {
+ let this: &[u8] = (&**other).as_ref();
+ PartialEq::eq(this, other.as_bytes())
+ }
+ }
+ };
+}
+
+macro_rules! impl_partial_ord {
+ ($lhs:ty, $rhs:ty) => {
+ impl<'a, 'b> PartialOrd<$rhs> for $lhs {
+ #[inline]
+ fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
+ let other: &[u8] = other.as_ref();
+ PartialOrd::partial_cmp(self.as_bytes(), other)
+ }
+ }
+
+ impl<'a, 'b> PartialOrd<$lhs> for $rhs {
+ #[inline]
+ fn partial_cmp(&self, other: &$lhs) -> Option<Ordering> {
+ let this: &[u8] = self.as_ref();
+ PartialOrd::partial_cmp(this, other.as_bytes())
+ }
+ }
+ };
+}
+
+#[cfg(feature = "std")]
+mod bstring {
+ use std::borrow::{Borrow, Cow, ToOwned};
+ use std::cmp::Ordering;
+ use std::fmt;
+ use std::iter::FromIterator;
+ use std::ops;
+
+ use bstr::BStr;
+ use bstring::BString;
+ use ext_vec::ByteVec;
+
+ impl fmt::Display for BString {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Display::fmt(self.as_bstr(), f)
+ }
+ }
+
+ impl fmt::Debug for BString {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self.as_bstr(), f)
+ }
+ }
+
+ impl ops::Deref for BString {
+ type Target = Vec<u8>;
+
+ #[inline]
+ fn deref(&self) -> &Vec<u8> {
+ &self.bytes
+ }
+ }
+
+ impl ops::DerefMut for BString {
+ #[inline]
+ fn deref_mut(&mut self) -> &mut Vec<u8> {
+ &mut self.bytes
+ }
+ }
+
+ impl AsRef<[u8]> for BString {
+ #[inline]
+ fn as_ref(&self) -> &[u8] {
+ &self.bytes
+ }
+ }
+
+ impl AsRef<BStr> for BString {
+ #[inline]
+ fn as_ref(&self) -> &BStr {
+ self.as_bstr()
+ }
+ }
+
+ impl AsMut<[u8]> for BString {
+ #[inline]
+ fn as_mut(&mut self) -> &mut [u8] {
+ &mut self.bytes
+ }
+ }
+
+ impl AsMut<BStr> for BString {
+ #[inline]
+ fn as_mut(&mut self) -> &mut BStr {
+ self.as_mut_bstr()
+ }
+ }
+
+ impl Borrow<BStr> for BString {
+ #[inline]
+ fn borrow(&self) -> &BStr {
+ self.as_bstr()
+ }
+ }
+
+ impl ToOwned for BStr {
+ type Owned = BString;
+
+ #[inline]
+ fn to_owned(&self) -> BString {
+ BString::from(self)
+ }
+ }
+
+ impl Default for BString {
+ fn default() -> BString {
+ BString::from(vec![])
+ }
+ }
+
+ impl<'a> From<&'a [u8]> for BString {
+ #[inline]
+ fn from(s: &'a [u8]) -> BString {
+ BString::from(s.to_vec())
+ }
+ }
+
+ impl From<Vec<u8>> for BString {
+ #[inline]
+ fn from(s: Vec<u8>) -> BString {
+ BString { bytes: s }
+ }
+ }
+
+ impl From<BString> for Vec<u8> {
+ #[inline]
+ fn from(s: BString) -> Vec<u8> {
+ s.bytes
+ }
+ }
+
+ impl<'a> From<&'a str> for BString {
+ #[inline]
+ fn from(s: &'a str) -> BString {
+ BString::from(s.as_bytes().to_vec())
+ }
+ }
+
+ impl From<String> for BString {
+ #[inline]
+ fn from(s: String) -> BString {
+ BString::from(s.into_bytes())
+ }
+ }
+
+ impl<'a> From<&'a BStr> for BString {
+ #[inline]
+ fn from(s: &'a BStr) -> BString {
+ BString::from(s.bytes.to_vec())
+ }
+ }
+
+ impl<'a> From<BString> for Cow<'a, BStr> {
+ #[inline]
+ fn from(s: BString) -> Cow<'a, BStr> {
+ Cow::Owned(s)
+ }
+ }
+
+ impl FromIterator<char> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> BString {
+ BString::from(iter.into_iter().collect::<String>())
+ }
+ }
+
+ impl FromIterator<u8> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = u8>>(iter: T) -> BString {
+ BString::from(iter.into_iter().collect::<Vec<u8>>())
+ }
+ }
+
+ impl<'a> FromIterator<&'a str> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> BString {
+ let mut buf = vec![];
+ for b in iter {
+ buf.push_str(b);
+ }
+ BString::from(buf)
+ }
+ }
+
+ impl<'a> FromIterator<&'a [u8]> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = &'a [u8]>>(iter: T) -> BString {
+ let mut buf = vec![];
+ for b in iter {
+ buf.push_str(b);
+ }
+ BString::from(buf)
+ }
+ }
+
+ impl<'a> FromIterator<&'a BStr> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = &'a BStr>>(iter: T) -> BString {
+ let mut buf = vec![];
+ for b in iter {
+ buf.push_str(b);
+ }
+ BString::from(buf)
+ }
+ }
+
+ impl FromIterator<BString> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = BString>>(iter: T) -> BString {
+ let mut buf = vec![];
+ for b in iter {
+ buf.push_str(b);
+ }
+ BString::from(buf)
+ }
+ }
+
+ impl Eq for BString {}
+
+ impl PartialEq for BString {
+ #[inline]
+ fn eq(&self, other: &BString) -> bool {
+ &self[..] == &other[..]
+ }
+ }
+
+ impl_partial_eq!(BString, Vec<u8>);
+ impl_partial_eq!(BString, [u8]);
+ impl_partial_eq!(BString, &'a [u8]);
+ impl_partial_eq!(BString, String);
+ impl_partial_eq!(BString, str);
+ impl_partial_eq!(BString, &'a str);
+ impl_partial_eq!(BString, BStr);
+ impl_partial_eq!(BString, &'a BStr);
+
+ impl PartialOrd for BString {
+ #[inline]
+ fn partial_cmp(&self, other: &BString) -> Option<Ordering> {
+ PartialOrd::partial_cmp(&self.bytes, &other.bytes)
+ }
+ }
+
+ impl Ord for BString {
+ #[inline]
+ fn cmp(&self, other: &BString) -> Ordering {
+ self.partial_cmp(other).unwrap()
+ }
+ }
+
+ impl_partial_ord!(BString, Vec<u8>);
+ impl_partial_ord!(BString, [u8]);
+ impl_partial_ord!(BString, &'a [u8]);
+ impl_partial_ord!(BString, String);
+ impl_partial_ord!(BString, str);
+ impl_partial_ord!(BString, &'a str);
+ impl_partial_ord!(BString, BStr);
+ impl_partial_ord!(BString, &'a BStr);
+}
+
+mod bstr {
+ #[cfg(feature = "std")]
+ use std::borrow::Cow;
+
+ use core::cmp::Ordering;
+ use core::fmt;
+ use core::ops;
+
+ use bstr::BStr;
+ use ext_slice::ByteSlice;
+
+ impl fmt::Display for BStr {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ /// Write the given bstr (lossily) to the given formatter.
+ fn write_bstr(
+ f: &mut fmt::Formatter,
+ bstr: &BStr,
+ ) -> Result<(), fmt::Error> {
+ for chunk in bstr.utf8_chunks() {
+ f.write_str(chunk.valid())?;
+ if !chunk.invalid().is_empty() {
+ f.write_str("\u{FFFD}")?;
+ }
+ }
+ Ok(())
+ }
+
+ /// Write 'num' fill characters to the given formatter.
+ fn write_pads(f: &mut fmt::Formatter, num: usize) -> fmt::Result {
+ let fill = f.fill();
+ for _ in 0..num {
+ f.write_fmt(format_args!("{}", fill))?;
+ }
+ Ok(())
+ }
+
+ if let Some(align) = f.align() {
+ let width = f.width().unwrap_or(0);
+ let nchars = self.chars().count();
+ let remaining_pads = width.saturating_sub(nchars);
+ match align {
+ fmt::Alignment::Left => {
+ write_bstr(f, self)?;
+ write_pads(f, remaining_pads)?;
+ }
+ fmt::Alignment::Right => {
+ write_pads(f, remaining_pads)?;
+ write_bstr(f, self)?;
+ }
+ fmt::Alignment::Center => {
+ let half = remaining_pads / 2;
+ let second_half = if remaining_pads % 2 == 0 {
+ half
+ } else {
+ half + 1
+ };
+ write_pads(f, half)?;
+ write_bstr(f, self)?;
+ write_pads(f, second_half)?;
+ }
+ }
+ Ok(())
+ } else {
+ write_bstr(f, self)?;
+ Ok(())
+ }
+ }
+ }
+
+ impl fmt::Debug for BStr {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "\"")?;
+ for (s, e, ch) in self.char_indices() {
+ match ch {
+ '\0' => write!(f, "\\0")?,
+ '\u{FFFD}' => {
+ let bytes = self[s..e].as_bytes();
+ if bytes == b"\xEF\xBF\xBD" {
+ write!(f, "{}", ch.escape_debug())?;
+ } else {
+ for &b in self[s..e].as_bytes() {
+ write!(f, r"\x{:02X}", b)?;
+ }
+ }
+ }
+ // ASCII control characters except \0, \n, \r, \t
+ '\x01'..='\x08'
+ | '\x0b'
+ | '\x0c'
+ | '\x0e'..='\x19'
+ | '\x7f' => {
+ write!(f, "\\x{:02x}", ch as u32)?;
+ }
+ '\n' | '\r' | '\t' | _ => {
+ write!(f, "{}", ch.escape_debug())?;
+ }
+ }
+ }
+ write!(f, "\"")?;
+ Ok(())
+ }
+ }
+
+ impl ops::Deref for BStr {
+ type Target = [u8];
+
+ #[inline]
+ fn deref(&self) -> &[u8] {
+ &self.bytes
+ }
+ }
+
+ impl ops::DerefMut for BStr {
+ #[inline]
+ fn deref_mut(&mut self) -> &mut [u8] {
+ &mut self.bytes
+ }
+ }
+
+ impl ops::Index<usize> for BStr {
+ type Output = u8;
+
+ #[inline]
+ fn index(&self, idx: usize) -> &u8 {
+ &self.as_bytes()[idx]
+ }
+ }
+
+ impl ops::Index<ops::RangeFull> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, _: ops::RangeFull) -> &BStr {
+ self
+ }
+ }
+
+ impl ops::Index<ops::Range<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::Range<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[r.start..r.end])
+ }
+ }
+
+ impl ops::Index<ops::RangeInclusive<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::RangeInclusive<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[*r.start()..=*r.end()])
+ }
+ }
+
+ impl ops::Index<ops::RangeFrom<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::RangeFrom<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[r.start..])
+ }
+ }
+
+ impl ops::Index<ops::RangeTo<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::RangeTo<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[..r.end])
+ }
+ }
+
+ impl ops::Index<ops::RangeToInclusive<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::RangeToInclusive<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[..=r.end])
+ }
+ }
+
+ impl ops::IndexMut<usize> for BStr {
+ #[inline]
+ fn index_mut(&mut self, idx: usize) -> &mut u8 {
+ &mut self.bytes[idx]
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeFull> for BStr {
+ #[inline]
+ fn index_mut(&mut self, _: ops::RangeFull) -> &mut BStr {
+ self
+ }
+ }
+
+ impl ops::IndexMut<ops::Range<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::Range<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[r.start..r.end])
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeInclusive<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::RangeInclusive<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[*r.start()..=*r.end()])
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeFrom<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::RangeFrom<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[r.start..])
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeTo<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::RangeTo<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[..r.end])
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeToInclusive<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::RangeToInclusive<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[..=r.end])
+ }
+ }
+
+ impl AsRef<[u8]> for BStr {
+ #[inline]
+ fn as_ref(&self) -> &[u8] {
+ self.as_bytes()
+ }
+ }
+
+ impl AsRef<BStr> for [u8] {
+ #[inline]
+ fn as_ref(&self) -> &BStr {
+ BStr::new(self)
+ }
+ }
+
+ impl AsRef<BStr> for str {
+ #[inline]
+ fn as_ref(&self) -> &BStr {
+ BStr::new(self)
+ }
+ }
+
+ impl AsMut<[u8]> for BStr {
+ #[inline]
+ fn as_mut(&mut self) -> &mut [u8] {
+ &mut self.bytes
+ }
+ }
+
+ impl AsMut<BStr> for [u8] {
+ #[inline]
+ fn as_mut(&mut self) -> &mut BStr {
+ BStr::new_mut(self)
+ }
+ }
+
+ impl<'a> Default for &'a BStr {
+ fn default() -> &'a BStr {
+ BStr::from_bytes(b"")
+ }
+ }
+
+ impl<'a> Default for &'a mut BStr {
+ fn default() -> &'a mut BStr {
+ BStr::from_bytes_mut(&mut [])
+ }
+ }
+
+ impl<'a> From<&'a [u8]> for &'a BStr {
+ #[inline]
+ fn from(s: &'a [u8]) -> &'a BStr {
+ BStr::from_bytes(s)
+ }
+ }
+
+ impl<'a> From<&'a str> for &'a BStr {
+ #[inline]
+ fn from(s: &'a str) -> &'a BStr {
+ BStr::from_bytes(s.as_bytes())
+ }
+ }
+
+ #[cfg(feature = "std")]
+ impl<'a> From<&'a BStr> for Cow<'a, BStr> {
+ #[inline]
+ fn from(s: &'a BStr) -> Cow<'a, BStr> {
+ Cow::Borrowed(s)
+ }
+ }
+
+ #[cfg(feature = "std")]
+ impl From<Box<[u8]>> for Box<BStr> {
+ #[inline]
+ fn from(s: Box<[u8]>) -> Box<BStr> {
+ BStr::from_boxed_bytes(s)
+ }
+ }
+
+ #[cfg(feature = "std")]
+ impl From<Box<BStr>> for Box<[u8]> {
+ #[inline]
+ fn from(s: Box<BStr>) -> Box<[u8]> {
+ BStr::into_boxed_bytes(s)
+ }
+ }
+
+ impl Eq for BStr {}
+
+ impl PartialEq<BStr> for BStr {
+ #[inline]
+ fn eq(&self, other: &BStr) -> bool {
+ self.as_bytes() == other.as_bytes()
+ }
+ }
+
+ impl_partial_eq!(BStr, [u8]);
+ impl_partial_eq!(BStr, &'a [u8]);
+ impl_partial_eq!(BStr, str);
+ impl_partial_eq!(BStr, &'a str);
+
+ #[cfg(feature = "std")]
+ impl_partial_eq!(BStr, Vec<u8>);
+ #[cfg(feature = "std")]
+ impl_partial_eq!(&'a BStr, Vec<u8>);
+ #[cfg(feature = "std")]
+ impl_partial_eq!(BStr, String);
+ #[cfg(feature = "std")]
+ impl_partial_eq!(&'a BStr, String);
+ #[cfg(feature = "std")]
+ impl_partial_eq_cow!(&'a BStr, Cow<'a, BStr>);
+ #[cfg(feature = "std")]
+ impl_partial_eq_cow!(&'a BStr, Cow<'a, str>);
+ #[cfg(feature = "std")]
+ impl_partial_eq_cow!(&'a BStr, Cow<'a, [u8]>);
+
+ impl PartialOrd for BStr {
+ #[inline]
+ fn partial_cmp(&self, other: &BStr) -> Option<Ordering> {
+ PartialOrd::partial_cmp(self.as_bytes(), other.as_bytes())
+ }
+ }
+
+ impl Ord for BStr {
+ #[inline]
+ fn cmp(&self, other: &BStr) -> Ordering {
+ self.partial_cmp(other).unwrap()
+ }
+ }
+
+ impl_partial_ord!(BStr, [u8]);
+ impl_partial_ord!(BStr, &'a [u8]);
+ impl_partial_ord!(BStr, str);
+ impl_partial_ord!(BStr, &'a str);
+
+ #[cfg(feature = "std")]
+ impl_partial_ord!(BStr, Vec<u8>);
+ #[cfg(feature = "std")]
+ impl_partial_ord!(&'a BStr, Vec<u8>);
+ #[cfg(feature = "std")]
+ impl_partial_ord!(BStr, String);
+ #[cfg(feature = "std")]
+ impl_partial_ord!(&'a BStr, String);
+}
+
+#[cfg(feature = "serde1-nostd")]
+mod bstr_serde {
+ use core::fmt;
+
+ use serde::{
+ de::Error, de::Visitor, Deserialize, Deserializer, Serialize,
+ Serializer,
+ };
+
+ use bstr::BStr;
+
+ impl Serialize for BStr {
+ #[inline]
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ serializer.serialize_bytes(self.as_bytes())
+ }
+ }
+
+ impl<'a, 'de: 'a> Deserialize<'de> for &'a BStr {
+ #[inline]
+ fn deserialize<D>(deserializer: D) -> Result<&'a BStr, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ struct BStrVisitor;
+
+ impl<'de> Visitor<'de> for BStrVisitor {
+ type Value = &'de BStr;
+
+ fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str("a borrowed byte string")
+ }
+
+ #[inline]
+ fn visit_borrowed_bytes<E: Error>(
+ self,
+ value: &'de [u8],
+ ) -> Result<&'de BStr, E> {
+ Ok(BStr::new(value))
+ }
+
+ #[inline]
+ fn visit_borrowed_str<E: Error>(
+ self,
+ value: &'de str,
+ ) -> Result<&'de BStr, E> {
+ Ok(BStr::new(value))
+ }
+ }
+
+ deserializer.deserialize_bytes(BStrVisitor)
+ }
+ }
+}
+
+#[cfg(feature = "serde1")]
+mod bstring_serde {
+ use std::cmp;
+ use std::fmt;
+
+ use serde::{
+ de::Error, de::SeqAccess, de::Visitor, Deserialize, Deserializer,
+ Serialize, Serializer,
+ };
+
+ use bstring::BString;
+
+ impl Serialize for BString {
+ #[inline]
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ serializer.serialize_bytes(self.as_bytes())
+ }
+ }
+
+ impl<'de> Deserialize<'de> for BString {
+ #[inline]
+ fn deserialize<D>(deserializer: D) -> Result<BString, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ struct BStringVisitor;
+
+ impl<'de> Visitor<'de> for BStringVisitor {
+ type Value = BString;
+
+ fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str("a byte string")
+ }
+
+ #[inline]
+ fn visit_seq<V: SeqAccess<'de>>(
+ self,
+ mut visitor: V,
+ ) -> Result<BString, V::Error> {
+ let len = cmp::min(visitor.size_hint().unwrap_or(0), 256);
+ let mut bytes = Vec::with_capacity(len);
+ while let Some(v) = visitor.next_element()? {
+ bytes.push(v);
+ }
+ Ok(BString::from(bytes))
+ }
+
+ #[inline]
+ fn visit_bytes<E: Error>(
+ self,
+ value: &[u8],
+ ) -> Result<BString, E> {
+ Ok(BString::from(value))
+ }
+
+ #[inline]
+ fn visit_byte_buf<E: Error>(
+ self,
+ value: Vec<u8>,
+ ) -> Result<BString, E> {
+ Ok(BString::from(value))
+ }
+
+ #[inline]
+ fn visit_str<E: Error>(
+ self,
+ value: &str,
+ ) -> Result<BString, E> {
+ Ok(BString::from(value))
+ }
+
+ #[inline]
+ fn visit_string<E: Error>(
+ self,
+ value: String,
+ ) -> Result<BString, E> {
+ Ok(BString::from(value))
+ }
+ }
+
+ deserializer.deserialize_byte_buf(BStringVisitor)
+ }
+ }
+}
+
+#[cfg(test)]
+mod display {
+ use crate::ByteSlice;
+ use bstring::BString;
+
+ #[test]
+ fn clean() {
+ assert_eq!(&format!("{}", &b"abc".as_bstr()), "abc");
+ assert_eq!(&format!("{}", &b"\xf0\x28\x8c\xbc".as_bstr()), "�(��");
+ }
+
+ #[test]
+ fn width_bigger_than_bstr() {
+ assert_eq!(&format!("{:<7}!", &b"abc".as_bstr()), "abc !");
+ assert_eq!(&format!("{:>7}!", &b"abc".as_bstr()), " abc!");
+ assert_eq!(&format!("{:^7}!", &b"abc".as_bstr()), " abc !");
+ assert_eq!(&format!("{:^6}!", &b"abc".as_bstr()), " abc !");
+ assert_eq!(&format!("{:-<7}!", &b"abc".as_bstr()), "abc----!");
+ assert_eq!(&format!("{:->7}!", &b"abc".as_bstr()), "----abc!");
+ assert_eq!(&format!("{:-^7}!", &b"abc".as_bstr()), "--abc--!");
+ assert_eq!(&format!("{:-^6}!", &b"abc".as_bstr()), "-abc--!");
+
+ assert_eq!(
+ &format!("{:<7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(�� !"
+ );
+ assert_eq!(
+ &format!("{:>7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ " �(��!"
+ );
+ assert_eq!(
+ &format!("{:^7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ " �(�� !"
+ );
+ assert_eq!(
+ &format!("{:^6}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ " �(�� !"
+ );
+
+ assert_eq!(
+ &format!("{:-<7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��---!"
+ );
+ assert_eq!(
+ &format!("{:->7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "---�(��!"
+ );
+ assert_eq!(
+ &format!("{:-^7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "-�(��--!"
+ );
+ assert_eq!(
+ &format!("{:-^6}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "-�(��-!"
+ );
+ }
+
+ #[test]
+ fn width_lesser_than_bstr() {
+ assert_eq!(&format!("{:<2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:>2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:^2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:-<2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:->2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:-^2}!", &b"abc".as_bstr()), "abc!");
+
+ assert_eq!(
+ &format!("{:<3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:>3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:^3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:^2}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+
+ assert_eq!(
+ &format!("{:-<3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:->3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:-^3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:-^2}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ }
+
+ quickcheck! {
+ fn total_length(bstr: BString) -> bool {
+ let size = bstr.chars().count();
+ format!("{:<1$}", bstr.as_bstr(), size).chars().count() >= size
+ }
+ }
+}
+
+#[cfg(test)]
+mod bstring_arbitrary {
+ use bstring::BString;
+
+ use quickcheck::{Arbitrary, Gen};
+
+ impl Arbitrary for BString {
+ fn arbitrary<G: Gen>(g: &mut G) -> BString {
+ BString::from(Vec::<u8>::arbitrary(g))
+ }
+
+ fn shrink(&self) -> Box<dyn Iterator<Item = BString>> {
+ Box::new(self.bytes.shrink().map(BString::from))
+ }
+ }
+}
+
+#[test]
+fn test_debug() {
+ use crate::{ByteSlice, B};
+
+ assert_eq!(
+ r#""\0\0\0 ftypisom\0\0\x02\0isomiso2avc1mp""#,
+ format!("{:?}", b"\0\0\0 ftypisom\0\0\x02\0isomiso2avc1mp".as_bstr()),
+ );
+
+ // Tests that if the underlying bytes contain the UTF-8 encoding of the
+ // replacement codepoint, then we emit the codepoint just like other
+ // non-printable Unicode characters.
+ assert_eq!(
+ b"\"\\xFF\xEF\xBF\xBD\\xFF\"".as_bstr(),
+ // Before fixing #72, the output here would be:
+ // \\xFF\\xEF\\xBF\\xBD\\xFF
+ B(&format!("{:?}", b"\xFF\xEF\xBF\xBD\xFF".as_bstr())).as_bstr(),
+ );
+}
diff --git a/src/io.rs b/src/io.rs
new file mode 100644
index 0000000..f2b4452
--- /dev/null
+++ b/src/io.rs
@@ -0,0 +1,514 @@
+/*!
+Utilities for working with I/O using byte strings.
+
+This module currently only exports a single trait, `BufReadExt`, which provides
+facilities for conveniently and efficiently working with lines as byte strings.
+
+More APIs may be added in the future.
+*/
+
+use std::io;
+
+use ext_slice::ByteSlice;
+use ext_vec::ByteVec;
+
+/// An extention trait for
+/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html)
+/// which provides convenience APIs for dealing with byte strings.
+pub trait BufReadExt: io::BufRead {
+ /// Returns an iterator over the lines of this reader, where each line
+ /// is represented as a byte string.
+ ///
+ /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
+ /// an error is yielded if there was a problem reading from the underlying
+ /// reader.
+ ///
+ /// On success, the next line in the iterator is returned. The line does
+ /// *not* contain a trailing `\n` or `\r\n`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
+ ///
+ /// let mut lines = vec![];
+ /// for result in cursor.byte_lines() {
+ /// let line = result?;
+ /// lines.push(line);
+ /// }
+ /// assert_eq!(lines.len(), 3);
+ /// assert_eq!(lines[0], "lorem".as_bytes());
+ /// assert_eq!(lines[1], "ipsum".as_bytes());
+ /// assert_eq!(lines[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn byte_lines(self) -> ByteLines<Self>
+ where
+ Self: Sized,
+ {
+ ByteLines { buf: self }
+ }
+
+ /// Returns an iterator over byte-terminated records of this reader, where
+ /// each record is represented as a byte string.
+ ///
+ /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
+ /// an error is yielded if there was a problem reading from the underlying
+ /// reader.
+ ///
+ /// On success, the next record in the iterator is returned. The record
+ /// does *not* contain its trailing terminator.
+ ///
+ /// Note that calling `byte_records(b'\n')` differs from `byte_lines()` in
+ /// that it has no special handling for `\r`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+ ///
+ /// let mut records = vec![];
+ /// for result in cursor.byte_records(b'\x00') {
+ /// let record = result?;
+ /// records.push(record);
+ /// }
+ /// assert_eq!(records.len(), 3);
+ /// assert_eq!(records[0], "lorem".as_bytes());
+ /// assert_eq!(records[1], "ipsum".as_bytes());
+ /// assert_eq!(records[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn byte_records(self, terminator: u8) -> ByteRecords<Self>
+ where
+ Self: Sized,
+ {
+ ByteRecords { terminator, buf: self }
+ }
+
+ /// Executes the given closure on each line in the underlying reader.
+ ///
+ /// If the closure returns an error (or if the underlying reader returns an
+ /// error), then iteration is stopped and the error is returned. If false
+ /// is returned, then iteration is stopped and no error is returned.
+ ///
+ /// The closure given is called on exactly the same values as yielded by
+ /// the [`byte_lines`](trait.BufReadExt.html#method.byte_lines)
+ /// iterator. Namely, lines do _not_ contain trailing `\n` or `\r\n` bytes.
+ ///
+ /// This routine is useful for iterating over lines as quickly as
+ /// possible. Namely, a single allocation is reused for each line.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
+ ///
+ /// let mut lines = vec![];
+ /// cursor.for_byte_line(|line| {
+ /// lines.push(line.to_vec());
+ /// Ok(true)
+ /// })?;
+ /// assert_eq!(lines.len(), 3);
+ /// assert_eq!(lines[0], "lorem".as_bytes());
+ /// assert_eq!(lines[1], "ipsum".as_bytes());
+ /// assert_eq!(lines[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn for_byte_line<F>(self, mut for_each_line: F) -> io::Result<()>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
+ {
+ self.for_byte_line_with_terminator(|line| {
+ for_each_line(&trim_line_slice(&line))
+ })
+ }
+
+ /// Executes the given closure on each byte-terminated record in the
+ /// underlying reader.
+ ///
+ /// If the closure returns an error (or if the underlying reader returns an
+ /// error), then iteration is stopped and the error is returned. If false
+ /// is returned, then iteration is stopped and no error is returned.
+ ///
+ /// The closure given is called on exactly the same values as yielded by
+ /// the [`byte_records`](trait.BufReadExt.html#method.byte_records)
+ /// iterator. Namely, records do _not_ contain a trailing terminator byte.
+ ///
+ /// This routine is useful for iterating over records as quickly as
+ /// possible. Namely, a single allocation is reused for each record.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+ ///
+ /// let mut records = vec![];
+ /// cursor.for_byte_record(b'\x00', |record| {
+ /// records.push(record.to_vec());
+ /// Ok(true)
+ /// })?;
+ /// assert_eq!(records.len(), 3);
+ /// assert_eq!(records[0], "lorem".as_bytes());
+ /// assert_eq!(records[1], "ipsum".as_bytes());
+ /// assert_eq!(records[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn for_byte_record<F>(
+ self,
+ terminator: u8,
+ mut for_each_record: F,
+ ) -> io::Result<()>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
+ {
+ self.for_byte_record_with_terminator(terminator, |chunk| {
+ for_each_record(&trim_record_slice(&chunk, terminator))
+ })
+ }
+
+ /// Executes the given closure on each line in the underlying reader.
+ ///
+ /// If the closure returns an error (or if the underlying reader returns an
+ /// error), then iteration is stopped and the error is returned. If false
+ /// is returned, then iteration is stopped and no error is returned.
+ ///
+ /// Unlike
+ /// [`for_byte_line`](trait.BufReadExt.html#method.for_byte_line),
+ /// the lines given to the closure *do* include the line terminator, if one
+ /// exists.
+ ///
+ /// This routine is useful for iterating over lines as quickly as
+ /// possible. Namely, a single allocation is reused for each line.
+ ///
+ /// This is identical to `for_byte_record_with_terminator` with a
+ /// terminator of `\n`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
+ ///
+ /// let mut lines = vec![];
+ /// cursor.for_byte_line_with_terminator(|line| {
+ /// lines.push(line.to_vec());
+ /// Ok(true)
+ /// })?;
+ /// assert_eq!(lines.len(), 3);
+ /// assert_eq!(lines[0], "lorem\n".as_bytes());
+ /// assert_eq!(lines[1], "ipsum\r\n".as_bytes());
+ /// assert_eq!(lines[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn for_byte_line_with_terminator<F>(
+ self,
+ for_each_line: F,
+ ) -> io::Result<()>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
+ {
+ self.for_byte_record_with_terminator(b'\n', for_each_line)
+ }
+
+ /// Executes the given closure on each byte-terminated record in the
+ /// underlying reader.
+ ///
+ /// If the closure returns an error (or if the underlying reader returns an
+ /// error), then iteration is stopped and the error is returned. If false
+ /// is returned, then iteration is stopped and no error is returned.
+ ///
+ /// Unlike
+ /// [`for_byte_record`](trait.BufReadExt.html#method.for_byte_record),
+ /// the lines given to the closure *do* include the record terminator, if
+ /// one exists.
+ ///
+ /// This routine is useful for iterating over records as quickly as
+ /// possible. Namely, a single allocation is reused for each record.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::B;
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+ ///
+ /// let mut records = vec![];
+ /// cursor.for_byte_record_with_terminator(b'\x00', |record| {
+ /// records.push(record.to_vec());
+ /// Ok(true)
+ /// })?;
+ /// assert_eq!(records.len(), 3);
+ /// assert_eq!(records[0], B(b"lorem\x00"));
+ /// assert_eq!(records[1], B("ipsum\x00"));
+ /// assert_eq!(records[2], B("dolor"));
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn for_byte_record_with_terminator<F>(
+ mut self,
+ terminator: u8,
+ mut for_each_record: F,
+ ) -> io::Result<()>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
+ {
+ let mut bytes = vec![];
+ let mut res = Ok(());
+ let mut consumed = 0;
+ 'outer: loop {
+ // Lend out complete record slices from our buffer
+ {
+ let mut buf = self.fill_buf()?;
+ while let Some(index) = buf.find_byte(terminator) {
+ let (record, rest) = buf.split_at(index + 1);
+ buf = rest;
+ consumed += record.len();
+ match for_each_record(&record) {
+ Ok(false) => break 'outer,
+ Err(err) => {
+ res = Err(err);
+ break 'outer;
+ }
+ _ => (),
+ }
+ }
+
+ // Copy the final record fragment to our local buffer. This
+ // saves read_until() from re-scanning a buffer we know
+ // contains no remaining terminators.
+ bytes.extend_from_slice(&buf);
+ consumed += buf.len();
+ }
+
+ self.consume(consumed);
+ consumed = 0;
+
+ // N.B. read_until uses a different version of memchr that may
+ // be slower than the memchr crate that bstr uses. However, this
+ // should only run for a fairly small number of records, assuming a
+ // decent buffer size.
+ self.read_until(terminator, &mut bytes)?;
+ if bytes.is_empty() || !for_each_record(&bytes)? {
+ break;
+ }
+ bytes.clear();
+ }
+ self.consume(consumed);
+ res
+ }
+}
+
+impl<B: io::BufRead> BufReadExt for B {}
+
+/// An iterator over lines from an instance of
+/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
+///
+/// This iterator is generally created by calling the
+/// [`byte_lines`](trait.BufReadExt.html#method.byte_lines)
+/// method on the
+/// [`BufReadExt`](trait.BufReadExt.html)
+/// trait.
+#[derive(Debug)]
+pub struct ByteLines<B> {
+ buf: B,
+}
+
+/// An iterator over records from an instance of
+/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
+///
+/// A byte record is any sequence of bytes terminated by a particular byte
+/// chosen by the caller. For example, NUL separated byte strings are said to
+/// be NUL-terminated byte records.
+///
+/// This iterator is generally created by calling the
+/// [`byte_records`](trait.BufReadExt.html#method.byte_records)
+/// method on the
+/// [`BufReadExt`](trait.BufReadExt.html)
+/// trait.
+#[derive(Debug)]
+pub struct ByteRecords<B> {
+ buf: B,
+ terminator: u8,
+}
+
+impl<B: io::BufRead> Iterator for ByteLines<B> {
+ type Item = io::Result<Vec<u8>>;
+
+ fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
+ let mut bytes = vec![];
+ match self.buf.read_until(b'\n', &mut bytes) {
+ Err(e) => Some(Err(e)),
+ Ok(0) => None,
+ Ok(_) => {
+ trim_line(&mut bytes);
+ Some(Ok(bytes))
+ }
+ }
+ }
+}
+
+impl<B: io::BufRead> Iterator for ByteRecords<B> {
+ type Item = io::Result<Vec<u8>>;
+
+ fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
+ let mut bytes = vec![];
+ match self.buf.read_until(self.terminator, &mut bytes) {
+ Err(e) => Some(Err(e)),
+ Ok(0) => None,
+ Ok(_) => {
+ trim_record(&mut bytes, self.terminator);
+ Some(Ok(bytes))
+ }
+ }
+ }
+}
+
+fn trim_line(line: &mut Vec<u8>) {
+ if line.last_byte() == Some(b'\n') {
+ line.pop_byte();
+ if line.last_byte() == Some(b'\r') {
+ line.pop_byte();
+ }
+ }
+}
+
+fn trim_line_slice(mut line: &[u8]) -> &[u8] {
+ if line.last_byte() == Some(b'\n') {
+ line = &line[..line.len() - 1];
+ if line.last_byte() == Some(b'\r') {
+ line = &line[..line.len() - 1];
+ }
+ }
+ line
+}
+
+fn trim_record(record: &mut Vec<u8>, terminator: u8) {
+ if record.last_byte() == Some(terminator) {
+ record.pop_byte();
+ }
+}
+
+fn trim_record_slice(mut record: &[u8], terminator: u8) -> &[u8] {
+ if record.last_byte() == Some(terminator) {
+ record = &record[..record.len() - 1];
+ }
+ record
+}
+
+#[cfg(test)]
+mod tests {
+ use super::BufReadExt;
+ use bstring::BString;
+
+ fn collect_lines<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
+ let mut lines = vec![];
+ slice
+ .as_ref()
+ .for_byte_line(|line| {
+ lines.push(BString::from(line.to_vec()));
+ Ok(true)
+ })
+ .unwrap();
+ lines
+ }
+
+ fn collect_lines_term<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
+ let mut lines = vec![];
+ slice
+ .as_ref()
+ .for_byte_line_with_terminator(|line| {
+ lines.push(BString::from(line.to_vec()));
+ Ok(true)
+ })
+ .unwrap();
+ lines
+ }
+
+ #[test]
+ fn lines_without_terminator() {
+ assert_eq!(collect_lines(""), Vec::<BString>::new());
+
+ assert_eq!(collect_lines("\n"), vec![""]);
+ assert_eq!(collect_lines("\n\n"), vec!["", ""]);
+ assert_eq!(collect_lines("a\nb\n"), vec!["a", "b"]);
+ assert_eq!(collect_lines("a\nb"), vec!["a", "b"]);
+ assert_eq!(collect_lines("abc\nxyz\n"), vec!["abc", "xyz"]);
+ assert_eq!(collect_lines("abc\nxyz"), vec!["abc", "xyz"]);
+
+ assert_eq!(collect_lines("\r\n"), vec![""]);
+ assert_eq!(collect_lines("\r\n\r\n"), vec!["", ""]);
+ assert_eq!(collect_lines("a\r\nb\r\n"), vec!["a", "b"]);
+ assert_eq!(collect_lines("a\r\nb"), vec!["a", "b"]);
+ assert_eq!(collect_lines("abc\r\nxyz\r\n"), vec!["abc", "xyz"]);
+ assert_eq!(collect_lines("abc\r\nxyz"), vec!["abc", "xyz"]);
+
+ assert_eq!(collect_lines("abc\rxyz"), vec!["abc\rxyz"]);
+ }
+
+ #[test]
+ fn lines_with_terminator() {
+ assert_eq!(collect_lines_term(""), Vec::<BString>::new());
+
+ assert_eq!(collect_lines_term("\n"), vec!["\n"]);
+ assert_eq!(collect_lines_term("\n\n"), vec!["\n", "\n"]);
+ assert_eq!(collect_lines_term("a\nb\n"), vec!["a\n", "b\n"]);
+ assert_eq!(collect_lines_term("a\nb"), vec!["a\n", "b"]);
+ assert_eq!(collect_lines_term("abc\nxyz\n"), vec!["abc\n", "xyz\n"]);
+ assert_eq!(collect_lines_term("abc\nxyz"), vec!["abc\n", "xyz"]);
+
+ assert_eq!(collect_lines_term("\r\n"), vec!["\r\n"]);
+ assert_eq!(collect_lines_term("\r\n\r\n"), vec!["\r\n", "\r\n"]);
+ assert_eq!(collect_lines_term("a\r\nb\r\n"), vec!["a\r\n", "b\r\n"]);
+ assert_eq!(collect_lines_term("a\r\nb"), vec!["a\r\n", "b"]);
+ assert_eq!(
+ collect_lines_term("abc\r\nxyz\r\n"),
+ vec!["abc\r\n", "xyz\r\n"]
+ );
+ assert_eq!(collect_lines_term("abc\r\nxyz"), vec!["abc\r\n", "xyz"]);
+
+ assert_eq!(collect_lines_term("abc\rxyz"), vec!["abc\rxyz"]);
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..c240cd1
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,456 @@
+/*!
+An experimental byte string library.
+
+Byte strings are just like standard Unicode strings with one very important
+difference: byte strings are only *conventionally* UTF-8 while Rust's standard
+Unicode strings are *guaranteed* to be valid UTF-8. The primary motivation for
+byte strings is for handling arbitrary bytes that are mostly UTF-8.
+
+# Overview
+
+This crate provides two important traits that provide string oriented methods
+on `&[u8]` and `Vec<u8>` types:
+
+* [`ByteSlice`](trait.ByteSlice.html) extends the `[u8]` type with additional
+ string oriented methods.
+* [`ByteVec`](trait.ByteVec.html) extends the `Vec<u8>` type with additional
+ string oriented methods.
+
+Additionally, this crate provides two concrete byte string types that deref to
+`[u8]` and `Vec<u8>`. These are useful for storing byte string types, and come
+with convenient `std::fmt::Debug` implementations:
+
+* [`BStr`](struct.BStr.html) is a byte string slice, analogous to `str`.
+* [`BString`](struct.BString.html) is an owned growable byte string buffer,
+ analogous to `String`.
+
+Additionally, the free function [`B`](fn.B.html) serves as a convenient short
+hand for writing byte string literals.
+
+# Quick examples
+
+Byte strings build on the existing APIs for `Vec<u8>` and `&[u8]`, with
+additional string oriented methods. Operations such as iterating over
+graphemes, searching for substrings, replacing substrings, trimming and case
+conversion are examples of things not provided on the standard library `&[u8]`
+APIs but are provided by this crate. For example, this code iterates over all
+of occurrences of a subtring:
+
+```
+use bstr::ByteSlice;
+
+let s = b"foo bar foo foo quux foo";
+
+let mut matches = vec![];
+for start in s.find_iter("foo") {
+ matches.push(start);
+}
+assert_eq!(matches, [0, 8, 12, 21]);
+```
+
+Here's another example showing how to do a search and replace (and also showing
+use of the `B` function):
+
+```
+use bstr::{B, ByteSlice};
+
+let old = B("foo ☃☃☃ foo foo quux foo");
+let new = old.replace("foo", "hello");
+assert_eq!(new, B("hello ☃☃☃ hello hello quux hello"));
+```
+
+And here's an example that shows case conversion, even in the presence of
+invalid UTF-8:
+
+```
+use bstr::{ByteSlice, ByteVec};
+
+let mut lower = Vec::from("hello β");
+lower[0] = b'\xFF';
+// lowercase β is uppercased to Β
+assert_eq!(lower.to_uppercase(), b"\xFFELLO \xCE\x92");
+```
+
+# Convenient debug representation
+
+When working with byte strings, it is often useful to be able to print them
+as if they were byte strings and not sequences of integers. While this crate
+cannot affect the `std::fmt::Debug` implementations for `[u8]` and `Vec<u8>`,
+this crate does provide the `BStr` and `BString` types which have convenient
+`std::fmt::Debug` implementations.
+
+For example, this
+
+```
+use bstr::ByteSlice;
+
+let mut bytes = Vec::from("hello β");
+bytes[0] = b'\xFF';
+
+println!("{:?}", bytes.as_bstr());
+```
+
+will output `"\xFFello β"`.
+
+This example works because the
+[`ByteSlice::as_bstr`](trait.ByteSlice.html#method.as_bstr)
+method converts any `&[u8]` to a `&BStr`.
+
+# When should I use byte strings?
+
+This library is somewhat of an experiment that reflects my hypothesis that
+UTF-8 by convention is a better trade off in some circumstances than guaranteed
+UTF-8. It's possible, perhaps even likely, that this is a niche concern for
+folks working closely with core text primitives.
+
+The first time this idea hit me was in the implementation of Rust's regex
+engine. In particular, very little of the internal implementation cares at all
+about searching valid UTF-8 encoded strings. Indeed, internally, the
+implementation converts `&str` from the API to `&[u8]` fairly quickly and
+just deals with raw bytes. UTF-8 match boundaries are then guaranteed by the
+finite state machine itself rather than any specific string type. This makes it
+possible to not only run regexes on `&str` values, but also on `&[u8]` values.
+
+Why would you ever want to run a regex on a `&[u8]` though? Well, `&[u8]` is
+the fundamental way at which one reads data from all sorts of streams, via the
+standard library's [`Read`](https://doc.rust-lang.org/std/io/trait.Read.html)
+trait. In particular, there is no platform independent way to determine whether
+what you're reading from is some binary file or a human readable text file.
+Therefore, if you're writing a program to search files, you probably need to
+deal with `&[u8]` directly unless you're okay with first converting it to a
+`&str` and dropping any bytes that aren't valid UTF-8. (Or otherwise determine
+the encoding---which is often impractical---and perform a transcoding step.)
+Often, the simplest and most robust way to approach this is to simply treat the
+contents of a file as if it were mostly valid UTF-8 and pass through invalid
+UTF-8 untouched. This may not be the most correct approach though!
+
+One case in particular exacerbates these issues, and that's memory mapping
+a file. When you memory map a file, that file may be gigabytes big, but all
+you get is a `&[u8]`. Converting that to a `&str` all in one go is generally
+not a good idea because of the costs associated with doing so, and also
+because it generally causes one to do two passes over the data instead of
+one, which is quite undesirable. It is of course usually possible to do it an
+incremental way by only parsing chunks at a time, but this is often complex to
+do or impractical. For example, many regex engines only accept one contiguous
+sequence of bytes at a time with no way to perform incremental matching.
+
+In summary, the conventional UTF-8 byte strings provided by this library is an
+experiment. They are definitely useful in some limited circumstances, but how
+useful they are more broadly isn't clear yet.
+
+# `bstr` in public APIs
+
+Since this library is still experimental, you should not use it in the public
+API of your crates until it hits `1.0` (unless you're OK with with tracking
+breaking releases of `bstr`).
+
+In general, it should be possible to avoid putting anything in this crate into
+your public APIs. Namely, you should never need to use the `ByteSlice` or
+`ByteVec` traits as bounds on public APIs, since their only purpose is to
+extend the methods on the concrete types `[u8]` and `Vec<u8>`, respectively.
+Similarly, it should not be necessary to put either the `BStr` or `BString`
+types into public APIs. If you want to use them internally, then they can
+be converted to/from `[u8]`/`Vec<u8>` as needed.
+
+# Differences with standard strings
+
+The primary difference between `[u8]` and `str` is that the former is
+conventionally UTF-8 while the latter is guaranteed to be UTF-8. The phrase
+"conventionally UTF-8" means that a `[u8]` may contain bytes that do not form
+a valid UTF-8 sequence, but operations defined on the type in this crate are
+generally most useful on valid UTF-8 sequences. For example, iterating over
+Unicode codepoints or grapheme clusters is an operation that is only defined
+on valid UTF-8. Therefore, when invalid UTF-8 is encountered, the Unicode
+replacement codepoint is substituted. Thus, a byte string that is not UTF-8 at
+all is of limited utility when using these crate.
+
+However, not all operations on byte strings are specifically Unicode aware. For
+example, substring search has no specific Unicode semantics ascribed to it. It
+works just as well for byte strings that are completely valid UTF-8 as for byte
+strings that contain no valid UTF-8 at all. Similarly for replacements and
+various other operations that do not need any Unicode specific tailoring.
+
+Aside from the difference in how UTF-8 is handled, the APIs between `[u8]` and
+`str` (and `Vec<u8>` and `String`) are intentionally very similar, including
+maintaining the same behavior for corner cases in things like substring
+splitting. There are, however, some differences:
+
+* Substring search is not done with `matches`, but instead, `find_iter`.
+ In general, this crate does not define any generic
+ [`Pattern`](https://doc.rust-lang.org/std/str/pattern/trait.Pattern.html)
+ infrastructure, and instead prefers adding new methods for different
+ argument types. For example, `matches` can search by a `char` or a `&str`,
+ where as `find_iter` can only search by a byte string. `find_char` can be
+ used for searching by a `char`.
+* Since `SliceConcatExt` in the standard library is unstable, it is not
+ possible to reuse that to implement `join` and `concat` methods. Instead,
+ [`join`](fn.join.html) and [`concat`](fn.concat.html) are provided as free
+ functions that perform a similar task.
+* This library bundles in a few more Unicode operations, such as grapheme,
+ word and sentence iterators. More operations, such as normalization and
+ case folding, may be provided in the future.
+* Some `String`/`str` APIs will panic if a particular index was not on a valid
+ UTF-8 code unit sequence boundary. Conversely, no such checking is performed
+ in this crate, as is consistent with treating byte strings as a sequence of
+ bytes. This means callers are responsible for maintaining a UTF-8 invariant
+ if that's important.
+* Some routines provided by this crate, such as `starts_with_str`, have a
+ `_str` suffix to differentiate them from similar routines already defined
+ on the `[u8]` type. The difference is that `starts_with` requires its
+ parameter to be a `&[u8]`, where as `starts_with_str` permits its parameter
+ to by anything that implements `AsRef<[u8]>`, which is more flexible. This
+ means you can write `bytes.starts_with_str("☃")` instead of
+ `bytes.starts_with("☃".as_bytes())`.
+
+Otherwise, you should find most of the APIs between this crate and the standard
+library string APIs to be very similar, if not identical.
+
+# Handling of invalid UTF-8
+
+Since byte strings are only *conventionally* UTF-8, there is no guarantee
+that byte strings contain valid UTF-8. Indeed, it is perfectly legal for a
+byte string to contain arbitrary bytes. However, since this library defines
+a *string* type, it provides many operations specified by Unicode. These
+operations are typically only defined over codepoints, and thus have no real
+meaning on bytes that are invalid UTF-8 because they do not map to a particular
+codepoint.
+
+For this reason, whenever operations defined only on codepoints are used, this
+library will automatically convert invalid UTF-8 to the Unicode replacement
+codepoint, `U+FFFD`, which looks like this: `�`. For example, an
+[iterator over codepoints](struct.Chars.html) will yield a Unicode
+replacement codepoint whenever it comes across bytes that are not valid UTF-8:
+
+```
+use bstr::ByteSlice;
+
+let bs = b"a\xFF\xFFz";
+let chars: Vec<char> = bs.chars().collect();
+assert_eq!(vec!['a', '\u{FFFD}', '\u{FFFD}', 'z'], chars);
+```
+
+There are a few ways in which invalid bytes can be substituted with a Unicode
+replacement codepoint. One way, not used by this crate, is to replace every
+individual invalid byte with a single replacement codepoint. In contrast, the
+approach this crate uses is called the "substitution of maximal subparts," as
+specified by the Unicode Standard (Chapter 3, Section 9). (This approach is
+also used by [W3C's Encoding Standard](https://www.w3.org/TR/encoding/).) In
+this strategy, a replacement codepoint is inserted whenever a byte is found
+that cannot possibly lead to a valid UTF-8 code unit sequence. If there were
+previous bytes that represented a *prefix* of a well-formed UTF-8 code unit
+sequence, then all of those bytes (up to 3) are substituted with a single
+replacement codepoint. For example:
+
+```
+use bstr::ByteSlice;
+
+let bs = b"a\xF0\x9F\x87z";
+let chars: Vec<char> = bs.chars().collect();
+// The bytes \xF0\x9F\x87 could lead to a valid UTF-8 sequence, but 3 of them
+// on their own are invalid. Only one replacement codepoint is substituted,
+// which demonstrates the "substitution of maximal subparts" strategy.
+assert_eq!(vec!['a', '\u{FFFD}', 'z'], chars);
+```
+
+If you do need to access the raw bytes for some reason in an iterator like
+`Chars`, then you should use the iterator's "indices" variant, which gives
+the byte offsets containing the invalid UTF-8 bytes that were substituted with
+the replacement codepoint. For example:
+
+```
+use bstr::{B, ByteSlice};
+
+let bs = b"a\xE2\x98z";
+let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
+// Even though the replacement codepoint is encoded as 3 bytes itself, the
+// byte range given here is only two bytes, corresponding to the original
+// raw bytes.
+assert_eq!(vec![(0, 1, 'a'), (1, 3, '\u{FFFD}'), (3, 4, 'z')], chars);
+
+// Thus, getting the original raw bytes is as simple as slicing the original
+// byte string:
+let chars: Vec<&[u8]> = bs.char_indices().map(|(s, e, _)| &bs[s..e]).collect();
+assert_eq!(vec![B("a"), B(b"\xE2\x98"), B("z")], chars);
+```
+
+# File paths and OS strings
+
+One of the premiere features of Rust's standard library is how it handles file
+paths. In particular, it makes it very hard to write incorrect code while
+simultaneously providing a correct cross platform abstraction for manipulating
+file paths. The key challenge that one faces with file paths across platforms
+is derived from the following observations:
+
+* On most Unix-like systems, file paths are an arbitrary sequence of bytes.
+* On Windows, file paths are an arbitrary sequence of 16-bit integers.
+
+(In both cases, certain sequences aren't allowed. For example a `NUL` byte is
+not allowed in either case. But we can ignore this for the purposes of this
+section.)
+
+Byte strings, like the ones provided in this crate, line up really well with
+file paths on Unix like systems, which are themselves just arbitrary sequences
+of bytes. It turns out that if you treat them as "mostly UTF-8," then things
+work out pretty well. On the contrary, byte strings _don't_ really work
+that well on Windows because it's not possible to correctly roundtrip file
+paths between 16-bit integers and something that looks like UTF-8 _without_
+explicitly defining an encoding to do this for you, which is anathema to byte
+strings, which are just bytes.
+
+Rust's standard library elegantly solves this problem by specifying an
+internal encoding for file paths that's only used on Windows called
+[WTF-8](https://simonsapin.github.io/wtf-8/). Its key properties are that they
+permit losslessly roundtripping file paths on Windows by extending UTF-8 to
+support an encoding of surrogate codepoints, while simultaneously supporting
+zero-cost conversion from Rust's Unicode strings to file paths. (Since UTF-8 is
+a proper subset of WTF-8.)
+
+The fundamental point at which the above strategy fails is when you want to
+treat file paths as things that look like strings in a zero cost way. In most
+cases, this is actually the wrong thing to do, but some cases call for it,
+for example, glob or regex matching on file paths. This is because WTF-8 is
+treated as an internal implementation detail, and there is no way to access
+those bytes via a public API. Therefore, such consumers are limited in what
+they can do:
+
+1. One could re-implement WTF-8 and re-encode file paths on Windows to WTF-8
+ by accessing their underlying 16-bit integer representation. Unfortunately,
+ this isn't zero cost (it introduces a second WTF-8 decoding step) and it's
+ not clear this is a good thing to do, since WTF-8 should ideally remain an
+ internal implementation detail.
+2. One could instead declare that they will not handle paths on Windows that
+ are not valid UTF-16, and return an error when one is encountered.
+3. Like (2), but instead of returning an error, lossily decode the file path
+ on Windows that isn't valid UTF-16 into UTF-16 by replacing invalid bytes
+ with the Unicode replacement codepoint.
+
+While this library may provide facilities for (1) in the future, currently,
+this library only provides facilities for (2) and (3). In particular, a suite
+of conversion functions are provided that permit converting between byte
+strings, OS strings and file paths. For owned byte strings, they are:
+
+* [`ByteVec::from_os_string`](trait.ByteVec.html#method.from_os_string)
+* [`ByteVec::from_os_str_lossy`](trait.ByteVec.html#method.from_os_str_lossy)
+* [`ByteVec::from_path_buf`](trait.ByteVec.html#method.from_path_buf)
+* [`ByteVec::from_path_lossy`](trait.ByteVec.html#method.from_path_lossy)
+* [`ByteVec::into_os_string`](trait.ByteVec.html#method.into_os_string)
+* [`ByteVec::into_os_string_lossy`](trait.ByteVec.html#method.into_os_string_lossy)
+* [`ByteVec::into_path_buf`](trait.ByteVec.html#method.into_path_buf)
+* [`ByteVec::into_path_buf_lossy`](trait.ByteVec.html#method.into_path_buf_lossy)
+
+For byte string slices, they are:
+
+* [`ByteSlice::from_os_str`](trait.ByteSlice.html#method.from_os_str)
+* [`ByteSlice::from_path`](trait.ByteSlice.html#method.from_path)
+* [`ByteSlice::to_os_str`](trait.ByteSlice.html#method.to_os_str)
+* [`ByteSlice::to_os_str_lossy`](trait.ByteSlice.html#method.to_os_str_lossy)
+* [`ByteSlice::to_path`](trait.ByteSlice.html#method.to_path)
+* [`ByteSlice::to_path_lossy`](trait.ByteSlice.html#method.to_path_lossy)
+
+On Unix, all of these conversions are rigorously zero cost, which gives one
+a way to ergonomically deal with raw file paths exactly as they are using
+normal string-related functions. On Windows, these conversion routines perform
+a UTF-8 check and either return an error or lossily decode the file path
+into valid UTF-8, depending on which function you use. This means that you
+cannot roundtrip all file paths on Windows correctly using these conversion
+routines. However, this may be an acceptable downside since such file paths
+are exceptionally rare. Moreover, roundtripping isn't always necessary, for
+example, if all you're doing is filtering based on file paths.
+
+The reason why using byte strings for this is potentially superior than the
+standard library's approach is that a lot of Rust code is already lossily
+converting file paths to Rust's Unicode strings, which are required to be valid
+UTF-8, and thus contain latent bugs on Unix where paths with invalid UTF-8 are
+not terribly uncommon. If you instead use byte strings, then you're guaranteed
+to write correct code for Unix, at the cost of getting a corner case wrong on
+Windows.
+*/
+
+#![cfg_attr(not(feature = "std"), no_std)]
+#![allow(dead_code)]
+
+#[cfg(feature = "std")]
+extern crate core;
+
+#[cfg(feature = "unicode")]
+#[macro_use]
+extern crate lazy_static;
+extern crate memchr;
+#[cfg(test)]
+#[macro_use]
+extern crate quickcheck;
+#[cfg(feature = "unicode")]
+extern crate regex_automata;
+#[cfg(feature = "serde1-nostd")]
+extern crate serde;
+#[cfg(test)]
+extern crate ucd_parse;
+
+pub use bstr::BStr;
+#[cfg(feature = "std")]
+pub use bstring::BString;
+pub use ext_slice::{
+ ByteSlice, Bytes, Fields, FieldsWith, Find, FindReverse, Finder,
+ FinderReverse, Lines, LinesWithTerminator, Split, SplitN, SplitNReverse,
+ SplitReverse, B,
+};
+#[cfg(feature = "std")]
+pub use ext_vec::{concat, join, ByteVec, DrainBytes, FromUtf8Error};
+#[cfg(feature = "unicode")]
+pub use unicode::{
+ GraphemeIndices, Graphemes, SentenceIndices, Sentences, WordIndices,
+ Words, WordsWithBreakIndices, WordsWithBreaks,
+};
+pub use utf8::{
+ decode as decode_utf8, decode_last as decode_last_utf8, CharIndices,
+ Chars, Utf8Chunk, Utf8Chunks, Utf8Error,
+};
+
+mod ascii;
+mod bstr;
+#[cfg(feature = "std")]
+mod bstring;
+mod byteset;
+mod cow;
+mod ext_slice;
+#[cfg(feature = "std")]
+mod ext_vec;
+mod impls;
+#[cfg(feature = "std")]
+pub mod io;
+mod search;
+#[cfg(test)]
+mod tests;
+#[cfg(feature = "unicode")]
+mod unicode;
+mod utf8;
+
+#[cfg(test)]
+mod apitests {
+ use bstr::BStr;
+ use bstring::BString;
+ use ext_slice::{Finder, FinderReverse};
+
+ #[test]
+ fn oibits() {
+ use std::panic::{RefUnwindSafe, UnwindSafe};
+
+ fn assert_send<T: Send>() {}
+ fn assert_sync<T: Sync>() {}
+ fn assert_unwind_safe<T: RefUnwindSafe + UnwindSafe>() {}
+
+ assert_send::<&BStr>();
+ assert_sync::<&BStr>();
+ assert_unwind_safe::<&BStr>();
+ assert_send::<BString>();
+ assert_sync::<BString>();
+ assert_unwind_safe::<BString>();
+
+ assert_send::<Finder>();
+ assert_sync::<Finder>();
+ assert_unwind_safe::<Finder>();
+ assert_send::<FinderReverse>();
+ assert_sync::<FinderReverse>();
+ assert_unwind_safe::<FinderReverse>();
+ }
+}
diff --git a/src/search/byte_frequencies.rs b/src/search/byte_frequencies.rs
new file mode 100644
index 0000000..c313b62
--- /dev/null
+++ b/src/search/byte_frequencies.rs
@@ -0,0 +1,258 @@
+pub const BYTE_FREQUENCIES: [u8; 256] = [
+ 55, // '\x00'
+ 52, // '\x01'
+ 51, // '\x02'
+ 50, // '\x03'
+ 49, // '\x04'
+ 48, // '\x05'
+ 47, // '\x06'
+ 46, // '\x07'
+ 45, // '\x08'
+ 103, // '\t'
+ 242, // '\n'
+ 66, // '\x0b'
+ 67, // '\x0c'
+ 229, // '\r'
+ 44, // '\x0e'
+ 43, // '\x0f'
+ 42, // '\x10'
+ 41, // '\x11'
+ 40, // '\x12'
+ 39, // '\x13'
+ 38, // '\x14'
+ 37, // '\x15'
+ 36, // '\x16'
+ 35, // '\x17'
+ 34, // '\x18'
+ 33, // '\x19'
+ 56, // '\x1a'
+ 32, // '\x1b'
+ 31, // '\x1c'
+ 30, // '\x1d'
+ 29, // '\x1e'
+ 28, // '\x1f'
+ 255, // ' '
+ 148, // '!'
+ 164, // '"'
+ 149, // '#'
+ 136, // '$'
+ 160, // '%'
+ 155, // '&'
+ 173, // "'"
+ 221, // '('
+ 222, // ')'
+ 134, // '*'
+ 122, // '+'
+ 232, // ','
+ 202, // '-'
+ 215, // '.'
+ 224, // '/'
+ 208, // '0'
+ 220, // '1'
+ 204, // '2'
+ 187, // '3'
+ 183, // '4'
+ 179, // '5'
+ 177, // '6'
+ 168, // '7'
+ 178, // '8'
+ 200, // '9'
+ 226, // ':'
+ 195, // ';'
+ 154, // '<'
+ 184, // '='
+ 174, // '>'
+ 126, // '?'
+ 120, // '@'
+ 191, // 'A'
+ 157, // 'B'
+ 194, // 'C'
+ 170, // 'D'
+ 189, // 'E'
+ 162, // 'F'
+ 161, // 'G'
+ 150, // 'H'
+ 193, // 'I'
+ 142, // 'J'
+ 137, // 'K'
+ 171, // 'L'
+ 176, // 'M'
+ 185, // 'N'
+ 167, // 'O'
+ 186, // 'P'
+ 112, // 'Q'
+ 175, // 'R'
+ 192, // 'S'
+ 188, // 'T'
+ 156, // 'U'
+ 140, // 'V'
+ 143, // 'W'
+ 123, // 'X'
+ 133, // 'Y'
+ 128, // 'Z'
+ 147, // '['
+ 138, // '\\'
+ 146, // ']'
+ 114, // '^'
+ 223, // '_'
+ 151, // '`'
+ 249, // 'a'
+ 216, // 'b'
+ 238, // 'c'
+ 236, // 'd'
+ 253, // 'e'
+ 227, // 'f'
+ 218, // 'g'
+ 230, // 'h'
+ 247, // 'i'
+ 135, // 'j'
+ 180, // 'k'
+ 241, // 'l'
+ 233, // 'm'
+ 246, // 'n'
+ 244, // 'o'
+ 231, // 'p'
+ 139, // 'q'
+ 245, // 'r'
+ 243, // 's'
+ 251, // 't'
+ 235, // 'u'
+ 201, // 'v'
+ 196, // 'w'
+ 240, // 'x'
+ 214, // 'y'
+ 152, // 'z'
+ 182, // '{'
+ 205, // '|'
+ 181, // '}'
+ 127, // '~'
+ 27, // '\x7f'
+ 212, // '\x80'
+ 211, // '\x81'
+ 210, // '\x82'
+ 213, // '\x83'
+ 228, // '\x84'
+ 197, // '\x85'
+ 169, // '\x86'
+ 159, // '\x87'
+ 131, // '\x88'
+ 172, // '\x89'
+ 105, // '\x8a'
+ 80, // '\x8b'
+ 98, // '\x8c'
+ 96, // '\x8d'
+ 97, // '\x8e'
+ 81, // '\x8f'
+ 207, // '\x90'
+ 145, // '\x91'
+ 116, // '\x92'
+ 115, // '\x93'
+ 144, // '\x94'
+ 130, // '\x95'
+ 153, // '\x96'
+ 121, // '\x97'
+ 107, // '\x98'
+ 132, // '\x99'
+ 109, // '\x9a'
+ 110, // '\x9b'
+ 124, // '\x9c'
+ 111, // '\x9d'
+ 82, // '\x9e'
+ 108, // '\x9f'
+ 118, // '\xa0'
+ 141, // '¡'
+ 113, // '¢'
+ 129, // '£'
+ 119, // '¤'
+ 125, // '¥'
+ 165, // '¦'
+ 117, // '§'
+ 92, // '¨'
+ 106, // '©'
+ 83, // 'ª'
+ 72, // '«'
+ 99, // '¬'
+ 93, // '\xad'
+ 65, // '®'
+ 79, // '¯'
+ 166, // '°'
+ 237, // '±'
+ 163, // '²'
+ 199, // '³'
+ 190, // '´'
+ 225, // 'µ'
+ 209, // '¶'
+ 203, // '·'
+ 198, // '¸'
+ 217, // '¹'
+ 219, // 'º'
+ 206, // '»'
+ 234, // '¼'
+ 248, // '½'
+ 158, // '¾'
+ 239, // '¿'
+ 255, // 'À'
+ 255, // 'Á'
+ 255, // 'Â'
+ 255, // 'Ã'
+ 255, // 'Ä'
+ 255, // 'Å'
+ 255, // 'Æ'
+ 255, // 'Ç'
+ 255, // 'È'
+ 255, // 'É'
+ 255, // 'Ê'
+ 255, // 'Ë'
+ 255, // 'Ì'
+ 255, // 'Í'
+ 255, // 'Î'
+ 255, // 'Ï'
+ 255, // 'Ð'
+ 255, // 'Ñ'
+ 255, // 'Ò'
+ 255, // 'Ó'
+ 255, // 'Ô'
+ 255, // 'Õ'
+ 255, // 'Ö'
+ 255, // '×'
+ 255, // 'Ø'
+ 255, // 'Ù'
+ 255, // 'Ú'
+ 255, // 'Û'
+ 255, // 'Ü'
+ 255, // 'Ý'
+ 255, // 'Þ'
+ 255, // 'ß'
+ 255, // 'à'
+ 255, // 'á'
+ 255, // 'â'
+ 255, // 'ã'
+ 255, // 'ä'
+ 255, // 'å'
+ 255, // 'æ'
+ 255, // 'ç'
+ 255, // 'è'
+ 255, // 'é'
+ 255, // 'ê'
+ 255, // 'ë'
+ 255, // 'ì'
+ 255, // 'í'
+ 255, // 'î'
+ 255, // 'ï'
+ 255, // 'ð'
+ 255, // 'ñ'
+ 255, // 'ò'
+ 255, // 'ó'
+ 255, // 'ô'
+ 255, // 'õ'
+ 255, // 'ö'
+ 255, // '÷'
+ 255, // 'ø'
+ 255, // 'ù'
+ 255, // 'ú'
+ 255, // 'û'
+ 255, // 'ü'
+ 255, // 'ý'
+ 255, // 'þ'
+ 255, // 'ÿ'
+];
diff --git a/src/search/mod.rs b/src/search/mod.rs
new file mode 100644
index 0000000..a0d1b45
--- /dev/null
+++ b/src/search/mod.rs
@@ -0,0 +1,8 @@
+pub use self::prefilter::PrefilterState;
+pub use self::twoway::TwoWay;
+
+mod byte_frequencies;
+mod prefilter;
+#[cfg(test)]
+mod tests;
+mod twoway;
diff --git a/src/search/prefilter.rs b/src/search/prefilter.rs
new file mode 100644
index 0000000..00e6acf
--- /dev/null
+++ b/src/search/prefilter.rs
@@ -0,0 +1,424 @@
+use core::mem;
+
+use ext_slice::ByteSlice;
+use search::byte_frequencies::BYTE_FREQUENCIES;
+
+/// PrefilterState tracks state associated with the effectiveness of a
+/// prefilter. It is used to track how many bytes, on average, are skipped by
+/// the prefilter. If this average dips below a certain threshold over time,
+/// then the state renders the prefilter inert and stops using it.
+///
+/// A prefilter state should be created for each search. (Where creating an
+/// iterator via, e.g., `find_iter`, is treated as a single search.)
+#[derive(Clone, Debug)]
+pub struct PrefilterState {
+ /// The number of skips that has been executed.
+ skips: usize,
+ /// The total number of bytes that have been skipped.
+ skipped: usize,
+ /// The maximum length of a match. This is used to help determine how many
+ /// bytes on average should be skipped in order for a prefilter to be
+ /// effective.
+ max_match_len: usize,
+ /// Once this heuristic has been deemed ineffective, it will be inert
+ /// throughout the rest of its lifetime. This serves as a cheap way to
+ /// check inertness.
+ inert: bool,
+}
+
+impl PrefilterState {
+ /// The minimum number of skip attempts to try before considering whether
+ /// a prefilter is effective or not.
+ const MIN_SKIPS: usize = 50;
+
+ /// The minimum amount of bytes that skipping must average.
+ ///
+ /// This value was chosen based on varying it and checking the bstr/find/
+ /// microbenchmarks. In particular, this can impact the
+ /// pathological/repeated-{huge,small} benchmarks quite a bit if it's
+ /// set too low.
+ const MIN_SKIP_BYTES: usize = 8;
+
+ /// Create a fresh prefilter state.
+ pub fn new(max_match_len: usize) -> PrefilterState {
+ if max_match_len == 0 {
+ return PrefilterState::inert();
+ }
+ PrefilterState { skips: 0, skipped: 0, max_match_len, inert: false }
+ }
+
+ /// Create a fresh prefilter state that is always inert.
+ fn inert() -> PrefilterState {
+ PrefilterState { skips: 0, skipped: 0, max_match_len: 0, inert: true }
+ }
+
+ /// Update this state with the number of bytes skipped on the last
+ /// invocation of the prefilter.
+ #[inline]
+ pub fn update(&mut self, skipped: usize) {
+ self.skips += 1;
+ self.skipped += skipped;
+ }
+
+ /// Return true if and only if this state indicates that a prefilter is
+ /// still effective.
+ #[inline]
+ pub fn is_effective(&mut self) -> bool {
+ if self.inert {
+ return false;
+ }
+ if self.skips < PrefilterState::MIN_SKIPS {
+ return true;
+ }
+ if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips {
+ return true;
+ }
+
+ // We're inert.
+ self.inert = true;
+ false
+ }
+}
+
+/// A heuristic frequency based prefilter for searching a single needle.
+///
+/// This prefilter attempts to pick out the byte in a needle that is predicted
+/// to occur least frequently, and search for that using fast vectorized
+/// routines. If a rare enough byte could not be found, then this prefilter's
+/// constructors will return `None`.
+///
+/// This can be combined with `PrefilterState` to dynamically render this
+/// prefilter inert if it proves to ineffective.
+#[derive(Clone, Debug)]
+pub struct Freqy {
+ /// Whether this prefilter should be used or not.
+ inert: bool,
+ /// The length of the needle we're searching for.
+ needle_len: usize,
+ /// The rarest byte in the needle, according to pre-computed frequency
+ /// analysis.
+ rare1: u8,
+ /// The leftmost offset of the rarest byte in the needle.
+ rare1i: usize,
+ /// The second rarest byte in the needle, according to pre-computed
+ /// frequency analysis. (This may be equivalent to the rarest byte.)
+ ///
+ /// The second rarest byte is used as a type of guard for quickly detecting
+ /// a mismatch after memchr locates an instance of the rarest byte. This
+ /// is a hedge against pathological cases where the pre-computed frequency
+ /// analysis may be off. (But of course, does not prevent *all*
+ /// pathological cases.)
+ rare2: u8,
+ /// The leftmost offset of the second rarest byte in the needle.
+ rare2i: usize,
+}
+
+impl Freqy {
+ /// The maximum frequency rank permitted. If the rarest byte in the needle
+ /// has a frequency rank above this value, then Freqy is not used.
+ const MAX_RANK: usize = 200;
+
+ /// Return a fresh prefilter state that can be used with this prefilter. A
+ /// prefilter state is used to track the effectiveness of a prefilter for
+ /// speeding up searches. Therefore, the prefilter state should generally
+ /// be reused on subsequent searches (such as in an iterator). For searches
+ /// on a different haystack, then a new prefilter state should be used.
+ pub fn prefilter_state(&self) -> PrefilterState {
+ if self.inert {
+ PrefilterState::inert()
+ } else {
+ PrefilterState::new(self.needle_len)
+ }
+ }
+
+ /// Returns a valid but inert prefilter. This is valid for both the forward
+ /// and reverse direction.
+ ///
+ /// It is never correct to use an inert prefilter. The results of finding
+ /// the next (or previous) candidate are unspecified.
+ fn inert() -> Freqy {
+ Freqy {
+ inert: true,
+ needle_len: 0,
+ rare1: 0,
+ rare1i: 0,
+ rare2: 0,
+ rare2i: 0,
+ }
+ }
+
+ /// Return search info for the given needle in the forward direction.
+ pub fn forward(needle: &[u8]) -> Freqy {
+ if needle.is_empty() {
+ return Freqy::inert();
+ }
+
+ // Find the rarest two bytes. Try to make them distinct (but it's not
+ // required).
+ let (mut rare1, mut rare1i) = (needle[0], 0);
+ let (mut rare2, mut rare2i) = (needle[0], 0);
+ if needle.len() >= 2 {
+ rare2 = needle[1];
+ rare2i = 1;
+ }
+ if Freqy::rank(rare2) < Freqy::rank(rare1) {
+ mem::swap(&mut rare1, &mut rare2);
+ mem::swap(&mut rare1i, &mut rare2i);
+ }
+ for (i, b) in needle.bytes().enumerate().skip(2) {
+ if Freqy::rank(b) < Freqy::rank(rare1) {
+ rare2 = rare1;
+ rare2i = rare1i;
+ rare1 = b;
+ rare1i = i;
+ } else if b != rare1 && Freqy::rank(b) < Freqy::rank(rare2) {
+ rare2 = b;
+ rare2i = i;
+ }
+ }
+ if Freqy::rank(rare1) > Freqy::MAX_RANK {
+ return Freqy::inert();
+ }
+ let needle_len = needle.len();
+ Freqy { inert: false, needle_len, rare1, rare1i, rare2, rare2i }
+ }
+
+ /// Return search info for the given needle in the reverse direction.
+ pub fn reverse(needle: &[u8]) -> Freqy {
+ if needle.is_empty() {
+ return Freqy::inert();
+ }
+
+ // Find the rarest two bytes. Try to make them distinct (but it's not
+ // required). In reverse, the offsets correspond to the number of bytes
+ // from the end of the needle. So `0` is the last byte in the needle.
+ let (mut rare1i, mut rare2i) = (0, 0);
+ if needle.len() >= 2 {
+ rare2i += 1;
+ }
+ let mut rare1 = needle[needle.len() - rare1i - 1];
+ let mut rare2 = needle[needle.len() - rare2i - 1];
+ if Freqy::rank(rare2) < Freqy::rank(rare1) {
+ mem::swap(&mut rare1, &mut rare2);
+ mem::swap(&mut rare1i, &mut rare2i);
+ }
+ for (i, b) in needle.bytes().rev().enumerate().skip(2) {
+ if Freqy::rank(b) < Freqy::rank(rare1) {
+ rare2 = rare1;
+ rare2i = rare1i;
+ rare1 = b;
+ rare1i = i;
+ } else if b != rare1 && Freqy::rank(b) < Freqy::rank(rare2) {
+ rare2 = b;
+ rare2i = i;
+ }
+ }
+ if Freqy::rank(rare1) > Freqy::MAX_RANK {
+ return Freqy::inert();
+ }
+ let needle_len = needle.len();
+ Freqy { inert: false, needle_len, rare1, rare1i, rare2, rare2i }
+ }
+
+ /// Look for a possible occurrence of needle. The position returned
+ /// corresponds to the beginning of the occurrence, if one exists.
+ ///
+ /// Callers may assume that this never returns false negatives (i.e., it
+ /// never misses an actual occurrence), but must check that the returned
+ /// position corresponds to a match. That is, it can return false
+ /// positives.
+ ///
+ /// This should only be used when Freqy is constructed for forward
+ /// searching.
+ pub fn find_candidate(
+ &self,
+ prestate: &mut PrefilterState,
+ haystack: &[u8],
+ ) -> Option<usize> {
+ debug_assert!(!self.inert);
+
+ let mut i = 0;
+ while prestate.is_effective() {
+ // Use a fast vectorized implementation to skip to the next
+ // occurrence of the rarest byte (heuristically chosen) in the
+ // needle.
+ i += match haystack[i..].find_byte(self.rare1) {
+ None => return None,
+ Some(found) => {
+ prestate.update(found);
+ found
+ }
+ };
+
+ // If we can't align our first match with the haystack, then a
+ // match is impossible.
+ if i < self.rare1i {
+ i += 1;
+ continue;
+ }
+
+ // Align our rare2 byte with the haystack. A mismatch means that
+ // a match is impossible.
+ let aligned_rare2i = i - self.rare1i + self.rare2i;
+ if haystack.get(aligned_rare2i) != Some(&self.rare2) {
+ i += 1;
+ continue;
+ }
+
+ // We've done what we can. There might be a match here.
+ return Some(i - self.rare1i);
+ }
+ // The only way we get here is if we believe our skipping heuristic
+ // has become ineffective. We're allowed to return false positives,
+ // so return the position at which we advanced to, aligned to the
+ // haystack.
+ Some(i.saturating_sub(self.rare1i))
+ }
+
+ /// Look for a possible occurrence of needle, in reverse, starting from the
+ /// end of the given haystack. The position returned corresponds to the
+ /// position immediately after the end of the occurrence, if one exists.
+ ///
+ /// Callers may assume that this never returns false negatives (i.e., it
+ /// never misses an actual occurrence), but must check that the returned
+ /// position corresponds to a match. That is, it can return false
+ /// positives.
+ ///
+ /// This should only be used when Freqy is constructed for reverse
+ /// searching.
+ pub fn rfind_candidate(
+ &self,
+ prestate: &mut PrefilterState,
+ haystack: &[u8],
+ ) -> Option<usize> {
+ debug_assert!(!self.inert);
+
+ let mut i = haystack.len();
+ while prestate.is_effective() {
+ // Use a fast vectorized implementation to skip to the next
+ // occurrence of the rarest byte (heuristically chosen) in the
+ // needle.
+ i = match haystack[..i].rfind_byte(self.rare1) {
+ None => return None,
+ Some(found) => {
+ prestate.update(i - found);
+ found
+ }
+ };
+
+ // If we can't align our first match with the haystack, then a
+ // match is impossible.
+ if i + self.rare1i + 1 > haystack.len() {
+ continue;
+ }
+
+ // Align our rare2 byte with the haystack. A mismatch means that
+ // a match is impossible.
+ let aligned = match (i + self.rare1i).checked_sub(self.rare2i) {
+ None => continue,
+ Some(aligned) => aligned,
+ };
+ if haystack.get(aligned) != Some(&self.rare2) {
+ continue;
+ }
+
+ // We've done what we can. There might be a match here.
+ return Some(i + self.rare1i + 1);
+ }
+ // The only way we get here is if we believe our skipping heuristic
+ // has become ineffective. We're allowed to return false positives,
+ // so return the position at which we advanced to, aligned to the
+ // haystack.
+ Some(i + self.rare1i + 1)
+ }
+
+ /// Return the heuristical frequency rank of the given byte. A lower rank
+ /// means the byte is believed to occur less frequently.
+ fn rank(b: u8) -> usize {
+ BYTE_FREQUENCIES[b as usize] as usize
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use ext_slice::B;
+
+ #[test]
+ fn freqy_forward() {
+ // N.B. We sometimes use uppercase here since that mostly ensures freqy
+ // will be constructable. Lowercase letters may be too common for freqy
+ // to work.
+
+ let s = Freqy::forward(B("BAR"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(Some(0), s.find_candidate(&mut pre, B("BARFOO")));
+
+ let s = Freqy::forward(B("BAR"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(Some(3), s.find_candidate(&mut pre, B("FOOBAR")));
+
+ let s = Freqy::forward(B("zyzy"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(Some(0), s.find_candidate(&mut pre, B("zyzz")));
+
+ let s = Freqy::forward(B("zyzy"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(Some(2), s.find_candidate(&mut pre, B("zzzy")));
+
+ let s = Freqy::forward(B("zyzy"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(None, s.find_candidate(&mut pre, B("zazb")));
+
+ let s = Freqy::forward(B("yzyz"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(Some(0), s.find_candidate(&mut pre, B("yzyy")));
+
+ let s = Freqy::forward(B("yzyz"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(Some(2), s.find_candidate(&mut pre, B("yyyz")));
+
+ let s = Freqy::forward(B("yzyz"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(None, s.find_candidate(&mut pre, B("yayb")));
+ }
+
+ #[test]
+ fn freqy_reverse() {
+ // N.B. We sometimes use uppercase here since that mostly ensures freqy
+ // will be constructable. Lowercase letters may be too common for freqy
+ // to work.
+
+ let s = Freqy::reverse(B("BAR"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(Some(3), s.rfind_candidate(&mut pre, B("BARFOO")));
+
+ let s = Freqy::reverse(B("BAR"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(Some(6), s.rfind_candidate(&mut pre, B("FOOBAR")));
+
+ let s = Freqy::reverse(B("zyzy"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(Some(2), s.rfind_candidate(&mut pre, B("zyzz")));
+
+ let s = Freqy::reverse(B("zyzy"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(Some(4), s.rfind_candidate(&mut pre, B("zzzy")));
+
+ let s = Freqy::reverse(B("zyzy"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(None, s.rfind_candidate(&mut pre, B("zazb")));
+
+ let s = Freqy::reverse(B("yzyz"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(Some(2), s.rfind_candidate(&mut pre, B("yzyy")));
+
+ let s = Freqy::reverse(B("yzyz"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(Some(4), s.rfind_candidate(&mut pre, B("yyyz")));
+
+ let s = Freqy::reverse(B("yzyz"));
+ let mut pre = s.prefilter_state();
+ assert_eq!(None, s.rfind_candidate(&mut pre, B("yayb")));
+ }
+}
diff --git a/src/search/tests.rs b/src/search/tests.rs
new file mode 100644
index 0000000..827df92
--- /dev/null
+++ b/src/search/tests.rs
@@ -0,0 +1,225 @@
+use search::twoway::TwoWay;
+
+/// Each test is a (needle, haystack, expected_fwd, expected_rev) tuple.
+type SearchTest = (&'static str, &'static str, Option<usize>, Option<usize>);
+
+const SEARCH_TESTS: &'static [SearchTest] = &[
+ ("", "", Some(0), Some(0)),
+ ("", "a", Some(0), Some(1)),
+ ("", "ab", Some(0), Some(2)),
+ ("", "abc", Some(0), Some(3)),
+ ("a", "", None, None),
+ ("a", "a", Some(0), Some(0)),
+ ("a", "aa", Some(0), Some(1)),
+ ("a", "ba", Some(1), Some(1)),
+ ("a", "bba", Some(2), Some(2)),
+ ("a", "bbba", Some(3), Some(3)),
+ ("a", "bbbab", Some(3), Some(3)),
+ ("a", "bbbabb", Some(3), Some(3)),
+ ("a", "bbbabbb", Some(3), Some(3)),
+ ("a", "bbbbbb", None, None),
+ ("ab", "", None, None),
+ ("ab", "a", None, None),
+ ("ab", "b", None, None),
+ ("ab", "ab", Some(0), Some(0)),
+ ("ab", "aab", Some(1), Some(1)),
+ ("ab", "aaab", Some(2), Some(2)),
+ ("ab", "abaab", Some(0), Some(3)),
+ ("ab", "baaab", Some(3), Some(3)),
+ ("ab", "acb", None, None),
+ ("ab", "abba", Some(0), Some(0)),
+ ("abc", "ab", None, None),
+ ("abc", "abc", Some(0), Some(0)),
+ ("abc", "abcz", Some(0), Some(0)),
+ ("abc", "abczz", Some(0), Some(0)),
+ ("abc", "zabc", Some(1), Some(1)),
+ ("abc", "zzabc", Some(2), Some(2)),
+ ("abc", "azbc", None, None),
+ ("abc", "abzc", None, None),
+ ("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)),
+ ("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)),
+ // Failures caught by quickcheck.
+ ("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)),
+ ("\u{0}\u{1e}", "\u{1e}\u{0}", None, None),
+];
+
+#[test]
+fn unit_twoway_fwd() {
+ run_search_tests_fwd("TwoWay", |n, h| TwoWay::forward(n).find(h));
+}
+
+#[test]
+fn unit_twoway_rev() {
+ run_search_tests_rev("TwoWay", |n, h| TwoWay::reverse(n).rfind(h));
+}
+
+/// Run the substring search tests. `name` should be the type of searcher used,
+/// for diagnostics. `search` should be a closure that accepts a needle and a
+/// haystack and returns the starting position of the first occurrence of
+/// needle in the haystack, or `None` if one doesn't exist.
+fn run_search_tests_fwd(
+ name: &str,
+ mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
+) {
+ for &(needle, haystack, expected_fwd, _) in SEARCH_TESTS {
+ let (n, h) = (needle.as_bytes(), haystack.as_bytes());
+ assert_eq!(
+ expected_fwd,
+ search(n, h),
+ "{}: needle: {:?}, haystack: {:?}, expected: {:?}",
+ name,
+ n,
+ h,
+ expected_fwd
+ );
+ }
+}
+
+/// Run the substring search tests. `name` should be the type of searcher used,
+/// for diagnostics. `search` should be a closure that accepts a needle and a
+/// haystack and returns the starting position of the last occurrence of
+/// needle in the haystack, or `None` if one doesn't exist.
+fn run_search_tests_rev(
+ name: &str,
+ mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
+) {
+ for &(needle, haystack, _, expected_rev) in SEARCH_TESTS {
+ let (n, h) = (needle.as_bytes(), haystack.as_bytes());
+ assert_eq!(
+ expected_rev,
+ search(n, h),
+ "{}: needle: {:?}, haystack: {:?}, expected: {:?}",
+ name,
+ n,
+ h,
+ expected_rev
+ );
+ }
+}
+
+quickcheck! {
+ fn qc_twoway_fwd_prefix_is_substring(bs: Vec<u8>) -> bool {
+ prop_prefix_is_substring(false, &bs, |n, h| TwoWay::forward(n).find(h))
+ }
+
+ fn qc_twoway_fwd_suffix_is_substring(bs: Vec<u8>) -> bool {
+ prop_suffix_is_substring(false, &bs, |n, h| TwoWay::forward(n).find(h))
+ }
+
+ fn qc_twoway_rev_prefix_is_substring(bs: Vec<u8>) -> bool {
+ prop_prefix_is_substring(true, &bs, |n, h| TwoWay::reverse(n).rfind(h))
+ }
+
+ fn qc_twoway_rev_suffix_is_substring(bs: Vec<u8>) -> bool {
+ prop_suffix_is_substring(true, &bs, |n, h| TwoWay::reverse(n).rfind(h))
+ }
+
+ fn qc_twoway_fwd_matches_naive(
+ needle: Vec<u8>,
+ haystack: Vec<u8>
+ ) -> bool {
+ prop_matches_naive(
+ false,
+ &needle,
+ &haystack,
+ |n, h| TwoWay::forward(n).find(h),
+ )
+ }
+
+ fn qc_twoway_rev_matches_naive(
+ needle: Vec<u8>,
+ haystack: Vec<u8>
+ ) -> bool {
+ prop_matches_naive(
+ true,
+ &needle,
+ &haystack,
+ |n, h| TwoWay::reverse(n).rfind(h),
+ )
+ }
+}
+
+/// Check that every prefix of the given byte string is a substring.
+fn prop_prefix_is_substring(
+ reverse: bool,
+ bs: &[u8],
+ mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
+) -> bool {
+ if bs.is_empty() {
+ return true;
+ }
+ for i in 0..(bs.len() - 1) {
+ let prefix = &bs[..i];
+ if reverse {
+ assert_eq!(naive_rfind(prefix, bs), search(prefix, bs));
+ } else {
+ assert_eq!(naive_find(prefix, bs), search(prefix, bs));
+ }
+ }
+ true
+}
+
+/// Check that every suffix of the given byte string is a substring.
+fn prop_suffix_is_substring(
+ reverse: bool,
+ bs: &[u8],
+ mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
+) -> bool {
+ if bs.is_empty() {
+ return true;
+ }
+ for i in 0..(bs.len() - 1) {
+ let suffix = &bs[i..];
+ if reverse {
+ assert_eq!(naive_rfind(suffix, bs), search(suffix, bs));
+ } else {
+ assert_eq!(naive_find(suffix, bs), search(suffix, bs));
+ }
+ }
+ true
+}
+
+/// Check that naive substring search matches the result of the given search
+/// algorithm.
+fn prop_matches_naive(
+ reverse: bool,
+ needle: &[u8],
+ haystack: &[u8],
+ mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
+) -> bool {
+ if reverse {
+ naive_rfind(needle, haystack) == search(needle, haystack)
+ } else {
+ naive_find(needle, haystack) == search(needle, haystack)
+ }
+}
+
+/// Naively search forwards for the given needle in the given haystack.
+fn naive_find(needle: &[u8], haystack: &[u8]) -> Option<usize> {
+ if needle.is_empty() {
+ return Some(0);
+ } else if haystack.len() < needle.len() {
+ return None;
+ }
+ for i in 0..(haystack.len() - needle.len() + 1) {
+ if needle == &haystack[i..i + needle.len()] {
+ return Some(i);
+ }
+ }
+ None
+}
+
+/// Naively search in reverse for the given needle in the given haystack.
+fn naive_rfind(needle: &[u8], haystack: &[u8]) -> Option<usize> {
+ if needle.is_empty() {
+ return Some(haystack.len());
+ } else if haystack.len() < needle.len() {
+ return None;
+ }
+ for i in (0..(haystack.len() - needle.len() + 1)).rev() {
+ if needle == &haystack[i..i + needle.len()] {
+ return Some(i);
+ }
+ }
+ None
+}
diff --git a/src/search/twoway.rs b/src/search/twoway.rs
new file mode 100644
index 0000000..5f1e8cf
--- /dev/null
+++ b/src/search/twoway.rs
@@ -0,0 +1,871 @@
+use core::cmp;
+
+use cow::CowBytes;
+use ext_slice::ByteSlice;
+use search::prefilter::{Freqy, PrefilterState};
+
+/// An implementation of the TwoWay substring search algorithm, with heuristics
+/// for accelerating search based on frequency analysis.
+///
+/// This searcher supports forward and reverse search, although not
+/// simultaneously. It runs in O(n + m) time and O(1) space, where
+/// `n ~ len(needle)` and `m ~ len(haystack)`.
+///
+/// The implementation here roughly matches that which was developed by
+/// Crochemore and Perrin in their 1991 paper "Two-way string-matching." The
+/// only change in this implementation is the use of zero-based indices and
+/// the addition of heuristics for a fast skip loop. That is, this will detect
+/// bytes that are believed to be rare in the needle and use fast vectorized
+/// instructions to find their occurrences quickly. The Two-Way algorithm is
+/// then used to confirm whether a match at that location occurred.
+///
+/// The heuristic for fast skipping is automatically shut off if it's
+/// detected to be ineffective at search time. Generally, this only occurs in
+/// pathological cases. But this is generally necessary in order to preserve
+/// a `O(n + m)` time bound.
+///
+/// The code below is fairly complex and not obviously correct at all. It's
+/// likely necessary to read the Two-Way paper cited above in order to fully
+/// grok this code.
+#[derive(Clone, Debug)]
+pub struct TwoWay<'b> {
+ /// The needle that we're looking for.
+ needle: CowBytes<'b>,
+ /// An implementation of a fast skip loop based on hard-coded frequency
+ /// data. This is only used when conditions are deemed favorable.
+ freqy: Freqy,
+ /// A critical position in needle. Specifically, this position corresponds
+ /// to beginning of either the minimal or maximal suffix in needle. (N.B.
+ /// See SuffixType below for why "minimal" isn't quite the correct word
+ /// here.)
+ ///
+ /// This is the position at which every search begins. Namely, search
+ /// starts by scanning text to the right of this position, and only if
+ /// there's a match does the text to the left of this position get scanned.
+ critical_pos: usize,
+ /// The amount we shift by in the Two-Way search algorithm. This
+ /// corresponds to the "small period" and "large period" cases.
+ shift: Shift,
+}
+
+impl<'b> TwoWay<'b> {
+ /// Create a searcher that uses the Two-Way algorithm by searching forwards
+ /// through any haystack.
+ pub fn forward(needle: &'b [u8]) -> TwoWay<'b> {
+ let freqy = Freqy::forward(needle);
+ if needle.is_empty() {
+ return TwoWay {
+ needle: CowBytes::new(needle),
+ freqy,
+ critical_pos: 0,
+ shift: Shift::Large { shift: 0 },
+ };
+ }
+
+ let min_suffix = Suffix::forward(needle, SuffixKind::Minimal);
+ let max_suffix = Suffix::forward(needle, SuffixKind::Maximal);
+ let (period_lower_bound, critical_pos) =
+ if min_suffix.pos > max_suffix.pos {
+ (min_suffix.period, min_suffix.pos)
+ } else {
+ (max_suffix.period, max_suffix.pos)
+ };
+ let shift = Shift::forward(needle, period_lower_bound, critical_pos);
+ let needle = CowBytes::new(needle);
+ TwoWay { needle, freqy, critical_pos, shift }
+ }
+
+ /// Create a searcher that uses the Two-Way algorithm by searching in
+ /// reverse through any haystack.
+ pub fn reverse(needle: &'b [u8]) -> TwoWay<'b> {
+ let freqy = Freqy::reverse(needle);
+ if needle.is_empty() {
+ return TwoWay {
+ needle: CowBytes::new(needle),
+ freqy,
+ critical_pos: 0,
+ shift: Shift::Large { shift: 0 },
+ };
+ }
+
+ let min_suffix = Suffix::reverse(needle, SuffixKind::Minimal);
+ let max_suffix = Suffix::reverse(needle, SuffixKind::Maximal);
+ let (period_lower_bound, critical_pos) =
+ if min_suffix.pos < max_suffix.pos {
+ (min_suffix.period, min_suffix.pos)
+ } else {
+ (max_suffix.period, max_suffix.pos)
+ };
+ let shift = Shift::reverse(needle, period_lower_bound, critical_pos);
+ let needle = CowBytes::new(needle);
+ TwoWay { needle, freqy, critical_pos, shift }
+ }
+
+ /// Return a fresh prefilter state that can be used with this searcher.
+ /// A prefilter state is used to track the effectiveness of a searcher's
+ /// prefilter for speeding up searches. Therefore, the prefilter state
+ /// should generally be reused on subsequent searches (such as in an
+ /// iterator). For searches on a different haystack, then a new prefilter
+ /// state should be used.
+ ///
+ /// This always initializes a valid prefilter state even if this searcher
+ /// does not have a prefilter enabled.
+ pub fn prefilter_state(&self) -> PrefilterState {
+ self.freqy.prefilter_state()
+ }
+
+ /// Return the needle used by this searcher.
+ pub fn needle(&self) -> &[u8] {
+ self.needle.as_slice()
+ }
+
+ /// Convert this searched into an owned version, where the needle is
+ /// copied if it isn't already owned.
+ #[cfg(feature = "std")]
+ pub fn into_owned(self) -> TwoWay<'static> {
+ TwoWay {
+ needle: self.needle.into_owned(),
+ freqy: self.freqy,
+ critical_pos: self.critical_pos,
+ shift: self.shift,
+ }
+ }
+
+ /// Find the position of the first occurrence of this searcher's needle in
+ /// the given haystack. If one does not exist, then return None.
+ ///
+ /// This will automatically initialize prefilter state. This should only
+ /// be used for one-off searches.
+ pub fn find(&self, haystack: &[u8]) -> Option<usize> {
+ self.find_with(&mut self.prefilter_state(), haystack)
+ }
+
+ /// Find the position of the last occurrence of this searcher's needle
+ /// in the given haystack. If one does not exist, then return None.
+ ///
+ /// This will automatically initialize prefilter state. This should only
+ /// be used for one-off searches.
+ pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
+ self.rfind_with(&mut self.prefilter_state(), haystack)
+ }
+
+ /// Find the position of the first occurrence of this searcher's needle in
+ /// the given haystack. If one does not exist, then return None.
+ ///
+ /// This accepts prefilter state that is useful when using the same
+ /// searcher multiple times, such as in an iterator.
+ pub fn find_with(
+ &self,
+ prestate: &mut PrefilterState,
+ haystack: &[u8],
+ ) -> Option<usize> {
+ if self.needle.is_empty() {
+ return Some(0);
+ } else if haystack.len() < self.needle.len() {
+ return None;
+ } else if self.needle.len() == 1 {
+ return haystack.find_byte(self.needle[0]);
+ }
+ match self.shift {
+ Shift::Small { period } => {
+ self.find_small(prestate, haystack, period)
+ }
+ Shift::Large { shift } => {
+ self.find_large(prestate, haystack, shift)
+ }
+ }
+ }
+
+ /// Find the position of the last occurrence of this searcher's needle
+ /// in the given haystack. If one does not exist, then return None.
+ ///
+ /// This accepts prefilter state that is useful when using the same
+ /// searcher multiple times, such as in an iterator.
+ pub fn rfind_with(
+ &self,
+ prestate: &mut PrefilterState,
+ haystack: &[u8],
+ ) -> Option<usize> {
+ if self.needle.is_empty() {
+ return Some(haystack.len());
+ } else if haystack.len() < self.needle.len() {
+ return None;
+ } else if self.needle.len() == 1 {
+ return haystack.rfind_byte(self.needle[0]);
+ }
+ match self.shift {
+ Shift::Small { period } => {
+ self.rfind_small(prestate, haystack, period)
+ }
+ Shift::Large { shift } => {
+ self.rfind_large(prestate, haystack, shift)
+ }
+ }
+ }
+
+ // Below is the actual implementation of TwoWay searching, including both
+ // forwards and backwards searching. Each forward and reverse search has
+ // two fairly similar implementations, each handling the small and large
+ // period cases, for a total 4 different search routines.
+ //
+ // On top of that, each search implementation can be accelerated by a
+ // Freqy prefilter, but it is not always enabled. To avoid its overhead
+ // when its disabled, we explicitly inline each search implementation based
+ // on whether Freqy will be used or not. This brings us up to a total of
+ // 8 monomorphized versions of the search code.
+
+ #[inline(never)]
+ fn find_small(
+ &self,
+ prestate: &mut PrefilterState,
+ haystack: &[u8],
+ period: usize,
+ ) -> Option<usize> {
+ if prestate.is_effective() {
+ self.find_small_imp(prestate, true, haystack, period)
+ } else {
+ self.find_small_imp(prestate, false, haystack, period)
+ }
+ }
+
+ #[inline(always)]
+ fn find_small_imp(
+ &self,
+ prestate: &mut PrefilterState,
+ prefilter: bool,
+ haystack: &[u8],
+ period: usize,
+ ) -> Option<usize> {
+ let needle = self.needle.as_slice();
+ let mut pos = 0;
+ let mut shift = 0;
+ while pos + needle.len() <= haystack.len() {
+ let mut i = cmp::max(self.critical_pos, shift);
+ if prefilter && prestate.is_effective() {
+ match self.freqy.find_candidate(prestate, &haystack[pos..]) {
+ None => return None,
+ Some(found) => {
+ shift = 0;
+ i = self.critical_pos;
+ pos += found;
+ if pos + needle.len() > haystack.len() {
+ return None;
+ }
+ }
+ }
+ }
+ while i < needle.len() && needle[i] == haystack[pos + i] {
+ i += 1;
+ }
+ if i < needle.len() {
+ pos += i - self.critical_pos + 1;
+ shift = 0;
+ } else {
+ let mut j = self.critical_pos;
+ while j > shift && needle[j] == haystack[pos + j] {
+ j -= 1;
+ }
+ if j <= shift && needle[shift] == haystack[pos + shift] {
+ return Some(pos);
+ }
+ pos += period;
+ shift = needle.len() - period;
+ }
+ }
+ None
+ }
+
+ #[inline(never)]
+ fn find_large(
+ &self,
+ prestate: &mut PrefilterState,
+ haystack: &[u8],
+ shift: usize,
+ ) -> Option<usize> {
+ if prestate.is_effective() {
+ self.find_large_imp(prestate, true, haystack, shift)
+ } else {
+ self.find_large_imp(prestate, false, haystack, shift)
+ }
+ }
+
+ #[inline(always)]
+ fn find_large_imp(
+ &self,
+ prestate: &mut PrefilterState,
+ prefilter: bool,
+ haystack: &[u8],
+ shift: usize,
+ ) -> Option<usize> {
+ let needle = self.needle.as_slice();
+ let mut pos = 0;
+ while pos + needle.len() <= haystack.len() {
+ let mut i = self.critical_pos;
+ if prefilter && prestate.is_effective() {
+ match self.freqy.find_candidate(prestate, &haystack[pos..]) {
+ None => return None,
+ Some(found) => {
+ pos += found;
+ if pos + needle.len() > haystack.len() {
+ return None;
+ }
+ }
+ }
+ }
+ while i < needle.len() && needle[i] == haystack[pos + i] {
+ i += 1;
+ }
+ if i < needle.len() {
+ pos += i - self.critical_pos + 1;
+ } else {
+ let mut j = self.critical_pos;
+ while j > 0 && needle[j] == haystack[pos + j] {
+ j -= 1;
+ }
+ if j == 0 && needle[0] == haystack[pos] {
+ return Some(pos);
+ }
+ pos += shift;
+ }
+ }
+ None
+ }
+
+ #[inline(never)]
+ fn rfind_small(
+ &self,
+ prestate: &mut PrefilterState,
+ haystack: &[u8],
+ period: usize,
+ ) -> Option<usize> {
+ if prestate.is_effective() {
+ self.rfind_small_imp(prestate, true, haystack, period)
+ } else {
+ self.rfind_small_imp(prestate, false, haystack, period)
+ }
+ }
+
+ #[inline(always)]
+ fn rfind_small_imp(
+ &self,
+ prestate: &mut PrefilterState,
+ prefilter: bool,
+ haystack: &[u8],
+ period: usize,
+ ) -> Option<usize> {
+ let needle = &*self.needle;
+ let nlen = needle.len();
+ let mut pos = haystack.len();
+ let mut shift = nlen;
+ while pos >= nlen {
+ let mut i = cmp::min(self.critical_pos, shift);
+ if prefilter && prestate.is_effective() {
+ match self.freqy.rfind_candidate(prestate, &haystack[..pos]) {
+ None => return None,
+ Some(found) => {
+ shift = nlen;
+ i = self.critical_pos;
+ pos = found;
+ if pos < nlen {
+ return None;
+ }
+ }
+ }
+ }
+ while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] {
+ i -= 1;
+ }
+ if i > 0 || needle[0] != haystack[pos - nlen] {
+ pos -= self.critical_pos - i + 1;
+ shift = nlen;
+ } else {
+ let mut j = self.critical_pos;
+ while j < shift && needle[j] == haystack[pos - nlen + j] {
+ j += 1;
+ }
+ if j == shift {
+ return Some(pos - nlen);
+ }
+ pos -= period;
+ shift = period;
+ }
+ }
+ None
+ }
+
+ #[inline(never)]
+ fn rfind_large(
+ &self,
+ prestate: &mut PrefilterState,
+ haystack: &[u8],
+ shift: usize,
+ ) -> Option<usize> {
+ if prestate.is_effective() {
+ self.rfind_large_imp(prestate, true, haystack, shift)
+ } else {
+ self.rfind_large_imp(prestate, false, haystack, shift)
+ }
+ }
+
+ #[inline(always)]
+ fn rfind_large_imp(
+ &self,
+ prestate: &mut PrefilterState,
+ prefilter: bool,
+ haystack: &[u8],
+ shift: usize,
+ ) -> Option<usize> {
+ let needle = &*self.needle;
+ let nlen = needle.len();
+ let mut pos = haystack.len();
+ while pos >= nlen {
+ if prefilter && prestate.is_effective() {
+ match self.freqy.rfind_candidate(prestate, &haystack[..pos]) {
+ None => return None,
+ Some(found) => {
+ pos = found;
+ if pos < nlen {
+ return None;
+ }
+ }
+ }
+ }
+
+ let mut i = self.critical_pos;
+ while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] {
+ i -= 1;
+ }
+ if i > 0 || needle[0] != haystack[pos - nlen] {
+ pos -= self.critical_pos - i + 1;
+ } else {
+ let mut j = self.critical_pos;
+ while j < nlen && needle[j] == haystack[pos - nlen + j] {
+ j += 1;
+ }
+ if j == nlen {
+ return Some(pos - nlen);
+ }
+ pos -= shift;
+ }
+ }
+ None
+ }
+}
+
+/// A representation of the amount we're allowed to shift by during Two-Way
+/// search.
+///
+/// When computing a critical factorization of the needle, we find the position
+/// of the critical factorization by finding the needle's maximal (or minimal)
+/// suffix, along with the period of that suffix. It turns out that the period
+/// of that suffix is a lower bound on the period of the needle itself.
+///
+/// This lower bound is equivalent to the actual period of the needle in
+/// some cases. To describe that case, we denote the needle as `x` where
+/// `x = uv` and `v` is the lexicographic maximal suffix of `v`. The lower
+/// bound given here is always the period of `v`, which is `<= period(x)`. The
+/// case where `period(v) == period(x)` occurs when `len(u) < (len(x) / 2)` and
+/// where `u` is a suffix of `v[0..period(v)]`.
+///
+/// This case is important because the search algorithm for when the
+/// periods are equivalent is slightly different than the search algorithm
+/// for when the periods are not equivalent. In particular, when they aren't
+/// equivalent, we know that the period of the needle is no less than half its
+/// length. In this case, we shift by an amount less than or equal to the
+/// period of the needle (determined by the maximum length of the components
+/// of the critical factorization of `x`, i.e., `max(len(u), len(v))`)..
+///
+/// The above two cases are represented by the variants below. Each entails
+/// a different instantiation of the Two-Way search algorithm.
+///
+/// N.B. If we could find a way to compute the exact period in all cases,
+/// then we could collapse this case analysis and simplify the algorithm. The
+/// Two-Way paper suggests this is possible, but more reading is required to
+/// grok why the authors didn't pursue that path.
+#[derive(Clone, Debug)]
+enum Shift {
+ Small { period: usize },
+ Large { shift: usize },
+}
+
+impl Shift {
+ /// Compute the shift for a given needle in the forward direction.
+ ///
+ /// This requires a lower bound on the period and a critical position.
+ /// These can be computed by extracting both the minimal and maximal
+ /// lexicographic suffixes, and choosing the right-most starting position.
+ /// The lower bound on the period is then the period of the chosen suffix.
+ fn forward(
+ needle: &[u8],
+ period_lower_bound: usize,
+ critical_pos: usize,
+ ) -> Shift {
+ let large = cmp::max(critical_pos, needle.len() - critical_pos);
+ if critical_pos * 2 >= needle.len() {
+ return Shift::Large { shift: large };
+ }
+
+ let (u, v) = needle.split_at(critical_pos);
+ if !v[..period_lower_bound].ends_with(u) {
+ return Shift::Large { shift: large };
+ }
+ Shift::Small { period: period_lower_bound }
+ }
+
+ /// Compute the shift for a given needle in the reverse direction.
+ ///
+ /// This requires a lower bound on the period and a critical position.
+ /// These can be computed by extracting both the minimal and maximal
+ /// lexicographic suffixes, and choosing the left-most starting position.
+ /// The lower bound on the period is then the period of the chosen suffix.
+ fn reverse(
+ needle: &[u8],
+ period_lower_bound: usize,
+ critical_pos: usize,
+ ) -> Shift {
+ let large = cmp::max(critical_pos, needle.len() - critical_pos);
+ if (needle.len() - critical_pos) * 2 >= needle.len() {
+ return Shift::Large { shift: large };
+ }
+
+ let (v, u) = needle.split_at(critical_pos);
+ if !v[v.len() - period_lower_bound..].starts_with(u) {
+ return Shift::Large { shift: large };
+ }
+ Shift::Small { period: period_lower_bound }
+ }
+}
+
+/// A suffix extracted from a needle along with its period.
+#[derive(Debug)]
+struct Suffix {
+ /// The starting position of this suffix.
+ ///
+ /// If this is a forward suffix, then `&bytes[pos..]` can be used. If this
+ /// is a reverse suffix, then `&bytes[..pos]` can be used. That is, for
+ /// forward suffixes, this is an inclusive starting position, where as for
+ /// reverse suffixes, this is an exclusive ending position.
+ pos: usize,
+ /// The period of this suffix.
+ ///
+ /// Note that this is NOT necessarily the period of the string from which
+ /// this suffix comes from. (It is always less than or equal to the period
+ /// of the original string.)
+ period: usize,
+}
+
+impl Suffix {
+ fn forward(needle: &[u8], kind: SuffixKind) -> Suffix {
+ debug_assert!(!needle.is_empty());
+
+ // suffix represents our maximal (or minimal) suffix, along with
+ // its period.
+ let mut suffix = Suffix { pos: 0, period: 1 };
+ // The start of a suffix in `needle` that we are considering as a
+ // more maximal (or minimal) suffix than what's in `suffix`.
+ let mut candidate_start = 1;
+ // The current offset of our suffixes that we're comparing.
+ //
+ // When the characters at this offset are the same, then we mush on
+ // to the next position since no decision is possible. When the
+ // candidate's character is greater (or lesser) than the corresponding
+ // character than our current maximal (or minimal) suffix, then the
+ // current suffix is changed over to the candidate and we restart our
+ // search. Otherwise, the candidate suffix is no good and we restart
+ // our search on the next candidate.
+ //
+ // The three cases above correspond to the three cases in the loop
+ // below.
+ let mut offset = 0;
+
+ while candidate_start + offset < needle.len() {
+ let current = needle[suffix.pos + offset];
+ let candidate = needle[candidate_start + offset];
+ match kind.cmp(current, candidate) {
+ SuffixOrdering::Accept => {
+ suffix = Suffix { pos: candidate_start, period: 1 };
+ candidate_start += 1;
+ offset = 0;
+ }
+ SuffixOrdering::Skip => {
+ candidate_start += offset + 1;
+ offset = 0;
+ suffix.period = candidate_start - suffix.pos;
+ }
+ SuffixOrdering::Push => {
+ if offset + 1 == suffix.period {
+ candidate_start += suffix.period;
+ offset = 0;
+ } else {
+ offset += 1;
+ }
+ }
+ }
+ }
+ suffix
+ }
+
+ fn reverse(needle: &[u8], kind: SuffixKind) -> Suffix {
+ debug_assert!(!needle.is_empty());
+
+ // See the comments in `forward` for how this works.
+ let mut suffix = Suffix { pos: needle.len(), period: 1 };
+ if needle.len() == 1 {
+ return suffix;
+ }
+ let mut candidate_start = needle.len() - 1;
+ let mut offset = 0;
+
+ while offset < candidate_start {
+ let current = needle[suffix.pos - offset - 1];
+ let candidate = needle[candidate_start - offset - 1];
+ match kind.cmp(current, candidate) {
+ SuffixOrdering::Accept => {
+ suffix = Suffix { pos: candidate_start, period: 1 };
+ candidate_start -= 1;
+ offset = 0;
+ }
+ SuffixOrdering::Skip => {
+ candidate_start -= offset + 1;
+ offset = 0;
+ suffix.period = suffix.pos - candidate_start;
+ }
+ SuffixOrdering::Push => {
+ if offset + 1 == suffix.period {
+ candidate_start -= suffix.period;
+ offset = 0;
+ } else {
+ offset += 1;
+ }
+ }
+ }
+ }
+ suffix
+ }
+}
+
+/// The kind of suffix to extract.
+#[derive(Clone, Copy, Debug)]
+enum SuffixKind {
+ /// Extract the smallest lexicographic suffix from a string.
+ ///
+ /// Technically, this doesn't actually pick the smallest lexicographic
+ /// suffix. e.g., Given the choice between `a` and `aa`, this will choose
+ /// the latter over the former, even though `a < aa`. The reasoning for
+ /// this isn't clear from the paper, but it still smells like a minimal
+ /// suffix.
+ Minimal,
+ /// Extract the largest lexicographic suffix from a string.
+ ///
+ /// Unlike `Minimal`, this really does pick the maximum suffix. e.g., Given
+ /// the choice between `z` and `zz`, this will choose the latter over the
+ /// former.
+ Maximal,
+}
+
+/// The result of comparing corresponding bytes between two suffixes.
+#[derive(Clone, Copy, Debug)]
+enum SuffixOrdering {
+ /// This occurs when the given candidate byte indicates that the candidate
+ /// suffix is better than the current maximal (or minimal) suffix. That is,
+ /// the current candidate suffix should supplant the current maximal (or
+ /// minimal) suffix.
+ Accept,
+ /// This occurs when the given candidate byte excludes the candidate suffix
+ /// from being better than the current maximal (or minimal) suffix. That
+ /// is, the current candidate suffix should be dropped and the next one
+ /// should be considered.
+ Skip,
+ /// This occurs when no decision to accept or skip the candidate suffix
+ /// can be made, e.g., when corresponding bytes are equivalent. In this
+ /// case, the next corresponding bytes should be compared.
+ Push,
+}
+
+impl SuffixKind {
+ /// Returns true if and only if the given candidate byte indicates that
+ /// it should replace the current suffix as the maximal (or minimal)
+ /// suffix.
+ fn cmp(self, current: u8, candidate: u8) -> SuffixOrdering {
+ use self::SuffixOrdering::*;
+
+ match self {
+ SuffixKind::Minimal if candidate < current => Accept,
+ SuffixKind::Minimal if candidate > current => Skip,
+ SuffixKind::Minimal => Push,
+ SuffixKind::Maximal if candidate > current => Accept,
+ SuffixKind::Maximal if candidate < current => Skip,
+ SuffixKind::Maximal => Push,
+ }
+ }
+}
+
+// N.B. There are more holistic tests in src/search/tests.rs.
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use ext_slice::B;
+
+ /// Convenience wrapper for computing the suffix as a byte string.
+ fn get_suffix_forward(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) {
+ let s = Suffix::forward(needle, kind);
+ (&needle[s.pos..], s.period)
+ }
+
+ /// Convenience wrapper for computing the reverse suffix as a byte string.
+ fn get_suffix_reverse(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) {
+ let s = Suffix::reverse(needle, kind);
+ (&needle[..s.pos], s.period)
+ }
+
+ /// Return all of the non-empty suffixes in the given byte string.
+ fn suffixes(bytes: &[u8]) -> Vec<&[u8]> {
+ (0..bytes.len()).map(|i| &bytes[i..]).collect()
+ }
+
+ /// Return the lexicographically maximal suffix of the given byte string.
+ fn naive_maximal_suffix_forward(needle: &[u8]) -> &[u8] {
+ let mut sufs = suffixes(needle);
+ sufs.sort();
+ sufs.pop().unwrap()
+ }
+
+ /// Return the lexicographically maximal suffix of the reverse of the given
+ /// byte string.
+ fn naive_maximal_suffix_reverse(needle: &[u8]) -> Vec<u8> {
+ let mut reversed = needle.to_vec();
+ reversed.reverse();
+ let mut got = naive_maximal_suffix_forward(&reversed).to_vec();
+ got.reverse();
+ got
+ }
+
+ #[test]
+ fn suffix_forward() {
+ macro_rules! assert_suffix_min {
+ ($given:expr, $expected:expr, $period:expr) => {
+ let (got_suffix, got_period) =
+ get_suffix_forward($given.as_bytes(), SuffixKind::Minimal);
+ assert_eq!((B($expected), $period), (got_suffix, got_period));
+ };
+ }
+
+ macro_rules! assert_suffix_max {
+ ($given:expr, $expected:expr, $period:expr) => {
+ let (got_suffix, got_period) =
+ get_suffix_forward($given.as_bytes(), SuffixKind::Maximal);
+ assert_eq!((B($expected), $period), (got_suffix, got_period));
+ };
+ }
+
+ assert_suffix_min!("a", "a", 1);
+ assert_suffix_max!("a", "a", 1);
+
+ assert_suffix_min!("ab", "ab", 2);
+ assert_suffix_max!("ab", "b", 1);
+
+ assert_suffix_min!("ba", "a", 1);
+ assert_suffix_max!("ba", "ba", 2);
+
+ assert_suffix_min!("abc", "abc", 3);
+ assert_suffix_max!("abc", "c", 1);
+
+ assert_suffix_min!("acb", "acb", 3);
+ assert_suffix_max!("acb", "cb", 2);
+
+ assert_suffix_min!("cba", "a", 1);
+ assert_suffix_max!("cba", "cba", 3);
+
+ assert_suffix_min!("abcabc", "abcabc", 3);
+ assert_suffix_max!("abcabc", "cabc", 3);
+
+ assert_suffix_min!("abcabcabc", "abcabcabc", 3);
+ assert_suffix_max!("abcabcabc", "cabcabc", 3);
+
+ assert_suffix_min!("abczz", "abczz", 5);
+ assert_suffix_max!("abczz", "zz", 1);
+
+ assert_suffix_min!("zzabc", "abc", 3);
+ assert_suffix_max!("zzabc", "zzabc", 5);
+
+ assert_suffix_min!("aaa", "aaa", 1);
+ assert_suffix_max!("aaa", "aaa", 1);
+
+ assert_suffix_min!("foobar", "ar", 2);
+ assert_suffix_max!("foobar", "r", 1);
+ }
+
+ #[test]
+ fn suffix_reverse() {
+ macro_rules! assert_suffix_min {
+ ($given:expr, $expected:expr, $period:expr) => {
+ let (got_suffix, got_period) =
+ get_suffix_reverse($given.as_bytes(), SuffixKind::Minimal);
+ assert_eq!((B($expected), $period), (got_suffix, got_period));
+ };
+ }
+
+ macro_rules! assert_suffix_max {
+ ($given:expr, $expected:expr, $period:expr) => {
+ let (got_suffix, got_period) =
+ get_suffix_reverse($given.as_bytes(), SuffixKind::Maximal);
+ assert_eq!((B($expected), $period), (got_suffix, got_period));
+ };
+ }
+
+ assert_suffix_min!("a", "a", 1);
+ assert_suffix_max!("a", "a", 1);
+
+ assert_suffix_min!("ab", "a", 1);
+ assert_suffix_max!("ab", "ab", 2);
+
+ assert_suffix_min!("ba", "ba", 2);
+ assert_suffix_max!("ba", "b", 1);
+
+ assert_suffix_min!("abc", "a", 1);
+ assert_suffix_max!("abc", "abc", 3);
+
+ assert_suffix_min!("acb", "a", 1);
+ assert_suffix_max!("acb", "ac", 2);
+
+ assert_suffix_min!("cba", "cba", 3);
+ assert_suffix_max!("cba", "c", 1);
+
+ assert_suffix_min!("abcabc", "abca", 3);
+ assert_suffix_max!("abcabc", "abcabc", 3);
+
+ assert_suffix_min!("abcabcabc", "abcabca", 3);
+ assert_suffix_max!("abcabcabc", "abcabcabc", 3);
+
+ assert_suffix_min!("abczz", "a", 1);
+ assert_suffix_max!("abczz", "abczz", 5);
+
+ assert_suffix_min!("zzabc", "zza", 3);
+ assert_suffix_max!("zzabc", "zz", 1);
+
+ assert_suffix_min!("aaa", "aaa", 1);
+ assert_suffix_max!("aaa", "aaa", 1);
+ }
+
+ quickcheck! {
+ fn qc_suffix_forward_maximal(bytes: Vec<u8>) -> bool {
+ if bytes.is_empty() {
+ return true;
+ }
+
+ let (got, _) = get_suffix_forward(&bytes, SuffixKind::Maximal);
+ let expected = naive_maximal_suffix_forward(&bytes);
+ got == expected
+ }
+
+ fn qc_suffix_reverse_maximal(bytes: Vec<u8>) -> bool {
+ if bytes.is_empty() {
+ return true;
+ }
+
+ let (got, _) = get_suffix_reverse(&bytes, SuffixKind::Maximal);
+ let expected = naive_maximal_suffix_reverse(&bytes);
+ expected == got
+ }
+ }
+}
diff --git a/src/tests.rs b/src/tests.rs
new file mode 100644
index 0000000..f4179fd
--- /dev/null
+++ b/src/tests.rs
@@ -0,0 +1,32 @@
+/// A sequence of tests for checking whether lossy decoding uses the maximal
+/// subpart strategy correctly. Namely, if a sequence of otherwise invalid
+/// UTF-8 bytes is a valid prefix of a valid UTF-8 sequence, then the entire
+/// prefix is replaced by a single replacement codepoint. In all other cases,
+/// each invalid byte is replaced by a single replacement codepoint.
+///
+/// The first element in each tuple is the expected result of lossy decoding,
+/// while the second element is the input given.
+pub const LOSSY_TESTS: &[(&str, &[u8])] = &[
+ ("a", b"a"),
+ ("\u{FFFD}", b"\xFF"),
+ ("\u{FFFD}\u{FFFD}", b"\xFF\xFF"),
+ ("β\u{FFFD}", b"\xCE\xB2\xFF"),
+ ("☃\u{FFFD}", b"\xE2\x98\x83\xFF"),
+ ("𝝱\u{FFFD}", b"\xF0\x9D\x9D\xB1\xFF"),
+ ("\u{FFFD}\u{FFFD}", b"\xCE\xF0"),
+ ("\u{FFFD}\u{FFFD}", b"\xCE\xFF"),
+ ("\u{FFFD}\u{FFFD}", b"\xE2\x98\xF0"),
+ ("\u{FFFD}\u{FFFD}", b"\xE2\x98\xFF"),
+ ("\u{FFFD}", b"\xF0\x9D\x9D"),
+ ("\u{FFFD}\u{FFFD}", b"\xF0\x9D\x9D\xF0"),
+ ("\u{FFFD}\u{FFFD}", b"\xF0\x9D\x9D\xFF"),
+ ("\u{FFFD}", b"\xCE"),
+ ("a\u{FFFD}", b"a\xCE"),
+ ("\u{FFFD}", b"\xE2\x98"),
+ ("a\u{FFFD}", b"a\xE2\x98"),
+ ("\u{FFFD}", b"\xF0\x9D\x9C"),
+ ("a\u{FFFD}", b"a\xF0\x9D\x9C"),
+ ("a\u{FFFD}\u{FFFD}\u{FFFD}z", b"a\xED\xA0\x80z"),
+ ("☃βツ\u{FFFD}", b"\xe2\x98\x83\xce\xb2\xe3\x83\x84\xFF"),
+ ("a\u{FFFD}\u{FFFD}\u{FFFD}b", b"\x61\xF1\x80\x80\xE1\x80\xC2\x62"),
+];
diff --git a/src/unicode/data/GraphemeBreakTest.txt b/src/unicode/data/GraphemeBreakTest.txt
new file mode 100644
index 0000000..fb4fec9
--- /dev/null
+++ b/src/unicode/data/GraphemeBreakTest.txt
@@ -0,0 +1,630 @@
+# GraphemeBreakTest-12.1.0.txt
+# Date: 2019-03-10, 10:53:12 GMT
+# © 2019 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Default Grapheme_Cluster_Break Test
+#
+# Format:
+# <string> (# <comment>)?
+# <string> contains hex Unicode code points, with
+# ÷ wherever there is a break opportunity, and
+# × wherever there is not.
+# <comment> the format can change, but currently it shows:
+# - the sample character name
+# - (x) the Grapheme_Cluster_Break property value for the sample character
+# - [x] the rule that determines whether there is a break or not,
+# as listed in the Rules section of GraphemeBreakTest.html
+#
+# These samples may be extended or changed in the future.
+#
+÷ 0020 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0020 × 034F ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0020 × 0308 × 034F ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0020 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0020 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0020 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0020 × 0308 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0020 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0020 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0020 × 0308 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SPACE (Other) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 000D ÷ 034F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 000D ÷ 0308 × 034F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000D ÷ 0600 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 000D ÷ 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000D ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000D ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000D ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000D ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000D ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SPACE (Other) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 000A ÷ 034F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 000A ÷ 0308 × 034F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000A ÷ 0600 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 000A ÷ 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000A ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000A ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000A ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000A ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000A ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0001 ÷ 034F ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0001 ÷ 0308 × 034F ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0001 ÷ 0600 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0001 ÷ 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0001 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0001 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0001 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0001 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0001 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0001 ÷ 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 034F ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 034F × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 034F ÷ 000D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 034F × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 034F ÷ 000A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 034F × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 034F ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 034F × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 034F × 034F ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 034F × 0308 × 034F ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 034F ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 034F × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 034F ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 034F × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 034F × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 034F × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 034F ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 034F × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 034F ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 034F × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 034F ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 034F × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 034F ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 034F × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 034F ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 034F × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 034F ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 034F × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 034F × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 034F × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 034F × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 034F × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 034F ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 034F × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1F1E6 × 034F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1F1E6 × 0308 × 034F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1F1E6 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1F1E6 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1F1E6 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1F1E6 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1F1E6 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1F1E6 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0600 × 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] SPACE (Other) ÷ [0.3]
+÷ 0600 × 0308 ÷ 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0600 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0600 × 0308 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0600 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0600 × 0308 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0600 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0600 × 0308 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0600 × 034F ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0600 × 0308 × 034F ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0600 × 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0600 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0600 × 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0600 × 0308 ÷ 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0600 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0600 × 0308 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0600 × 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0600 × 0308 ÷ 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0600 × 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0600 × 0308 ÷ 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0600 × 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0600 × 0308 ÷ 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0600 × AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0600 × 0308 ÷ AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0600 × AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0600 × 0308 ÷ AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0600 × 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] WATCH (ExtPict) ÷ [0.3]
+÷ 0600 × 0308 ÷ 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0600 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 0308 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 0308 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] <reserved-0378> (Other) ÷ [0.3]
+÷ 0600 × 0308 ÷ 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0903 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0903 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0903 × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0903 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0903 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0903 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1100 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1100 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1100 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1100 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1100 × 034F ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1100 × 0308 × 034F ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1100 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1100 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1100 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1100 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1100 × 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1100 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1100 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1100 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1100 × AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1100 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1100 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1160 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1160 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1160 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1160 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1160 × 034F ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1160 × 0308 × 034F ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1160 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1160 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1160 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1160 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1160 × 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1160 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1160 × 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1160 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1160 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1160 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1160 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 11A8 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 11A8 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 11A8 × 034F ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 11A8 × 0308 × 034F ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 11A8 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 11A8 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 11A8 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 11A8 × 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 11A8 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 11A8 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC00 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC00 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC00 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC00 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ AC00 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ AC00 × 0308 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ AC00 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC00 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC00 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC00 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC00 × 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC00 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC00 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC00 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC00 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC00 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ AC00 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC01 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC01 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC01 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC01 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ AC01 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ AC01 × 0308 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ AC01 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC01 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC01 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC01 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC01 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC01 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC01 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC01 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC01 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC01 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ AC01 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 231A ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 231A × 0308 ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 231A ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 231A × 0308 ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 231A ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 231A × 0308 ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 231A ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 231A × 0308 ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 231A × 034F ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 231A × 0308 × 034F ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 231A ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 231A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 231A ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 231A × 0308 ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 231A × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 231A × 0308 × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 231A ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 231A × 0308 ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 231A ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 231A × 0308 ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 231A ÷ 11A8 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 231A × 0308 ÷ 11A8 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 231A ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 231A × 0308 ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 231A ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 231A × 0308 ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 231A ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 231A × 0308 ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 231A × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 231A × 0308 × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 231A × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 231A × 0308 × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 231A ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 231A × 0308 ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0300 × 034F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0300 × 0308 × 034F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0300 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0300 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0300 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0300 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0300 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0300 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0300 × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200D × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200D ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200D × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200D ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 200D × 0308 ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 200D × 034F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 200D × 0308 × 034F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 200D ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200D ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 200D × 0308 ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 200D × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 200D × 0308 × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 200D ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 200D × 0308 ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 200D ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 200D × 0308 ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 200D ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 200D × 0308 ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 200D ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 200D × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 200D ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 200D × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 200D ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 200D × 0308 ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0378 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0378 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0378 × 0308 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0378 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0378 × 0308 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0378 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0378 × 034F ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0378 × 0308 × 034F ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0378 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0378 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0378 ÷ 0600 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0600 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0378 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0378 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0378 × 0308 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0378 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0378 × 0308 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0378 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0378 × 0308 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0378 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0378 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0378 × 0308 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0378 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0378 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000D × 000A ÷ 0061 ÷ 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC LETTER NOON (Other) ÷ [0.3]
+÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC00 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC01 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 200D ÷ 1F1E7 × 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0061 × 0308 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 × 0903 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 0600 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
+÷ 0061 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
+÷ 0061 × 1F3FF ÷ 1F476 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [0.3]
+÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 0061 × 200D ÷ 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
+÷ 0061 × 200D ÷ 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
+#
+# Lines: 602
+#
+# EOF
diff --git a/src/unicode/data/LICENSE-UNICODE b/src/unicode/data/LICENSE-UNICODE
new file mode 100644
index 0000000..ad06935
--- /dev/null
+++ b/src/unicode/data/LICENSE-UNICODE
@@ -0,0 +1,45 @@
+UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
+See Terms of Use for definitions of Unicode Inc.'s
+Data Files and Software.
+
+NOTICE TO USER: Carefully read the following legal agreement.
+BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
+DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
+YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT.
+IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
+THE DATA FILES OR SOFTWARE.
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 1991-2019 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that either
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software, or
+(b) this copyright and permission notice appear in associated
+Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.
diff --git a/src/unicode/data/SentenceBreakTest.txt b/src/unicode/data/SentenceBreakTest.txt
new file mode 100644
index 0000000..7c1c34a
--- /dev/null
+++ b/src/unicode/data/SentenceBreakTest.txt
@@ -0,0 +1,530 @@
+# SentenceBreakTest-12.1.0.txt
+# Date: 2019-03-10, 10:53:28 GMT
+# © 2019 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Default Sentence_Break Test
+#
+# Format:
+# <string> (# <comment>)?
+# <string> contains hex Unicode code points, with
+# ÷ wherever there is a break opportunity, and
+# × wherever there is not.
+# <comment> the format can change, but currently it shows:
+# - the sample character name
+# - (x) the Sentence_Break property value for the sample character
+# - [x] the rule that determines whether there is a break or not,
+# as listed in the Rules section of SentenceBreakTest.html
+#
+# These samples may be extended or changed in the future.
+#
+÷ 0001 × 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0001 × 0308 × 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0001 × 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 × 0308 × 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 × 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 × 0308 × 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 × 0085 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0001 × 0308 × 0085 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0001 × 0009 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0001 × 0308 × 0009 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0001 × 0061 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0001 × 0308 × 0061 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0001 × 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0001 × 0308 × 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0001 × 01BB ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0001 × 0308 × 01BB ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0001 × 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0001 × 0308 × 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0001 × 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0001 × 0308 × 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0001 × 0021 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0001 × 0308 × 0021 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0001 × 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0001 × 0308 × 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0001 × 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0001 × 0308 × 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0001 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0001 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0001 × 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000D ÷ 0308 × 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D ÷ 0308 × 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0308 × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0085 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 000D ÷ 0308 × 0085 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 000D ÷ 0009 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 000D ÷ 0308 × 0009 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 000D ÷ 0308 × 0061 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 000D ÷ 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 000D ÷ 0308 × 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 000D ÷ 01BB ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 000D ÷ 0308 × 01BB ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 000D ÷ 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000D ÷ 0308 × 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000D ÷ 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 000D ÷ 0308 × 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 000D ÷ 0021 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 000D ÷ 0308 × 0021 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 000D ÷ 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 000D ÷ 0308 × 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 000D ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMMA (SContinue) ÷ [0.3]
+÷ 000D ÷ 0308 × 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 000D ÷ 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000A ÷ 0308 × 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 0308 × 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0308 × 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0085 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 000A ÷ 0308 × 0085 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 000A ÷ 0009 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 000A ÷ 0308 × 0009 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 000A ÷ 0308 × 0061 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 000A ÷ 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 000A ÷ 0308 × 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 000A ÷ 01BB ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 000A ÷ 0308 × 01BB ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 000A ÷ 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000A ÷ 0308 × 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000A ÷ 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 000A ÷ 0308 × 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 000A ÷ 0021 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 000A ÷ 0308 × 0021 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 000A ÷ 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 000A ÷ 0308 × 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 000A ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMMA (SContinue) ÷ [0.3]
+÷ 000A ÷ 0308 × 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 000A ÷ 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0085 ÷ 0001 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0001 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0085 ÷ 000D ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0085 ÷ 0308 × 000D ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0085 ÷ 000A ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0085 ÷ 0308 × 000A ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0085 ÷ 0085 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0085 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0085 ÷ 0009 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0009 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0085 ÷ 0061 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0061 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0085 ÷ 0041 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0041 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0085 ÷ 01BB ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0085 ÷ 0308 × 01BB ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0085 ÷ 0030 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0030 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0085 ÷ 002E ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0085 ÷ 0308 × 002E ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0085 ÷ 0021 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0021 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0085 ÷ 0022 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0022 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0085 ÷ 002C ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMMA (SContinue) ÷ [0.3]
+÷ 0085 ÷ 0308 × 002C ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0085 ÷ 00AD ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0085 ÷ 0308 × 00AD ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0085 ÷ 0300 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0300 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0009 × 0001 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0009 × 0308 × 0001 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0009 × 000D ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0009 × 0308 × 000D ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0009 × 000A ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0009 × 0308 × 000A ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0009 × 0085 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0009 × 0308 × 0085 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0009 × 0009 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0009 × 0308 × 0009 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0009 × 0061 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0009 × 0308 × 0061 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0009 × 0041 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0009 × 0308 × 0041 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0009 × 01BB ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0009 × 0308 × 01BB ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0009 × 0030 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0009 × 0308 × 0030 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0009 × 002E ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0009 × 0308 × 002E ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0009 × 0021 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0009 × 0308 × 0021 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0009 × 0022 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0009 × 0308 × 0022 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0009 × 002C ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0009 × 0308 × 002C ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0009 × 00AD ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0009 × 0308 × 00AD ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0009 × 0300 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0009 × 0308 × 0300 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 × 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 × 0308 × 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 × 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 × 0308 × 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 × 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 × 0308 × 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 × 0085 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0061 × 0308 × 0085 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0061 × 0009 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0061 × 0308 × 0009 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0061 × 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0061 × 0308 × 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0061 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0061 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0061 × 01BB ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0061 × 0308 × 01BB ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0061 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 × 0308 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 × 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0061 × 0308 × 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0061 × 0021 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0061 × 0308 × 0021 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0061 × 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0061 × 0308 × 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0061 × 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0061 × 0308 × 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0061 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0041 × 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0041 × 0308 × 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0041 × 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0041 × 0308 × 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0041 × 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0041 × 0308 × 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0041 × 0085 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0041 × 0308 × 0085 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0041 × 0009 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0041 × 0308 × 0009 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0041 × 0061 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0041 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0041 × 0308 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0041 × 01BB ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0041 × 0308 × 01BB ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0041 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0041 × 0308 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0041 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0041 × 0308 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0041 × 0021 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0041 × 0308 × 0021 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0041 × 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0041 × 0308 × 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0041 × 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0041 × 0308 × 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0041 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0041 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 01BB × 0001 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 01BB × 0308 × 0001 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 01BB × 000D ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 01BB × 0308 × 000D ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 01BB × 000A ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 01BB × 0308 × 000A ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 01BB × 0085 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 01BB × 0308 × 0085 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 01BB × 0009 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 01BB × 0308 × 0009 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 01BB × 0061 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 01BB × 0308 × 0061 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 01BB × 0041 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 01BB × 0308 × 0041 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 01BB × 01BB ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 01BB × 0308 × 01BB ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 01BB × 0030 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 01BB × 0308 × 0030 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 01BB × 002E ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 01BB × 0308 × 002E ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 01BB × 0021 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 01BB × 0308 × 0021 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 01BB × 0022 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 01BB × 0308 × 0022 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 01BB × 002C ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 01BB × 0308 × 002C ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 01BB × 00AD ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 01BB × 0308 × 00AD ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 01BB × 0300 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 01BB × 0308 × 0300 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0030 × 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0030 × 0308 × 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0030 × 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0030 × 0308 × 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0030 × 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0030 × 0308 × 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0030 × 0085 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0030 × 0308 × 0085 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0030 × 0009 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0030 × 0308 × 0009 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0030 × 0061 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0030 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0030 × 0308 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0030 × 01BB ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0030 × 0308 × 01BB ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0030 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 × 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0030 × 0308 × 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0030 × 0021 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0030 × 0308 × 0021 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0030 × 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0030 × 0308 × 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0030 × 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0030 × 0308 × 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0030 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0030 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002E ÷ 0001 ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002E × 0308 ÷ 0001 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002E × 000D ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002E × 0308 × 000D ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002E × 000A ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002E × 0308 × 000A ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002E × 0085 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 002E × 0308 × 0085 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 002E × 0009 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 002E × 0308 × 0009 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 002E × 0061 ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 002E × 0308 × 0061 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 002E ÷ 0041 ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 002E × 0308 ÷ 0041 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 002E ÷ 01BB ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 01BB ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 002E × 0030 ÷ # ÷ [0.2] FULL STOP (ATerm) × [6.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002E × 0308 × 0030 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [6.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002E × 002E ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] FULL STOP (ATerm) ÷ [0.3]
+÷ 002E × 0308 × 002E ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] FULL STOP (ATerm) ÷ [0.3]
+÷ 002E × 0021 ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 002E × 0308 × 0021 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 002E × 0022 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 002E × 0308 × 0022 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 002E × 002C ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] COMMA (SContinue) ÷ [0.3]
+÷ 002E × 0308 × 002C ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] COMMA (SContinue) ÷ [0.3]
+÷ 002E × 00AD ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002E × 0308 × 00AD ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002E × 0300 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002E × 0308 × 0300 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0021 ÷ 0001 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0021 × 0308 ÷ 0001 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0021 × 000D ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0021 × 0308 × 000D ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0021 × 000A ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0021 × 0308 × 000A ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0021 × 0085 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0021 × 0308 × 0085 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0021 × 0009 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0021 × 0308 × 0009 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0021 ÷ 0061 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0021 × 0308 ÷ 0061 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0021 ÷ 0041 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0021 × 0308 ÷ 0041 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0021 ÷ 01BB ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0021 × 0308 ÷ 01BB ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0021 ÷ 0030 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0021 × 0308 ÷ 0030 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0021 × 002E ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] FULL STOP (ATerm) ÷ [0.3]
+÷ 0021 × 0308 × 002E ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] FULL STOP (ATerm) ÷ [0.3]
+÷ 0021 × 0021 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0021 × 0308 × 0021 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0021 × 0022 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0021 × 0308 × 0022 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0021 × 002C ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] COMMA (SContinue) ÷ [0.3]
+÷ 0021 × 0308 × 002C ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] COMMA (SContinue) ÷ [0.3]
+÷ 0021 × 00AD ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0021 × 0308 × 00AD ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0021 × 0300 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0021 × 0308 × 0300 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0022 × 0001 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0022 × 0308 × 0001 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0022 × 000D ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0022 × 0308 × 000D ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0022 × 000A ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0022 × 0308 × 000A ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0022 × 0085 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0022 × 0308 × 0085 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0022 × 0009 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0022 × 0308 × 0009 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0022 × 0061 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0022 × 0308 × 0061 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0022 × 0041 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0022 × 0308 × 0041 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0022 × 01BB ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0022 × 0308 × 01BB ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0022 × 0030 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0022 × 0308 × 0030 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0022 × 002E ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0022 × 0308 × 002E ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0022 × 0021 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0022 × 0308 × 0021 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0022 × 0022 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0022 × 0308 × 0022 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0022 × 002C ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0022 × 0308 × 002C ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0022 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0022 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0022 × 0308 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002C × 0001 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002C × 0308 × 0001 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002C × 000D ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002C × 0308 × 000D ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002C × 000A ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002C × 0308 × 000A ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002C × 0085 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 002C × 0308 × 0085 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 002C × 0009 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 002C × 0308 × 0009 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 002C × 0061 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 002C × 0308 × 0061 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 002C × 0041 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 002C × 0308 × 0041 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 002C × 01BB ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 002C × 0308 × 01BB ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 002C × 0030 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002C × 0308 × 0030 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002C × 002E ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 002C × 0308 × 002E ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 002C × 0021 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 002C × 0308 × 0021 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 002C × 0022 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 002C × 0308 × 0022 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 002C × 002C ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 002C × 0308 × 002C ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 002C × 00AD ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002C × 0300 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 00AD × 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 00AD × 0308 × 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 00AD × 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 00AD × 0308 × 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 00AD × 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 00AD × 0308 × 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 00AD × 0085 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 00AD × 0308 × 0085 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 00AD × 0009 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 00AD × 0308 × 0009 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 00AD × 0061 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 00AD × 0308 × 0061 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 00AD × 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 00AD × 0308 × 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 00AD × 01BB ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 00AD × 0308 × 01BB ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 00AD × 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 00AD × 0308 × 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 00AD × 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 00AD × 0308 × 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 00AD × 0021 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 00AD × 0308 × 0021 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 00AD × 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 00AD × 0308 × 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 00AD × 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 00AD × 0308 × 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 00AD × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 00AD × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 00AD × 0308 × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0300 × 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0300 × 0308 × 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0300 × 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 × 0308 × 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 × 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 × 0308 × 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 × 0085 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0300 × 0308 × 0085 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0300 × 0009 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0300 × 0308 × 0009 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0300 × 0061 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0300 × 0308 × 0061 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0300 × 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0300 × 0308 × 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0300 × 01BB ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0300 × 0308 × 01BB ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0300 × 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0300 × 0308 × 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0300 × 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0300 × 0308 × 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0300 × 0021 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0300 × 0308 × 0021 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0300 × 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0300 × 0308 × 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0300 × 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0300 × 0308 × 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0300 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D × 000A ÷ 0061 × 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Lower) × [998.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
+÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
+÷ 0020 × 200D × 0646 ÷ # ÷ [0.2] SPACE (Sp) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [998.0] ARABIC LETTER NOON (OLetter) ÷ [0.3]
+÷ 0646 × 200D × 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (OLetter) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [998.0] SPACE (Sp) ÷ [0.3]
+÷ 0028 × 0022 × 0047 × 006F × 002E × 0022 × 0029 × 0020 ÷ 0028 × 0048 × 0065 × 0020 × 0064 × 0069 × 0064 × 002E × 0029 ÷ # ÷ [0.2] LEFT PARENTHESIS (Close) × [998.0] QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER G (Upper) × [998.0] LATIN SMALL LETTER O (Lower) × [998.0] FULL STOP (ATerm) × [9.0] QUOTATION MARK (Close) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] SPACE (Sp) ÷ [11.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) ÷ [0.3]
+÷ 0028 × 201C × 0047 × 006F × 003F × 201D × 0029 × 0020 ÷ 0028 × 0048 × 0065 × 0020 × 0064 × 0069 × 0064 × 002E × 0029 ÷ # ÷ [0.2] LEFT PARENTHESIS (Close) × [998.0] LEFT DOUBLE QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER G (Upper) × [998.0] LATIN SMALL LETTER O (Lower) × [998.0] QUESTION MARK (STerm) × [9.0] RIGHT DOUBLE QUOTATION MARK (Close) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] SPACE (Sp) ÷ [11.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) ÷ [0.3]
+÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 002E × 0020 × 0069 × 0073 ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [8.0] SPACE (Sp) × [8.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER S (Lower) ÷ [0.3]
+÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 003F × 0020 ÷ 0048 × 0065 ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUESTION MARK (STerm) × [9.0] SPACE (Sp) ÷ [11.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0033 × 002E × 0034 ÷ # ÷ [0.2] DIGIT THREE (Numeric) × [998.0] FULL STOP (ATerm) × [6.0] DIGIT FOUR (Numeric) ÷ [0.3]
+÷ 0063 × 002E × 0064 ÷ # ÷ [0.2] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER D (Lower) ÷ [0.3]
+÷ 0043 × 002E × 0064 ÷ # ÷ [0.2] LATIN CAPITAL LETTER C (Upper) × [998.0] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER D (Lower) ÷ [0.3]
+÷ 0063 × 002E × 0044 ÷ # ÷ [0.2] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER D (Upper) ÷ [0.3]
+÷ 0043 × 002E × 0044 ÷ # ÷ [0.2] LATIN CAPITAL LETTER C (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER D (Upper) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [8.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 2018 × 0028 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [8.0] LEFT SINGLE QUOTATION MARK (Close) × [998.0] LEFT PARENTHESIS (Close) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 ÷ 2018 × 0028 × 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) ÷ [11.0] LEFT SINGLE QUOTATION MARK (Close) × [998.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0308 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0308 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 0308 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 000A ÷ 0308 × 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0074 × 0068 × 0065 × 0020 × 0072 × 0065 × 0073 × 0070 × 002E × 0020 × 006C × 0065 × 0061 × 0064 × 0065 × 0072 × 0073 × 0020 × 0061 × 0072 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER S (Lower) × [998.0] LATIN SMALL LETTER P (Lower) × [998.0] FULL STOP (ATerm) × [8.0] SPACE (Sp) × [8.0] LATIN SMALL LETTER L (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER S (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 5B57 × 002E ÷ 5B57 ÷ # ÷ [0.2] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [998.0] FULL STOP (ATerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E ÷ 5B83 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 3002 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.1] IDEOGRAPHIC FULL STOP (STerm) ÷ [0.3]
+÷ 5B57 × 3002 ÷ 5B83 ÷ # ÷ [0.2] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [998.0] IDEOGRAPHIC FULL STOP (STerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) ÷ [0.3]
+÷ 0021 × 0020 × 0020 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] SPACE (Sp) × [10.0] SPACE (Sp) ÷ [0.3]
+÷ 2060 × 0028 × 2060 × 0022 × 2060 × 0047 × 2060 × 006F × 2060 × 002E × 2060 × 0022 × 2060 × 0029 × 2060 × 0020 × 2060 ÷ 0028 × 2060 × 0048 × 2060 × 0065 × 2060 × 0020 × 2060 × 0064 × 2060 × 0069 × 2060 × 0064 × 2060 × 002E × 2060 × 0029 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER G (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER O (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0028 × 2060 × 201C × 2060 × 0047 × 2060 × 006F × 2060 × 003F × 2060 × 201D × 2060 × 0029 × 2060 × 0020 × 2060 ÷ 0028 × 2060 × 0048 × 2060 × 0065 × 2060 × 0020 × 2060 × 0064 × 2060 × 0069 × 2060 × 0064 × 2060 × 002E × 2060 × 0029 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT DOUBLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER G (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER O (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] QUESTION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT DOUBLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 002E × 2060 × 0020 × 2060 × 0069 × 2060 × 0073 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 003F × 2060 × 0020 × 2060 ÷ 0048 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUESTION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 002E × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0033 × 2060 × 002E × 2060 × 0034 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] DIGIT THREE (Numeric) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [6.0] DIGIT FOUR (Numeric) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0063 × 2060 × 002E × 2060 × 0064 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0043 × 2060 × 002E × 2060 × 0064 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER C (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0063 × 2060 × 002E × 2060 × 0044 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER D (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0043 × 2060 × 002E × 2060 × 0044 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER C (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER D (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 2018 × 2060 × 0028 × 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LEFT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 ÷ 2018 × 2060 × 0028 × 2060 × 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0308 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0308 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 0308 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 000A ÷ 2060 × 0308 × 2060 × 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [4.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 0020 × 2060 × 0072 × 2060 × 0065 × 2060 × 0073 × 2060 × 0070 × 2060 × 002E × 2060 × 0020 × 2060 × 006C × 2060 × 0065 × 2060 × 0061 × 2060 × 0064 × 2060 × 0065 × 2060 × 0072 × 2060 × 0073 × 2060 × 0020 × 2060 × 0061 × 2060 × 0072 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER P (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER L (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 5B57 × 2060 × 002E × 2060 ÷ 5B57 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 ÷ 5B83 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 3002 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.1] IDEOGRAPHIC FULL STOP (STerm) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 5B57 × 2060 × 3002 × 2060 ÷ 5B83 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [998.0] IDEOGRAPHIC FULL STOP (STerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0021 × 2060 × 0020 × 2060 × 0020 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] EXCLAMATION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [10.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+#
+# Lines: 502
+#
+# EOF
diff --git a/src/unicode/data/WordBreakTest.txt b/src/unicode/data/WordBreakTest.txt
new file mode 100644
index 0000000..facd892
--- /dev/null
+++ b/src/unicode/data/WordBreakTest.txt
@@ -0,0 +1,1851 @@
+# WordBreakTest-12.1.0.txt
+# Date: 2019-03-10, 10:53:29 GMT
+# © 2019 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Default Word_Break Test
+#
+# Format:
+# <string> (# <comment>)?
+# <string> contains hex Unicode code points, with
+# ÷ wherever there is a break opportunity, and
+# × wherever there is not.
+# <comment> the format can change, but currently it shows:
+# - the sample character name
+# - (x) the Word_Break property value for the sample character
+# - [x] the rule that determines whether there is a break or not,
+# as listed in the Rules section of WordBreakTest.html
+#
+# These samples may be extended or changed in the future.
+#
+÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 × 0308 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 × 0308 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 ÷ 000B ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0001 × 0308 ÷ 000B ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0001 ÷ 3031 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0001 × 0308 ÷ 3031 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0001 ÷ 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0001 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 × 0308 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 ÷ 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0001 × 0308 ÷ 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0001 ÷ 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0001 ÷ 005F ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0001 × 0308 ÷ 005F ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0001 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0001 ÷ 05D0 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 05D0 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0001 ÷ 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0001 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0001 × 0308 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0001 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0001 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0001 × 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0001 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0001 × 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0001 ÷ 0061 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 000B ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000B ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000D ÷ 3031 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000D ÷ 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000D ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000D ÷ 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000D ÷ 005F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000D ÷ 05D0 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000D ÷ 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000D ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] WATCH (ExtPict) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] SPACE (WSegSpace) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 000D ÷ 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000D ÷ 0061 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 000B ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000B ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000A ÷ 3031 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000A ÷ 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000A ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000A ÷ 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000A ÷ 005F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000A ÷ 05D0 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000A ÷ 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000A ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] WATCH (ExtPict) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] SPACE (WSegSpace) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 000A ÷ 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000A ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0001 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000B ÷ 000D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000B ÷ 000A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000B ÷ 000B ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 000B ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000B ÷ 3031 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000B ÷ 0041 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000B ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 002E ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 002E ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000B ÷ 0030 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000B ÷ 005F ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000B ÷ 1F1E6 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000B ÷ 05D0 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000B ÷ 0022 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000B ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 231A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] WATCH (ExtPict) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000B ÷ 0020 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] SPACE (WSegSpace) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 000B ÷ 00AD ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0308 × 00AD ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0300 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000B ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000B ÷ 200D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000B ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000B ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 ÷ 0001 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0001 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 3031 ÷ 000D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 3031 × 0308 ÷ 000D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 3031 ÷ 000A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 3031 × 0308 ÷ 000A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 3031 ÷ 000B ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 3031 × 0308 ÷ 000B ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 3031 × 3031 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 3031 × 0308 × 3031 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 3031 ÷ 0041 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0041 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 3031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 × 0308 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 ÷ 002E ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 3031 × 0308 ÷ 002E ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 3031 ÷ 0030 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0030 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 3031 × 005F ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 3031 × 0308 × 005F ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 3031 ÷ 1F1E6 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 3031 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 3031 ÷ 05D0 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 05D0 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 3031 ÷ 0022 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0022 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 3031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 ÷ 231A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 3031 × 0308 ÷ 231A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 3031 ÷ 0020 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0020 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 3031 × 00AD ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 3031 × 0308 × 00AD ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 3031 × 0300 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 3031 × 0308 × 0300 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 3031 × 200D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 3031 × 0308 × 200D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 3031 ÷ 0061 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 ÷ 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0041 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0041 ÷ 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0041 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0041 ÷ 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0041 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0041 ÷ 000B ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0041 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0041 ÷ 3031 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0041 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0041 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0041 × 0308 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0041 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 ÷ 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0041 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0041 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0041 × 0308 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0041 × 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0041 × 0308 × 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0041 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0041 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0041 × 05D0 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0041 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0041 ÷ 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0041 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0041 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 ÷ 231A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0041 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0041 ÷ 0020 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0041 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0041 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0041 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0041 × 200D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0041 × 0308 × 200D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0041 × 0061 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A ÷ 0001 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 003A ÷ 000D ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 003A ÷ 000A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 003A ÷ 000B ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 003A ÷ 3031 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 003A ÷ 0041 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 003A × 0308 ÷ 0041 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 003A ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A ÷ 002E ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 003A ÷ 0030 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 003A ÷ 005F ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 003A ÷ 05D0 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 003A × 0308 ÷ 05D0 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 003A ÷ 0022 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 003A ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A ÷ 231A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 003A × 0308 ÷ 231A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 003A ÷ 0020 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 003A × 0308 ÷ 0020 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 003A × 00AD ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 003A × 0300 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 003A × 200D ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 003A × 0308 × 200D ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 003A ÷ 0061 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C ÷ 0001 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002C ÷ 000D ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002C ÷ 000A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002C ÷ 000B ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 002C ÷ 3031 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 002C ÷ 0041 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 002C ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C ÷ 002E ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 002C ÷ 0030 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002C × 0308 ÷ 0030 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002C ÷ 005F ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 002C ÷ 05D0 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 002C ÷ 0022 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 002C ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C ÷ 231A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 002C × 0308 ÷ 231A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 002C ÷ 0020 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 002C × 0308 ÷ 0020 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 002C × 00AD ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002C × 0300 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002C × 200D ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 002C × 0308 × 200D ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E ÷ 0001 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002E × 0308 ÷ 0001 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002E ÷ 000D ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002E × 0308 ÷ 000D ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002E ÷ 000A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002E × 0308 ÷ 000A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002E ÷ 000B ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 002E × 0308 ÷ 000B ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 002E ÷ 3031 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 002E × 0308 ÷ 3031 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 002E ÷ 0041 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 0041 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 002E ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E × 0308 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E ÷ 002E ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 002E × 0308 ÷ 002E ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 002E ÷ 0030 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002E × 0308 ÷ 0030 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002E ÷ 005F ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 002E × 0308 ÷ 005F ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 002E ÷ 1F1E6 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 002E × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 002E ÷ 05D0 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 002E × 0308 ÷ 05D0 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 002E ÷ 0022 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 002E × 0308 ÷ 0022 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 002E ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E × 0308 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E ÷ 231A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 002E × 0308 ÷ 231A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 002E ÷ 0020 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 002E × 0308 ÷ 0020 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 002E × 00AD ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002E × 0308 × 00AD ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002E × 0300 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002E × 0308 × 0300 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002E × 200D ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 002E × 0308 × 200D ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 002E ÷ 0061 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E ÷ 0061 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E ÷ 0061 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E ÷ 0031 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E ÷ 0031 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 ÷ 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0030 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0030 ÷ 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0030 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0030 ÷ 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0030 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0030 ÷ 000B ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0030 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0030 ÷ 3031 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0030 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0030 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0030 × 0308 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0030 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 ÷ 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0030 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0030 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 × 005F ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0030 × 0308 × 005F ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0030 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0030 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0030 × 05D0 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0030 × 0308 × 05D0 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0030 ÷ 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0030 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0030 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 ÷ 231A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0030 × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0030 ÷ 0020 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0030 × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0030 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0030 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0030 × 200D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0030 × 0308 × 200D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0030 × 0061 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F ÷ 0001 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 005F × 0308 ÷ 0001 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 005F ÷ 000D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 005F × 0308 ÷ 000D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 005F ÷ 000A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 005F × 0308 ÷ 000A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 005F ÷ 000B ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 005F × 0308 ÷ 000B ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 005F × 3031 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 005F × 0308 × 3031 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 005F × 0041 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 005F × 0308 × 0041 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 005F ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0308 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0308 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F ÷ 002E ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 005F × 0308 ÷ 002E ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 005F × 0030 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 005F × 0308 × 0030 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 005F × 005F ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 005F × 0308 × 005F ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 005F ÷ 1F1E6 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 005F × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 005F × 05D0 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 005F × 0308 × 05D0 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 005F ÷ 0022 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 005F × 0308 ÷ 0022 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 005F ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0308 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F ÷ 231A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 005F × 0308 ÷ 231A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 005F ÷ 0020 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 005F × 0308 ÷ 0020 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 005F × 00AD ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 005F × 0308 × 00AD ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 005F × 0300 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 005F × 0308 × 0300 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 005F × 200D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 005F × 0308 × 200D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 005F × 0061 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0061 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0061 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0061 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0031 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0031 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0031 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 ÷ 000B ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000B ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 1F1E6 ÷ 3031 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 3031 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 1F1E6 ÷ 0041 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0041 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 1F1E6 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 ÷ 002E ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 002E ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 1F1E6 ÷ 0030 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0030 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 1F1E6 ÷ 005F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 005F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [15.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1F1E6 × 0308 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [15.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1F1E6 ÷ 05D0 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 05D0 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 1F1E6 ÷ 0022 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0022 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 1F1E6 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 1F1E6 × 00AD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 × 00AD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 ÷ 0001 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 0001 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 05D0 ÷ 000D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 000D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 05D0 ÷ 000A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 000A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 05D0 ÷ 000B ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 000B ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 05D0 ÷ 3031 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 3031 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 05D0 × 0041 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 05D0 × 0308 × 0041 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 05D0 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 ÷ 002E ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 002E ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 05D0 × 0030 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 05D0 × 0308 × 0030 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 05D0 × 005F ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 05D0 × 0308 × 005F ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 05D0 ÷ 1F1E6 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 05D0 × 05D0 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 05D0 × 0308 × 05D0 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 05D0 ÷ 0022 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 0022 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 05D0 × 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [7.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0308 × 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 ÷ 231A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 231A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 05D0 ÷ 0020 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 0020 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 05D0 × 00AD ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 00AD ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 05D0 × 0300 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 0300 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 05D0 × 200D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 200D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 05D0 × 0061 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0061 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0061 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0061 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0031 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0031 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0031 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 ÷ 0001 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0001 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0022 ÷ 000D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0022 × 0308 ÷ 000D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0022 ÷ 000A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0022 × 0308 ÷ 000A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0022 ÷ 000B ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0022 × 0308 ÷ 000B ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0022 ÷ 3031 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0022 × 0308 ÷ 3031 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0022 ÷ 0041 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0041 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0022 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 × 0308 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 ÷ 002E ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0022 × 0308 ÷ 002E ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0022 ÷ 0030 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0030 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0022 ÷ 005F ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0022 × 0308 ÷ 005F ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0022 ÷ 1F1E6 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0022 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0022 ÷ 05D0 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 05D0 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0022 ÷ 0022 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0022 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0022 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 ÷ 231A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0022 × 0308 ÷ 231A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0022 ÷ 0020 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0020 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0022 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0022 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0022 × 0308 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0022 × 200D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0022 × 0308 × 200D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0022 ÷ 0061 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 ÷ 0001 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0027 ÷ 000D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0027 ÷ 000A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0027 ÷ 000B ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0027 ÷ 3031 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0027 ÷ 0041 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0041 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0027 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 ÷ 002E ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0027 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0027 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0027 ÷ 05D0 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 05D0 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0027 ÷ 0022 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0027 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 ÷ 231A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0027 × 0308 ÷ 231A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0027 ÷ 0020 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0020 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0027 × 00AD ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0027 × 0300 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0027 × 200D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0027 × 0308 × 200D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0027 ÷ 0061 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 231A × 0308 ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 231A ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 231A × 0308 ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 231A ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 231A × 0308 ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 231A ÷ 000B ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 231A × 0308 ÷ 000B ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 231A ÷ 3031 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 231A × 0308 ÷ 3031 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 231A ÷ 0041 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 231A × 0308 ÷ 0041 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 231A ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A × 0308 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A × 0308 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A ÷ 002E ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 231A × 0308 ÷ 002E ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 231A ÷ 0030 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 231A × 0308 ÷ 0030 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 231A ÷ 005F ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 231A × 0308 ÷ 005F ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 231A ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 231A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 231A ÷ 05D0 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 231A × 0308 ÷ 05D0 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 231A ÷ 0022 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 231A × 0308 ÷ 0022 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 231A ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A × 0308 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 231A × 0308 ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 231A ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 231A × 0308 ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 231A × 00AD ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 231A × 0308 × 00AD ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 231A × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 231A × 0308 × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 231A × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 231A × 0308 × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 231A ÷ 0061 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0020 ÷ 000B ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000B ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0020 ÷ 3031 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0020 × 0308 ÷ 3031 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0020 ÷ 0041 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0041 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0020 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 × 0308 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 ÷ 002E ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0020 × 0308 ÷ 002E ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0020 ÷ 0030 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0030 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0020 ÷ 005F ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0020 × 0308 ÷ 005F ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0020 ÷ 05D0 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 05D0 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0020 ÷ 0022 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0022 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0020 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 ÷ 231A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0020 × 0308 ÷ 231A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0020 × 0020 ÷ # ÷ [0.2] SPACE (WSegSpace) × [3.4] SPACE (WSegSpace) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0020 × 00AD ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0020 × 0308 × 00AD ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0020 ÷ 0061 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD ÷ 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 00AD ÷ 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 00AD × 0308 ÷ 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 00AD ÷ 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 00AD × 0308 ÷ 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 00AD ÷ 000B ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 00AD × 0308 ÷ 000B ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 00AD ÷ 3031 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 00AD × 0308 ÷ 3031 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 00AD ÷ 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 00AD ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD × 0308 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD ÷ 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 00AD × 0308 ÷ 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 00AD ÷ 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 00AD ÷ 005F ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 00AD × 0308 ÷ 005F ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 00AD ÷ 1F1E6 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 00AD × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 00AD ÷ 05D0 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 05D0 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 00AD ÷ 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 00AD ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD ÷ 231A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 00AD × 0308 ÷ 231A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 00AD ÷ 0020 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0020 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 00AD × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 00AD × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 00AD × 0308 × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 00AD × 200D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 00AD × 0308 × 200D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 00AD ÷ 0061 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 ÷ 000B ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000B ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0300 ÷ 3031 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0300 × 0308 ÷ 3031 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0300 ÷ 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0300 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 × 0308 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 ÷ 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0300 × 0308 ÷ 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0300 ÷ 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0300 ÷ 005F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0300 × 0308 ÷ 005F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0300 ÷ 05D0 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 05D0 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0300 ÷ 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0300 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0300 × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0300 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0300 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 200D × 0308 ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200D × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200D ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200D × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200D ÷ 000B ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 200D × 0308 ÷ 000B ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 200D ÷ 3031 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 200D × 0308 ÷ 3031 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 200D ÷ 0041 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 200D × 0308 ÷ 0041 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 200D ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D × 0308 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D × 0308 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D ÷ 002E ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 200D × 0308 ÷ 002E ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 200D ÷ 0030 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 200D × 0308 ÷ 0030 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 200D ÷ 005F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 200D × 0308 ÷ 005F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 200D ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200D ÷ 05D0 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 200D × 0308 ÷ 05D0 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 200D ÷ 0022 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 200D × 0308 ÷ 0022 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 200D ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D × 0308 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D × 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] WATCH (ExtPict) ÷ [0.3]
+÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 200D × 00AD ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 200D × 0308 × 00AD ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 200D ÷ 0061 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D ÷ 0061 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D ÷ 0061 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D ÷ 0031 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D ÷ 0031 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 × 2060 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 × 2060 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 × 2060 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 × 2060 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 2060 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 2060 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 × 2060 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 × 2060 × 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 2060 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 2060 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 × 2060 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 × 2060 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 × 2060 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 × 2060 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 003A × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 003A × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 003A × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 003A × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 003A × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 003A × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 003A × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 003A × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 0027 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 0027 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 0027 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 0027 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 0027 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 002C × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 003A × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 × 0027 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 0027 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 0027 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 0027 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 0027 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 0027 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 × 002C × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 002C × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002C × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 002C × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 002C × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002C × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D × 000A ÷ 0061 ÷ 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
+÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
+÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] ARABIC LETTER NOON (ALetter) ÷ [0.3]
+÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0041 × 0041 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0041 × 003A × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0041 ÷ 003A ÷ 003A ÷ 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 05D0 × 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [7.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0022 × 05D0 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [7.2] QUOTATION MARK (Double_Quote) × [7.3] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0041 × 0030 × 0030 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ZERO (Numeric) × [8.0] DIGIT ZERO (Numeric) × [10.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0030 × 002C × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 ÷ 002C ÷ 002C ÷ 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 3031 × 3031 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0041 × 005F × 0030 × 005F × 3031 × 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ZERO (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0041 × 005F × 005F × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [15.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 200D × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
+÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 0061 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] UPPER BLADE SCISSORS (Other) ÷ [0.3]
+÷ 0061 × 200D × 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] UPPER BLADE SCISSORS (Other) ÷ [0.3]
+÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3]
+÷ 1F6D1 × 1F3FF ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3]
+÷ 200D × 1F6D1 × 1F3FF ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3]
+÷ 200D × 1F6D1 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 200D × 1F6D1 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 1F6D1 ÷ 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 0061 × 0308 × 200D × 0308 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 0020 × 0020 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] SPACE (WSegSpace) × [3.4] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+#
+# Lines: 1823
+#
+# EOF
diff --git a/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa b/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
new file mode 100644
index 0000000..0efaaf2
--- /dev/null
+++ b/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa b/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
new file mode 100644
index 0000000..eb24025
--- /dev/null
+++ b/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/grapheme_break_fwd.rs b/src/unicode/fsm/grapheme_break_fwd.rs
new file mode 100644
index 0000000..317ba96
--- /dev/null
+++ b/src/unicode/fsm/grapheme_break_fwd.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name GRAPHEME_BREAK_FWD --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)]
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static! {
+ pub static ref GRAPHEME_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("grapheme_break_fwd.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static! {
+ pub static ref GRAPHEME_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("grapheme_break_fwd.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
diff --git a/src/unicode/fsm/grapheme_break_rev.bigendian.dfa b/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
new file mode 100644
index 0000000..d42cd36
--- /dev/null
+++ b/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/grapheme_break_rev.littleendian.dfa b/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
new file mode 100644
index 0000000..c75ea5f
--- /dev/null
+++ b/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/grapheme_break_rev.rs b/src/unicode/fsm/grapheme_break_rev.rs
new file mode 100644
index 0000000..db6b6ee
--- /dev/null
+++ b/src/unicode/fsm/grapheme_break_rev.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name GRAPHEME_BREAK_REV --reverse --longest --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)]
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static! {
+ pub static ref GRAPHEME_BREAK_REV: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("grapheme_break_rev.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static! {
+ pub static ref GRAPHEME_BREAK_REV: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("grapheme_break_rev.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
diff --git a/src/unicode/fsm/mod.rs b/src/unicode/fsm/mod.rs
new file mode 100644
index 0000000..ae6c499
--- /dev/null
+++ b/src/unicode/fsm/mod.rs
@@ -0,0 +1,8 @@
+pub mod grapheme_break_fwd;
+pub mod grapheme_break_rev;
+pub mod regional_indicator_rev;
+pub mod sentence_break_fwd;
+pub mod simple_word_fwd;
+pub mod whitespace_anchored_fwd;
+pub mod whitespace_anchored_rev;
+pub mod word_break_fwd;
diff --git a/src/unicode/fsm/regional_indicator_rev.bigendian.dfa b/src/unicode/fsm/regional_indicator_rev.bigendian.dfa
new file mode 100644
index 0000000..1a3357f
--- /dev/null
+++ b/src/unicode/fsm/regional_indicator_rev.bigendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/regional_indicator_rev.littleendian.dfa b/src/unicode/fsm/regional_indicator_rev.littleendian.dfa
new file mode 100644
index 0000000..e437aae
--- /dev/null
+++ b/src/unicode/fsm/regional_indicator_rev.littleendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/regional_indicator_rev.rs b/src/unicode/fsm/regional_indicator_rev.rs
new file mode 100644
index 0000000..3b6beff
--- /dev/null
+++ b/src/unicode/fsm/regional_indicator_rev.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name REGIONAL_INDICATOR_REV --reverse --classes --minimize --anchored --premultiply --state-size 1 src/unicode/fsm/ \p{gcb=Regional_Indicator}
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static! {
+ pub static ref REGIONAL_INDICATOR_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static! {
+ pub static ref REGIONAL_INDICATOR_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
diff --git a/src/unicode/fsm/sentence_break_fwd.bigendian.dfa b/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
new file mode 100644
index 0000000..a1813d7
--- /dev/null
+++ b/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/sentence_break_fwd.littleendian.dfa b/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
new file mode 100644
index 0000000..2763583
--- /dev/null
+++ b/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/sentence_break_fwd.rs b/src/unicode/fsm/sentence_break_fwd.rs
new file mode 100644
index 0000000..46ecfcf
--- /dev/null
+++ b/src/unicode/fsm/sentence_break_fwd.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name SENTENCE_BREAK_FWD --minimize --sparse --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)]
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static! {
+ pub static ref SENTENCE_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("sentence_break_fwd.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static! {
+ pub static ref SENTENCE_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("sentence_break_fwd.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
diff --git a/src/unicode/fsm/simple_word_fwd.bigendian.dfa b/src/unicode/fsm/simple_word_fwd.bigendian.dfa
new file mode 100644
index 0000000..adc64c1
--- /dev/null
+++ b/src/unicode/fsm/simple_word_fwd.bigendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/simple_word_fwd.littleendian.dfa b/src/unicode/fsm/simple_word_fwd.littleendian.dfa
new file mode 100644
index 0000000..dd48386
--- /dev/null
+++ b/src/unicode/fsm/simple_word_fwd.littleendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/simple_word_fwd.rs b/src/unicode/fsm/simple_word_fwd.rs
new file mode 100644
index 0000000..c5fabe3
--- /dev/null
+++ b/src/unicode/fsm/simple_word_fwd.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name SIMPLE_WORD_FWD --sparse --minimize --state-size 2 src/unicode/fsm/ \w
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static! {
+ pub static ref SIMPLE_WORD_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("simple_word_fwd.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static! {
+ pub static ref SIMPLE_WORD_FWD: ::regex_automata::SparseDFA<&'static [u8], u16> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("simple_word_fwd.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
diff --git a/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa b/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa
new file mode 100644
index 0000000..bcfc4e9
--- /dev/null
+++ b/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa b/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa
new file mode 100644
index 0000000..d534a46
--- /dev/null
+++ b/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/whitespace_anchored_fwd.rs b/src/unicode/fsm/whitespace_anchored_fwd.rs
new file mode 100644
index 0000000..ea68582
--- /dev/null
+++ b/src/unicode/fsm/whitespace_anchored_fwd.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name WHITESPACE_ANCHORED_FWD --anchored --classes --premultiply --minimize --state-size 1 src/unicode/fsm/ \s+
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static! {
+ pub static ref WHITESPACE_ANCHORED_FWD: ::regex_automata::DenseDFA<&'static [u8], u8> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static! {
+ pub static ref WHITESPACE_ANCHORED_FWD: ::regex_automata::DenseDFA<&'static [u8], u8> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
diff --git a/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa b/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa
new file mode 100644
index 0000000..bb217f1
--- /dev/null
+++ b/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa b/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa
new file mode 100644
index 0000000..a7cb5a7
--- /dev/null
+++ b/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/whitespace_anchored_rev.rs b/src/unicode/fsm/whitespace_anchored_rev.rs
new file mode 100644
index 0000000..72b444e
--- /dev/null
+++ b/src/unicode/fsm/whitespace_anchored_rev.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name WHITESPACE_ANCHORED_REV --reverse --anchored --classes --minimize --state-size 1 src/unicode/fsm/ \s+
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static! {
+ pub static ref WHITESPACE_ANCHORED_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static! {
+ pub static ref WHITESPACE_ANCHORED_REV: ::regex_automata::DenseDFA<&'static [u8], u8> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
diff --git a/src/unicode/fsm/word_break_fwd.bigendian.dfa b/src/unicode/fsm/word_break_fwd.bigendian.dfa
new file mode 100644
index 0000000..1e75db6
--- /dev/null
+++ b/src/unicode/fsm/word_break_fwd.bigendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/word_break_fwd.littleendian.dfa b/src/unicode/fsm/word_break_fwd.littleendian.dfa
new file mode 100644
index 0000000..e3093a3
--- /dev/null
+++ b/src/unicode/fsm/word_break_fwd.littleendian.dfa
Binary files differ
diff --git a/src/unicode/fsm/word_break_fwd.rs b/src/unicode/fsm/word_break_fwd.rs
new file mode 100644
index 0000000..52e6bc2
--- /dev/null
+++ b/src/unicode/fsm/word_break_fwd.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name WORD_BREAK_FWD --sparse --minimize --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)]
+//
+// ucd-generate 0.2.8 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+lazy_static! {
+ pub static ref WORD_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("word_break_fwd.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
+
+#[cfg(target_endian = "little")]
+lazy_static! {
+ pub static ref WORD_BREAK_FWD: ::regex_automata::SparseDFA<&'static [u8], u32> = {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("word_break_fwd.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+ };
+}
diff --git a/src/unicode/grapheme.rs b/src/unicode/grapheme.rs
new file mode 100644
index 0000000..e40a0de
--- /dev/null
+++ b/src/unicode/grapheme.rs
@@ -0,0 +1,355 @@
+use regex_automata::DFA;
+
+use ext_slice::ByteSlice;
+use unicode::fsm::grapheme_break_fwd::GRAPHEME_BREAK_FWD;
+use unicode::fsm::grapheme_break_rev::GRAPHEME_BREAK_REV;
+use unicode::fsm::regional_indicator_rev::REGIONAL_INDICATOR_REV;
+use utf8;
+
+/// An iterator over grapheme clusters in a byte string.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::graphemes`](trait.ByteSlice.html#method.graphemes).
+///
+/// Unicode defines a grapheme cluster as an *approximation* to a single user
+/// visible character. A grapheme cluster, or just "grapheme," is made up of
+/// one or more codepoints. For end user oriented tasks, one should generally
+/// prefer using graphemes instead of [`Chars`](struct.Chars.html), which
+/// always yields one codepoint at a time.
+///
+/// Since graphemes are made up of one or more codepoints, this iterator yields
+/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
+/// are [substituted](index.html#handling-of-invalid-utf-8).
+///
+/// This iterator can be used in reverse. When reversed, exactly the same
+/// set of grapheme clusters are yielded, but in reverse order.
+///
+/// This iterator only yields *extended* grapheme clusters, in accordance with
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Grapheme_Cluster_Boundaries).
+#[derive(Clone, Debug)]
+pub struct Graphemes<'a> {
+ bs: &'a [u8],
+}
+
+impl<'a> Graphemes<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> Graphemes<'a> {
+ Graphemes { bs }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"abc".graphemes();
+ ///
+ /// assert_eq!(b"abc", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"bc", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for Graphemes<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ let (grapheme, size) = decode_grapheme(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ Some(grapheme)
+ }
+}
+
+impl<'a> DoubleEndedIterator for Graphemes<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<&'a str> {
+ let (grapheme, size) = decode_last_grapheme(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[..self.bs.len() - size];
+ Some(grapheme)
+ }
+}
+
+/// An iterator over grapheme clusters in a byte string and their byte index
+/// positions.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::grapheme_indices`](trait.ByteSlice.html#method.grapheme_indices).
+///
+/// Unicode defines a grapheme cluster as an *approximation* to a single user
+/// visible character. A grapheme cluster, or just "grapheme," is made up of
+/// one or more codepoints. For end user oriented tasks, one should generally
+/// prefer using graphemes instead of [`Chars`](struct.Chars.html), which
+/// always yields one codepoint at a time.
+///
+/// Since graphemes are made up of one or more codepoints, this iterator
+/// yields `&str` elements (along with their start and end byte offsets).
+/// When invalid UTF-8 is encountered, replacement codepoints are
+/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
+/// indices yielded by this iterator may not correspond to the length of the
+/// grapheme cluster yielded with those indices. For example, when this
+/// iterator encounters `\xFF` in the byte string, then it will yield a pair
+/// of indices ranging over a single byte, but will provide an `&str`
+/// equivalent to `"\u{FFFD}"`, which is three bytes in length. However, when
+/// given only valid UTF-8, then all indices are in exact correspondence with
+/// their paired grapheme cluster.
+///
+/// This iterator can be used in reverse. When reversed, exactly the same
+/// set of grapheme clusters are yielded, but in reverse order.
+///
+/// This iterator only yields *extended* grapheme clusters, in accordance with
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Grapheme_Cluster_Boundaries).
+#[derive(Clone, Debug)]
+pub struct GraphemeIndices<'a> {
+ bs: &'a [u8],
+ forward_index: usize,
+ reverse_index: usize,
+}
+
+impl<'a> GraphemeIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> GraphemeIndices<'a> {
+ GraphemeIndices { bs: bs, forward_index: 0, reverse_index: bs.len() }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"abc".grapheme_indices();
+ ///
+ /// assert_eq!(b"abc", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"bc", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for GraphemeIndices<'a> {
+ type Item = (usize, usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, &'a str)> {
+ let index = self.forward_index;
+ let (grapheme, size) = decode_grapheme(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ self.forward_index += size;
+ Some((index, index + size, grapheme))
+ }
+}
+
+impl<'a> DoubleEndedIterator for GraphemeIndices<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<(usize, usize, &'a str)> {
+ let (grapheme, size) = decode_last_grapheme(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[..self.bs.len() - size];
+ self.reverse_index -= size;
+ Some((self.reverse_index, self.reverse_index + size, grapheme))
+ }
+}
+
+/// Decode a grapheme from the given byte string.
+///
+/// This returns the resulting grapheme (which may be a Unicode replacement
+/// codepoint if invalid UTF-8 was found), along with the number of bytes
+/// decoded in the byte string. The number of bytes decoded may not be the
+/// same as the length of grapheme in the case where invalid UTF-8 is found.
+pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) {
+ if bs.is_empty() {
+ ("", 0)
+ } else if let Some(end) = GRAPHEME_BREAK_FWD.find(bs) {
+ // Safe because a match can only occur for valid UTF-8.
+ let grapheme = unsafe { bs[..end].to_str_unchecked() };
+ (grapheme, grapheme.len())
+ } else {
+ const INVALID: &'static str = "\u{FFFD}";
+ // No match on non-empty bytes implies we found invalid UTF-8.
+ let (_, size) = utf8::decode_lossy(bs);
+ (INVALID, size)
+ }
+}
+
+fn decode_last_grapheme(bs: &[u8]) -> (&str, usize) {
+ if bs.is_empty() {
+ ("", 0)
+ } else if let Some(mut start) = GRAPHEME_BREAK_REV.rfind(bs) {
+ start = adjust_rev_for_regional_indicator(bs, start);
+ // Safe because a match can only occur for valid UTF-8.
+ let grapheme = unsafe { bs[start..].to_str_unchecked() };
+ (grapheme, grapheme.len())
+ } else {
+ const INVALID: &'static str = "\u{FFFD}";
+ // No match on non-empty bytes implies we found invalid UTF-8.
+ let (_, size) = utf8::decode_last_lossy(bs);
+ (INVALID, size)
+ }
+}
+
+/// Return the correct offset for the next grapheme decoded at the end of the
+/// given byte string, where `i` is the initial guess. In particular,
+/// `&bs[i..]` represents the candidate grapheme.
+///
+/// `i` is returned by this function in all cases except when `&bs[i..]` is
+/// a pair of regional indicator codepoints. In that case, if an odd number of
+/// additional regional indicator codepoints precedes `i`, then `i` is
+/// adjusted such that it points to only a single regional indicator.
+///
+/// This "fixing" is necessary to handle the requirement that a break cannot
+/// occur between regional indicators where it would cause an odd number of
+/// regional indicators to exist before the break from the *start* of the
+/// string. A reverse regex cannot detect this case easily without look-around.
+fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize {
+ // All regional indicators use a 4 byte encoding, and we only care about
+ // the case where we found a pair of regional indicators.
+ if bs.len() - i != 8 {
+ return i;
+ }
+ // Count all contiguous occurrences of regional indicators. If there's an
+ // even number of them, then we can accept the pair we found. Otherwise,
+ // we can only take one of them.
+ //
+ // FIXME: This is quadratic in the worst case, e.g., a string of just
+ // regional indicator codepoints. A fix probably requires refactoring this
+ // code a bit such that we don't rescan regional indicators.
+ let mut count = 0;
+ while let Some(start) = REGIONAL_INDICATOR_REV.rfind(bs) {
+ bs = &bs[..start];
+ count += 1;
+ }
+ if count % 2 == 0 {
+ i
+ } else {
+ i + 4
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use ucd_parse::GraphemeClusterBreakTest;
+
+ use super::*;
+ use ext_slice::ByteSlice;
+ use tests::LOSSY_TESTS;
+
+ #[test]
+ fn forward_ucd() {
+ for (i, test) in ucdtests().into_iter().enumerate() {
+ let given = test.grapheme_clusters.concat();
+ let got: Vec<String> = Graphemes::new(given.as_bytes())
+ .map(|cluster| cluster.to_string())
+ .collect();
+ assert_eq!(
+ test.grapheme_clusters,
+ got,
+ "\ngrapheme forward break test {} failed:\n\
+ given: {:?}\n\
+ expected: {:?}\n\
+ got: {:?}\n",
+ i,
+ uniescape(&given),
+ uniescape_vec(&test.grapheme_clusters),
+ uniescape_vec(&got),
+ );
+ }
+ }
+
+ #[test]
+ fn reverse_ucd() {
+ for (i, test) in ucdtests().into_iter().enumerate() {
+ let given = test.grapheme_clusters.concat();
+ let mut got: Vec<String> = Graphemes::new(given.as_bytes())
+ .rev()
+ .map(|cluster| cluster.to_string())
+ .collect();
+ got.reverse();
+ assert_eq!(
+ test.grapheme_clusters,
+ got,
+ "\n\ngrapheme reverse break test {} failed:\n\
+ given: {:?}\n\
+ expected: {:?}\n\
+ got: {:?}\n",
+ i,
+ uniescape(&given),
+ uniescape_vec(&test.grapheme_clusters),
+ uniescape_vec(&got),
+ );
+ }
+ }
+
+ #[test]
+ fn forward_lossy() {
+ for &(expected, input) in LOSSY_TESTS {
+ let got = Graphemes::new(input.as_bytes()).collect::<String>();
+ assert_eq!(expected, got);
+ }
+ }
+
+ #[test]
+ fn reverse_lossy() {
+ for &(expected, input) in LOSSY_TESTS {
+ let expected: String = expected.chars().rev().collect();
+ let got =
+ Graphemes::new(input.as_bytes()).rev().collect::<String>();
+ assert_eq!(expected, got);
+ }
+ }
+
+ fn uniescape(s: &str) -> String {
+ s.chars().flat_map(|c| c.escape_unicode()).collect::<String>()
+ }
+
+ fn uniescape_vec(strs: &[String]) -> Vec<String> {
+ strs.iter().map(|s| uniescape(s)).collect()
+ }
+
+ /// Return all of the UCD for grapheme breaks.
+ fn ucdtests() -> Vec<GraphemeClusterBreakTest> {
+ const TESTDATA: &'static str =
+ include_str!("data/GraphemeBreakTest.txt");
+
+ let mut tests = vec![];
+ for mut line in TESTDATA.lines() {
+ line = line.trim();
+ if line.starts_with("#") || line.contains("surrogate") {
+ continue;
+ }
+ tests.push(line.parse().unwrap());
+ }
+ tests
+ }
+}
diff --git a/src/unicode/mod.rs b/src/unicode/mod.rs
new file mode 100644
index 0000000..60318f4
--- /dev/null
+++ b/src/unicode/mod.rs
@@ -0,0 +1,12 @@
+pub use self::grapheme::{decode_grapheme, GraphemeIndices, Graphemes};
+pub use self::sentence::{SentenceIndices, Sentences};
+pub use self::whitespace::{whitespace_len_fwd, whitespace_len_rev};
+pub use self::word::{
+ WordIndices, Words, WordsWithBreakIndices, WordsWithBreaks,
+};
+
+mod fsm;
+mod grapheme;
+mod sentence;
+mod whitespace;
+mod word;
diff --git a/src/unicode/sentence.rs b/src/unicode/sentence.rs
new file mode 100644
index 0000000..01f5473
--- /dev/null
+++ b/src/unicode/sentence.rs
@@ -0,0 +1,220 @@
+use regex_automata::DFA;
+
+use ext_slice::ByteSlice;
+use unicode::fsm::sentence_break_fwd::SENTENCE_BREAK_FWD;
+use utf8;
+
+/// An iterator over sentences in a byte string.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::sentences`](trait.ByteSlice.html#method.sentences).
+///
+/// Sentences typically include their trailing punctuation and whitespace.
+///
+/// Since sentences are made up of one or more codepoints, this iterator yields
+/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
+/// are [substituted](index.html#handling-of-invalid-utf-8).
+///
+/// This iterator yields words in accordance with the default sentence boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Sentence_Boundaries).
+#[derive(Clone, Debug)]
+pub struct Sentences<'a> {
+ bs: &'a [u8],
+}
+
+impl<'a> Sentences<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> Sentences<'a> {
+ Sentences { bs }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"I want this. Not that. Right now.".sentences();
+ ///
+ /// assert_eq!(&b"I want this. Not that. Right now."[..], it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"Not that. Right now.", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for Sentences<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ let (sentence, size) = decode_sentence(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ Some(sentence)
+ }
+}
+
+/// An iterator over sentences in a byte string, along with their byte offsets.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::sentence_indices`](trait.ByteSlice.html#method.sentence_indices).
+///
+/// Sentences typically include their trailing punctuation and whitespace.
+///
+/// Since sentences are made up of one or more codepoints, this iterator
+/// yields `&str` elements (along with their start and end byte offsets).
+/// When invalid UTF-8 is encountered, replacement codepoints are
+/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
+/// indices yielded by this iterator may not correspond to the length of the
+/// sentence yielded with those indices. For example, when this iterator
+/// encounters `\xFF` in the byte string, then it will yield a pair of indices
+/// ranging over a single byte, but will provide an `&str` equivalent to
+/// `"\u{FFFD}"`, which is three bytes in length. However, when given only
+/// valid UTF-8, then all indices are in exact correspondence with their paired
+/// word.
+///
+/// This iterator yields words in accordance with the default sentence boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Sentence_Boundaries).
+#[derive(Clone, Debug)]
+pub struct SentenceIndices<'a> {
+ bs: &'a [u8],
+ forward_index: usize,
+}
+
+impl<'a> SentenceIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> SentenceIndices<'a> {
+ SentenceIndices { bs: bs, forward_index: 0 }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"I want this. Not that. Right now.".sentence_indices();
+ ///
+ /// assert_eq!(&b"I want this. Not that. Right now."[..], it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"Not that. Right now.", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for SentenceIndices<'a> {
+ type Item = (usize, usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, &'a str)> {
+ let index = self.forward_index;
+ let (word, size) = decode_sentence(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ self.forward_index += size;
+ Some((index, index + size, word))
+ }
+}
+
+fn decode_sentence(bs: &[u8]) -> (&str, usize) {
+ if bs.is_empty() {
+ ("", 0)
+ } else if let Some(end) = SENTENCE_BREAK_FWD.find(bs) {
+ // Safe because a match can only occur for valid UTF-8.
+ let sentence = unsafe { bs[..end].to_str_unchecked() };
+ (sentence, sentence.len())
+ } else {
+ const INVALID: &'static str = "\u{FFFD}";
+ // No match on non-empty bytes implies we found invalid UTF-8.
+ let (_, size) = utf8::decode_lossy(bs);
+ (INVALID, size)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use ucd_parse::SentenceBreakTest;
+
+ use ext_slice::ByteSlice;
+
+ #[test]
+ fn forward_ucd() {
+ for (i, test) in ucdtests().into_iter().enumerate() {
+ let given = test.sentences.concat();
+ let got = sentences(given.as_bytes());
+ assert_eq!(
+ test.sentences,
+ got,
+ "\n\nsentence forward break test {} failed:\n\
+ given: {:?}\n\
+ expected: {:?}\n\
+ got: {:?}\n",
+ i,
+ given,
+ strs_to_bstrs(&test.sentences),
+ strs_to_bstrs(&got),
+ );
+ }
+ }
+
+ // Some additional tests that don't seem to be covered by the UCD tests.
+ #[test]
+ fn forward_additional() {
+ assert_eq!(vec!["a.. ", "A"], sentences(b"a.. A"));
+ assert_eq!(vec!["a.. a"], sentences(b"a.. a"));
+
+ assert_eq!(vec!["a... ", "A"], sentences(b"a... A"));
+ assert_eq!(vec!["a... a"], sentences(b"a... a"));
+
+ assert_eq!(vec!["a...,..., a"], sentences(b"a...,..., a"));
+ }
+
+ fn sentences(bytes: &[u8]) -> Vec<&str> {
+ bytes.sentences().collect()
+ }
+
+ fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
+ strs.iter().map(|s| s.as_ref().as_bytes()).collect()
+ }
+
+ /// Return all of the UCD for sentence breaks.
+ fn ucdtests() -> Vec<SentenceBreakTest> {
+ const TESTDATA: &'static str =
+ include_str!("data/SentenceBreakTest.txt");
+
+ let mut tests = vec![];
+ for mut line in TESTDATA.lines() {
+ line = line.trim();
+ if line.starts_with("#") || line.contains("surrogate") {
+ continue;
+ }
+ tests.push(line.parse().unwrap());
+ }
+ tests
+ }
+}
diff --git a/src/unicode/whitespace.rs b/src/unicode/whitespace.rs
new file mode 100644
index 0000000..a8da144
--- /dev/null
+++ b/src/unicode/whitespace.rs
@@ -0,0 +1,14 @@
+use regex_automata::DFA;
+
+use unicode::fsm::whitespace_anchored_fwd::WHITESPACE_ANCHORED_FWD;
+use unicode::fsm::whitespace_anchored_rev::WHITESPACE_ANCHORED_REV;
+
+/// Return the first position of a non-whitespace character.
+pub fn whitespace_len_fwd(slice: &[u8]) -> usize {
+ WHITESPACE_ANCHORED_FWD.find(slice).unwrap_or(0)
+}
+
+/// Return the last position of a non-whitespace character.
+pub fn whitespace_len_rev(slice: &[u8]) -> usize {
+ WHITESPACE_ANCHORED_REV.rfind(slice).unwrap_or(slice.len())
+}
diff --git a/src/unicode/word.rs b/src/unicode/word.rs
new file mode 100644
index 0000000..1260e52
--- /dev/null
+++ b/src/unicode/word.rs
@@ -0,0 +1,406 @@
+use regex_automata::DFA;
+
+use ext_slice::ByteSlice;
+use unicode::fsm::simple_word_fwd::SIMPLE_WORD_FWD;
+use unicode::fsm::word_break_fwd::WORD_BREAK_FWD;
+use utf8;
+
+/// An iterator over words in a byte string.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::words`](trait.ByteSlice.html#method.words).
+///
+/// This is similar to the [`WordsWithBreaks`](struct.WordsWithBreaks.html)
+/// iterator, except it only returns elements that contain a "word" character.
+/// A word character is defined by UTS #18 (Annex C) to be the combination
+/// of the `Alphabetic` and `Join_Control` properties, along with the
+/// `Decimal_Number`, `Mark` and `Connector_Punctuation` general categories.
+///
+/// Since words are made up of one or more codepoints, this iterator yields
+/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
+/// are [substituted](index.html#handling-of-invalid-utf-8).
+///
+/// This iterator yields words in accordance with the default word boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
+/// In particular, this may not be suitable for Japanese and Chinese scripts
+/// that do not use spaces between words.
+#[derive(Clone, Debug)]
+pub struct Words<'a>(WordsWithBreaks<'a>);
+
+impl<'a> Words<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> Words<'a> {
+ Words(WordsWithBreaks::new(bs))
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"foo bar baz".words();
+ ///
+ /// assert_eq!(b"foo bar baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b" baz", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.0.as_bytes()
+ }
+}
+
+impl<'a> Iterator for Words<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ while let Some(word) = self.0.next() {
+ if SIMPLE_WORD_FWD.is_match(word.as_bytes()) {
+ return Some(word);
+ }
+ }
+ None
+ }
+}
+
+/// An iterator over words in a byte string and their byte index positions.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::word_indices`](trait.ByteSlice.html#method.word_indices).
+///
+/// This is similar to the
+/// [`WordsWithBreakIndices`](struct.WordsWithBreakIndices.html) iterator,
+/// except it only returns elements that contain a "word" character. A
+/// word character is defined by UTS #18 (Annex C) to be the combination
+/// of the `Alphabetic` and `Join_Control` properties, along with the
+/// `Decimal_Number`, `Mark` and `Connector_Punctuation` general categories.
+///
+/// Since words are made up of one or more codepoints, this iterator
+/// yields `&str` elements (along with their start and end byte offsets).
+/// When invalid UTF-8 is encountered, replacement codepoints are
+/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
+/// indices yielded by this iterator may not correspond to the length of the
+/// word yielded with those indices. For example, when this iterator encounters
+/// `\xFF` in the byte string, then it will yield a pair of indices ranging
+/// over a single byte, but will provide an `&str` equivalent to `"\u{FFFD}"`,
+/// which is three bytes in length. However, when given only valid UTF-8, then
+/// all indices are in exact correspondence with their paired word.
+///
+/// This iterator yields words in accordance with the default word boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
+/// In particular, this may not be suitable for Japanese and Chinese scripts
+/// that do not use spaces between words.
+#[derive(Clone, Debug)]
+pub struct WordIndices<'a>(WordsWithBreakIndices<'a>);
+
+impl<'a> WordIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> WordIndices<'a> {
+ WordIndices(WordsWithBreakIndices::new(bs))
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"foo bar baz".word_indices();
+ ///
+ /// assert_eq!(b"foo bar baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b" baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.0.as_bytes()
+ }
+}
+
+impl<'a> Iterator for WordIndices<'a> {
+ type Item = (usize, usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, &'a str)> {
+ while let Some((start, end, word)) = self.0.next() {
+ if SIMPLE_WORD_FWD.is_match(word.as_bytes()) {
+ return Some((start, end, word));
+ }
+ }
+ None
+ }
+}
+
+/// An iterator over all word breaks in a byte string.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks).
+///
+/// This iterator yields not only all words, but the content that comes between
+/// words. In particular, if all elements yielded by this iterator are
+/// concatenated, then the result is the original string (subject to Unicode
+/// replacement codepoint substitutions).
+///
+/// Since words are made up of one or more codepoints, this iterator yields
+/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
+/// are [substituted](index.html#handling-of-invalid-utf-8).
+///
+/// This iterator yields words in accordance with the default word boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
+/// In particular, this may not be suitable for Japanese and Chinese scripts
+/// that do not use spaces between words.
+#[derive(Clone, Debug)]
+pub struct WordsWithBreaks<'a> {
+ bs: &'a [u8],
+}
+
+impl<'a> WordsWithBreaks<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> WordsWithBreaks<'a> {
+ WordsWithBreaks { bs }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"foo bar baz".words_with_breaks();
+ ///
+ /// assert_eq!(b"foo bar baz", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b" bar baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b" baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for WordsWithBreaks<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ let (word, size) = decode_word(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ Some(word)
+ }
+}
+
+/// An iterator over all word breaks in a byte string, along with their byte
+/// index positions.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices).
+///
+/// This iterator yields not only all words, but the content that comes between
+/// words. In particular, if all elements yielded by this iterator are
+/// concatenated, then the result is the original string (subject to Unicode
+/// replacement codepoint substitutions).
+///
+/// Since words are made up of one or more codepoints, this iterator
+/// yields `&str` elements (along with their start and end byte offsets).
+/// When invalid UTF-8 is encountered, replacement codepoints are
+/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
+/// indices yielded by this iterator may not correspond to the length of the
+/// word yielded with those indices. For example, when this iterator encounters
+/// `\xFF` in the byte string, then it will yield a pair of indices ranging
+/// over a single byte, but will provide an `&str` equivalent to `"\u{FFFD}"`,
+/// which is three bytes in length. However, when given only valid UTF-8, then
+/// all indices are in exact correspondence with their paired word.
+///
+/// This iterator yields words in accordance with the default word boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
+/// In particular, this may not be suitable for Japanese and Chinese scripts
+/// that do not use spaces between words.
+#[derive(Clone, Debug)]
+pub struct WordsWithBreakIndices<'a> {
+ bs: &'a [u8],
+ forward_index: usize,
+}
+
+impl<'a> WordsWithBreakIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> WordsWithBreakIndices<'a> {
+ WordsWithBreakIndices { bs: bs, forward_index: 0 }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"foo bar baz".words_with_break_indices();
+ ///
+ /// assert_eq!(b"foo bar baz", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b" bar baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b" baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for WordsWithBreakIndices<'a> {
+ type Item = (usize, usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, &'a str)> {
+ let index = self.forward_index;
+ let (word, size) = decode_word(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ self.forward_index += size;
+ Some((index, index + size, word))
+ }
+}
+
+fn decode_word(bs: &[u8]) -> (&str, usize) {
+ if bs.is_empty() {
+ ("", 0)
+ } else if let Some(end) = WORD_BREAK_FWD.find(bs) {
+ // Safe because a match can only occur for valid UTF-8.
+ let word = unsafe { bs[..end].to_str_unchecked() };
+ (word, word.len())
+ } else {
+ const INVALID: &'static str = "\u{FFFD}";
+ // No match on non-empty bytes implies we found invalid UTF-8.
+ let (_, size) = utf8::decode_lossy(bs);
+ (INVALID, size)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use ucd_parse::WordBreakTest;
+
+ use ext_slice::ByteSlice;
+
+ #[test]
+ fn forward_ucd() {
+ for (i, test) in ucdtests().into_iter().enumerate() {
+ let given = test.words.concat();
+ let got = words(given.as_bytes());
+ assert_eq!(
+ test.words,
+ got,
+ "\n\nword forward break test {} failed:\n\
+ given: {:?}\n\
+ expected: {:?}\n\
+ got: {:?}\n",
+ i,
+ given,
+ strs_to_bstrs(&test.words),
+ strs_to_bstrs(&got),
+ );
+ }
+ }
+
+ // Some additional tests that don't seem to be covered by the UCD tests.
+ //
+ // It's pretty amazing that the UCD tests miss these cases. I only found
+ // them by running this crate's segmenter and ICU's segmenter on the same
+ // text and comparing the output.
+ #[test]
+ fn forward_additional() {
+ assert_eq!(vec!["a", ".", " ", "Y"], words(b"a. Y"));
+ assert_eq!(vec!["r", ".", " ", "Yo"], words(b"r. Yo"));
+ assert_eq!(
+ vec!["whatsoever", ".", " ", "You", " ", "may"],
+ words(b"whatsoever. You may")
+ );
+ assert_eq!(
+ vec!["21stcentury'syesterday"],
+ words(b"21stcentury'syesterday")
+ );
+
+ assert_eq!(vec!["Bonta_", "'", "s"], words(b"Bonta_'s"));
+ assert_eq!(vec!["_vhat's"], words(b"_vhat's"));
+ assert_eq!(vec!["__on'anima"], words(b"__on'anima"));
+ assert_eq!(vec!["123_", "'", "4"], words(b"123_'4"));
+ assert_eq!(vec!["_123'4"], words(b"_123'4"));
+ assert_eq!(vec!["__12'345"], words(b"__12'345"));
+
+ assert_eq!(
+ vec!["tomorrowat4", ":", "00", ","],
+ words(b"tomorrowat4:00,")
+ );
+ assert_eq!(vec!["RS1", "'", "s"], words(b"RS1's"));
+ assert_eq!(vec!["X38"], words(b"X38"));
+
+ assert_eq!(vec!["4abc", ":", "00", ","], words(b"4abc:00,"));
+ assert_eq!(vec!["12S", "'", "1"], words(b"12S'1"));
+ assert_eq!(vec!["1XY"], words(b"1XY"));
+
+ assert_eq!(vec!["\u{FEFF}", "Ты"], words("\u{FEFF}Ты".as_bytes()));
+ }
+
+ fn words(bytes: &[u8]) -> Vec<&str> {
+ bytes.words_with_breaks().collect()
+ }
+
+ fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
+ strs.iter().map(|s| s.as_ref().as_bytes()).collect()
+ }
+
+ /// Return all of the UCD for word breaks.
+ fn ucdtests() -> Vec<WordBreakTest> {
+ const TESTDATA: &'static str = include_str!("data/WordBreakTest.txt");
+
+ let mut tests = vec![];
+ for mut line in TESTDATA.lines() {
+ line = line.trim();
+ if line.starts_with("#") || line.contains("surrogate") {
+ continue;
+ }
+ tests.push(line.parse().unwrap());
+ }
+ tests
+ }
+}
diff --git a/src/utf8.rs b/src/utf8.rs
new file mode 100644
index 0000000..2d4035d
--- /dev/null
+++ b/src/utf8.rs
@@ -0,0 +1,1370 @@
+use core::char;
+use core::cmp;
+use core::fmt;
+use core::str;
+#[cfg(feature = "std")]
+use std::error;
+
+use ascii;
+use bstr::BStr;
+use ext_slice::ByteSlice;
+
+// The UTF-8 decoder provided here is based on the one presented here:
+// https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+//
+// We *could* have done UTF-8 decoding by using a DFA generated by `\p{any}`
+// using regex-automata that is roughly the same size. The real benefit of
+// Hoehrmann's formulation is that the byte class mapping below is manually
+// tailored such that each byte's class doubles as a shift to mask out the
+// bits necessary for constructing the leading bits of each codepoint value
+// from the initial byte.
+//
+// There are some minor differences between this implementation and Hoehrmann's
+// formulation.
+//
+// Firstly, we make REJECT have state ID 0, since it makes the state table
+// itself a little easier to read and is consistent with the notion that 0
+// means "false" or "bad."
+//
+// Secondly, when doing bulk decoding, we add a SIMD accelerated ASCII fast
+// path.
+//
+// Thirdly, we pre-multiply the state IDs to avoid a multiplication instruction
+// in the core decoding loop. (Which is what regex-automata would do by
+// default.)
+//
+// Fourthly, we split the byte class mapping and transition table into two
+// arrays because it's clearer.
+//
+// It is unlikely that this is the fastest way to do UTF-8 decoding, however,
+// it is fairly simple.
+
+const ACCEPT: usize = 12;
+const REJECT: usize = 0;
+
+/// SAFETY: The decode below function relies on the correctness of these
+/// equivalence classes.
+#[cfg_attr(rustfmt, rustfmt::skip)]
+const CLASSES: [u8; 256] = [
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+];
+
+/// SAFETY: The decode below function relies on the correctness of this state
+/// machine.
+#[cfg_attr(rustfmt, rustfmt::skip)]
+const STATES_FORWARD: &'static [u8] = &[
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 12, 0, 24, 36, 60, 96, 84, 0, 0, 0, 48, 72,
+ 0, 12, 0, 0, 0, 0, 0, 12, 0, 12, 0, 0,
+ 0, 24, 0, 0, 0, 0, 0, 24, 0, 24, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0,
+ 0, 24, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 36, 0, 36, 0, 0,
+ 0, 36, 0, 0, 0, 0, 0, 36, 0, 36, 0, 0,
+ 0, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+];
+
+/// An iterator over Unicode scalar values in a byte string.
+///
+/// When invalid UTF-8 byte sequences are found, they are substituted with the
+/// Unicode replacement codepoint (`U+FFFD`) using the
+/// ["maximal subpart" strategy](http://www.unicode.org/review/pr-121.html).
+///
+/// This iterator is created by the
+/// [`chars`](trait.ByteSlice.html#method.chars) method provided by the
+/// [`ByteSlice`](trait.ByteSlice.html) extension trait for `&[u8]`.
+#[derive(Clone, Debug)]
+pub struct Chars<'a> {
+ bs: &'a [u8],
+}
+
+impl<'a> Chars<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> Chars<'a> {
+ Chars { bs }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut chars = b"abc".chars();
+ ///
+ /// assert_eq!(b"abc", chars.as_bytes());
+ /// chars.next();
+ /// assert_eq!(b"bc", chars.as_bytes());
+ /// chars.next();
+ /// chars.next();
+ /// assert_eq!(b"", chars.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for Chars<'a> {
+ type Item = char;
+
+ #[inline]
+ fn next(&mut self) -> Option<char> {
+ let (ch, size) = decode_lossy(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ Some(ch)
+ }
+}
+
+impl<'a> DoubleEndedIterator for Chars<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<char> {
+ let (ch, size) = decode_last_lossy(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[..self.bs.len() - size];
+ Some(ch)
+ }
+}
+
+/// An iterator over Unicode scalar values in a byte string and their
+/// byte index positions.
+///
+/// When invalid UTF-8 byte sequences are found, they are substituted with the
+/// Unicode replacement codepoint (`U+FFFD`) using the
+/// ["maximal subpart" strategy](http://www.unicode.org/review/pr-121.html).
+///
+/// Note that this is slightly different from the `CharIndices` iterator
+/// provided by the standard library. Aside from working on possibly invalid
+/// UTF-8, this iterator provides both the corresponding starting and ending
+/// byte indices of each codepoint yielded. The ending position is necessary to
+/// slice the original byte string when invalid UTF-8 bytes are converted into
+/// a Unicode replacement codepoint, since a single replacement codepoint can
+/// substitute anywhere from 1 to 3 invalid bytes (inclusive).
+///
+/// This iterator is created by the
+/// [`char_indices`](trait.ByteSlice.html#method.char_indices) method provided
+/// by the [`ByteSlice`](trait.ByteSlice.html) extension trait for `&[u8]`.
+#[derive(Clone, Debug)]
+pub struct CharIndices<'a> {
+ bs: &'a [u8],
+ forward_index: usize,
+ reverse_index: usize,
+}
+
+impl<'a> CharIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> CharIndices<'a> {
+ CharIndices { bs: bs, forward_index: 0, reverse_index: bs.len() }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"abc".char_indices();
+ ///
+ /// assert_eq!(b"abc", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"bc", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for CharIndices<'a> {
+ type Item = (usize, usize, char);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, char)> {
+ let index = self.forward_index;
+ let (ch, size) = decode_lossy(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ self.forward_index += size;
+ Some((index, index + size, ch))
+ }
+}
+
+impl<'a> DoubleEndedIterator for CharIndices<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<(usize, usize, char)> {
+ let (ch, size) = decode_last_lossy(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[..self.bs.len() - size];
+ self.reverse_index -= size;
+ Some((self.reverse_index, self.reverse_index + size, ch))
+ }
+}
+
+impl<'a> ::core::iter::FusedIterator for CharIndices<'a> {}
+
+/// An iterator over chunks of valid UTF-8 in a byte slice.
+///
+/// See [`utf8_chunks`](trait.ByteSlice.html#method.utf8_chunks).
+#[derive(Clone, Debug)]
+pub struct Utf8Chunks<'a> {
+ pub(super) bytes: &'a [u8],
+}
+
+/// A chunk of valid UTF-8, possibly followed by invalid UTF-8 bytes.
+///
+/// This is yielded by the
+/// [`Utf8Chunks`](struct.Utf8Chunks.html)
+/// iterator, which can be created via the
+/// [`ByteSlice::utf8_chunks`](trait.ByteSlice.html#method.utf8_chunks)
+/// method.
+///
+/// The `'a` lifetime parameter corresponds to the lifetime of the bytes that
+/// are being iterated over.
+#[cfg_attr(test, derive(Debug, PartialEq))]
+pub struct Utf8Chunk<'a> {
+ /// A valid UTF-8 piece, at the start, end, or between invalid UTF-8 bytes.
+ ///
+ /// This is empty between adjacent invalid UTF-8 byte sequences.
+ valid: &'a str,
+ /// A sequence of invalid UTF-8 bytes.
+ ///
+ /// Can only be empty in the last chunk.
+ ///
+ /// Should be replaced by a single unicode replacement character, if not
+ /// empty.
+ invalid: &'a BStr,
+ /// Indicates whether the invalid sequence could've been valid if there
+ /// were more bytes.
+ ///
+ /// Can only be true in the last chunk.
+ incomplete: bool,
+}
+
+impl<'a> Utf8Chunk<'a> {
+ /// Returns the (possibly empty) valid UTF-8 bytes in this chunk.
+ ///
+ /// This may be empty if there are consecutive sequences of invalid UTF-8
+ /// bytes.
+ #[inline]
+ pub fn valid(&self) -> &'a str {
+ self.valid
+ }
+
+ /// Returns the (possibly empty) invalid UTF-8 bytes in this chunk that
+ /// immediately follow the valid UTF-8 bytes in this chunk.
+ ///
+ /// This is only empty when this chunk corresponds to the last chunk in
+ /// the original bytes.
+ ///
+ /// The maximum length of this slice is 3. That is, invalid UTF-8 byte
+ /// sequences greater than 1 always correspond to a valid _prefix_ of
+ /// a valid UTF-8 encoded codepoint. This corresponds to the "substitution
+ /// of maximal subparts" strategy that is described in more detail in the
+ /// docs for the
+ /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy)
+ /// method.
+ #[inline]
+ pub fn invalid(&self) -> &'a [u8] {
+ self.invalid.as_bytes()
+ }
+
+ /// Returns whether the invalid sequence might still become valid if more
+ /// bytes are added.
+ ///
+ /// Returns true if the end of the input was reached unexpectedly,
+ /// without encountering an unexpected byte.
+ ///
+ /// This can only be the case for the last chunk.
+ #[inline]
+ pub fn incomplete(&self) -> bool {
+ self.incomplete
+ }
+}
+
+impl<'a> Iterator for Utf8Chunks<'a> {
+ type Item = Utf8Chunk<'a>;
+
+ #[inline]
+ fn next(&mut self) -> Option<Utf8Chunk<'a>> {
+ if self.bytes.is_empty() {
+ return None;
+ }
+ match validate(self.bytes) {
+ Ok(()) => {
+ let valid = self.bytes;
+ self.bytes = &[];
+ Some(Utf8Chunk {
+ // SAFETY: This is safe because of the guarantees provided
+ // by utf8::validate.
+ valid: unsafe { str::from_utf8_unchecked(valid) },
+ invalid: [].as_bstr(),
+ incomplete: false,
+ })
+ }
+ Err(e) => {
+ let (valid, rest) = self.bytes.split_at(e.valid_up_to());
+ // SAFETY: This is safe because of the guarantees provided by
+ // utf8::validate.
+ let valid = unsafe { str::from_utf8_unchecked(valid) };
+ let (invalid_len, incomplete) = match e.error_len() {
+ Some(n) => (n, false),
+ None => (rest.len(), true),
+ };
+ let (invalid, rest) = rest.split_at(invalid_len);
+ self.bytes = rest;
+ Some(Utf8Chunk {
+ valid,
+ invalid: invalid.as_bstr(),
+ incomplete,
+ })
+ }
+ }
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ if self.bytes.is_empty() {
+ (0, Some(0))
+ } else {
+ (1, Some(self.bytes.len()))
+ }
+ }
+}
+
+impl<'a> ::core::iter::FusedIterator for Utf8Chunks<'a> {}
+
+/// An error that occurs when UTF-8 decoding fails.
+///
+/// This error occurs when attempting to convert a non-UTF-8 byte
+/// string to a Rust string that must be valid UTF-8. For example,
+/// [`to_str`](trait.ByteSlice.html#method.to_str) is one such method.
+///
+/// # Example
+///
+/// This example shows what happens when a given byte sequence is invalid,
+/// but ends with a sequence that is a possible prefix of valid UTF-8.
+///
+/// ```
+/// use bstr::{B, ByteSlice};
+///
+/// let s = B(b"foobar\xF1\x80\x80");
+/// let err = s.to_str().unwrap_err();
+/// assert_eq!(err.valid_up_to(), 6);
+/// assert_eq!(err.error_len(), None);
+/// ```
+///
+/// This example shows what happens when a given byte sequence contains
+/// invalid UTF-8.
+///
+/// ```
+/// use bstr::ByteSlice;
+///
+/// let s = b"foobar\xF1\x80\x80quux";
+/// let err = s.to_str().unwrap_err();
+/// assert_eq!(err.valid_up_to(), 6);
+/// // The error length reports the maximum number of bytes that correspond to
+/// // a valid prefix of a UTF-8 encoded codepoint.
+/// assert_eq!(err.error_len(), Some(3));
+///
+/// // In contrast to the above which contains a single invalid prefix,
+/// // consider the case of multiple individal bytes that are never valid
+/// // prefixes. Note how the value of error_len changes!
+/// let s = b"foobar\xFF\xFFquux";
+/// let err = s.to_str().unwrap_err();
+/// assert_eq!(err.valid_up_to(), 6);
+/// assert_eq!(err.error_len(), Some(1));
+///
+/// // The fact that it's an invalid prefix does not change error_len even
+/// // when it immediately precedes the end of the string.
+/// let s = b"foobar\xFF";
+/// let err = s.to_str().unwrap_err();
+/// assert_eq!(err.valid_up_to(), 6);
+/// assert_eq!(err.error_len(), Some(1));
+/// ```
+#[derive(Debug, Eq, PartialEq)]
+pub struct Utf8Error {
+ valid_up_to: usize,
+ error_len: Option<usize>,
+}
+
+impl Utf8Error {
+ /// Returns the byte index of the position immediately following the last
+ /// valid UTF-8 byte.
+ ///
+ /// # Example
+ ///
+ /// This examples shows how `valid_up_to` can be used to retrieve a
+ /// possibly empty prefix that is guaranteed to be valid UTF-8:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foobar\xF1\x80\x80quux";
+ /// let err = s.to_str().unwrap_err();
+ ///
+ /// // This is guaranteed to never panic.
+ /// let string = s[..err.valid_up_to()].to_str().unwrap();
+ /// assert_eq!(string, "foobar");
+ /// ```
+ #[inline]
+ pub fn valid_up_to(&self) -> usize {
+ self.valid_up_to
+ }
+
+ /// Returns the total number of invalid UTF-8 bytes immediately following
+ /// the position returned by `valid_up_to`. This value is always at least
+ /// `1`, but can be up to `3` if bytes form a valid prefix of some UTF-8
+ /// encoded codepoint.
+ ///
+ /// If the end of the original input was found before a valid UTF-8 encoded
+ /// codepoint could be completed, then this returns `None`. This is useful
+ /// when processing streams, where a `None` value signals that more input
+ /// might be needed.
+ #[inline]
+ pub fn error_len(&self) -> Option<usize> {
+ self.error_len
+ }
+}
+
+#[cfg(feature = "std")]
+impl error::Error for Utf8Error {
+ fn description(&self) -> &str {
+ "invalid UTF-8"
+ }
+}
+
+impl fmt::Display for Utf8Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "invalid UTF-8 found at byte offset {}", self.valid_up_to)
+ }
+}
+
+/// Returns OK if and only if the given slice is completely valid UTF-8.
+///
+/// If the slice isn't valid UTF-8, then an error is returned that explains
+/// the first location at which invalid UTF-8 was detected.
+pub fn validate(slice: &[u8]) -> Result<(), Utf8Error> {
+ // The fast path for validating UTF-8. It steps through a UTF-8 automaton
+ // and uses a SIMD accelerated ASCII fast path on x86_64. If an error is
+ // detected, it backs up and runs the slower version of the UTF-8 automaton
+ // to determine correct error information.
+ fn fast(slice: &[u8]) -> Result<(), Utf8Error> {
+ let mut state = ACCEPT;
+ let mut i = 0;
+
+ while i < slice.len() {
+ let b = slice[i];
+
+ // ASCII fast path. If we see two consecutive ASCII bytes, then try
+ // to validate as much ASCII as possible very quickly.
+ if state == ACCEPT
+ && b <= 0x7F
+ && slice.get(i + 1).map_or(false, |&b| b <= 0x7F)
+ {
+ i += ascii::first_non_ascii_byte(&slice[i..]);
+ continue;
+ }
+
+ state = step(state, b);
+ if state == REJECT {
+ return Err(find_valid_up_to(slice, i));
+ }
+ i += 1;
+ }
+ if state != ACCEPT {
+ Err(find_valid_up_to(slice, slice.len()))
+ } else {
+ Ok(())
+ }
+ }
+
+ // Given the first position at which a UTF-8 sequence was determined to be
+ // invalid, return an error that correctly reports the position at which
+ // the last complete UTF-8 sequence ends.
+ #[inline(never)]
+ fn find_valid_up_to(slice: &[u8], rejected_at: usize) -> Utf8Error {
+ // In order to find the last valid byte, we need to back up an amount
+ // that guarantees every preceding byte is part of a valid UTF-8
+ // code unit sequence. To do this, we simply locate the last leading
+ // byte that occurs before rejected_at.
+ let mut backup = rejected_at.saturating_sub(1);
+ while backup > 0 && !is_leading_or_invalid_utf8_byte(slice[backup]) {
+ backup -= 1;
+ }
+ let upto = cmp::min(slice.len(), rejected_at.saturating_add(1));
+ let mut err = slow(&slice[backup..upto]).unwrap_err();
+ err.valid_up_to += backup;
+ err
+ }
+
+ // Like top-level UTF-8 decoding, except it correctly reports a UTF-8 error
+ // when an invalid sequence is found. This is split out from validate so
+ // that the fast path doesn't need to keep track of the position of the
+ // last valid UTF-8 byte. In particular, tracking this requires checking
+ // for an ACCEPT state on each byte, which degrades throughput pretty
+ // badly.
+ fn slow(slice: &[u8]) -> Result<(), Utf8Error> {
+ let mut state = ACCEPT;
+ let mut valid_up_to = 0;
+ for (i, &b) in slice.iter().enumerate() {
+ state = step(state, b);
+ if state == ACCEPT {
+ valid_up_to = i + 1;
+ } else if state == REJECT {
+ // Our error length must always be at least 1.
+ let error_len = Some(cmp::max(1, i - valid_up_to));
+ return Err(Utf8Error { valid_up_to, error_len });
+ }
+ }
+ if state != ACCEPT {
+ Err(Utf8Error { valid_up_to, error_len: None })
+ } else {
+ Ok(())
+ }
+ }
+
+ // Advance to the next state given the current state and current byte.
+ fn step(state: usize, b: u8) -> usize {
+ let class = CLASSES[b as usize];
+ // SAFETY: This is safe because 'class' is always <=11 and 'state' is
+ // always <=96. Therefore, the maximal index is 96+11 = 107, where
+ // STATES_FORWARD.len() = 108 such that every index is guaranteed to be
+ // valid by construction of the state machine and the byte equivalence
+ // classes.
+ unsafe {
+ *STATES_FORWARD.get_unchecked(state + class as usize) as usize
+ }
+ }
+
+ fast(slice)
+}
+
+/// UTF-8 decode a single Unicode scalar value from the beginning of a slice.
+///
+/// When successful, the corresponding Unicode scalar value is returned along
+/// with the number of bytes it was encoded with. The number of bytes consumed
+/// for a successful decode is always between 1 and 4, inclusive.
+///
+/// When unsuccessful, `None` is returned along with the number of bytes that
+/// make up a maximal prefix of a valid UTF-8 code unit sequence. In this case,
+/// the number of bytes consumed is always between 0 and 3, inclusive, where
+/// 0 is only returned when `slice` is empty.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr::decode_utf8;
+///
+/// // Decoding a valid codepoint.
+/// let (ch, size) = decode_utf8(b"\xE2\x98\x83");
+/// assert_eq!(Some('☃'), ch);
+/// assert_eq!(3, size);
+///
+/// // Decoding an incomplete codepoint.
+/// let (ch, size) = decode_utf8(b"\xE2\x98");
+/// assert_eq!(None, ch);
+/// assert_eq!(2, size);
+/// ```
+///
+/// This example shows how to iterate over all codepoints in UTF-8 encoded
+/// bytes, while replacing invalid UTF-8 sequences with the replacement
+/// codepoint:
+///
+/// ```
+/// use bstr::{B, decode_utf8};
+///
+/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61");
+/// let mut chars = vec![];
+/// while !bytes.is_empty() {
+/// let (ch, size) = decode_utf8(bytes);
+/// bytes = &bytes[size..];
+/// chars.push(ch.unwrap_or('\u{FFFD}'));
+/// }
+/// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars);
+/// ```
+#[inline]
+pub fn decode<B: AsRef<[u8]>>(slice: B) -> (Option<char>, usize) {
+ let slice = slice.as_ref();
+ match slice.get(0) {
+ None => return (None, 0),
+ Some(&b) if b <= 0x7F => return (Some(b as char), 1),
+ _ => {}
+ }
+
+ let (mut state, mut cp, mut i) = (ACCEPT, 0, 0);
+ while i < slice.len() {
+ decode_step(&mut state, &mut cp, slice[i]);
+ i += 1;
+
+ if state == ACCEPT {
+ // SAFETY: This is safe because `decode_step` guarantees that
+ // `cp` is a valid Unicode scalar value in an ACCEPT state.
+ let ch = unsafe { char::from_u32_unchecked(cp) };
+ return (Some(ch), i);
+ } else if state == REJECT {
+ // At this point, we always want to advance at least one byte.
+ return (None, cmp::max(1, i.saturating_sub(1)));
+ }
+ }
+ (None, i)
+}
+
+/// Lossily UTF-8 decode a single Unicode scalar value from the beginning of a
+/// slice.
+///
+/// When successful, the corresponding Unicode scalar value is returned along
+/// with the number of bytes it was encoded with. The number of bytes consumed
+/// for a successful decode is always between 1 and 4, inclusive.
+///
+/// When unsuccessful, the Unicode replacement codepoint (`U+FFFD`) is returned
+/// along with the number of bytes that make up a maximal prefix of a valid
+/// UTF-8 code unit sequence. In this case, the number of bytes consumed is
+/// always between 0 and 3, inclusive, where 0 is only returned when `slice` is
+/// empty.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```ignore
+/// use bstr::decode_utf8_lossy;
+///
+/// // Decoding a valid codepoint.
+/// let (ch, size) = decode_utf8_lossy(b"\xE2\x98\x83");
+/// assert_eq!('☃', ch);
+/// assert_eq!(3, size);
+///
+/// // Decoding an incomplete codepoint.
+/// let (ch, size) = decode_utf8_lossy(b"\xE2\x98");
+/// assert_eq!('\u{FFFD}', ch);
+/// assert_eq!(2, size);
+/// ```
+///
+/// This example shows how to iterate over all codepoints in UTF-8 encoded
+/// bytes, while replacing invalid UTF-8 sequences with the replacement
+/// codepoint:
+///
+/// ```ignore
+/// use bstr::{B, decode_utf8_lossy};
+///
+/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61");
+/// let mut chars = vec![];
+/// while !bytes.is_empty() {
+/// let (ch, size) = decode_utf8_lossy(bytes);
+/// bytes = &bytes[size..];
+/// chars.push(ch);
+/// }
+/// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars);
+/// ```
+#[inline]
+pub fn decode_lossy<B: AsRef<[u8]>>(slice: B) -> (char, usize) {
+ match decode(slice) {
+ (Some(ch), size) => (ch, size),
+ (None, size) => ('\u{FFFD}', size),
+ }
+}
+
+/// UTF-8 decode a single Unicode scalar value from the end of a slice.
+///
+/// When successful, the corresponding Unicode scalar value is returned along
+/// with the number of bytes it was encoded with. The number of bytes consumed
+/// for a successful decode is always between 1 and 4, inclusive.
+///
+/// When unsuccessful, `None` is returned along with the number of bytes that
+/// make up a maximal prefix of a valid UTF-8 code unit sequence. In this case,
+/// the number of bytes consumed is always between 0 and 3, inclusive, where
+/// 0 is only returned when `slice` is empty.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr::decode_last_utf8;
+///
+/// // Decoding a valid codepoint.
+/// let (ch, size) = decode_last_utf8(b"\xE2\x98\x83");
+/// assert_eq!(Some('☃'), ch);
+/// assert_eq!(3, size);
+///
+/// // Decoding an incomplete codepoint.
+/// let (ch, size) = decode_last_utf8(b"\xE2\x98");
+/// assert_eq!(None, ch);
+/// assert_eq!(2, size);
+/// ```
+///
+/// This example shows how to iterate over all codepoints in UTF-8 encoded
+/// bytes in reverse, while replacing invalid UTF-8 sequences with the
+/// replacement codepoint:
+///
+/// ```
+/// use bstr::{B, decode_last_utf8};
+///
+/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61");
+/// let mut chars = vec![];
+/// while !bytes.is_empty() {
+/// let (ch, size) = decode_last_utf8(bytes);
+/// bytes = &bytes[..bytes.len()-size];
+/// chars.push(ch.unwrap_or('\u{FFFD}'));
+/// }
+/// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars);
+/// ```
+#[inline]
+pub fn decode_last<B: AsRef<[u8]>>(slice: B) -> (Option<char>, usize) {
+ // TODO: We could implement this by reversing the UTF-8 automaton, but for
+ // now, we do it the slow way by using the forward automaton.
+
+ let slice = slice.as_ref();
+ if slice.is_empty() {
+ return (None, 0);
+ }
+ let mut start = slice.len() - 1;
+ let limit = slice.len().saturating_sub(4);
+ while start > limit && !is_leading_or_invalid_utf8_byte(slice[start]) {
+ start -= 1;
+ }
+ let (ch, size) = decode(&slice[start..]);
+ // If we didn't consume all of the bytes, then that means there's at least
+ // one stray byte that never occurs in a valid code unit prefix, so we can
+ // advance by one byte.
+ if start + size != slice.len() {
+ (None, 1)
+ } else {
+ (ch, size)
+ }
+}
+
+/// Lossily UTF-8 decode a single Unicode scalar value from the end of a slice.
+///
+/// When successful, the corresponding Unicode scalar value is returned along
+/// with the number of bytes it was encoded with. The number of bytes consumed
+/// for a successful decode is always between 1 and 4, inclusive.
+///
+/// When unsuccessful, the Unicode replacement codepoint (`U+FFFD`) is returned
+/// along with the number of bytes that make up a maximal prefix of a valid
+/// UTF-8 code unit sequence. In this case, the number of bytes consumed is
+/// always between 0 and 3, inclusive, where 0 is only returned when `slice` is
+/// empty.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```ignore
+/// use bstr::decode_last_utf8_lossy;
+///
+/// // Decoding a valid codepoint.
+/// let (ch, size) = decode_last_utf8_lossy(b"\xE2\x98\x83");
+/// assert_eq!('☃', ch);
+/// assert_eq!(3, size);
+///
+/// // Decoding an incomplete codepoint.
+/// let (ch, size) = decode_last_utf8_lossy(b"\xE2\x98");
+/// assert_eq!('\u{FFFD}', ch);
+/// assert_eq!(2, size);
+/// ```
+///
+/// This example shows how to iterate over all codepoints in UTF-8 encoded
+/// bytes in reverse, while replacing invalid UTF-8 sequences with the
+/// replacement codepoint:
+///
+/// ```ignore
+/// use bstr::decode_last_utf8_lossy;
+///
+/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61");
+/// let mut chars = vec![];
+/// while !bytes.is_empty() {
+/// let (ch, size) = decode_last_utf8_lossy(bytes);
+/// bytes = &bytes[..bytes.len()-size];
+/// chars.push(ch);
+/// }
+/// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars);
+/// ```
+#[inline]
+pub fn decode_last_lossy<B: AsRef<[u8]>>(slice: B) -> (char, usize) {
+ match decode_last(slice) {
+ (Some(ch), size) => (ch, size),
+ (None, size) => ('\u{FFFD}', size),
+ }
+}
+
+/// SAFETY: The decode function relies on state being equal to ACCEPT only if
+/// cp is a valid Unicode scalar value.
+#[inline]
+pub fn decode_step(state: &mut usize, cp: &mut u32, b: u8) {
+ let class = CLASSES[b as usize];
+ if *state == ACCEPT {
+ *cp = (0xFF >> class) & (b as u32);
+ } else {
+ *cp = (b as u32 & 0b111111) | (*cp << 6);
+ }
+ *state = STATES_FORWARD[*state + class as usize] as usize;
+}
+
+/// Returns true if and only if the given byte is either a valid leading UTF-8
+/// byte, or is otherwise an invalid byte that can never appear anywhere in a
+/// valid UTF-8 sequence.
+fn is_leading_or_invalid_utf8_byte(b: u8) -> bool {
+ // In the ASCII case, the most significant bit is never set. The leading
+ // byte of a 2/3/4-byte sequence always has the top two most significant
+ // bits set. For bytes that can never appear anywhere in valid UTF-8, this
+ // also returns true, since every such byte has its two most significant
+ // bits set:
+ //
+ // \xC0 :: 11000000
+ // \xC1 :: 11000001
+ // \xF5 :: 11110101
+ // \xF6 :: 11110110
+ // \xF7 :: 11110111
+ // \xF8 :: 11111000
+ // \xF9 :: 11111001
+ // \xFA :: 11111010
+ // \xFB :: 11111011
+ // \xFC :: 11111100
+ // \xFD :: 11111101
+ // \xFE :: 11111110
+ // \xFF :: 11111111
+ (b & 0b1100_0000) != 0b1000_0000
+}
+
+#[cfg(test)]
+mod tests {
+ use std::char;
+
+ use ext_slice::{ByteSlice, B};
+ use tests::LOSSY_TESTS;
+ use utf8::{self, Utf8Error};
+
+ fn utf8e(valid_up_to: usize) -> Utf8Error {
+ Utf8Error { valid_up_to, error_len: None }
+ }
+
+ fn utf8e2(valid_up_to: usize, error_len: usize) -> Utf8Error {
+ Utf8Error { valid_up_to, error_len: Some(error_len) }
+ }
+
+ #[test]
+ fn validate_all_codepoints() {
+ for i in 0..(0x10FFFF + 1) {
+ let cp = match char::from_u32(i) {
+ None => continue,
+ Some(cp) => cp,
+ };
+ let mut buf = [0; 4];
+ let s = cp.encode_utf8(&mut buf);
+ assert_eq!(Ok(()), utf8::validate(s.as_bytes()));
+ }
+ }
+
+ #[test]
+ fn validate_multiple_codepoints() {
+ assert_eq!(Ok(()), utf8::validate(b"abc"));
+ assert_eq!(Ok(()), utf8::validate(b"a\xE2\x98\x83a"));
+ assert_eq!(Ok(()), utf8::validate(b"a\xF0\x9D\x9C\xB7a"));
+ assert_eq!(Ok(()), utf8::validate(b"\xE2\x98\x83\xF0\x9D\x9C\xB7",));
+ assert_eq!(
+ Ok(()),
+ utf8::validate(b"a\xE2\x98\x83a\xF0\x9D\x9C\xB7a",)
+ );
+ assert_eq!(
+ Ok(()),
+ utf8::validate(b"\xEF\xBF\xBD\xE2\x98\x83\xEF\xBF\xBD",)
+ );
+ }
+
+ #[test]
+ fn validate_errors() {
+ // single invalid byte
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xFF"));
+ // single invalid byte after ASCII
+ assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xFF"));
+ // single invalid byte after 2 byte sequence
+ assert_eq!(Err(utf8e2(2, 1)), utf8::validate(b"\xCE\xB2\xFF"));
+ // single invalid byte after 3 byte sequence
+ assert_eq!(Err(utf8e2(3, 1)), utf8::validate(b"\xE2\x98\x83\xFF"));
+ // single invalid byte after 4 byte sequence
+ assert_eq!(Err(utf8e2(4, 1)), utf8::validate(b"\xF0\x9D\x9D\xB1\xFF"));
+
+ // An invalid 2-byte sequence with a valid 1-byte prefix.
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xCE\xF0"));
+ // An invalid 3-byte sequence with a valid 2-byte prefix.
+ assert_eq!(Err(utf8e2(0, 2)), utf8::validate(b"\xE2\x98\xF0"));
+ // An invalid 4-byte sequence with a valid 3-byte prefix.
+ assert_eq!(Err(utf8e2(0, 3)), utf8::validate(b"\xF0\x9D\x9D\xF0"));
+
+ // An overlong sequence. Should be \xE2\x82\xAC, but we encode the
+ // same codepoint value in 4 bytes. This not only tests that we reject
+ // overlong sequences, but that we get valid_up_to correct.
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xF0\x82\x82\xAC"));
+ assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xF0\x82\x82\xAC"));
+ assert_eq!(
+ Err(utf8e2(3, 1)),
+ utf8::validate(b"\xE2\x98\x83\xF0\x82\x82\xAC",)
+ );
+
+ // Check that encoding a surrogate codepoint using the UTF-8 scheme
+ // fails validation.
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xED\xA0\x80"));
+ assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xED\xA0\x80"));
+ assert_eq!(
+ Err(utf8e2(3, 1)),
+ utf8::validate(b"\xE2\x98\x83\xED\xA0\x80",)
+ );
+
+ // Check that an incomplete 2-byte sequence fails.
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xCEa"));
+ assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xCEa"));
+ assert_eq!(
+ Err(utf8e2(3, 1)),
+ utf8::validate(b"\xE2\x98\x83\xCE\xE2\x98\x83",)
+ );
+ // Check that an incomplete 3-byte sequence fails.
+ assert_eq!(Err(utf8e2(0, 2)), utf8::validate(b"\xE2\x98a"));
+ assert_eq!(Err(utf8e2(1, 2)), utf8::validate(b"a\xE2\x98a"));
+ assert_eq!(
+ Err(utf8e2(3, 2)),
+ utf8::validate(b"\xE2\x98\x83\xE2\x98\xE2\x98\x83",)
+ );
+ // Check that an incomplete 4-byte sequence fails.
+ assert_eq!(Err(utf8e2(0, 3)), utf8::validate(b"\xF0\x9D\x9Ca"));
+ assert_eq!(Err(utf8e2(1, 3)), utf8::validate(b"a\xF0\x9D\x9Ca"));
+ assert_eq!(
+ Err(utf8e2(4, 3)),
+ utf8::validate(b"\xF0\x9D\x9C\xB1\xF0\x9D\x9C\xE2\x98\x83",)
+ );
+ assert_eq!(
+ Err(utf8e2(6, 3)),
+ utf8::validate(b"foobar\xF1\x80\x80quux",)
+ );
+
+ // Check that an incomplete (EOF) 2-byte sequence fails.
+ assert_eq!(Err(utf8e(0)), utf8::validate(b"\xCE"));
+ assert_eq!(Err(utf8e(1)), utf8::validate(b"a\xCE"));
+ assert_eq!(Err(utf8e(3)), utf8::validate(b"\xE2\x98\x83\xCE"));
+ // Check that an incomplete (EOF) 3-byte sequence fails.
+ assert_eq!(Err(utf8e(0)), utf8::validate(b"\xE2\x98"));
+ assert_eq!(Err(utf8e(1)), utf8::validate(b"a\xE2\x98"));
+ assert_eq!(Err(utf8e(3)), utf8::validate(b"\xE2\x98\x83\xE2\x98"));
+ // Check that an incomplete (EOF) 4-byte sequence fails.
+ assert_eq!(Err(utf8e(0)), utf8::validate(b"\xF0\x9D\x9C"));
+ assert_eq!(Err(utf8e(1)), utf8::validate(b"a\xF0\x9D\x9C"));
+ assert_eq!(
+ Err(utf8e(4)),
+ utf8::validate(b"\xF0\x9D\x9C\xB1\xF0\x9D\x9C",)
+ );
+
+ // Test that we errors correct even after long valid sequences. This
+ // checks that our "backup" logic for detecting errors is correct.
+ assert_eq!(
+ Err(utf8e2(8, 1)),
+ utf8::validate(b"\xe2\x98\x83\xce\xb2\xe3\x83\x84\xFF",)
+ );
+ }
+
+ #[test]
+ fn decode_valid() {
+ fn d(mut s: &str) -> Vec<char> {
+ let mut chars = vec![];
+ while !s.is_empty() {
+ let (ch, size) = utf8::decode(s.as_bytes());
+ s = &s[size..];
+ chars.push(ch.unwrap());
+ }
+ chars
+ }
+
+ assert_eq!(vec!['☃'], d("☃"));
+ assert_eq!(vec!['☃', '☃'], d("☃☃"));
+ assert_eq!(vec!['α', 'β', 'γ', 'δ', 'ε'], d("αβγδε"));
+ assert_eq!(vec!['☃', '⛄', '⛇'], d("☃⛄⛇"));
+ assert_eq!(vec!['𝗮', '𝗯', '𝗰', '𝗱', '𝗲'], d("𝗮𝗯𝗰𝗱𝗲"));
+ }
+
+ #[test]
+ fn decode_invalid() {
+ let (ch, size) = utf8::decode(b"");
+ assert_eq!(None, ch);
+ assert_eq!(0, size);
+
+ let (ch, size) = utf8::decode(b"\xFF");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xCE\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xE2\x98\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode(b"\xF0\x9D\x9D");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode(b"\xF0\x9D\x9D\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode(b"\xF0\x82\x82\xAC");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xED\xA0\x80");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xCEa");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xE2\x98a");
+ assert_eq!(None, ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode(b"\xF0\x9D\x9Ca");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+ }
+
+ #[test]
+ fn decode_lossy() {
+ let (ch, size) = utf8::decode_lossy(b"");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(0, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xFF");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xCE\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xE2\x98\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xF0\x9D\x9D\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xF0\x82\x82\xAC");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xED\xA0\x80");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xCEa");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xE2\x98a");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xF0\x9D\x9Ca");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(3, size);
+ }
+
+ #[test]
+ fn decode_last_valid() {
+ fn d(mut s: &str) -> Vec<char> {
+ let mut chars = vec![];
+ while !s.is_empty() {
+ let (ch, size) = utf8::decode_last(s.as_bytes());
+ s = &s[..s.len() - size];
+ chars.push(ch.unwrap());
+ }
+ chars
+ }
+
+ assert_eq!(vec!['☃'], d("☃"));
+ assert_eq!(vec!['☃', '☃'], d("☃☃"));
+ assert_eq!(vec!['ε', 'δ', 'γ', 'β', 'α'], d("αβγδε"));
+ assert_eq!(vec!['⛇', '⛄', '☃'], d("☃⛄⛇"));
+ assert_eq!(vec!['𝗲', '𝗱', '𝗰', '𝗯', '𝗮'], d("𝗮𝗯𝗰𝗱𝗲"));
+ }
+
+ #[test]
+ fn decode_last_invalid() {
+ let (ch, size) = utf8::decode_last(b"");
+ assert_eq!(None, ch);
+ assert_eq!(0, size);
+
+ let (ch, size) = utf8::decode_last(b"\xFF");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xCE\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xCE");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xE2\x98\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xE2\x98");
+ assert_eq!(None, ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_last(b"\xF0\x9D\x9D\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xF0\x9D\x9D");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode_last(b"\xF0\x82\x82\xAC");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xED\xA0\x80");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xED\xA0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xED");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"a\xCE");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"a\xE2\x98");
+ assert_eq!(None, ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_last(b"a\xF0\x9D\x9C");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+ }
+
+ #[test]
+ fn decode_last_lossy() {
+ let (ch, size) = utf8::decode_last_lossy(b"");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(0, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xFF");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xCE\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xCE");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xE2\x98\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xE2\x98");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xF0\x9D\x9D\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xF0\x9D\x9D");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xF0\x82\x82\xAC");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xED\xA0\x80");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xED\xA0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xED");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"a\xCE");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"a\xE2\x98");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"a\xF0\x9D\x9C");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(3, size);
+ }
+
+ #[test]
+ fn chars() {
+ for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() {
+ let got: String = B(input).chars().collect();
+ assert_eq!(
+ expected, got,
+ "chars(ith: {:?}, given: {:?})",
+ i, input,
+ );
+ let got: String =
+ B(input).char_indices().map(|(_, _, ch)| ch).collect();
+ assert_eq!(
+ expected, got,
+ "char_indices(ith: {:?}, given: {:?})",
+ i, input,
+ );
+
+ let expected: String = expected.chars().rev().collect();
+
+ let got: String = B(input).chars().rev().collect();
+ assert_eq!(
+ expected, got,
+ "chars.rev(ith: {:?}, given: {:?})",
+ i, input,
+ );
+ let got: String =
+ B(input).char_indices().rev().map(|(_, _, ch)| ch).collect();
+ assert_eq!(
+ expected, got,
+ "char_indices.rev(ith: {:?}, given: {:?})",
+ i, input,
+ );
+ }
+ }
+
+ #[test]
+ fn utf8_chunks() {
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xC0" };
+ assert_eq!(
+ (c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xC0".as_bstr(),
+ incomplete: false,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xFF\xFF" };
+ assert_eq!(
+ (c.next(), c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xFF".as_bstr(),
+ incomplete: false,
+ }),
+ Some(utf8::Utf8Chunk {
+ valid: "",
+ invalid: b"\xFF".as_bstr(),
+ incomplete: false,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xD0" };
+ assert_eq!(
+ (c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xD0".as_bstr(),
+ incomplete: true,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xD0456" };
+ assert_eq!(
+ (c.next(), c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xD0".as_bstr(),
+ incomplete: false,
+ }),
+ Some(utf8::Utf8Chunk {
+ valid: "456",
+ invalid: b"".as_bstr(),
+ incomplete: false,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xE2\x98" };
+ assert_eq!(
+ (c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xE2\x98".as_bstr(),
+ incomplete: true,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xF4\x8F\xBF" };
+ assert_eq!(
+ (c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xF4\x8F\xBF".as_bstr(),
+ incomplete: true,
+ }),
+ None,
+ )
+ );
+ }
+}