diff options
author | Treehugger Robot <treehugger-gerrit@google.com> | 2022-12-19 10:02:37 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2022-12-19 10:02:37 +0000 |
commit | aa41e2c8d75821113c2adbdaf8e5d0409584474f (patch) | |
tree | ed85d852ab68d24da88ef68983dbce316adfe707 | |
parent | 5a03c8a832818e874f3dbf2f23910d2576575926 (diff) | |
parent | 7d0d0484f77273c5977664d0830960140f99d4a5 (diff) | |
download | regex-syntax-aa41e2c8d75821113c2adbdaf8e5d0409584474f.tar.gz |
Merge "Upgrade regex-syntax to 0.6.28"main-16k-with-phones
34 files changed, 2700 insertions, 1463 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index 3d660d5..50bfde1 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,5 +1,6 @@ { "git": { - "sha1": "3ea9e3eca7b762c30fbc09205522e3935cd70052" - } -} + "sha1": "ea3b1320807741aae8b5db926f6b54b99e65bce6" + }, + "path_in_vcs": "regex-syntax" +}
\ No newline at end of file @@ -40,11 +40,10 @@ license { rust_library { name: "libregex_syntax", - // has rustc warnings host_supported: true, crate_name: "regex_syntax", cargo_env_compat: true, - cargo_pkg_version: "0.6.25", + cargo_pkg_version: "0.6.28", srcs: ["src/lib.rs"], edition: "2018", features: [ @@ -68,11 +67,10 @@ rust_library { rust_test { name: "regex-syntax_test_src_lib", - // has rustc warnings host_supported: true, crate_name: "regex_syntax", cargo_env_compat: true, - cargo_pkg_version: "0.6.25", + cargo_pkg_version: "0.6.28", srcs: ["src/lib.rs"], test_suites: ["general-tests"], auto_gen_config: true, @@ -3,27 +3,35 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies +# to registry (e.g., crates.io) dependencies. # -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "regex-syntax" -version = "0.6.25" +version = "0.6.28" authors = ["The Rust Project Developers"] description = "A regular expression parser." homepage = "https://github.com/rust-lang/regex" documentation = "https://docs.rs/regex-syntax" -license = "MIT/Apache-2.0" +readme = "README.md" +license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/regex" [features] default = ["unicode"] -unicode = ["unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] +unicode = [ + "unicode-age", + "unicode-bool", + "unicode-case", + "unicode-gencat", + "unicode-perl", + "unicode-script", + "unicode-segment", +] unicode-age = [] unicode-bool = [] unicode-case = [] diff --git a/Cargo.toml.orig b/Cargo.toml.orig index 1359aa1..e491bf1 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,8 +1,8 @@ [package] name = "regex-syntax" -version = "0.6.25" #:version +version = "0.6.28" #:version authors = ["The Rust Project Developers"] -license = "MIT/Apache-2.0" +license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/regex" documentation = "https://docs.rs/regex-syntax" homepage = "https://github.com/rust-lang/regex" @@ -1,3 +1,7 @@ +# This project was upgraded with external_updater. +# Usage: tools/external_updater/updater.sh update rust/crates/regex-syntax +# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md + name: "regex-syntax" description: "A regular expression parser." third_party { @@ -7,13 +11,13 @@ third_party { } url { type: ARCHIVE - value: "https://static.crates.io/crates/regex-syntax/regex-syntax-0.6.25.crate" + value: "https://static.crates.io/crates/regex-syntax/regex-syntax-0.6.28.crate" } - version: "0.6.25" + version: "0.6.28" license_type: NOTICE last_upgrade_date { - year: 2021 - month: 5 - day: 19 + year: 2022 + month: 12 + day: 13 } } @@ -2,9 +2,8 @@ regex-syntax ============ This crate provides a robust regular expression parser. -[![Build status](https://travis-ci.com/rust-lang/regex.svg?branch=master)](https://travis-ci.com/rust-lang/regex) -[![Build status](https://ci.appveyor.com/api/projects/status/github/rust-lang/regex?svg=true)](https://ci.appveyor.com/project/rust-lang-libs/regex) -[![](https://meritbadge.herokuapp.com/regex-syntax)](https://crates.io/crates/regex-syntax) +[![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions) +[![Crates.io](https://img.shields.io/crates/v/regex-syntax.svg)](https://crates.io/crates/regex-syntax) [![Rust](https://img.shields.io/badge/rust-1.28.0%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex) @@ -53,7 +52,7 @@ for extreme optimization, and therefore, use of `unsafe`. The standard for using `unsafe` in this crate is extremely high because this crate is intended to be reasonably safe to use with user supplied regular -expressions. Therefore, while their may be bugs in the regex parser itself, +expressions. Therefore, while there may be bugs in the regex parser itself, they should _never_ result in memory unsafety unless there is either a bug in the compiler or the standard library. (Since `regex-syntax` has zero dependencies.) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9b9127b..387ea3a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -15,7 +15,7 @@ mod visitor; /// An error that occurred while parsing a regular expression into an abstract /// syntax tree. /// -/// Note that note all ASTs represents a valid regular expression. For example, +/// Note that not all ASTs represents a valid regular expression. For example, /// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a /// valid Unicode property name. That particular error is reported when /// translating an AST to the high-level intermediate representation (`HIR`). @@ -385,7 +385,7 @@ impl PartialOrd for Position { impl Span { /// Create a new span with the given positions. pub fn new(start: Position, end: Position) -> Span { - Span { start: start, end: end } + Span { start, end } } /// Create a new span using the given position as the start and end. @@ -427,7 +427,7 @@ impl Position { /// /// `column` is the approximate column number, starting at `1`. pub fn new(offset: usize, line: usize, column: usize) -> Position { - Position { offset: offset, line: line, column: column } + Position { offset, line, column } } } diff --git a/src/ast/parse.rs b/src/ast/parse.rs index e62a7c2..6e9c9ac 100644 --- a/src/ast/parse.rs +++ b/src/ast/parse.rs @@ -167,7 +167,7 @@ impl ParserBuilder { /// they should impose a limit on the length, in bytes, of the concrete /// pattern string. In particular, this is viable since this parser /// implementation will limit itself to heap space proportional to the - /// lenth of the pattern string. + /// length of the pattern string. /// /// Note that a nest limit of `0` will return a nest limit error for most /// patterns but not all. For example, a nest limit of `0` permits `a` but @@ -202,7 +202,7 @@ impl ParserBuilder { /// Enable verbose mode in the regular expression. /// - /// When enabled, verbose mode permits insigificant whitespace in many + /// When enabled, verbose mode permits insignificant whitespace in many /// places in the regular expression, as well as comments. Comments are /// started using `#` and continue until the end of the line. /// @@ -236,7 +236,7 @@ pub struct Parser { /// supported. octal: bool, /// The initial setting for `ignore_whitespace` as provided by - /// Th`ParserBuilder`. is is used when reseting the parser's state. + /// `ParserBuilder`. It is used when resetting the parser's state. initial_ignore_whitespace: bool, /// Whether whitespace should be ignored. When enabled, comments are /// also permitted. @@ -366,7 +366,7 @@ impl Parser { impl<'s, P: Borrow<Parser>> ParserI<'s, P> { /// Build an internal parser from a parser configuration and a pattern. fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> { - ParserI { parser: parser, pattern: pattern } + ParserI { parser, pattern } } /// Return a reference to the parser state. @@ -381,11 +381,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { /// Create a new error with the given span and error type. fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error { - ast::Error { - kind: kind, - pattern: self.pattern().to_string(), - span: span, - } + ast::Error { kind, pattern: self.pattern().to_string(), span } } /// Return the current offset of the parser. @@ -481,11 +477,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { column = column.checked_add(1).unwrap(); } offset += self.char().len_utf8(); - self.parser().pos.set(Position { - offset: offset, - line: line, - column: column, - }); + self.parser().pos.set(Position { offset, line, column }); self.pattern()[self.offset()..].chars().next().is_some() } @@ -703,8 +695,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { .unwrap_or(old_ignore_whitespace); self.parser().stack_group.borrow_mut().push( GroupState::Group { - concat: concat, - group: group, + concat, + group, ignore_whitespace: old_ignore_whitespace, }, ); @@ -899,12 +891,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { #[inline(never)] fn unclosed_class_error(&self) -> ast::Error { for state in self.parser().stack_class.borrow().iter().rev() { - match *state { - ClassState::Open { ref set, .. } => { - return self - .error(set.span, ast::ErrorKind::ClassUnclosed); - } - _ => {} + if let ClassState::Open { ref set, .. } = *state { + return self.error(set.span, ast::ErrorKind::ClassUnclosed); } } // We are guaranteed to have a non-empty stack with at least @@ -950,8 +938,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { }; let span = Span::new(lhs.span().start, rhs.span().end); ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { - span: span, - kind: kind, + span, + kind, lhs: Box::new(lhs), rhs: Box::new(rhs), }) @@ -1010,7 +998,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { let ast = self.pop_group_end(concat)?; NestLimiter::new(self).check(&ast)?; Ok(ast::WithComments { - ast: ast, + ast, comments: mem::replace( &mut *self.parser().comments.borrow_mut(), vec![], @@ -1023,7 +1011,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { /// The given `kind` should correspond to the operator observed by the /// caller. /// - /// This assumes that the paser is currently positioned at the repetition + /// This assumes that the parser is currently positioned at the repetition /// operator and advances the parser to the first character after the /// operator. (Note that the operator may include a single additional `?`, /// which makes the operator ungreedy.) @@ -1066,9 +1054,9 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { span: ast.span().with_end(self.pos()), op: ast::RepetitionOp { span: Span::new(op_start, self.pos()), - kind: kind, + kind, }, - greedy: greedy, + greedy, ast: Box::new(ast), })); Ok(concat) @@ -1078,7 +1066,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { /// corresponds to the {m,n} syntax, and does not include the ?, * or + /// operators. /// - /// This assumes that the paser is currently positioned at the opening `{` + /// This assumes that the parser is currently positioned at the opening `{` /// and advances the parser to the first character after the operator. /// (Note that the operator may include a single additional `?`, which /// makes the operator ungreedy.) @@ -1170,7 +1158,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { span: op_span, kind: ast::RepetitionKind::Range(range), }, - greedy: greedy, + greedy, ast: Box::new(ast), })); Ok(concat) @@ -1235,7 +1223,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { } Ok(Either::Left(ast::SetFlags { span: Span { end: self.pos(), ..open_span }, - flags: flags, + flags, })) } else { assert_eq!(char_end, ':'); @@ -1428,7 +1416,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { let ast = Primitive::Literal(ast::Literal { span: self.span_char(), kind: ast::LiteralKind::Verbatim, - c: c, + c, }); self.bump(); Ok(ast) @@ -1494,16 +1482,16 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { let span = Span::new(start, self.pos()); if is_meta_character(c) { return Ok(Primitive::Literal(ast::Literal { - span: span, + span, kind: ast::LiteralKind::Punctuation, - c: c, + c, })); } let special = |kind, c| { Ok(Primitive::Literal(ast::Literal { - span: span, + span, kind: ast::LiteralKind::Special(kind), - c: c, + c, })) }; match c { @@ -1517,19 +1505,19 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { special(ast::SpecialLiteralKind::Space, ' ') } 'A' => Ok(Primitive::Assertion(ast::Assertion { - span: span, + span, kind: ast::AssertionKind::StartText, })), 'z' => Ok(Primitive::Assertion(ast::Assertion { - span: span, + span, kind: ast::AssertionKind::EndText, })), 'b' => Ok(Primitive::Assertion(ast::Assertion { - span: span, + span, kind: ast::AssertionKind::WordBoundary, })), 'B' => Ok(Primitive::Assertion(ast::Assertion { - span: span, + span, kind: ast::AssertionKind::NotWordBoundary, })), _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)), @@ -1569,7 +1557,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { ast::Literal { span: Span::new(start, end), kind: ast::LiteralKind::Octal, - c: c, + c, } } @@ -1645,7 +1633,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { Some(c) => Ok(ast::Literal { span: Span::new(start, end), kind: ast::LiteralKind::HexFixed(kind), - c: c, + c, }), } } @@ -1700,7 +1688,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { Some(c) => Ok(ast::Literal { span: Span::new(start, self.pos()), kind: ast::LiteralKind::HexBrace(kind), - c: c, + c, }), } } @@ -1927,7 +1915,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { })); if !self.bump_and_bump_space() { return Err(self.error( - Span::new(start, self.pos()), + Span::new(start, start), ast::ErrorKind::ClassUnclosed, )); } @@ -1949,7 +1937,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { } let set = ast::ClassBracketed { span: Span::new(start, self.pos()), - negated: negated, + negated, kind: ast::ClassSet::union(ast::ClassSetUnion { span: Span::new(union.span.start, union.span.start), items: vec![], @@ -2026,8 +2014,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { }; Some(ast::ClassAscii { span: Span::new(start, self.pos()), - kind: kind, - negated: negated, + kind, + negated, }) } @@ -2108,8 +2096,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { }; Ok(ast::ClassUnicode { span: Span::new(start, self.pos()), - negated: negated, - kind: kind, + negated, + kind, }) } @@ -2130,7 +2118,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> { 'W' => (true, ast::ClassPerlKind::Word), c => panic!("expected valid Perl class but got '{}'", c), }; - ast::ClassPerl { span: span, kind: kind, negated: negated } + ast::ClassPerl { span, kind, negated } } } @@ -2146,7 +2134,7 @@ struct NestLimiter<'p, 's, P> { impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> { fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> { - NestLimiter { p: p, depth: 0 } + NestLimiter { p, depth: 0 } } #[inline(never)] @@ -2429,18 +2417,18 @@ mod tests { /// Create a punctuation literal starting at the given position. fn punct_lit(c: char, span: Span) -> Ast { Ast::Literal(ast::Literal { - span: span, + span, kind: ast::LiteralKind::Punctuation, - c: c, + c, }) } /// Create a verbatim literal with the given span. fn lit_with(c: char, span: Span) -> Ast { Ast::Literal(ast::Literal { - span: span, + span, kind: ast::LiteralKind::Verbatim, - c: c, + c, }) } @@ -2451,12 +2439,12 @@ mod tests { /// Create a concatenation with the given span. fn concat_with(span: Span, asts: Vec<Ast>) -> Ast { - Ast::Concat(ast::Concat { span: span, asts: asts }) + Ast::Concat(ast::Concat { span, asts }) } /// Create an alternation with the given span. fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast { - Ast::Alternation(ast::Alternation { span: span(range), asts: asts }) + Ast::Alternation(ast::Alternation { span: span(range), asts }) } /// Create a capturing group with the given span. @@ -2498,7 +2486,7 @@ mod tests { span: span_range(pat, range.clone()), flags: ast::Flags { span: span_range(pat, (range.start + 2)..(range.end - 1)), - items: items, + items, }, }) } @@ -4208,7 +4196,7 @@ bar Ok(Primitive::Literal(ast::Literal { span: span(0..2), kind: ast::LiteralKind::Special(kind.clone()), - c: c, + c, })) ); } @@ -4402,7 +4390,7 @@ bar kind: ast::LiteralKind::HexFixed( ast::HexLiteralKind::UnicodeShort ), - c: c, + c, })) ); } @@ -4466,7 +4454,7 @@ bar kind: ast::LiteralKind::HexFixed( ast::HexLiteralKind::UnicodeLong ), - c: c, + c, })) ); } @@ -4667,10 +4655,7 @@ bar #[test] fn parse_set_class() { fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet { - ast::ClassSet::union(ast::ClassSetUnion { - span: span, - items: items, - }) + ast::ClassSet::union(ast::ClassSetUnion { span, items }) } fn intersection( @@ -4679,7 +4664,7 @@ bar rhs: ast::ClassSet, ) -> ast::ClassSet { ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { - span: span, + span, kind: ast::ClassSetBinaryOpKind::Intersection, lhs: Box::new(lhs), rhs: Box::new(rhs), @@ -4692,7 +4677,7 @@ bar rhs: ast::ClassSet, ) -> ast::ClassSet { ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { - span: span, + span, kind: ast::ClassSetBinaryOpKind::Difference, lhs: Box::new(lhs), rhs: Box::new(rhs), @@ -4705,7 +4690,7 @@ bar rhs: ast::ClassSet, ) -> ast::ClassSet { ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { - span: span, + span, kind: ast::ClassSetBinaryOpKind::SymmetricDifference, lhs: Box::new(lhs), rhs: Box::new(rhs), @@ -4734,9 +4719,9 @@ bar fn lit(span: Span, c: char) -> ast::ClassSetItem { ast::ClassSetItem::Literal(ast::Literal { - span: span, + span, kind: ast::LiteralKind::Verbatim, - c: c, + c, }) } @@ -4756,7 +4741,7 @@ bar ..span.end }; ast::ClassSetItem::Range(ast::ClassSetRange { - span: span, + span, start: ast::Literal { span: Span { end: pos1, ..span }, kind: ast::LiteralKind::Verbatim, @@ -4771,19 +4756,11 @@ bar } fn alnum(span: Span, negated: bool) -> ast::ClassAscii { - ast::ClassAscii { - span: span, - kind: ast::ClassAsciiKind::Alnum, - negated: negated, - } + ast::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated } } fn lower(span: Span, negated: bool) -> ast::ClassAscii { - ast::ClassAscii { - span: span, - kind: ast::ClassAsciiKind::Lower, - negated: negated, - } + ast::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated } } assert_eq!( @@ -5515,14 +5492,23 @@ bar assert_eq!( parser("[-").parse_set_class_open().unwrap_err(), TestError { - span: span(0..2), + span: span(0..0), kind: ast::ErrorKind::ClassUnclosed, } ); assert_eq!( parser("[--").parse_set_class_open().unwrap_err(), TestError { - span: span(0..3), + span: span(0..0), + kind: ast::ErrorKind::ClassUnclosed, + } + ); + + // See: https://github.com/rust-lang/regex/issues/792 + assert_eq!( + parser("(?x)[-#]").parse_with_comments().unwrap_err(), + TestError { + span: span(4..4), kind: ast::ErrorKind::ClassUnclosed, } ); diff --git a/src/ast/print.rs b/src/ast/print.rs index 283ce4c..045de2e 100644 --- a/src/ast/print.rs +++ b/src/ast/print.rs @@ -57,17 +57,16 @@ impl Printer { /// here are a `fmt::Formatter` (which is available in `fmt::Display` /// implementations) or a `&mut String`. pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result { - visitor::visit(ast, Writer { printer: self, wtr: wtr }) + visitor::visit(ast, Writer { wtr }) } } #[derive(Debug)] -struct Writer<'p, W> { - printer: &'p mut Printer, +struct Writer<W> { wtr: W, } -impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { +impl<W: fmt::Write> Visitor for Writer<W> { type Output = (); type Err = fmt::Error; @@ -153,7 +152,7 @@ impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { } } -impl<'p, W: fmt::Write> Writer<'p, W> { +impl<W: fmt::Write> Writer<W> { fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result { use crate::ast::GroupKind::*; match ast.kind { diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index a0d1e7d..78ee487 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -388,7 +388,7 @@ impl<'a> HeapVisitor<'a> { Some(ClassFrame::Union { head: item, tail: &[] }) } ast::ClassSet::BinaryOp(ref op) => { - Some(ClassFrame::Binary { op: op }) + Some(ClassFrame::Binary { op }) } } } @@ -402,11 +402,9 @@ impl<'a> HeapVisitor<'a> { }) } } - ClassInduct::BinaryOp(op) => Some(ClassFrame::BinaryLHS { - op: op, - lhs: &op.lhs, - rhs: &op.rhs, - }), + ClassInduct::BinaryOp(op) => { + Some(ClassFrame::BinaryLHS { op, lhs: &op.lhs, rhs: &op.rhs }) + } _ => None, } } @@ -427,7 +425,7 @@ impl<'a> HeapVisitor<'a> { } ClassFrame::Binary { .. } => None, ClassFrame::BinaryLHS { op, rhs, .. } => { - Some(ClassFrame::BinaryRHS { op: op, rhs: rhs }) + Some(ClassFrame::BinaryRHS { op, rhs }) } ClassFrame::BinaryRHS { .. } => None, } diff --git a/src/error.rs b/src/error.rs index 71cfa42..1230d2f 100644 --- a/src/error.rs +++ b/src/error.rs @@ -182,7 +182,7 @@ impl<'p> Spans<'p> { if line_count <= 1 { 0 } else { line_count.to_string().len() }; let mut spans = Spans { pattern: &fmter.pattern, - line_number_width: line_number_width, + line_number_width, by_line: vec![vec![]; line_count], multi_line: vec![], }; @@ -288,7 +288,7 @@ fn repeat_char(c: char, count: usize) -> String { mod tests { use crate::ast::parse::Parser; - fn assert_panic_message(pattern: &str, expected_msg: &str) -> () { + fn assert_panic_message(pattern: &str, expected_msg: &str) { let result = Parser::new().parse(pattern); match result { Ok(_) => { diff --git a/src/hir/interval.rs b/src/hir/interval.rs index cfaa2cb..56698c5 100644 --- a/src/hir/interval.rs +++ b/src/hir/interval.rs @@ -114,8 +114,8 @@ impl<I: Interval> IntervalSet<I> { // we're done. let drain_end = self.ranges.len(); - let mut ita = (0..drain_end).into_iter(); - let mut itb = (0..other.ranges.len()).into_iter(); + let mut ita = 0..drain_end; + let mut itb = 0..other.ranges.len(); let mut a = ita.next().unwrap(); let mut b = itb.next().unwrap(); loop { diff --git a/src/hir/literal/mod.rs b/src/hir/literal/mod.rs index 25ee88b..fbc5d3c 100644 --- a/src/hir/literal/mod.rs +++ b/src/hir/literal/mod.rs @@ -225,7 +225,7 @@ impl Literals { if self.lits.is_empty() { return self.to_empty(); } - let mut old: Vec<Literal> = self.lits.iter().cloned().collect(); + let mut old = self.lits.to_vec(); let mut new = self.to_empty(); 'OUTER: while let Some(mut candidate) = old.pop() { if candidate.is_empty() { @@ -256,15 +256,13 @@ impl Literals { old.push(lit3); lit2.clear(); } - } else { - if let Some(i) = position(&lit2, &candidate) { - lit2.cut(); - let mut new_candidate = candidate.clone(); - new_candidate.truncate(i); - new_candidate.cut(); - old.push(new_candidate); - candidate.clear(); - } + } else if let Some(i) = position(&lit2, &candidate) { + lit2.cut(); + let mut new_candidate = candidate.clone(); + new_candidate.truncate(i); + new_candidate.cut(); + old.push(new_candidate); + candidate.clear(); } // Oops, the candidate is already represented in the set. if candidate.is_empty() { @@ -735,18 +733,18 @@ fn repeat_zero_or_one_literals<F: FnMut(&Hir, &mut Literals)>( lits: &mut Literals, mut f: F, ) { - let (mut lits2, mut lits3) = (lits.clone(), lits.to_empty()); - lits3.set_limit_size(lits.limit_size() / 2); - f(e, &mut lits3); - - if lits3.is_empty() || !lits2.cross_product(&lits3) { - lits.cut(); - return; - } - lits2.add(Literal::empty()); - if !lits.union(lits2) { - lits.cut(); - } + f( + &Hir::repetition(hir::Repetition { + kind: hir::RepetitionKind::ZeroOrMore, + // FIXME: Our literal extraction doesn't care about greediness. + // Which is partially why we're treating 'e?' as 'e*'. Namely, + // 'ab??' yields [Complete(ab), Complete(a)], but it should yield + // [Complete(a), Complete(ab)] because of the non-greediness. + greedy: true, + hir: Box::new(e.clone()), + }), + lits, + ); } fn repeat_zero_or_more_literals<F: FnMut(&Hir, &mut Literals)>( @@ -793,7 +791,7 @@ fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>( f( &Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::ZeroOrMore, - greedy: greedy, + greedy, hir: Box::new(e.clone()), }), lits, @@ -932,12 +930,10 @@ fn escape_unicode(bytes: &[u8]) -> String { if c.is_whitespace() { let escaped = if c as u32 <= 0x7F { escape_byte(c as u8) + } else if c as u32 <= 0xFFFF { + format!(r"\u{{{:04x}}}", c as u32) } else { - if c as u32 <= 0xFFFF { - format!(r"\u{{{:04x}}}", c as u32) - } else { - format!(r"\U{{{:08x}}}", c as u32) - } + format!(r"\U{{{:08x}}}", c as u32) }; space_escaped.push_str(&escaped); } else { @@ -1141,6 +1137,11 @@ mod tests { test_lit!(pfx_group1, prefixes, "(a)", M("a")); test_lit!(pfx_rep_zero_or_one1, prefixes, "a?"); test_lit!(pfx_rep_zero_or_one2, prefixes, "(?:abc)?"); + test_lit!(pfx_rep_zero_or_one_cat1, prefixes, "ab?", C("ab"), M("a")); + // FIXME: This should return [M("a"), M("ab")] because of the non-greedy + // repetition. As a work-around, we rewrite ab?? as ab*?, and thus we get + // a cut literal. + test_lit!(pfx_rep_zero_or_one_cat2, prefixes, "ab??", C("ab"), M("a")); test_lit!(pfx_rep_zero_or_more1, prefixes, "a*"); test_lit!(pfx_rep_zero_or_more2, prefixes, "(?:abc)*"); test_lit!(pfx_rep_one_or_more1, prefixes, "a+", C("a")); @@ -1249,8 +1250,8 @@ mod tests { pfx_crazy1, prefixes, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - C("Mo\\'am"), - C("Mu\\'am"), + C("Mo\\'"), + C("Mu\\'"), C("Moam"), C("Muam") ); diff --git a/src/hir/mod.rs b/src/hir/mod.rs index 4969f12..1096e9f 100644 --- a/src/hir/mod.rs +++ b/src/hir/mod.rs @@ -243,7 +243,7 @@ impl Hir { info.set_match_empty(true); info.set_literal(false); info.set_alternation_literal(false); - Hir { kind: HirKind::Empty, info: info } + Hir { kind: HirKind::Empty, info } } /// Creates a literal HIR expression. @@ -268,7 +268,7 @@ impl Hir { info.set_match_empty(false); info.set_literal(true); info.set_alternation_literal(true); - Hir { kind: HirKind::Literal(lit), info: info } + Hir { kind: HirKind::Literal(lit), info } } /// Creates a class HIR expression. @@ -285,7 +285,7 @@ impl Hir { info.set_match_empty(false); info.set_literal(false); info.set_alternation_literal(false); - Hir { kind: HirKind::Class(class), info: info } + Hir { kind: HirKind::Class(class), info } } /// Creates an anchor assertion HIR expression. @@ -318,7 +318,7 @@ impl Hir { if let Anchor::EndLine = anchor { info.set_line_anchored_end(true); } - Hir { kind: HirKind::Anchor(anchor), info: info } + Hir { kind: HirKind::Anchor(anchor), info } } /// Creates a word boundary assertion HIR expression. @@ -334,14 +334,18 @@ impl Hir { info.set_any_anchored_end(false); info.set_literal(false); info.set_alternation_literal(false); - // A negated word boundary matches the empty string, but a normal - // word boundary does not! - info.set_match_empty(word_boundary.is_negated()); + // A negated word boundary matches '', so that's fine. But \b does not + // match \b, so why do we say it can match the empty string? Well, + // because, if you search for \b against 'a', it will report [0, 0) and + // [1, 1) as matches, and both of those matches correspond to the empty + // string. Thus, only *certain* empty strings match \b, which similarly + // applies to \B. + info.set_match_empty(true); // Negated ASCII word boundaries can match invalid UTF-8. if let WordBoundary::AsciiNegate = word_boundary { info.set_always_utf8(false); } - Hir { kind: HirKind::WordBoundary(word_boundary), info: info } + Hir { kind: HirKind::WordBoundary(word_boundary), info } } /// Creates a repetition HIR expression. @@ -368,7 +372,7 @@ impl Hir { info.set_match_empty(rep.is_match_empty() || rep.hir.is_match_empty()); info.set_literal(false); info.set_alternation_literal(false); - Hir { kind: HirKind::Repetition(rep), info: info } + Hir { kind: HirKind::Repetition(rep), info } } /// Creates a group HIR expression. @@ -385,7 +389,7 @@ impl Hir { info.set_match_empty(group.hir.is_match_empty()); info.set_literal(false); info.set_alternation_literal(false); - Hir { kind: HirKind::Group(group), info: info } + Hir { kind: HirKind::Group(group), info } } /// Returns the concatenation of the given expressions. @@ -476,7 +480,7 @@ impl Hir { }) .any(|e| e.is_line_anchored_end()), ); - Hir { kind: HirKind::Concat(exprs), info: info } + Hir { kind: HirKind::Concat(exprs), info } } } } @@ -538,7 +542,7 @@ impl Hir { let x = info.is_alternation_literal() && e.is_literal(); info.set_alternation_literal(x); } - Hir { kind: HirKind::Alternation(exprs), info: info } + Hir { kind: HirKind::Alternation(exprs), info } } } } @@ -661,8 +665,8 @@ impl Hir { /// Return true if and only if the empty string is part of the language /// matched by this regular expression. /// - /// This includes `a*`, `a?b*`, `a{0}`, `()`, `()+`, `^$`, `a|b?`, `\B`, - /// but not `a`, `a+` or `\b`. + /// This includes `a*`, `a?b*`, `a{0}`, `()`, `()+`, `^$`, `a|b?`, `\b` + /// and `\B`, but not `a` or `a+`. pub fn is_match_empty(&self) -> bool { self.info.is_match_empty() } diff --git a/src/hir/print.rs b/src/hir/print.rs index ff18c6e..b71f389 100644 --- a/src/hir/print.rs +++ b/src/hir/print.rs @@ -65,17 +65,16 @@ impl Printer { /// here are a `fmt::Formatter` (which is available in `fmt::Display` /// implementations) or a `&mut String`. pub fn print<W: fmt::Write>(&mut self, hir: &Hir, wtr: W) -> fmt::Result { - visitor::visit(hir, Writer { printer: self, wtr: wtr }) + visitor::visit(hir, Writer { wtr }) } } #[derive(Debug)] -struct Writer<'p, W> { - printer: &'p mut Printer, +struct Writer<W> { wtr: W, } -impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { +impl<W: fmt::Write> Visitor for Writer<W> { type Output = (); type Err = fmt::Error; @@ -209,7 +208,7 @@ impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { } } -impl<'p, W: fmt::Write> Writer<'p, W> { +impl<W: fmt::Write> Writer<W> { fn write_literal_char(&mut self, c: char) -> fmt::Result { if is_meta_character(c) { self.wtr.write_str("\\")?; diff --git a/src/hir/translate.rs b/src/hir/translate.rs index 99c9493..890e160 100644 --- a/src/hir/translate.rs +++ b/src/hir/translate.rs @@ -434,20 +434,14 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { } ast::ClassSetItem::Ascii(ref x) => { if self.flags().unicode() { + let xcls = self.hir_ascii_unicode_class(x)?; let mut cls = self.pop().unwrap().unwrap_class_unicode(); - for &(s, e) in ascii_class(&x.kind) { - cls.push(hir::ClassUnicodeRange::new(s, e)); - } - self.unicode_fold_and_negate( - &x.span, x.negated, &mut cls, - )?; + cls.union(&xcls); self.push(HirFrame::ClassUnicode(cls)); } else { + let xcls = self.hir_ascii_byte_class(x)?; let mut cls = self.pop().unwrap().unwrap_class_bytes(); - for &(s, e) in ascii_class(&x.kind) { - cls.push(hir::ClassBytesRange::new(s as u8, e as u8)); - } - self.bytes_fold_and_negate(&x.span, x.negated, &mut cls)?; + cls.union(&xcls); self.push(HirFrame::ClassBytes(cls)); } } @@ -595,7 +589,7 @@ struct TranslatorI<'t, 'p> { impl<'t, 'p> TranslatorI<'t, 'p> { /// Build a new internal translator. fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> { - TranslatorI { trans: trans, pattern: pattern } + TranslatorI { trans, pattern } } /// Return a reference to the underlying translator. @@ -615,7 +609,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { /// Create a new error with the given span and error type. fn error(&self, span: Span, kind: ErrorKind) -> Error { - Error { kind: kind, pattern: self.pattern.to_string(), span: span } + Error { kind, pattern: self.pattern.to_string(), span } } /// Return a copy of the active flags. @@ -785,7 +779,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { } ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing, }; - Hir::group(hir::Group { kind: kind, hir: Box::new(expr) }) + Hir::group(hir::Group { kind, hir: Box::new(expr) }) } fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir { @@ -808,11 +802,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { }; let greedy = if self.flags().swap_greed() { !rep.greedy } else { rep.greedy }; - Hir::repetition(hir::Repetition { - kind: kind, - greedy: greedy, - hir: Box::new(expr), - }) + Hir::repetition(hir::Repetition { kind, greedy, hir: Box::new(expr) }) } fn hir_unicode_class( @@ -853,6 +843,32 @@ impl<'t, 'p> TranslatorI<'t, 'p> { result } + fn hir_ascii_unicode_class( + &self, + ast: &ast::ClassAscii, + ) -> Result<hir::ClassUnicode> { + let mut cls = hir::ClassUnicode::new( + ascii_class(&ast.kind) + .iter() + .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)), + ); + self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?; + Ok(cls) + } + + fn hir_ascii_byte_class( + &self, + ast: &ast::ClassAscii, + ) -> Result<hir::ClassBytes> { + let mut cls = hir::ClassBytes::new( + ascii_class(&ast.kind) + .iter() + .map(|&(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)), + ); + self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?; + Ok(cls) + } + fn hir_perl_unicode_class( &self, ast_class: &ast::ClassPerl, @@ -948,7 +964,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { class: &mut hir::ClassBytes, ) -> Result<()> { // Note that we must apply case folding before negation! - // Consider `(?i)[^x]`. If we applied negation field, then + // Consider `(?i)[^x]`. If we applied negation first, then // the result would be the character class that matched any // Unicode scalar value. if self.flags().case_insensitive() { @@ -1218,7 +1234,7 @@ mod tests { fn hir_quest(greedy: bool, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::ZeroOrOne, - greedy: greedy, + greedy, hir: Box::new(expr), }) } @@ -1226,7 +1242,7 @@ mod tests { fn hir_star(greedy: bool, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::ZeroOrMore, - greedy: greedy, + greedy, hir: Box::new(expr), }) } @@ -1234,7 +1250,7 @@ mod tests { fn hir_plus(greedy: bool, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::OneOrMore, - greedy: greedy, + greedy, hir: Box::new(expr), }) } @@ -1242,7 +1258,7 @@ mod tests { fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::Range(range), - greedy: greedy, + greedy, hir: Box::new(expr), }) } @@ -1944,6 +1960,25 @@ mod tests { } #[test] + fn class_ascii_multiple() { + // See: https://github.com/rust-lang/regex/issues/680 + assert_eq!( + t("[[:alnum:][:^ascii:]]"), + hir_union( + hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)), + hir_uclass(&[('\u{80}', '\u{10FFFF}')]), + ), + ); + assert_eq!( + t_bytes("(?-u)[[:alnum:][:^ascii:]]"), + hir_union( + hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Alnum)), + hir_bclass(&[(0x80, 0xFF)]), + ), + ); + } + + #[test] #[cfg(feature = "unicode-perl")] fn class_perl() { // Unicode @@ -3100,6 +3135,9 @@ mod tests { assert!(t(r"\pL*").is_match_empty()); assert!(t(r"a*|b").is_match_empty()); assert!(t(r"b|a*").is_match_empty()); + assert!(t(r"a|").is_match_empty()); + assert!(t(r"|a").is_match_empty()); + assert!(t(r"a||b").is_match_empty()); assert!(t(r"a*a?(abcd)*").is_match_empty()); assert!(t(r"^").is_match_empty()); assert!(t(r"$").is_match_empty()); @@ -3109,6 +3147,8 @@ mod tests { assert!(t(r"\z").is_match_empty()); assert!(t(r"\B").is_match_empty()); assert!(t_bytes(r"(?-u)\B").is_match_empty()); + assert!(t(r"\b").is_match_empty()); + assert!(t(r"(?-u)\b").is_match_empty()); // Negative examples. assert!(!t(r"a+").is_match_empty()); @@ -3118,8 +3158,6 @@ mod tests { assert!(!t(r"a{1,10}").is_match_empty()); assert!(!t(r"b|a").is_match_empty()); assert!(!t(r"a*a+(abcd)*").is_match_empty()); - assert!(!t(r"\b").is_match_empty()); - assert!(!t(r"(?-u)\b").is_match_empty()); } #[test] @@ -195,7 +195,7 @@ pub fn escape_into(text: &str, buf: &mut String) { } } -/// Returns true if the give character has significance in a regex. +/// Returns true if the given character has significance in a regex. /// /// These are the only characters that are allowed to be escaped, with one /// exception: an ASCII space character may be escaped when extended mode (with diff --git a/src/parser.rs b/src/parser.rs index eb363ca..ded95b2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -45,7 +45,7 @@ impl ParserBuilder { /// they should impose a limit on the length, in bytes, of the concrete /// pattern string. In particular, this is viable since this parser /// implementation will limit itself to heap space proportional to the - /// lenth of the pattern string. + /// length of the pattern string. /// /// Note that a nest limit of `0` will return a nest limit error for most /// patterns but not all. For example, a nest limit of `0` permits `a` but @@ -96,7 +96,7 @@ impl ParserBuilder { /// Enable verbose mode in the regular expression. /// - /// When enabled, verbose mode permits insigificant whitespace in many + /// When enabled, verbose mode permits insignificant whitespace in many /// places in the regular expression, as well as comments. Comments are /// started using `#` and continue until the end of the line. /// diff --git a/src/unicode.rs b/src/unicode.rs index b894c7d..8194d7f 100644 --- a/src/unicode.rs +++ b/src/unicode.rs @@ -99,7 +99,7 @@ pub fn simple_fold( Ok(CASE_FOLDING_SIMPLE .binary_search_by_key(&c, |&(c1, _)| c1) - .map(|i| CASE_FOLDING_SIMPLE[i].1.iter().map(|&c| c)) + .map(|i| CASE_FOLDING_SIMPLE[i].1.iter().copied()) .map_err(|i| { if i >= CASE_FOLDING_SIMPLE.len() { None @@ -580,7 +580,7 @@ fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>> { fn imp(canonical_age: &str) -> Result<impl Iterator<Item = Range>> { use crate::unicode_tables::age; - const AGES: &'static [(&'static str, Range)] = &[ + const AGES: &[(&str, Range)] = &[ ("V1_1", age::V1_1), ("V2_0", age::V2_0), ("V2_1", age::V2_1), @@ -604,13 +604,15 @@ fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>> { ("V12_0", age::V12_0), ("V12_1", age::V12_1), ("V13_0", age::V13_0), + ("V14_0", age::V14_0), + ("V15_0", age::V15_0), ]; assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync"); let pos = AGES.iter().position(|&(age, _)| canonical_age == age); match pos { None => Err(Error::PropertyValueNotFound), - Some(i) => Ok(AGES[..i + 1].iter().map(|&(_, classes)| classes)), + Some(i) => Ok(AGES[..=i].iter().map(|&(_, classes)| classes)), } } diff --git a/src/unicode_tables/age.rs b/src/unicode_tables/age.rs index 7772919..71f4861 100644 --- a/src/unicode_tables/age.rs +++ b/src/unicode_tables/age.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate age ucd-13.0.0 --chars +// ucd-generate age ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("V10_0", V10_0), @@ -12,6 +12,8 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("V12_0", V12_0), ("V12_1", V12_1), ("V13_0", V13_0), + ("V14_0", V14_0), + ("V15_0", V15_0), ("V1_1", V1_1), ("V2_0", V2_0), ("V2_1", V2_1), @@ -203,69 +205,185 @@ pub const V12_0: &'static [(char, char)] = &[ pub const V12_1: &'static [(char, char)] = &[('㋿', '㋿')]; pub const V13_0: &'static [(char, char)] = &[ - ('\u{8be}', '\u{8c7}'), + ('ࢾ', 'ࣇ'), ('\u{b55}', '\u{b55}'), - ('\u{d04}', '\u{d04}'), + ('ഄ', 'ഄ'), ('\u{d81}', '\u{d81}'), ('\u{1abf}', '\u{1ac0}'), - ('\u{2b97}', '\u{2b97}'), - ('\u{2e50}', '\u{2e52}'), - ('\u{31bb}', '\u{31bf}'), - ('\u{4db6}', '\u{4dbf}'), - ('\u{9ff0}', '\u{9ffc}'), - ('\u{a7c7}', '\u{a7ca}'), - ('\u{a7f5}', '\u{a7f6}'), + ('⮗', '⮗'), + ('⹐', '⹒'), + ('ㆻ', 'ㆿ'), + ('䶶', '䶿'), + ('鿰', '鿼'), + ('Ꟈ', 'ꟊ'), + ('Ꟶ', 'ꟶ'), ('\u{a82c}', '\u{a82c}'), - ('\u{ab68}', '\u{ab6b}'), - ('\u{1019c}', '\u{1019c}'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eab}', '\u{10ead}'), - ('\u{10eb0}', '\u{10eb1}'), - ('\u{10fb0}', '\u{10fcb}'), - ('\u{11147}', '\u{11147}'), - ('\u{111ce}', '\u{111cf}'), - ('\u{1145a}', '\u{1145a}'), - ('\u{11460}', '\u{11461}'), - ('\u{11900}', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), - ('\u{1193b}', '\u{11946}'), - ('\u{11950}', '\u{11959}'), - ('\u{11fb0}', '\u{11fb0}'), + ('ꭨ', '꭫'), + ('𐆜', '𐆜'), + ('𐺀', '𐺩'), + ('\u{10eab}', '𐺭'), + ('𐺰', '𐺱'), + ('𐾰', '𐿋'), + ('𑅇', '𑅇'), + ('𑇎', '\u{111cf}'), + ('𑑚', '𑑚'), + ('𑑠', '𑑡'), + ('𑤀', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤵'), + ('𑤷', '𑤸'), + ('\u{1193b}', '𑥆'), + ('𑥐', '𑥙'), + ('𑾰', '𑾰'), ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), - ('\u{18af3}', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('\u{1f10d}', '\u{1f10f}'), - ('\u{1f16d}', '\u{1f16f}'), - ('\u{1f1ad}', '\u{1f1ad}'), - ('\u{1f6d6}', '\u{1f6d7}'), - ('\u{1f6fb}', '\u{1f6fc}'), - ('\u{1f8b0}', '\u{1f8b1}'), - ('\u{1f90c}', '\u{1f90c}'), - ('\u{1f972}', '\u{1f972}'), - ('\u{1f977}', '\u{1f978}'), - ('\u{1f9a3}', '\u{1f9a4}'), - ('\u{1f9ab}', '\u{1f9ad}'), - ('\u{1f9cb}', '\u{1f9cb}'), - ('\u{1fa74}', '\u{1fa74}'), - ('\u{1fa83}', '\u{1fa86}'), - ('\u{1fa96}', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), - ('\u{1fb00}', '\u{1fb92}'), - ('\u{1fb94}', '\u{1fbca}'), - ('\u{1fbf0}', '\u{1fbf9}'), - ('\u{2a6d7}', '\u{2a6dd}'), - ('\u{30000}', '\u{3134a}'), + ('𖿰', '𖿱'), + ('𘫳', '𘳕'), + ('𘴀', '𘴈'), + ('🄍', '🄏'), + ('🅭', '🅯'), + ('🆭', '🆭'), + ('🛖', '🛗'), + ('🛻', '🛼'), + ('🢰', '🢱'), + ('🤌', '🤌'), + ('🥲', '🥲'), + ('🥷', '🥸'), + ('🦣', '🦤'), + ('🦫', '🦭'), + ('🧋', '🧋'), + ('🩴', '🩴'), + ('🪃', '🪆'), + ('🪖', '🪨'), + ('🪰', '🪶'), + ('🫀', '🫂'), + ('🫐', '🫖'), + ('🬀', '🮒'), + ('🮔', '🯊'), + ('🯰', '🯹'), + ('𪛗', '𪛝'), + ('𰀀', '𱍊'), +]; + +pub const V14_0: &'static [(char, char)] = &[ + ('؝', '؝'), + ('ࡰ', 'ࢎ'), + ('\u{890}', '\u{891}'), + ('\u{898}', '\u{89f}'), + ('ࢵ', 'ࢵ'), + ('ࣈ', '\u{8d2}'), + ('\u{c3c}', '\u{c3c}'), + ('ౝ', 'ౝ'), + ('ೝ', 'ೝ'), + ('ᜍ', 'ᜍ'), + ('᜕', '᜕'), + ('ᜟ', 'ᜟ'), + ('\u{180f}', '\u{180f}'), + ('\u{1ac1}', '\u{1ace}'), + ('ᭌ', 'ᭌ'), + ('᭽', '᭾'), + ('\u{1dfa}', '\u{1dfa}'), + ('⃀', '⃀'), + ('Ⱟ', 'Ⱟ'), + ('ⱟ', 'ⱟ'), + ('⹓', '⹝'), + ('鿽', '鿿'), + ('Ꟁ', 'ꟁ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꟴ'), + ('﯂', '﯂'), + ('﵀', '﵏'), + ('﷏', '﷏'), + ('﷾', '﷿'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), + ('𐽰', '𐾉'), + ('\u{11070}', '𑁵'), + ('\u{110c2}', '\u{110c2}'), + ('𑚹', '𑚹'), + ('𑝀', '𑝆'), + ('𑪰', '𑪿'), + ('𒾐', '𒿲'), + ('𖩰', '𖪾'), + ('𖫀', '𖫉'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), + ('𛄟', '𛄢'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), + ('𜽐', '𜿃'), + ('𝇩', '𝇪'), + ('𝼀', '𝼞'), + ('𞊐', '\u{1e2ae}'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), + ('🛝', '🛟'), + ('🟰', '🟰'), + ('🥹', '🥹'), + ('🧌', '🧌'), + ('🩻', '🩼'), + ('🪩', '🪬'), + ('🪷', '🪺'), + ('🫃', '🫅'), + ('🫗', '🫙'), + ('🫠', '🫧'), + ('🫰', '🫶'), + ('𪛞', '𪛟'), + ('𫜵', '𫜸'), +]; + +pub const V15_0: &'static [(char, char)] = &[ + ('ೳ', 'ೳ'), + ('\u{ece}', '\u{ece}'), + ('\u{10efd}', '\u{10eff}'), + ('𑈿', '\u{11241}'), + ('𑬀', '𑬉'), + ('\u{11f00}', '𑼐'), + ('𑼒', '\u{11f3a}'), + ('𑼾', '𑽙'), + ('𓐯', '𓐯'), + ('\u{13439}', '\u{13455}'), + ('𛄲', '𛄲'), + ('𛅕', '𛅕'), + ('𝋀', '𝋓'), + ('𝼥', '𝼪'), + ('𞀰', '𞁭'), + ('\u{1e08f}', '\u{1e08f}'), + ('𞓐', '𞓹'), + ('🛜', '🛜'), + ('🝴', '🝶'), + ('🝻', '🝿'), + ('🟙', '🟙'), + ('🩵', '🩷'), + ('🪇', '🪈'), + ('🪭', '🪯'), + ('🪻', '🪽'), + ('🪿', '🪿'), + ('🫎', '🫏'), + ('🫚', '🫛'), + ('🫨', '🫨'), + ('🫷', '🫸'), + ('𫜹', '𫜹'), + ('𱍐', '𲎯'), ]; pub const V1_1: &'static [(char, char)] = &[ - ('\u{0}', 'ǵ'), + ('\0', 'ǵ'), ('Ǻ', 'ȗ'), ('ɐ', 'ʨ'), ('ʰ', '˞'), diff --git a/src/unicode_tables/case_folding_simple.rs b/src/unicode_tables/case_folding_simple.rs index cfb83f3..23f9364 100644 --- a/src/unicode_tables/case_folding_simple.rs +++ b/src/unicode_tables/case_folding_simple.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate case-folding-simple ucd-13.0.0 --chars --all-pairs +// ucd-generate case-folding-simple ucd-15.0.0 --chars --all-pairs // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('A', &['a']), @@ -1781,6 +1781,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('Ⱜ', &['ⱜ']), ('Ⱝ', &['ⱝ']), ('Ⱞ', &['ⱞ']), + ('Ⱟ', &['ⱟ']), ('ⰰ', &['Ⰰ']), ('ⰱ', &['Ⰱ']), ('ⰲ', &['Ⰲ']), @@ -1828,6 +1829,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('ⱜ', &['Ⱜ']), ('ⱝ', &['Ⱝ']), ('ⱞ', &['Ⱞ']), + ('ⱟ', &['Ⱟ']), ('Ⱡ', &['ⱡ']), ('ⱡ', &['Ⱡ']), ('Ɫ', &['ɫ']), @@ -2211,17 +2213,25 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('ꞽ', &['Ꞽ']), ('Ꞿ', &['ꞿ']), ('ꞿ', &['Ꞿ']), + ('Ꟁ', &['ꟁ']), + ('ꟁ', &['Ꟁ']), ('Ꟃ', &['ꟃ']), ('ꟃ', &['Ꟃ']), ('Ꞔ', &['ꞔ']), ('Ʂ', &['ʂ']), ('Ᶎ', &['ᶎ']), - ('\u{a7c7}', &['\u{a7c8}']), - ('\u{a7c8}', &['\u{a7c7}']), - ('\u{a7c9}', &['\u{a7ca}']), - ('\u{a7ca}', &['\u{a7c9}']), - ('\u{a7f5}', &['\u{a7f6}']), - ('\u{a7f6}', &['\u{a7f5}']), + ('Ꟈ', &['ꟈ']), + ('ꟈ', &['Ꟈ']), + ('Ꟊ', &['ꟊ']), + ('ꟊ', &['Ꟊ']), + ('Ꟑ', &['ꟑ']), + ('ꟑ', &['Ꟑ']), + ('Ꟗ', &['ꟗ']), + ('ꟗ', &['Ꟗ']), + ('Ꟙ', &['ꟙ']), + ('ꟙ', &['Ꟙ']), + ('Ꟶ', &['ꟶ']), + ('ꟶ', &['Ꟶ']), ('ꭓ', &['Ꭓ']), ('ꭰ', &['Ꭰ']), ('ꭱ', &['Ꭱ']), @@ -2507,6 +2517,76 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('𐓹', &['𐓑']), ('𐓺', &['𐓒']), ('𐓻', &['𐓓']), + ('𐕰', &['𐖗']), + ('𐕱', &['𐖘']), + ('𐕲', &['𐖙']), + ('𐕳', &['𐖚']), + ('𐕴', &['𐖛']), + ('𐕵', &['𐖜']), + ('𐕶', &['𐖝']), + ('𐕷', &['𐖞']), + ('𐕸', &['𐖟']), + ('𐕹', &['𐖠']), + ('𐕺', &['𐖡']), + ('𐕼', &['𐖣']), + ('𐕽', &['𐖤']), + ('𐕾', &['𐖥']), + ('𐕿', &['𐖦']), + ('𐖀', &['𐖧']), + ('𐖁', &['𐖨']), + ('𐖂', &['𐖩']), + ('𐖃', &['𐖪']), + ('𐖄', &['𐖫']), + ('𐖅', &['𐖬']), + ('𐖆', &['𐖭']), + ('𐖇', &['𐖮']), + ('𐖈', &['𐖯']), + ('𐖉', &['𐖰']), + ('𐖊', &['𐖱']), + ('𐖌', &['𐖳']), + ('𐖍', &['𐖴']), + ('𐖎', &['𐖵']), + ('𐖏', &['𐖶']), + ('𐖐', &['𐖷']), + ('𐖑', &['𐖸']), + ('𐖒', &['𐖹']), + ('𐖔', &['𐖻']), + ('𐖕', &['𐖼']), + ('𐖗', &['𐕰']), + ('𐖘', &['𐕱']), + ('𐖙', &['𐕲']), + ('𐖚', &['𐕳']), + ('𐖛', &['𐕴']), + ('𐖜', &['𐕵']), + ('𐖝', &['𐕶']), + ('𐖞', &['𐕷']), + ('𐖟', &['𐕸']), + ('𐖠', &['𐕹']), + ('𐖡', &['𐕺']), + ('𐖣', &['𐕼']), + ('𐖤', &['𐕽']), + ('𐖥', &['𐕾']), + ('𐖦', &['𐕿']), + ('𐖧', &['𐖀']), + ('𐖨', &['𐖁']), + ('𐖩', &['𐖂']), + ('𐖪', &['𐖃']), + ('𐖫', &['𐖄']), + ('𐖬', &['𐖅']), + ('𐖭', &['𐖆']), + ('𐖮', &['𐖇']), + ('𐖯', &['𐖈']), + ('𐖰', &['𐖉']), + ('𐖱', &['𐖊']), + ('𐖳', &['𐖌']), + ('𐖴', &['𐖍']), + ('𐖵', &['𐖎']), + ('𐖶', &['𐖏']), + ('𐖷', &['𐖐']), + ('𐖸', &['𐖑']), + ('𐖹', &['𐖒']), + ('𐖻', &['𐖔']), + ('𐖼', &['𐖕']), ('𐲀', &['𐳀']), ('𐲁', &['𐳁']), ('𐲂', &['𐳂']), diff --git a/src/unicode_tables/general_category.rs b/src/unicode_tables/general_category.rs index 33b7b7e..8fc9289 100644 --- a/src/unicode_tables/general_category.rs +++ b/src/unicode_tables/general_category.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate general-category ucd-13.0.0 --chars --exclude surrogate +// ucd-generate general-category ucd-15.0.0 --chars --exclude surrogate // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Cased_Letter", CASED_LETTER), @@ -116,9 +116,7 @@ pub const CASED_LETTER: &'static [(char, char)] = &[ ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ↄ', 'ↄ'), - ('Ⰰ', 'Ⱞ'), - ('ⰰ', 'ⱞ'), - ('Ⱡ', 'ⱻ'), + ('Ⰰ', 'ⱻ'), ('Ȿ', 'ⳤ'), ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'), @@ -130,12 +128,14 @@ pub const CASED_LETTER: &'static [(char, char)] = &[ ('Ꜣ', 'ꝯ'), ('ꝱ', 'ꞇ'), ('Ꞌ', 'ꞎ'), - ('Ꞑ', 'ꞿ'), - ('Ꟃ', '\u{a7ca}'), - ('\u{a7f5}', '\u{a7f6}'), + ('Ꞑ', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('Ꟶ', 'ꟶ'), ('ꟺ', 'ꟺ'), ('ꬰ', 'ꭚ'), - ('ꭠ', '\u{ab68}'), + ('ꭠ', 'ꭨ'), ('ꭰ', 'ꮿ'), ('ff', 'st'), ('ﬓ', 'ﬗ'), @@ -144,6 +144,14 @@ pub const CASED_LETTER: &'static [(char, char)] = &[ ('𐐀', '𐑏'), ('𐒰', '𐓓'), ('𐓘', '𐓻'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𑢠', '𑣟'), @@ -178,6 +186,9 @@ pub const CASED_LETTER: &'static [(char, char)] = &[ ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), + ('𝼀', '𝼉'), + ('𝼋', '𝼞'), + ('𝼥', '𝼪'), ('𞤀', '𞥃'), ]; @@ -225,6 +236,10 @@ pub const CLOSE_PUNCTUATION: &'static [(char, char)] = &[ ('⸥', '⸥'), ('⸧', '⸧'), ('⸩', '⸩'), + ('⹖', '⹖'), + ('⹘', '⹘'), + ('⹚', '⹚'), + ('⹜', '⹜'), ('〉', '〉'), ('》', '》'), ('」', '」'), @@ -266,7 +281,7 @@ pub const CONNECTOR_PUNCTUATION: &'static [(char, char)] = &[ ]; pub const CONTROL: &'static [(char, char)] = - &[('\u{0}', '\u{1f}'), ('\u{7f}', '\u{9f}')]; + &[('\0', '\u{1f}'), ('\u{7f}', '\u{9f}')]; pub const CURRENCY_SYMBOL: &'static [(char, char)] = &[ ('$', '$'), @@ -280,7 +295,7 @@ pub const CURRENCY_SYMBOL: &'static [(char, char)] = &[ ('௹', '௹'), ('฿', '฿'), ('៛', '៛'), - ('₠', '₿'), + ('₠', '⃀'), ('꠸', '꠸'), ('﷼', '﷼'), ('﹩', '﹩'), @@ -303,6 +318,7 @@ pub const DASH_PUNCTUATION: &'static [(char, char)] = &[ ('⸚', '⸚'), ('⸺', '⸻'), ('⹀', '⹀'), + ('⹝', '⹝'), ('〜', '〜'), ('〰', '〰'), ('゠', '゠'), @@ -310,7 +326,7 @@ pub const DASH_PUNCTUATION: &'static [(char, char)] = &[ ('﹘', '﹘'), ('﹣', '﹣'), ('-', '-'), - ('\u{10ead}', '\u{10ead}'), + ('𐺭', '𐺭'), ]; pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ @@ -364,17 +380,20 @@ pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ ('𑛀', '𑛉'), ('𑜰', '𑜹'), ('𑣠', '𑣩'), - ('\u{11950}', '\u{11959}'), + ('𑥐', '𑥙'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), + ('𑽐', '𑽙'), ('𖩠', '𖩩'), + ('𖫀', '𖫉'), ('𖭐', '𖭙'), ('𝟎', '𝟿'), ('𞅀', '𞅉'), ('𞋰', '𞋹'), + ('𞓰', '𞓹'), ('𞥐', '𞥙'), - ('\u{1fbf0}', '\u{1fbf9}'), + ('🯰', '🯹'), ]; pub const ENCLOSING_MARK: &'static [(char, char)] = &[ @@ -404,6 +423,7 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{61c}', '\u{61c}'), ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), + ('\u{890}', '\u{891}'), ('\u{8e2}', '\u{8e2}'), ('\u{180e}', '\u{180e}'), ('\u{200b}', '\u{200f}'), @@ -414,7 +434,7 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{fff9}', '\u{fffb}'), ('\u{110bd}', '\u{110bd}'), ('\u{110cd}', '\u{110cd}'), - ('\u{13430}', '\u{13438}'), + ('\u{13430}', '\u{1343f}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'), ('\u{e0001}', '\u{e0001}'), @@ -485,8 +505,9 @@ pub const LETTER: &'static [(char, char)] = &[ ('ࠨ', 'ࠨ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'), - ('ࢠ', 'ࢴ'), - ('ࢶ', '\u{8c7}'), + ('ࡰ', 'ࢇ'), + ('ࢉ', 'ࢎ'), + ('ࢠ', 'ࣉ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'), ('ॐ', 'ॐ'), @@ -551,6 +572,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('ప', 'హ'), ('ఽ', 'ఽ'), ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'), ('ಅ', 'ಌ'), @@ -559,10 +581,10 @@ pub const LETTER: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('ಽ', 'ಽ'), - ('ೞ', 'ೞ'), + ('ೝ', 'ೞ'), ('ೠ', 'ೡ'), ('ೱ', 'ೲ'), - ('\u{d04}', 'ഌ'), + ('ഄ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'), ('ഽ', 'ഽ'), @@ -630,9 +652,8 @@ pub const LETTER: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛱ', 'ᛸ'), - ('ᜀ', 'ᜌ'), - ('ᜎ', 'ᜑ'), - ('ᜠ', 'ᜱ'), + ('ᜀ', 'ᜑ'), + ('ᜟ', 'ᜱ'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -653,7 +674,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('ᨠ', 'ᩔ'), ('ᪧ', 'ᪧ'), ('ᬅ', 'ᬳ'), - ('ᭅ', 'ᭋ'), + ('ᭅ', 'ᭌ'), ('ᮃ', 'ᮠ'), ('ᮮ', 'ᮯ'), ('ᮺ', 'ᯥ'), @@ -704,9 +725,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ↄ', 'ↄ'), - ('Ⰰ', 'Ⱞ'), - ('ⰰ', 'ⱞ'), - ('Ⱡ', 'ⳤ'), + ('Ⰰ', 'ⳤ'), ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), @@ -733,11 +752,10 @@ pub const LETTER: &'static [(char, char)] = &[ ('ー', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), - ('ㆠ', '\u{31bf}'), + ('ㆠ', 'ㆿ'), ('ㇰ', 'ㇿ'), - ('㐀', '\u{4dbf}'), - ('一', '\u{9ffc}'), - ('ꀀ', 'ꒌ'), + ('㐀', '䶿'), + ('一', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘟ'), @@ -747,9 +765,11 @@ pub const LETTER: &'static [(char, char)] = &[ ('ꚠ', 'ꛥ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꞿ'), - ('Ꟃ', '\u{a7ca}'), - ('\u{a7f5}', 'ꠁ'), + ('Ꞌ', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), ('ꠌ', 'ꠢ'), @@ -786,7 +806,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), - ('ꭜ', '\u{ab69}'), + ('ꭜ', 'ꭩ'), ('ꭰ', 'ꯢ'), ('가', '힣'), ('ힰ', 'ퟆ'), @@ -837,9 +857,20 @@ pub const LETTER: &'static [(char, char)] = &[ ('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), @@ -870,19 +901,22 @@ pub const LETTER: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '𐴣'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eb0}', '\u{10eb1}'), + ('𐺀', '𐺩'), + ('𐺰', '𐺱'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), - ('\u{10fb0}', '\u{10fc4}'), + ('𐽰', '𐾁'), + ('𐾰', '𐿄'), ('𐿠', '𐿶'), ('𑀃', '𑀷'), + ('𑁱', '𑁲'), + ('𑁵', '𑁵'), ('𑂃', '𑂯'), ('𑃐', '𑃨'), ('𑄃', '𑄦'), ('𑅄', '𑅄'), - ('\u{11147}', '\u{11147}'), + ('𑅇', '𑅇'), ('𑅐', '𑅲'), ('𑅶', '𑅶'), ('𑆃', '𑆲'), @@ -891,6 +925,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈫'), + ('𑈿', '𑉀'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), @@ -908,7 +943,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('𑍝', '𑍡'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), - ('𑑟', '\u{11461}'), + ('𑑟', '𑑡'), ('𑒀', '𑒯'), ('𑓄', '𑓅'), ('𑓇', '𑓇'), @@ -919,15 +954,16 @@ pub const LETTER: &'static [(char, char)] = &[ ('𑚀', '𑚪'), ('𑚸', '𑚸'), ('𑜀', '𑜚'), + ('𑝀', '𑝆'), ('𑠀', '𑠫'), ('𑢠', '𑣟'), - ('𑣿', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{1192f}'), - ('\u{1193f}', '\u{1193f}'), - ('\u{11941}', '\u{11941}'), + ('𑣿', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤯'), + ('𑤿', '𑤿'), + ('𑥁', '𑥁'), ('𑦠', '𑦧'), ('𑦪', '𑧐'), ('𑧡', '𑧡'), @@ -938,7 +974,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('𑩐', '𑩐'), ('𑩜', '𑪉'), ('𑪝', '𑪝'), - ('𑫀', '𑫸'), + ('𑪰', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -952,13 +988,19 @@ pub const LETTER: &'static [(char, char)] = &[ ('𑵪', '𑶉'), ('𑶘', '𑶘'), ('𑻠', '𑻲'), - ('\u{11fb0}', '\u{11fb0}'), + ('𑼂', '𑼂'), + ('𑼄', '𑼐'), + ('𑼒', '𑼳'), + ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒒀', '𒕃'), - ('𓀀', '𓐮'), + ('𒾐', '𒿰'), + ('𓀀', '𓐯'), + ('𓑁', '𓑆'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), + ('𖩰', '𖪾'), ('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭀', '𖭃'), @@ -971,10 +1013,15 @@ pub const LETTER: &'static [(char, char)] = &[ ('𖿠', '𖿡'), ('𖿣', '𖿣'), ('𗀀', '𘟷'), - ('𘠀', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('𛀀', '𛄞'), + ('𘠀', '𘳕'), + ('𘴀', '𘴈'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), + ('𛀀', '𛄢'), + ('𛄲', '𛄲'), ('𛅐', '𛅒'), + ('𛅕', '𛅕'), ('𛅤', '𛅧'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), @@ -1011,10 +1058,19 @@ pub const LETTER: &'static [(char, char)] = &[ ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), + ('𝼀', '𝼞'), + ('𝼥', '𝼪'), + ('𞀰', '𞁭'), ('𞄀', '𞄬'), ('𞄷', '𞄽'), ('𞅎', '𞅎'), + ('𞊐', '𞊭'), ('𞋀', '𞋫'), + ('𞓐', '𞓫'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), ('𞠀', '𞣄'), ('𞤀', '𞥃'), ('𞥋', '𞥋'), @@ -1051,13 +1107,14 @@ pub const LETTER: &'static [(char, char)] = &[ ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), - ('𠀀', '\u{2a6dd}'), - ('𪜀', '𫜴'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), - ('\u{30000}', '\u{3134a}'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), ]; pub const LETTER_NUMBER: &'static [(char, char)] = &[ @@ -1510,7 +1567,7 @@ pub const LOWERCASE_LETTER: &'static [(char, char)] = &[ ('ⅆ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('ↄ', 'ↄ'), - ('ⰰ', 'ⱞ'), + ('ⰰ', 'ⱟ'), ('ⱡ', 'ⱡ'), ('ⱥ', 'ⱦ'), ('ⱨ', 'ⱨ'), @@ -1679,19 +1736,29 @@ pub const LOWERCASE_LETTER: &'static [(char, char)] = &[ ('ꞻ', 'ꞻ'), ('ꞽ', 'ꞽ'), ('ꞿ', 'ꞿ'), + ('ꟁ', 'ꟁ'), ('ꟃ', 'ꟃ'), - ('\u{a7c8}', '\u{a7c8}'), - ('\u{a7ca}', '\u{a7ca}'), - ('\u{a7f6}', '\u{a7f6}'), + ('ꟈ', 'ꟈ'), + ('ꟊ', 'ꟊ'), + ('ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟕ'), + ('ꟗ', 'ꟗ'), + ('ꟙ', 'ꟙ'), + ('ꟶ', 'ꟶ'), ('ꟺ', 'ꟺ'), ('ꬰ', 'ꭚ'), - ('ꭠ', '\u{ab68}'), + ('ꭠ', 'ꭨ'), ('ꭰ', 'ꮿ'), ('ff', 'st'), ('ﬓ', 'ﬗ'), ('a', 'z'), ('𐐨', '𐑏'), ('𐓘', '𐓻'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), ('𐳀', '𐳲'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), @@ -1723,6 +1790,9 @@ pub const LOWERCASE_LETTER: &'static [(char, char)] = &[ ('𝞪', '𝟂'), ('𝟄', '𝟉'), ('𝟋', '𝟋'), + ('𝼀', '𝼉'), + ('𝼋', '𝼞'), + ('𝼥', '𝼪'), ('𞤢', '𞥃'), ]; @@ -1751,7 +1821,8 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{8d3}', '\u{8e1}'), + ('\u{898}', '\u{89f}'), + ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', 'ः'), ('\u{93a}', '\u{93c}'), ('ा', 'ॏ'), @@ -1793,6 +1864,7 @@ pub const MARK: &'static [(char, char)] = &[ ('ொ', '\u{bcd}'), ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c04}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c3e}', 'ౄ'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), @@ -1805,6 +1877,7 @@ pub const MARK: &'static [(char, char)] = &[ ('ೊ', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), + ('ೳ', 'ೳ'), ('\u{d00}', 'ഃ'), ('\u{d3b}', '\u{d3c}'), ('\u{d3e}', '\u{d44}'), @@ -1823,7 +1896,7 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), @@ -1844,13 +1917,14 @@ pub const MARK: &'static [(char, char)] = &[ ('ႏ', 'ႏ'), ('ႚ', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1734}'), + ('\u{1712}', '᜕'), + ('\u{1732}', '᜴'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', 'ᤫ'), @@ -1859,7 +1933,7 @@ pub const MARK: &'static [(char, char)] = &[ ('ᩕ', '\u{1a5e}'), ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', 'ᬄ'), ('\u{1b34}', '᭄'), ('\u{1b6b}', '\u{1b73}'), @@ -1872,8 +1946,7 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('᳷', '\u{1cf9}'), - ('\u{1dc0}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('\u{1dc0}', '\u{1dff}'), ('\u{20d0}', '\u{20f0}'), ('\u{2cef}', '\u{2cf1}'), ('\u{2d7f}', '\u{2d7f}'), @@ -1925,11 +1998,16 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), ('𑀀', '𑀂'), ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), ('\u{1107f}', '𑂂'), ('𑂰', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{11134}'), ('𑅅', '𑅆'), @@ -1937,9 +2015,10 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{11180}', '𑆂'), ('𑆳', '𑇀'), ('\u{111c9}', '\u{111cc}'), - ('\u{111ce}', '\u{111cf}'), + ('𑇎', '\u{111cf}'), ('𑈬', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112ea}'), ('\u{11300}', '𑌃'), ('\u{1133b}', '\u{1133c}'), @@ -1960,11 +2039,11 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{116ab}', '\u{116b7}'), ('\u{1171d}', '\u{1172b}'), ('𑠬', '\u{1183a}'), - ('\u{11930}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('\u{11930}', '𑤵'), + ('𑤷', '𑤸'), ('\u{1193b}', '\u{1193e}'), - ('\u{11940}', '\u{11940}'), - ('\u{11942}', '\u{11943}'), + ('𑥀', '𑥀'), + ('𑥂', '\u{11943}'), ('𑧑', '\u{119d7}'), ('\u{119da}', '\u{119e0}'), ('𑧤', '𑧤'), @@ -1987,14 +2066,22 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{11d90}', '\u{11d91}'), ('𑶓', '\u{11d97}'), ('\u{11ef3}', '𑻶'), + ('\u{11f00}', '\u{11f01}'), + ('𑼃', '𑼃'), + ('𑼴', '\u{11f3a}'), + ('𑼾', '\u{11f42}'), + ('\u{13440}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('𖽑', '𖾇'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), + ('𖿰', '𖿱'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), ('𝅭', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), @@ -2012,8 +2099,11 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e4ec}', '\u{1e4ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0100}', '\u{e01ef}'), @@ -2102,6 +2192,7 @@ pub const MODIFIER_LETTER: &'static [(char, char)] = &[ ('ࠚ', 'ࠚ'), ('ࠤ', 'ࠤ'), ('ࠨ', 'ࠨ'), + ('ࣉ', 'ࣉ'), ('ॱ', 'ॱ'), ('ๆ', 'ๆ'), ('ໆ', 'ໆ'), @@ -2132,6 +2223,7 @@ pub const MODIFIER_LETTER: &'static [(char, char)] = &[ ('ꜗ', 'ꜟ'), ('ꝰ', 'ꝰ'), ('ꞈ', 'ꞈ'), + ('ꟲ', 'ꟴ'), ('ꟸ', 'ꟹ'), ('ꧏ', 'ꧏ'), ('ꧦ', 'ꧦ'), @@ -2139,14 +2231,22 @@ pub const MODIFIER_LETTER: &'static [(char, char)] = &[ ('ꫝ', 'ꫝ'), ('ꫳ', 'ꫴ'), ('ꭜ', 'ꭟ'), - ('\u{ab69}', '\u{ab69}'), + ('ꭩ', 'ꭩ'), ('ー', 'ー'), ('\u{ff9e}', '\u{ff9f}'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('𖭀', '𖭃'), ('𖾓', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '𖿣'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), + ('𞀰', '𞁭'), ('𞄷', '𞄽'), + ('𞓫', '𞓫'), ('𞥋', '𞥋'), ]; @@ -2164,6 +2264,7 @@ pub const MODIFIER_SYMBOL: &'static [(char, char)] = &[ ('˯', '˿'), ('͵', '͵'), ('΄', '΅'), + ('࢈', '࢈'), ('᾽', '᾽'), ('᾿', '῁'), ('῍', '῏'), @@ -2175,8 +2276,8 @@ pub const MODIFIER_SYMBOL: &'static [(char, char)] = &[ ('꜠', '꜡'), ('꞉', '꞊'), ('꭛', '꭛'), - ('\u{ab6a}', '\u{ab6b}'), - ('﮲', '﯁'), + ('꭪', '꭫'), + ('﮲', '﯂'), ('^', '^'), ('`', '`'), (' ̄', ' ̄'), @@ -2208,7 +2309,8 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{8d3}', '\u{8e1}'), + ('\u{898}', '\u{89f}'), + ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'), ('\u{93c}', '\u{93c}'), @@ -2249,6 +2351,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{bcd}', '\u{bcd}'), ('\u{c00}', '\u{c00}'), ('\u{c04}', '\u{c04}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c3e}', '\u{c40}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), @@ -2274,7 +2377,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), @@ -2298,7 +2401,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{109d}', '\u{109d}'), ('\u{135d}', '\u{135f}'), ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1734}'), + ('\u{1732}', '\u{1733}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17b5}'), @@ -2307,6 +2410,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{17c9}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', '\u{1922}'), @@ -2323,7 +2427,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{1a73}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1abd}'), - ('\u{1abf}', '\u{1ac0}'), + ('\u{1abf}', '\u{1ace}'), ('\u{1b00}', '\u{1b03}'), ('\u{1b34}', '\u{1b34}'), ('\u{1b36}', '\u{1b3a}'), @@ -2346,8 +2450,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('\u{1cf8}', '\u{1cf9}'), - ('\u{1dc0}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('\u{1dc0}', '\u{1dff}'), ('\u{20d0}', '\u{20dc}'), ('\u{20e1}', '\u{20e1}'), ('\u{20e5}', '\u{20f0}'), @@ -2405,12 +2508,17 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), ('\u{1107f}', '\u{11081}'), ('\u{110b3}', '\u{110b6}'), ('\u{110b9}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{1112b}'), ('\u{1112d}', '\u{11134}'), @@ -2423,6 +2531,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{11234}', '\u{11234}'), ('\u{11236}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112df}'), ('\u{112e3}', '\u{112ea}'), ('\u{11300}', '\u{11301}'), @@ -2484,12 +2593,20 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{11d95}', '\u{11d95}'), ('\u{11d97}', '\u{11d97}'), ('\u{11ef3}', '\u{11ef4}'), + ('\u{11f00}', '\u{11f01}'), + ('\u{11f36}', '\u{11f3a}'), + ('\u{11f40}', '\u{11f40}'), + ('\u{11f42}', '\u{11f42}'), + ('\u{13440}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d167}', '\u{1d169}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), @@ -2506,8 +2623,11 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e4ec}', '\u{1e4ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0100}', '\u{e01ef}'), @@ -2610,7 +2730,7 @@ pub const NUMBER: &'static [(char, char)] = &[ ('𐹠', '𐹾'), ('𐼝', '𐼦'), ('𐽑', '𐽔'), - ('\u{10fc5}', '\u{10fcb}'), + ('𐿅', '𐿋'), ('𑁒', '𑁯'), ('𑃰', '𑃹'), ('𑄶', '𑄿'), @@ -2623,21 +2743,25 @@ pub const NUMBER: &'static [(char, char)] = &[ ('𑛀', '𑛉'), ('𑜰', '𑜻'), ('𑣠', '𑣲'), - ('\u{11950}', '\u{11959}'), + ('𑥐', '𑥙'), ('𑱐', '𑱬'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), + ('𑽐', '𑽙'), ('𑿀', '𑿔'), ('𒐀', '𒑮'), ('𖩠', '𖩩'), + ('𖫀', '𖫉'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖺀', '𖺖'), + ('𝋀', '𝋓'), ('𝋠', '𝋳'), ('𝍠', '𝍸'), ('𝟎', '𝟿'), ('𞅀', '𞅉'), ('𞋰', '𞋹'), + ('𞓰', '𞓹'), ('𞣇', '𞣏'), ('𞥐', '𞥙'), ('𞱱', '𞲫'), @@ -2646,7 +2770,7 @@ pub const NUMBER: &'static [(char, char)] = &[ ('𞴁', '𞴭'), ('𞴯', '𞴽'), ('🄀', '🄌'), - ('\u{1fbf0}', '\u{1fbf9}'), + ('🯰', '🯹'), ]; pub const OPEN_PUNCTUATION: &'static [(char, char)] = &[ @@ -2696,6 +2820,10 @@ pub const OPEN_PUNCTUATION: &'static [(char, char)] = &[ ('⸦', '⸦'), ('⸨', '⸨'), ('⹂', '⹂'), + ('⹕', '⹕'), + ('⹗', '⹗'), + ('⹙', '⹙'), + ('⹛', '⹛'), ('〈', '〈'), ('《', '《'), ('「', '「'), @@ -2728,7 +2856,7 @@ pub const OPEN_PUNCTUATION: &'static [(char, char)] = &[ ]; pub const OTHER: &'static [(char, char)] = &[ - ('\u{0}', '\u{1f}'), + ('\0', '\u{1f}'), ('\u{7f}', '\u{9f}'), ('\u{ad}', '\u{ad}'), ('\u{378}', '\u{379}'), @@ -2743,7 +2871,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{5c8}', '\u{5cf}'), ('\u{5eb}', '\u{5ee}'), ('\u{5f5}', '\u{605}'), - ('\u{61c}', '\u{61d}'), + ('\u{61c}', '\u{61c}'), ('\u{6dd}', '\u{6dd}'), ('\u{70e}', '\u{70f}'), ('\u{74b}', '\u{74c}'), @@ -2753,9 +2881,8 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{83f}', '\u{83f}'), ('\u{85c}', '\u{85d}'), ('\u{85f}', '\u{85f}'), - ('\u{86b}', '\u{89f}'), - ('\u{8b5}', '\u{8b5}'), - ('\u{8c8}', '\u{8d2}'), + ('\u{86b}', '\u{86f}'), + ('\u{88f}', '\u{897}'), ('\u{8e2}', '\u{8e2}'), ('\u{984}', '\u{984}'), ('\u{98d}', '\u{98e}'), @@ -2834,12 +2961,13 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{c0d}', '\u{c0d}'), ('\u{c11}', '\u{c11}'), ('\u{c29}', '\u{c29}'), - ('\u{c3a}', '\u{c3c}'), + ('\u{c3a}', '\u{c3b}'), ('\u{c45}', '\u{c45}'), ('\u{c49}', '\u{c49}'), ('\u{c4e}', '\u{c54}'), ('\u{c57}', '\u{c57}'), - ('\u{c5b}', '\u{c5f}'), + ('\u{c5b}', '\u{c5c}'), + ('\u{c5e}', '\u{c5f}'), ('\u{c64}', '\u{c65}'), ('\u{c70}', '\u{c76}'), ('\u{c8d}', '\u{c8d}'), @@ -2850,11 +2978,11 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{cc5}', '\u{cc5}'), ('\u{cc9}', '\u{cc9}'), ('\u{cce}', '\u{cd4}'), - ('\u{cd7}', '\u{cdd}'), + ('\u{cd7}', '\u{cdc}'), ('\u{cdf}', '\u{cdf}'), ('\u{ce4}', '\u{ce5}'), ('\u{cf0}', '\u{cf0}'), - ('\u{cf3}', '\u{cff}'), + ('\u{cf4}', '\u{cff}'), ('\u{d0d}', '\u{d0d}'), ('\u{d11}', '\u{d11}'), ('\u{d45}', '\u{d45}'), @@ -2884,7 +3012,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{ebe}', '\u{ebf}'), ('\u{ec5}', '\u{ec5}'), ('\u{ec7}', '\u{ec7}'), - ('\u{ece}', '\u{ecf}'), + ('\u{ecf}', '\u{ecf}'), ('\u{eda}', '\u{edb}'), ('\u{ee0}', '\u{eff}'), ('\u{f48}', '\u{f48}'), @@ -2918,8 +3046,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{13fe}', '\u{13ff}'), ('\u{169d}', '\u{169f}'), ('\u{16f9}', '\u{16ff}'), - ('\u{170d}', '\u{170d}'), - ('\u{1715}', '\u{171f}'), + ('\u{1716}', '\u{171e}'), ('\u{1737}', '\u{173f}'), ('\u{1754}', '\u{175f}'), ('\u{176d}', '\u{176d}'), @@ -2928,7 +3055,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{17de}', '\u{17df}'), ('\u{17ea}', '\u{17ef}'), ('\u{17fa}', '\u{17ff}'), - ('\u{180e}', '\u{180f}'), + ('\u{180e}', '\u{180e}'), ('\u{181a}', '\u{181f}'), ('\u{1879}', '\u{187f}'), ('\u{18ab}', '\u{18af}'), @@ -2948,9 +3075,9 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1a8a}', '\u{1a8f}'), ('\u{1a9a}', '\u{1a9f}'), ('\u{1aae}', '\u{1aaf}'), - ('\u{1ac1}', '\u{1aff}'), - ('\u{1b4c}', '\u{1b4f}'), - ('\u{1b7d}', '\u{1b7f}'), + ('\u{1acf}', '\u{1aff}'), + ('\u{1b4d}', '\u{1b4f}'), + ('\u{1b7f}', '\u{1b7f}'), ('\u{1bf4}', '\u{1bfb}'), ('\u{1c38}', '\u{1c3a}'), ('\u{1c4a}', '\u{1c4c}'), @@ -2958,7 +3085,6 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1cbb}', '\u{1cbc}'), ('\u{1cc8}', '\u{1ccf}'), ('\u{1cfb}', '\u{1cff}'), - ('\u{1dfa}', '\u{1dfa}'), ('\u{1f16}', '\u{1f17}'), ('\u{1f1e}', '\u{1f1f}'), ('\u{1f46}', '\u{1f47}'), @@ -2981,15 +3107,13 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{2072}', '\u{2073}'), ('\u{208f}', '\u{208f}'), ('\u{209d}', '\u{209f}'), - ('\u{20c0}', '\u{20cf}'), + ('\u{20c1}', '\u{20cf}'), ('\u{20f1}', '\u{20ff}'), ('\u{218c}', '\u{218f}'), ('\u{2427}', '\u{243f}'), ('\u{244b}', '\u{245f}'), ('\u{2b74}', '\u{2b75}'), ('\u{2b96}', '\u{2b96}'), - ('\u{2c2f}', '\u{2c2f}'), - ('\u{2c5f}', '\u{2c5f}'), ('\u{2cf4}', '\u{2cf8}'), ('\u{2d26}', '\u{2d26}'), ('\u{2d28}', '\u{2d2c}'), @@ -3005,7 +3129,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{2dcf}', '\u{2dcf}'), ('\u{2dd7}', '\u{2dd7}'), ('\u{2ddf}', '\u{2ddf}'), - ('\u{2e53}', '\u{2e7f}'), + ('\u{2e5e}', '\u{2e7f}'), ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}', '\u{2eff}'), ('\u{2fd6}', '\u{2fef}'), @@ -3017,13 +3141,14 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{318f}', '\u{318f}'), ('\u{31e4}', '\u{31ef}'), ('\u{321f}', '\u{321f}'), - ('\u{9ffd}', '\u{9fff}'), ('\u{a48d}', '\u{a48f}'), ('\u{a4c7}', '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'), ('\u{a6f8}', '\u{a6ff}'), - ('\u{a7c0}', '\u{a7c1}'), - ('\u{a7cb}', '\u{a7f4}'), + ('\u{a7cb}', '\u{a7cf}'), + ('\u{a7d2}', '\u{a7d2}'), + ('\u{a7d4}', '\u{a7d4}'), + ('\u{a7da}', '\u{a7f1}'), ('\u{a82d}', '\u{a82f}'), ('\u{a83a}', '\u{a83f}'), ('\u{a878}', '\u{a87f}'), @@ -3059,11 +3184,10 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{fb3f}', '\u{fb3f}'), ('\u{fb42}', '\u{fb42}'), ('\u{fb45}', '\u{fb45}'), - ('\u{fbc2}', '\u{fbd2}'), - ('\u{fd40}', '\u{fd4f}'), + ('\u{fbc3}', '\u{fbd2}'), ('\u{fd90}', '\u{fd91}'), - ('\u{fdc8}', '\u{fdef}'), - ('\u{fdfe}', '\u{fdff}'), + ('\u{fdc8}', '\u{fdce}'), + ('\u{fdd0}', '\u{fdef}'), ('\u{fe1a}', '\u{fe1f}'), ('\u{fe53}', '\u{fe53}'), ('\u{fe67}', '\u{fe67}'), @@ -3106,10 +3230,20 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{104fc}', '\u{104ff}'), ('\u{10528}', '\u{1052f}'), ('\u{10564}', '\u{1056e}'), - ('\u{10570}', '\u{105ff}'), + ('\u{1057b}', '\u{1057b}'), + ('\u{1058b}', '\u{1058b}'), + ('\u{10593}', '\u{10593}'), + ('\u{10596}', '\u{10596}'), + ('\u{105a2}', '\u{105a2}'), + ('\u{105b2}', '\u{105b2}'), + ('\u{105ba}', '\u{105ba}'), + ('\u{105bd}', '\u{105ff}'), ('\u{10737}', '\u{1073f}'), ('\u{10756}', '\u{1075f}'), - ('\u{10768}', '\u{107ff}'), + ('\u{10768}', '\u{1077f}'), + ('\u{10786}', '\u{10786}'), + ('\u{107b1}', '\u{107b1}'), + ('\u{107bb}', '\u{107ff}'), ('\u{10806}', '\u{10807}'), ('\u{10809}', '\u{10809}'), ('\u{10836}', '\u{10836}'), @@ -3150,15 +3284,16 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{10e7f}', '\u{10e7f}'), ('\u{10eaa}', '\u{10eaa}'), ('\u{10eae}', '\u{10eaf}'), - ('\u{10eb2}', '\u{10eff}'), + ('\u{10eb2}', '\u{10efc}'), ('\u{10f28}', '\u{10f2f}'), - ('\u{10f5a}', '\u{10faf}'), + ('\u{10f5a}', '\u{10f6f}'), + ('\u{10f8a}', '\u{10faf}'), ('\u{10fcc}', '\u{10fdf}'), ('\u{10ff7}', '\u{10fff}'), ('\u{1104e}', '\u{11051}'), - ('\u{11070}', '\u{1107e}'), + ('\u{11076}', '\u{1107e}'), ('\u{110bd}', '\u{110bd}'), - ('\u{110c2}', '\u{110cf}'), + ('\u{110c3}', '\u{110cf}'), ('\u{110e9}', '\u{110ef}'), ('\u{110fa}', '\u{110ff}'), ('\u{11135}', '\u{11135}'), @@ -3167,7 +3302,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{111e0}', '\u{111e0}'), ('\u{111f5}', '\u{111ff}'), ('\u{11212}', '\u{11212}'), - ('\u{1123f}', '\u{1127f}'), + ('\u{11242}', '\u{1127f}'), ('\u{11287}', '\u{11287}'), ('\u{11289}', '\u{11289}'), ('\u{1128e}', '\u{1128e}'), @@ -3199,11 +3334,11 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{11645}', '\u{1164f}'), ('\u{1165a}', '\u{1165f}'), ('\u{1166d}', '\u{1167f}'), - ('\u{116b9}', '\u{116bf}'), + ('\u{116ba}', '\u{116bf}'), ('\u{116ca}', '\u{116ff}'), ('\u{1171b}', '\u{1171c}'), ('\u{1172c}', '\u{1172f}'), - ('\u{11740}', '\u{117ff}'), + ('\u{11747}', '\u{117ff}'), ('\u{1183c}', '\u{1189f}'), ('\u{118f3}', '\u{118fe}'), ('\u{11907}', '\u{11908}'), @@ -3218,8 +3353,9 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{119d8}', '\u{119d9}'), ('\u{119e5}', '\u{119ff}'), ('\u{11a48}', '\u{11a4f}'), - ('\u{11aa3}', '\u{11abf}'), - ('\u{11af9}', '\u{11bff}'), + ('\u{11aa3}', '\u{11aaf}'), + ('\u{11af9}', '\u{11aff}'), + ('\u{11b0a}', '\u{11bff}'), ('\u{11c09}', '\u{11c09}'), ('\u{11c37}', '\u{11c37}'), ('\u{11c46}', '\u{11c4f}'), @@ -3240,19 +3376,25 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{11d92}', '\u{11d92}'), ('\u{11d99}', '\u{11d9f}'), ('\u{11daa}', '\u{11edf}'), - ('\u{11ef9}', '\u{11faf}'), + ('\u{11ef9}', '\u{11eff}'), + ('\u{11f11}', '\u{11f11}'), + ('\u{11f3b}', '\u{11f3d}'), + ('\u{11f5a}', '\u{11faf}'), ('\u{11fb1}', '\u{11fbf}'), ('\u{11ff2}', '\u{11ffe}'), ('\u{1239a}', '\u{123ff}'), ('\u{1246f}', '\u{1246f}'), ('\u{12475}', '\u{1247f}'), - ('\u{12544}', '\u{12fff}'), - ('\u{1342f}', '\u{143ff}'), + ('\u{12544}', '\u{12f8f}'), + ('\u{12ff3}', '\u{12fff}'), + ('\u{13430}', '\u{1343f}'), + ('\u{13456}', '\u{143ff}'), ('\u{14647}', '\u{167ff}'), ('\u{16a39}', '\u{16a3f}'), ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}', '\u{16a6d}'), - ('\u{16a70}', '\u{16acf}'), + ('\u{16abf}', '\u{16abf}'), + ('\u{16aca}', '\u{16acf}'), ('\u{16aee}', '\u{16aef}'), ('\u{16af6}', '\u{16aff}'), ('\u{16b46}', '\u{16b4f}'), @@ -3268,21 +3410,30 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{16ff2}', '\u{16fff}'), ('\u{187f8}', '\u{187ff}'), ('\u{18cd6}', '\u{18cff}'), - ('\u{18d09}', '\u{1afff}'), - ('\u{1b11f}', '\u{1b14f}'), - ('\u{1b153}', '\u{1b163}'), + ('\u{18d09}', '\u{1afef}'), + ('\u{1aff4}', '\u{1aff4}'), + ('\u{1affc}', '\u{1affc}'), + ('\u{1afff}', '\u{1afff}'), + ('\u{1b123}', '\u{1b131}'), + ('\u{1b133}', '\u{1b14f}'), + ('\u{1b153}', '\u{1b154}'), + ('\u{1b156}', '\u{1b163}'), ('\u{1b168}', '\u{1b16f}'), ('\u{1b2fc}', '\u{1bbff}'), ('\u{1bc6b}', '\u{1bc6f}'), ('\u{1bc7d}', '\u{1bc7f}'), ('\u{1bc89}', '\u{1bc8f}'), ('\u{1bc9a}', '\u{1bc9b}'), - ('\u{1bca0}', '\u{1cfff}'), + ('\u{1bca0}', '\u{1ceff}'), + ('\u{1cf2e}', '\u{1cf2f}'), + ('\u{1cf47}', '\u{1cf4f}'), + ('\u{1cfc4}', '\u{1cfff}'), ('\u{1d0f6}', '\u{1d0ff}'), ('\u{1d127}', '\u{1d128}'), ('\u{1d173}', '\u{1d17a}'), - ('\u{1d1e9}', '\u{1d1ff}'), - ('\u{1d246}', '\u{1d2df}'), + ('\u{1d1eb}', '\u{1d1ff}'), + ('\u{1d246}', '\u{1d2bf}'), + ('\u{1d2d4}', '\u{1d2df}'), ('\u{1d2f4}', '\u{1d2ff}'), ('\u{1d357}', '\u{1d35f}'), ('\u{1d379}', '\u{1d3ff}'), @@ -3308,18 +3459,28 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1d7cc}', '\u{1d7cd}'), ('\u{1da8c}', '\u{1da9a}'), ('\u{1daa0}', '\u{1daa0}'), - ('\u{1dab0}', '\u{1dfff}'), + ('\u{1dab0}', '\u{1deff}'), + ('\u{1df1f}', '\u{1df24}'), + ('\u{1df2b}', '\u{1dfff}'), ('\u{1e007}', '\u{1e007}'), ('\u{1e019}', '\u{1e01a}'), ('\u{1e022}', '\u{1e022}'), ('\u{1e025}', '\u{1e025}'), - ('\u{1e02b}', '\u{1e0ff}'), + ('\u{1e02b}', '\u{1e02f}'), + ('\u{1e06e}', '\u{1e08e}'), + ('\u{1e090}', '\u{1e0ff}'), ('\u{1e12d}', '\u{1e12f}'), ('\u{1e13e}', '\u{1e13f}'), ('\u{1e14a}', '\u{1e14d}'), - ('\u{1e150}', '\u{1e2bf}'), + ('\u{1e150}', '\u{1e28f}'), + ('\u{1e2af}', '\u{1e2bf}'), ('\u{1e2fa}', '\u{1e2fe}'), - ('\u{1e300}', '\u{1e7ff}'), + ('\u{1e300}', '\u{1e4cf}'), + ('\u{1e4fa}', '\u{1e7df}'), + ('\u{1e7e7}', '\u{1e7e7}'), + ('\u{1e7ec}', '\u{1e7ec}'), + ('\u{1e7ef}', '\u{1e7ef}'), + ('\u{1e7ff}', '\u{1e7ff}'), ('\u{1e8c5}', '\u{1e8c6}'), ('\u{1e8d7}', '\u{1e8ff}'), ('\u{1e94c}', '\u{1e94f}'), @@ -3373,39 +3534,39 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1f249}', '\u{1f24f}'), ('\u{1f252}', '\u{1f25f}'), ('\u{1f266}', '\u{1f2ff}'), - ('\u{1f6d8}', '\u{1f6df}'), + ('\u{1f6d8}', '\u{1f6db}'), ('\u{1f6ed}', '\u{1f6ef}'), ('\u{1f6fd}', '\u{1f6ff}'), - ('\u{1f774}', '\u{1f77f}'), - ('\u{1f7d9}', '\u{1f7df}'), - ('\u{1f7ec}', '\u{1f7ff}'), + ('\u{1f777}', '\u{1f77a}'), + ('\u{1f7da}', '\u{1f7df}'), + ('\u{1f7ec}', '\u{1f7ef}'), + ('\u{1f7f1}', '\u{1f7ff}'), ('\u{1f80c}', '\u{1f80f}'), ('\u{1f848}', '\u{1f84f}'), ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'), ('\u{1f8ae}', '\u{1f8af}'), ('\u{1f8b2}', '\u{1f8ff}'), - ('\u{1f979}', '\u{1f979}'), - ('\u{1f9cc}', '\u{1f9cc}'), ('\u{1fa54}', '\u{1fa5f}'), ('\u{1fa6e}', '\u{1fa6f}'), - ('\u{1fa75}', '\u{1fa77}'), - ('\u{1fa7b}', '\u{1fa7f}'), - ('\u{1fa87}', '\u{1fa8f}'), - ('\u{1faa9}', '\u{1faaf}'), - ('\u{1fab7}', '\u{1fabf}'), - ('\u{1fac3}', '\u{1facf}'), - ('\u{1fad7}', '\u{1faff}'), + ('\u{1fa7d}', '\u{1fa7f}'), + ('\u{1fa89}', '\u{1fa8f}'), + ('\u{1fabe}', '\u{1fabe}'), + ('\u{1fac6}', '\u{1facd}'), + ('\u{1fadc}', '\u{1fadf}'), + ('\u{1fae9}', '\u{1faef}'), + ('\u{1faf9}', '\u{1faff}'), ('\u{1fb93}', '\u{1fb93}'), ('\u{1fbcb}', '\u{1fbef}'), ('\u{1fbfa}', '\u{1ffff}'), - ('\u{2a6de}', '\u{2a6ff}'), - ('\u{2b735}', '\u{2b73f}'), + ('\u{2a6e0}', '\u{2a6ff}'), + ('\u{2b73a}', '\u{2b73f}'), ('\u{2b81e}', '\u{2b81f}'), ('\u{2cea2}', '\u{2ceaf}'), ('\u{2ebe1}', '\u{2f7ff}'), ('\u{2fa1e}', '\u{2ffff}'), - ('\u{3134b}', '\u{e00ff}'), + ('\u{3134b}', '\u{3134f}'), + ('\u{323b0}', '\u{e00ff}'), ('\u{e01f0}', '\u{10ffff}'), ]; @@ -3433,8 +3594,9 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('ࠀ', 'ࠕ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'), - ('ࢠ', 'ࢴ'), - ('ࢶ', '\u{8c7}'), + ('ࡰ', 'ࢇ'), + ('ࢉ', 'ࢎ'), + ('ࢠ', 'ࣈ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'), ('ॐ', 'ॐ'), @@ -3499,6 +3661,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('ప', 'హ'), ('ఽ', 'ఽ'), ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'), ('ಅ', 'ಌ'), @@ -3507,10 +3670,10 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('ಽ', 'ಽ'), - ('ೞ', 'ೞ'), + ('ೝ', 'ೞ'), ('ೠ', 'ೡ'), ('ೱ', 'ೲ'), - ('\u{d04}', 'ഌ'), + ('ഄ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'), ('ഽ', 'ഽ'), @@ -3571,9 +3734,8 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛱ', 'ᛸ'), - ('ᜀ', 'ᜌ'), - ('ᜎ', 'ᜑ'), - ('ᜠ', 'ᜱ'), + ('ᜀ', 'ᜑ'), + ('ᜟ', 'ᜱ'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -3593,7 +3755,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('ᨀ', 'ᨖ'), ('ᨠ', 'ᩔ'), ('ᬅ', 'ᬳ'), - ('ᭅ', 'ᭋ'), + ('ᭅ', 'ᭌ'), ('ᮃ', 'ᮠ'), ('ᮮ', 'ᮯ'), ('ᮺ', 'ᯥ'), @@ -3623,11 +3785,10 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('ヿ', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), - ('ㆠ', '\u{31bf}'), + ('ㆠ', 'ㆿ'), ('ㇰ', 'ㇿ'), - ('㐀', '\u{4dbf}'), - ('一', '\u{9ffc}'), - ('ꀀ', 'ꀔ'), + ('㐀', '䶿'), + ('一', 'ꀔ'), ('ꀖ', 'ꒌ'), ('ꓐ', 'ꓷ'), ('ꔀ', 'ꘋ'), @@ -3750,19 +3911,22 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𐮀', '𐮑'), ('𐰀', '𐱈'), ('𐴀', '𐴣'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eb0}', '\u{10eb1}'), + ('𐺀', '𐺩'), + ('𐺰', '𐺱'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), - ('\u{10fb0}', '\u{10fc4}'), + ('𐽰', '𐾁'), + ('𐾰', '𐿄'), ('𐿠', '𐿶'), ('𑀃', '𑀷'), + ('𑁱', '𑁲'), + ('𑁵', '𑁵'), ('𑂃', '𑂯'), ('𑃐', '𑃨'), ('𑄃', '𑄦'), ('𑅄', '𑅄'), - ('\u{11147}', '\u{11147}'), + ('𑅇', '𑅇'), ('𑅐', '𑅲'), ('𑅶', '𑅶'), ('𑆃', '𑆲'), @@ -3771,6 +3935,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈫'), + ('𑈿', '𑉀'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), @@ -3788,7 +3953,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𑍝', '𑍡'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), - ('𑑟', '\u{11461}'), + ('𑑟', '𑑡'), ('𑒀', '𑒯'), ('𑓄', '𑓅'), ('𑓇', '𑓇'), @@ -3799,14 +3964,15 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𑚀', '𑚪'), ('𑚸', '𑚸'), ('𑜀', '𑜚'), + ('𑝀', '𑝆'), ('𑠀', '𑠫'), - ('𑣿', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{1192f}'), - ('\u{1193f}', '\u{1193f}'), - ('\u{11941}', '\u{11941}'), + ('𑣿', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤯'), + ('𑤿', '𑤿'), + ('𑥁', '𑥁'), ('𑦠', '𑦧'), ('𑦪', '𑧐'), ('𑧡', '𑧡'), @@ -3817,7 +3983,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𑩐', '𑩐'), ('𑩜', '𑪉'), ('𑪝', '𑪝'), - ('𑫀', '𑫸'), + ('𑪰', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -3831,13 +3997,19 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𑵪', '𑶉'), ('𑶘', '𑶘'), ('𑻠', '𑻲'), - ('\u{11fb0}', '\u{11fb0}'), + ('𑼂', '𑼂'), + ('𑼄', '𑼐'), + ('𑼒', '𑼳'), + ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒒀', '𒕃'), - ('𓀀', '𓐮'), + ('𒾐', '𒿰'), + ('𓀀', '𓐯'), + ('𓑁', '𓑆'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), + ('𖩰', '𖪾'), ('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭣', '𖭷'), @@ -3845,19 +4017,28 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𖼀', '𖽊'), ('𖽐', '𖽐'), ('𗀀', '𘟷'), - ('𘠀', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('𛀀', '𛄞'), + ('𘠀', '𘳕'), + ('𘴀', '𘴈'), + ('𛀀', '𛄢'), + ('𛄲', '𛄲'), ('𛅐', '𛅒'), + ('𛅕', '𛅕'), ('𛅤', '𛅧'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), + ('𝼊', '𝼊'), ('𞄀', '𞄬'), ('𞅎', '𞅎'), + ('𞊐', '𞊭'), ('𞋀', '𞋫'), + ('𞓐', '𞓪'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), ('𞠀', '𞣄'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), @@ -3892,13 +4073,14 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), - ('𠀀', '\u{2a6dd}'), - ('𪜀', '𫜴'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), - ('\u{30000}', '\u{3134a}'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), ]; pub const OTHER_NUMBER: &'static [(char, char)] = &[ @@ -3955,7 +4137,7 @@ pub const OTHER_NUMBER: &'static [(char, char)] = &[ ('𐹠', '𐹾'), ('𐼝', '𐼦'), ('𐽑', '𐽔'), - ('\u{10fc5}', '\u{10fcb}'), + ('𐿅', '𐿋'), ('𑁒', '𑁥'), ('𑇡', '𑇴'), ('𑜺', '𑜻'), @@ -3964,6 +4146,7 @@ pub const OTHER_NUMBER: &'static [(char, char)] = &[ ('𑿀', '𑿔'), ('𖭛', '𖭡'), ('𖺀', '𖺖'), + ('𝋀', '𝋓'), ('𝋠', '𝋳'), ('𝍠', '𝍸'), ('𞣇', '𞣏'), @@ -3999,7 +4182,7 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('؉', '؊'), ('،', '؍'), ('؛', '؛'), - ('؞', '؟'), + ('؝', '؟'), ('٪', '٭'), ('۔', '۔'), ('܀', '܍'), @@ -4036,6 +4219,7 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('᪠', '᪦'), ('᪨', '᪭'), ('᭚', '᭠'), + ('᭽', '᭾'), ('᯼', '᯿'), ('᰻', '᰿'), ('᱾', '᱿'), @@ -4064,7 +4248,7 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('⸼', '⸿'), ('⹁', '⹁'), ('⹃', '⹏'), - ('\u{2e52}', '\u{2e52}'), + ('⹒', '⹔'), ('、', '〃'), ('〽', '〽'), ('・', '・'), @@ -4118,6 +4302,7 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('𐬹', '𐬿'), ('𐮙', '𐮜'), ('𐽕', '𐽙'), + ('𐾆', '𐾉'), ('𑁇', '𑁍'), ('𑂻', '𑂼'), ('𑂾', '𑃁'), @@ -4130,24 +4315,28 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('𑈸', '𑈽'), ('𑊩', '𑊩'), ('𑑋', '𑑏'), - ('\u{1145a}', '𑑛'), + ('𑑚', '𑑛'), ('𑑝', '𑑝'), ('𑓆', '𑓆'), ('𑗁', '𑗗'), ('𑙁', '𑙃'), ('𑙠', '𑙬'), + ('𑚹', '𑚹'), ('𑜼', '𑜾'), ('𑠻', '𑠻'), - ('\u{11944}', '\u{11946}'), + ('𑥄', '𑥆'), ('𑧢', '𑧢'), ('𑨿', '𑩆'), ('𑪚', '𑪜'), ('𑪞', '𑪢'), + ('𑬀', '𑬉'), ('𑱁', '𑱅'), ('𑱰', '𑱱'), ('𑻷', '𑻸'), + ('𑽃', '𑽏'), ('𑿿', '𑿿'), ('𒑰', '𒑴'), + ('𒿱', '𒿲'), ('𖩮', '𖩯'), ('𖫵', '𖫵'), ('𖬷', '𖬻'), @@ -4240,9 +4429,9 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('⭅', '⭆'), ('⭍', '⭳'), ('⭶', '⮕'), - ('\u{2b97}', '⯿'), + ('⮗', '⯿'), ('⳥', '⳪'), - ('\u{2e50}', '\u{2e51}'), + ('⹐', '⹑'), ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), @@ -4267,7 +4456,9 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('꠶', '꠷'), ('꠹', '꠹'), ('꩷', '꩹'), - ('﷽', '﷽'), + ('﵀', '﵏'), + ('﷏', '﷏'), + ('﷽', '﷿'), ('¦', '¦'), ('│', '│'), ('■', '○'), @@ -4275,7 +4466,7 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('𐄷', '𐄿'), ('𐅹', '𐆉'), ('𐆌', '𐆎'), - ('𐆐', '\u{1019c}'), + ('𐆐', '𐆜'), ('𐆠', '𐆠'), ('𐇐', '𐇼'), ('𐡷', '𐡸'), @@ -4286,13 +4477,14 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('𖬼', '𖬿'), ('𖭅', '𖭅'), ('𛲜', '𛲜'), + ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), ('𝄩', '𝅘𝅥𝅲'), ('𝅪', '𝅬'), ('𝆃', '𝆄'), ('𝆌', '𝆩'), - ('𝆮', '𝇨'), + ('𝆮', '𝇪'), ('𝈀', '𝉁'), ('𝉅', '𝉅'), ('𝌀', '𝍖'), @@ -4310,38 +4502,37 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('🂱', '🂿'), ('🃁', '🃏'), ('🃑', '🃵'), - ('\u{1f10d}', '\u{1f1ad}'), + ('🄍', '🆭'), ('🇦', '🈂'), ('🈐', '🈻'), ('🉀', '🉈'), ('🉐', '🉑'), ('🉠', '🉥'), ('🌀', '🏺'), - ('🐀', '\u{1f6d7}'), - ('🛠', '🛬'), - ('🛰', '\u{1f6fc}'), - ('🜀', '🝳'), - ('🞀', '🟘'), + ('🐀', '🛗'), + ('🛜', '🛬'), + ('🛰', '🛼'), + ('🜀', '🝶'), + ('🝻', '🟙'), ('🟠', '🟫'), + ('🟰', '🟰'), ('🠀', '🠋'), ('🠐', '🡇'), ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('\u{1f8b0}', '\u{1f8b1}'), - ('🤀', '\u{1f978}'), - ('🥺', '\u{1f9cb}'), - ('🧍', '🩓'), + ('🢰', '🢱'), + ('🤀', '🩓'), ('🩠', '🩭'), - ('🩰', '\u{1fa74}'), - ('🩸', '🩺'), - ('🪀', '\u{1fa86}'), - ('🪐', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), - ('\u{1fb00}', '\u{1fb92}'), - ('\u{1fb94}', '\u{1fbca}'), + ('🩰', '🩼'), + ('🪀', '🪈'), + ('🪐', '🪽'), + ('🪿', '🫅'), + ('🫎', '🫛'), + ('🫠', '🫨'), + ('🫰', '🫸'), + ('🬀', '🮒'), + ('🮔', '🯊'), ]; pub const PARAGRAPH_SEPARATOR: &'static [(char, char)] = @@ -4381,7 +4572,7 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('؉', '؊'), ('،', '؍'), ('؛', '؛'), - ('؞', '؟'), + ('؝', '؟'), ('٪', '٭'), ('۔', '۔'), ('܀', '܍'), @@ -4420,6 +4611,7 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('᪠', '᪦'), ('᪨', '᪭'), ('᭚', '᭠'), + ('᭽', '᭾'), ('᯼', '᯿'), ('᰻', '᰿'), ('᱾', '᱿'), @@ -4444,7 +4636,7 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('⵰', '⵰'), ('⸀', '⸮'), ('⸰', '⹏'), - ('\u{2e52}', '\u{2e52}'), + ('⹒', '⹝'), ('、', '〃'), ('〈', '】'), ('〔', '〟'), @@ -4498,8 +4690,9 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('𐫰', '𐫶'), ('𐬹', '𐬿'), ('𐮙', '𐮜'), - ('\u{10ead}', '\u{10ead}'), + ('𐺭', '𐺭'), ('𐽕', '𐽙'), + ('𐾆', '𐾉'), ('𑁇', '𑁍'), ('𑂻', '𑂼'), ('𑂾', '𑃁'), @@ -4512,24 +4705,28 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('𑈸', '𑈽'), ('𑊩', '𑊩'), ('𑑋', '𑑏'), - ('\u{1145a}', '𑑛'), + ('𑑚', '𑑛'), ('𑑝', '𑑝'), ('𑓆', '𑓆'), ('𑗁', '𑗗'), ('𑙁', '𑙃'), ('𑙠', '𑙬'), + ('𑚹', '𑚹'), ('𑜼', '𑜾'), ('𑠻', '𑠻'), - ('\u{11944}', '\u{11946}'), + ('𑥄', '𑥆'), ('𑧢', '𑧢'), ('𑨿', '𑩆'), ('𑪚', '𑪜'), ('𑪞', '𑪢'), + ('𑬀', '𑬉'), ('𑱁', '𑱅'), ('𑱰', '𑱱'), ('𑻷', '𑻸'), + ('𑽃', '𑽏'), ('𑿿', '𑿿'), ('𒑰', '𒑴'), + ('𒿱', '𒿲'), ('𖩮', '𖩯'), ('𖫵', '𖫵'), ('𖬷', '𖬻'), @@ -4598,6 +4795,7 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('ೇ', 'ೈ'), ('ೊ', 'ೋ'), ('\u{cd5}', '\u{cd6}'), + ('ೳ', 'ೳ'), ('ം', 'ഃ'), ('\u{d3e}', 'ീ'), ('െ', 'ൈ'), @@ -4620,6 +4818,8 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('ႇ', 'ႌ'), ('ႏ', 'ႏ'), ('ႚ', 'ႜ'), + ('᜕', '᜕'), + ('᜴', '᜴'), ('ា', 'ា'), ('ើ', 'ៅ'), ('ះ', 'ៈ'), @@ -4682,7 +4882,7 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('𑆂', '𑆂'), ('𑆳', '𑆵'), ('𑆿', '𑇀'), - ('\u{111ce}', '\u{111ce}'), + ('𑇎', '𑇎'), ('𑈬', '𑈮'), ('𑈲', '𑈳'), ('𑈵', '𑈵'), @@ -4714,11 +4914,11 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('𑜦', '𑜦'), ('𑠬', '𑠮'), ('𑠸', '𑠸'), - ('\u{11930}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), - ('\u{1193d}', '\u{1193d}'), - ('\u{11940}', '\u{11940}'), - ('\u{11942}', '\u{11942}'), + ('\u{11930}', '𑤵'), + ('𑤷', '𑤸'), + ('𑤽', '𑤽'), + ('𑥀', '𑥀'), + ('𑥂', '𑥂'), ('𑧑', '𑧓'), ('𑧜', '𑧟'), ('𑧤', '𑧤'), @@ -4734,8 +4934,12 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('𑶓', '𑶔'), ('𑶖', '𑶖'), ('𑻵', '𑻶'), + ('𑼃', '𑼃'), + ('𑼴', '𑼵'), + ('𑼾', '𑼿'), + ('𑽁', '𑽁'), ('𖽑', '𖾇'), - ('\u{16ff0}', '\u{16ff1}'), + ('𖿰', '𖿱'), ('\u{1d165}', '𝅦'), ('𝅭', '\u{1d172}'), ]; @@ -4774,6 +4978,7 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('۽', '۾'), ('߶', '߶'), ('߾', '߿'), + ('࢈', '࢈'), ('৲', '৳'), ('৺', '৻'), ('૱', '૱'), @@ -4812,7 +5017,7 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('⁒', '⁒'), ('⁺', '⁼'), ('₊', '₌'), - ('₠', '₿'), + ('₠', '⃀'), ('℀', '℁'), ('℃', '℆'), ('℈', '℉'), @@ -4841,9 +5046,9 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('⧜', '⧻'), ('⧾', '⭳'), ('⭶', '⮕'), - ('\u{2b97}', '⯿'), + ('⮗', '⯿'), ('⳥', '⳪'), - ('\u{2e50}', '\u{2e51}'), + ('⹐', '⹑'), ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), @@ -4872,10 +5077,12 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('꠶', '꠹'), ('꩷', '꩹'), ('꭛', '꭛'), - ('\u{ab6a}', '\u{ab6b}'), + ('꭪', '꭫'), ('﬩', '﬩'), - ('﮲', '﯁'), - ('﷼', '﷽'), + ('﮲', '﯂'), + ('﵀', '﵏'), + ('﷏', '﷏'), + ('﷼', '﷿'), ('﹢', '﹢'), ('﹤', '﹦'), ('﹩', '﹩'), @@ -4892,7 +5099,7 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('𐄷', '𐄿'), ('𐅹', '𐆉'), ('𐆌', '𐆎'), - ('𐆐', '\u{1019c}'), + ('𐆐', '𐆜'), ('𐆠', '𐆠'), ('𐇐', '𐇼'), ('𐡷', '𐡸'), @@ -4902,13 +5109,14 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('𖬼', '𖬿'), ('𖭅', '𖭅'), ('𛲜', '𛲜'), + ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), ('𝄩', '𝅘𝅥𝅲'), ('𝅪', '𝅬'), ('𝆃', '𝆄'), ('𝆌', '𝆩'), - ('𝆮', '𝇨'), + ('𝆮', '𝇪'), ('𝈀', '𝉁'), ('𝉅', '𝉅'), ('𝌀', '𝍖'), @@ -4939,37 +5147,36 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('🂱', '🂿'), ('🃁', '🃏'), ('🃑', '🃵'), - ('\u{1f10d}', '\u{1f1ad}'), + ('🄍', '🆭'), ('🇦', '🈂'), ('🈐', '🈻'), ('🉀', '🉈'), ('🉐', '🉑'), ('🉠', '🉥'), - ('🌀', '\u{1f6d7}'), - ('🛠', '🛬'), - ('🛰', '\u{1f6fc}'), - ('🜀', '🝳'), - ('🞀', '🟘'), + ('🌀', '🛗'), + ('🛜', '🛬'), + ('🛰', '🛼'), + ('🜀', '🝶'), + ('🝻', '🟙'), ('🟠', '🟫'), + ('🟰', '🟰'), ('🠀', '🠋'), ('🠐', '🡇'), ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('\u{1f8b0}', '\u{1f8b1}'), - ('🤀', '\u{1f978}'), - ('🥺', '\u{1f9cb}'), - ('🧍', '🩓'), + ('🢰', '🢱'), + ('🤀', '🩓'), ('🩠', '🩭'), - ('🩰', '\u{1fa74}'), - ('🩸', '🩺'), - ('🪀', '\u{1fa86}'), - ('🪐', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), - ('\u{1fb00}', '\u{1fb92}'), - ('\u{1fb94}', '\u{1fbca}'), + ('🩰', '🩼'), + ('🪀', '🪈'), + ('🪐', '🪽'), + ('🪿', '🫅'), + ('🫎', '🫛'), + ('🫠', '🫨'), + ('🫰', '🫸'), + ('🬀', '🮒'), + ('🮔', '🯊'), ]; pub const TITLECASE_LETTER: &'static [(char, char)] = &[ @@ -4998,7 +5205,6 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{5c8}', '\u{5cf}'), ('\u{5eb}', '\u{5ee}'), ('\u{5f5}', '\u{5ff}'), - ('\u{61d}', '\u{61d}'), ('\u{70e}', '\u{70e}'), ('\u{74b}', '\u{74c}'), ('\u{7b2}', '\u{7bf}'), @@ -5007,9 +5213,9 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{83f}', '\u{83f}'), ('\u{85c}', '\u{85d}'), ('\u{85f}', '\u{85f}'), - ('\u{86b}', '\u{89f}'), - ('\u{8b5}', '\u{8b5}'), - ('\u{8c8}', '\u{8d2}'), + ('\u{86b}', '\u{86f}'), + ('\u{88f}', '\u{88f}'), + ('\u{892}', '\u{897}'), ('\u{984}', '\u{984}'), ('\u{98d}', '\u{98e}'), ('\u{991}', '\u{992}'), @@ -5087,12 +5293,13 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{c0d}', '\u{c0d}'), ('\u{c11}', '\u{c11}'), ('\u{c29}', '\u{c29}'), - ('\u{c3a}', '\u{c3c}'), + ('\u{c3a}', '\u{c3b}'), ('\u{c45}', '\u{c45}'), ('\u{c49}', '\u{c49}'), ('\u{c4e}', '\u{c54}'), ('\u{c57}', '\u{c57}'), - ('\u{c5b}', '\u{c5f}'), + ('\u{c5b}', '\u{c5c}'), + ('\u{c5e}', '\u{c5f}'), ('\u{c64}', '\u{c65}'), ('\u{c70}', '\u{c76}'), ('\u{c8d}', '\u{c8d}'), @@ -5103,11 +5310,11 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{cc5}', '\u{cc5}'), ('\u{cc9}', '\u{cc9}'), ('\u{cce}', '\u{cd4}'), - ('\u{cd7}', '\u{cdd}'), + ('\u{cd7}', '\u{cdc}'), ('\u{cdf}', '\u{cdf}'), ('\u{ce4}', '\u{ce5}'), ('\u{cf0}', '\u{cf0}'), - ('\u{cf3}', '\u{cff}'), + ('\u{cf4}', '\u{cff}'), ('\u{d0d}', '\u{d0d}'), ('\u{d11}', '\u{d11}'), ('\u{d45}', '\u{d45}'), @@ -5137,7 +5344,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{ebe}', '\u{ebf}'), ('\u{ec5}', '\u{ec5}'), ('\u{ec7}', '\u{ec7}'), - ('\u{ece}', '\u{ecf}'), + ('\u{ecf}', '\u{ecf}'), ('\u{eda}', '\u{edb}'), ('\u{ee0}', '\u{eff}'), ('\u{f48}', '\u{f48}'), @@ -5171,8 +5378,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{13fe}', '\u{13ff}'), ('\u{169d}', '\u{169f}'), ('\u{16f9}', '\u{16ff}'), - ('\u{170d}', '\u{170d}'), - ('\u{1715}', '\u{171f}'), + ('\u{1716}', '\u{171e}'), ('\u{1737}', '\u{173f}'), ('\u{1754}', '\u{175f}'), ('\u{176d}', '\u{176d}'), @@ -5181,7 +5387,6 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{17de}', '\u{17df}'), ('\u{17ea}', '\u{17ef}'), ('\u{17fa}', '\u{17ff}'), - ('\u{180f}', '\u{180f}'), ('\u{181a}', '\u{181f}'), ('\u{1879}', '\u{187f}'), ('\u{18ab}', '\u{18af}'), @@ -5201,9 +5406,9 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1a8a}', '\u{1a8f}'), ('\u{1a9a}', '\u{1a9f}'), ('\u{1aae}', '\u{1aaf}'), - ('\u{1ac1}', '\u{1aff}'), - ('\u{1b4c}', '\u{1b4f}'), - ('\u{1b7d}', '\u{1b7f}'), + ('\u{1acf}', '\u{1aff}'), + ('\u{1b4d}', '\u{1b4f}'), + ('\u{1b7f}', '\u{1b7f}'), ('\u{1bf4}', '\u{1bfb}'), ('\u{1c38}', '\u{1c3a}'), ('\u{1c4a}', '\u{1c4c}'), @@ -5211,7 +5416,6 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1cbb}', '\u{1cbc}'), ('\u{1cc8}', '\u{1ccf}'), ('\u{1cfb}', '\u{1cff}'), - ('\u{1dfa}', '\u{1dfa}'), ('\u{1f16}', '\u{1f17}'), ('\u{1f1e}', '\u{1f1f}'), ('\u{1f46}', '\u{1f47}'), @@ -5232,15 +5436,13 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{2072}', '\u{2073}'), ('\u{208f}', '\u{208f}'), ('\u{209d}', '\u{209f}'), - ('\u{20c0}', '\u{20cf}'), + ('\u{20c1}', '\u{20cf}'), ('\u{20f1}', '\u{20ff}'), ('\u{218c}', '\u{218f}'), ('\u{2427}', '\u{243f}'), ('\u{244b}', '\u{245f}'), ('\u{2b74}', '\u{2b75}'), ('\u{2b96}', '\u{2b96}'), - ('\u{2c2f}', '\u{2c2f}'), - ('\u{2c5f}', '\u{2c5f}'), ('\u{2cf4}', '\u{2cf8}'), ('\u{2d26}', '\u{2d26}'), ('\u{2d28}', '\u{2d2c}'), @@ -5256,7 +5458,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{2dcf}', '\u{2dcf}'), ('\u{2dd7}', '\u{2dd7}'), ('\u{2ddf}', '\u{2ddf}'), - ('\u{2e53}', '\u{2e7f}'), + ('\u{2e5e}', '\u{2e7f}'), ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}', '\u{2eff}'), ('\u{2fd6}', '\u{2fef}'), @@ -5268,13 +5470,14 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{318f}', '\u{318f}'), ('\u{31e4}', '\u{31ef}'), ('\u{321f}', '\u{321f}'), - ('\u{9ffd}', '\u{9fff}'), ('\u{a48d}', '\u{a48f}'), ('\u{a4c7}', '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'), ('\u{a6f8}', '\u{a6ff}'), - ('\u{a7c0}', '\u{a7c1}'), - ('\u{a7cb}', '\u{a7f4}'), + ('\u{a7cb}', '\u{a7cf}'), + ('\u{a7d2}', '\u{a7d2}'), + ('\u{a7d4}', '\u{a7d4}'), + ('\u{a7da}', '\u{a7f1}'), ('\u{a82d}', '\u{a82f}'), ('\u{a83a}', '\u{a83f}'), ('\u{a878}', '\u{a87f}'), @@ -5310,11 +5513,10 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{fb3f}', '\u{fb3f}'), ('\u{fb42}', '\u{fb42}'), ('\u{fb45}', '\u{fb45}'), - ('\u{fbc2}', '\u{fbd2}'), - ('\u{fd40}', '\u{fd4f}'), + ('\u{fbc3}', '\u{fbd2}'), ('\u{fd90}', '\u{fd91}'), - ('\u{fdc8}', '\u{fdef}'), - ('\u{fdfe}', '\u{fdff}'), + ('\u{fdc8}', '\u{fdce}'), + ('\u{fdd0}', '\u{fdef}'), ('\u{fe1a}', '\u{fe1f}'), ('\u{fe53}', '\u{fe53}'), ('\u{fe67}', '\u{fe67}'), @@ -5358,10 +5560,20 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{104fc}', '\u{104ff}'), ('\u{10528}', '\u{1052f}'), ('\u{10564}', '\u{1056e}'), - ('\u{10570}', '\u{105ff}'), + ('\u{1057b}', '\u{1057b}'), + ('\u{1058b}', '\u{1058b}'), + ('\u{10593}', '\u{10593}'), + ('\u{10596}', '\u{10596}'), + ('\u{105a2}', '\u{105a2}'), + ('\u{105b2}', '\u{105b2}'), + ('\u{105ba}', '\u{105ba}'), + ('\u{105bd}', '\u{105ff}'), ('\u{10737}', '\u{1073f}'), ('\u{10756}', '\u{1075f}'), - ('\u{10768}', '\u{107ff}'), + ('\u{10768}', '\u{1077f}'), + ('\u{10786}', '\u{10786}'), + ('\u{107b1}', '\u{107b1}'), + ('\u{107bb}', '\u{107ff}'), ('\u{10806}', '\u{10807}'), ('\u{10809}', '\u{10809}'), ('\u{10836}', '\u{10836}'), @@ -5402,14 +5614,15 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{10e7f}', '\u{10e7f}'), ('\u{10eaa}', '\u{10eaa}'), ('\u{10eae}', '\u{10eaf}'), - ('\u{10eb2}', '\u{10eff}'), + ('\u{10eb2}', '\u{10efc}'), ('\u{10f28}', '\u{10f2f}'), - ('\u{10f5a}', '\u{10faf}'), + ('\u{10f5a}', '\u{10f6f}'), + ('\u{10f8a}', '\u{10faf}'), ('\u{10fcc}', '\u{10fdf}'), ('\u{10ff7}', '\u{10fff}'), ('\u{1104e}', '\u{11051}'), - ('\u{11070}', '\u{1107e}'), - ('\u{110c2}', '\u{110cc}'), + ('\u{11076}', '\u{1107e}'), + ('\u{110c3}', '\u{110cc}'), ('\u{110ce}', '\u{110cf}'), ('\u{110e9}', '\u{110ef}'), ('\u{110fa}', '\u{110ff}'), @@ -5419,7 +5632,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{111e0}', '\u{111e0}'), ('\u{111f5}', '\u{111ff}'), ('\u{11212}', '\u{11212}'), - ('\u{1123f}', '\u{1127f}'), + ('\u{11242}', '\u{1127f}'), ('\u{11287}', '\u{11287}'), ('\u{11289}', '\u{11289}'), ('\u{1128e}', '\u{1128e}'), @@ -5451,11 +5664,11 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{11645}', '\u{1164f}'), ('\u{1165a}', '\u{1165f}'), ('\u{1166d}', '\u{1167f}'), - ('\u{116b9}', '\u{116bf}'), + ('\u{116ba}', '\u{116bf}'), ('\u{116ca}', '\u{116ff}'), ('\u{1171b}', '\u{1171c}'), ('\u{1172c}', '\u{1172f}'), - ('\u{11740}', '\u{117ff}'), + ('\u{11747}', '\u{117ff}'), ('\u{1183c}', '\u{1189f}'), ('\u{118f3}', '\u{118fe}'), ('\u{11907}', '\u{11908}'), @@ -5470,8 +5683,9 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{119d8}', '\u{119d9}'), ('\u{119e5}', '\u{119ff}'), ('\u{11a48}', '\u{11a4f}'), - ('\u{11aa3}', '\u{11abf}'), - ('\u{11af9}', '\u{11bff}'), + ('\u{11aa3}', '\u{11aaf}'), + ('\u{11af9}', '\u{11aff}'), + ('\u{11b0a}', '\u{11bff}'), ('\u{11c09}', '\u{11c09}'), ('\u{11c37}', '\u{11c37}'), ('\u{11c46}', '\u{11c4f}'), @@ -5492,20 +5706,24 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{11d92}', '\u{11d92}'), ('\u{11d99}', '\u{11d9f}'), ('\u{11daa}', '\u{11edf}'), - ('\u{11ef9}', '\u{11faf}'), + ('\u{11ef9}', '\u{11eff}'), + ('\u{11f11}', '\u{11f11}'), + ('\u{11f3b}', '\u{11f3d}'), + ('\u{11f5a}', '\u{11faf}'), ('\u{11fb1}', '\u{11fbf}'), ('\u{11ff2}', '\u{11ffe}'), ('\u{1239a}', '\u{123ff}'), ('\u{1246f}', '\u{1246f}'), ('\u{12475}', '\u{1247f}'), - ('\u{12544}', '\u{12fff}'), - ('\u{1342f}', '\u{1342f}'), - ('\u{13439}', '\u{143ff}'), + ('\u{12544}', '\u{12f8f}'), + ('\u{12ff3}', '\u{12fff}'), + ('\u{13456}', '\u{143ff}'), ('\u{14647}', '\u{167ff}'), ('\u{16a39}', '\u{16a3f}'), ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}', '\u{16a6d}'), - ('\u{16a70}', '\u{16acf}'), + ('\u{16abf}', '\u{16abf}'), + ('\u{16aca}', '\u{16acf}'), ('\u{16aee}', '\u{16aef}'), ('\u{16af6}', '\u{16aff}'), ('\u{16b46}', '\u{16b4f}'), @@ -5521,20 +5739,29 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{16ff2}', '\u{16fff}'), ('\u{187f8}', '\u{187ff}'), ('\u{18cd6}', '\u{18cff}'), - ('\u{18d09}', '\u{1afff}'), - ('\u{1b11f}', '\u{1b14f}'), - ('\u{1b153}', '\u{1b163}'), + ('\u{18d09}', '\u{1afef}'), + ('\u{1aff4}', '\u{1aff4}'), + ('\u{1affc}', '\u{1affc}'), + ('\u{1afff}', '\u{1afff}'), + ('\u{1b123}', '\u{1b131}'), + ('\u{1b133}', '\u{1b14f}'), + ('\u{1b153}', '\u{1b154}'), + ('\u{1b156}', '\u{1b163}'), ('\u{1b168}', '\u{1b16f}'), ('\u{1b2fc}', '\u{1bbff}'), ('\u{1bc6b}', '\u{1bc6f}'), ('\u{1bc7d}', '\u{1bc7f}'), ('\u{1bc89}', '\u{1bc8f}'), ('\u{1bc9a}', '\u{1bc9b}'), - ('\u{1bca4}', '\u{1cfff}'), + ('\u{1bca4}', '\u{1ceff}'), + ('\u{1cf2e}', '\u{1cf2f}'), + ('\u{1cf47}', '\u{1cf4f}'), + ('\u{1cfc4}', '\u{1cfff}'), ('\u{1d0f6}', '\u{1d0ff}'), ('\u{1d127}', '\u{1d128}'), - ('\u{1d1e9}', '\u{1d1ff}'), - ('\u{1d246}', '\u{1d2df}'), + ('\u{1d1eb}', '\u{1d1ff}'), + ('\u{1d246}', '\u{1d2bf}'), + ('\u{1d2d4}', '\u{1d2df}'), ('\u{1d2f4}', '\u{1d2ff}'), ('\u{1d357}', '\u{1d35f}'), ('\u{1d379}', '\u{1d3ff}'), @@ -5560,18 +5787,28 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1d7cc}', '\u{1d7cd}'), ('\u{1da8c}', '\u{1da9a}'), ('\u{1daa0}', '\u{1daa0}'), - ('\u{1dab0}', '\u{1dfff}'), + ('\u{1dab0}', '\u{1deff}'), + ('\u{1df1f}', '\u{1df24}'), + ('\u{1df2b}', '\u{1dfff}'), ('\u{1e007}', '\u{1e007}'), ('\u{1e019}', '\u{1e01a}'), ('\u{1e022}', '\u{1e022}'), ('\u{1e025}', '\u{1e025}'), - ('\u{1e02b}', '\u{1e0ff}'), + ('\u{1e02b}', '\u{1e02f}'), + ('\u{1e06e}', '\u{1e08e}'), + ('\u{1e090}', '\u{1e0ff}'), ('\u{1e12d}', '\u{1e12f}'), ('\u{1e13e}', '\u{1e13f}'), ('\u{1e14a}', '\u{1e14d}'), - ('\u{1e150}', '\u{1e2bf}'), + ('\u{1e150}', '\u{1e28f}'), + ('\u{1e2af}', '\u{1e2bf}'), ('\u{1e2fa}', '\u{1e2fe}'), - ('\u{1e300}', '\u{1e7ff}'), + ('\u{1e300}', '\u{1e4cf}'), + ('\u{1e4fa}', '\u{1e7df}'), + ('\u{1e7e7}', '\u{1e7e7}'), + ('\u{1e7ec}', '\u{1e7ec}'), + ('\u{1e7ef}', '\u{1e7ef}'), + ('\u{1e7ff}', '\u{1e7ff}'), ('\u{1e8c5}', '\u{1e8c6}'), ('\u{1e8d7}', '\u{1e8ff}'), ('\u{1e94c}', '\u{1e94f}'), @@ -5625,39 +5862,39 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1f249}', '\u{1f24f}'), ('\u{1f252}', '\u{1f25f}'), ('\u{1f266}', '\u{1f2ff}'), - ('\u{1f6d8}', '\u{1f6df}'), + ('\u{1f6d8}', '\u{1f6db}'), ('\u{1f6ed}', '\u{1f6ef}'), ('\u{1f6fd}', '\u{1f6ff}'), - ('\u{1f774}', '\u{1f77f}'), - ('\u{1f7d9}', '\u{1f7df}'), - ('\u{1f7ec}', '\u{1f7ff}'), + ('\u{1f777}', '\u{1f77a}'), + ('\u{1f7da}', '\u{1f7df}'), + ('\u{1f7ec}', '\u{1f7ef}'), + ('\u{1f7f1}', '\u{1f7ff}'), ('\u{1f80c}', '\u{1f80f}'), ('\u{1f848}', '\u{1f84f}'), ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'), ('\u{1f8ae}', '\u{1f8af}'), ('\u{1f8b2}', '\u{1f8ff}'), - ('\u{1f979}', '\u{1f979}'), - ('\u{1f9cc}', '\u{1f9cc}'), ('\u{1fa54}', '\u{1fa5f}'), ('\u{1fa6e}', '\u{1fa6f}'), - ('\u{1fa75}', '\u{1fa77}'), - ('\u{1fa7b}', '\u{1fa7f}'), - ('\u{1fa87}', '\u{1fa8f}'), - ('\u{1faa9}', '\u{1faaf}'), - ('\u{1fab7}', '\u{1fabf}'), - ('\u{1fac3}', '\u{1facf}'), - ('\u{1fad7}', '\u{1faff}'), + ('\u{1fa7d}', '\u{1fa7f}'), + ('\u{1fa89}', '\u{1fa8f}'), + ('\u{1fabe}', '\u{1fabe}'), + ('\u{1fac6}', '\u{1facd}'), + ('\u{1fadc}', '\u{1fadf}'), + ('\u{1fae9}', '\u{1faef}'), + ('\u{1faf9}', '\u{1faff}'), ('\u{1fb93}', '\u{1fb93}'), ('\u{1fbcb}', '\u{1fbef}'), ('\u{1fbfa}', '\u{1ffff}'), - ('\u{2a6de}', '\u{2a6ff}'), - ('\u{2b735}', '\u{2b73f}'), + ('\u{2a6e0}', '\u{2a6ff}'), + ('\u{2b73a}', '\u{2b73f}'), ('\u{2b81e}', '\u{2b81f}'), ('\u{2cea2}', '\u{2ceaf}'), ('\u{2ebe1}', '\u{2f7ff}'), ('\u{2fa1e}', '\u{2ffff}'), - ('\u{3134b}', '\u{e0000}'), + ('\u{3134b}', '\u{3134f}'), + ('\u{323b0}', '\u{e0000}'), ('\u{e0002}', '\u{e001f}'), ('\u{e0080}', '\u{e00ff}'), ('\u{e01f0}', '\u{effff}'), @@ -6097,7 +6334,7 @@ pub const UPPERCASE_LETTER: &'static [(char, char)] = &[ ('ℾ', 'ℿ'), ('ⅅ', 'ⅅ'), ('Ↄ', 'Ↄ'), - ('Ⰰ', 'Ⱞ'), + ('Ⰰ', 'Ⱟ'), ('Ⱡ', 'Ⱡ'), ('Ɫ', 'Ɽ'), ('Ⱨ', 'Ⱨ'), @@ -6262,13 +6499,21 @@ pub const UPPERCASE_LETTER: &'static [(char, char)] = &[ ('Ꞻ', 'Ꞻ'), ('Ꞽ', 'Ꞽ'), ('Ꞿ', 'Ꞿ'), + ('Ꟁ', 'Ꟁ'), ('Ꟃ', 'Ꟃ'), - ('Ꞔ', '\u{a7c7}'), - ('\u{a7c9}', '\u{a7c9}'), - ('\u{a7f5}', '\u{a7f5}'), + ('Ꞔ', 'Ꟈ'), + ('Ꟊ', 'Ꟊ'), + ('Ꟑ', 'Ꟑ'), + ('Ꟗ', 'Ꟗ'), + ('Ꟙ', 'Ꟙ'), + ('Ꟶ', 'Ꟶ'), ('A', 'Z'), ('𐐀', '𐐧'), ('𐒰', '𐓓'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), ('𐲀', '𐲲'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), diff --git a/src/unicode_tables/grapheme_cluster_break.rs b/src/unicode_tables/grapheme_cluster_break.rs index 7df9d2b..294dfbd 100644 --- a/src/unicode_tables/grapheme_cluster_break.rs +++ b/src/unicode_tables/grapheme_cluster_break.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate grapheme-cluster-break ucd-13.0.0 --chars +// ucd-generate grapheme-cluster-break ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("CR", CR), @@ -25,7 +25,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ pub const CR: &'static [(char, char)] = &[('\r', '\r')]; pub const CONTROL: &'static [(char, char)] = &[ - ('\u{0}', '\t'), + ('\0', '\t'), ('\u{b}', '\u{c}'), ('\u{e}', '\u{1f}'), ('\u{7f}', '\u{9f}'), @@ -38,7 +38,7 @@ pub const CONTROL: &'static [(char, char)] = &[ ('\u{2060}', '\u{206f}'), ('\u{feff}', '\u{feff}'), ('\u{fff0}', '\u{fffb}'), - ('\u{13430}', '\u{13438}'), + ('\u{13430}', '\u{1343f}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'), ('\u{e0000}', '\u{e001f}'), @@ -71,7 +71,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{8d3}', '\u{8e1}'), + ('\u{898}', '\u{89f}'), + ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'), ('\u{93c}', '\u{93c}'), @@ -116,6 +117,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c00}'), ('\u{c04}', '\u{c04}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c3e}', '\u{c40}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), @@ -147,7 +149,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), @@ -171,7 +173,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{109d}', '\u{109d}'), ('\u{135d}', '\u{135f}'), ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1734}'), + ('\u{1732}', '\u{1733}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17b5}'), @@ -180,6 +182,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{17c9}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', '\u{1922}'), @@ -195,7 +198,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1a65}', '\u{1a6c}'), ('\u{1a73}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', '\u{1b03}'), ('\u{1b34}', '\u{1b3a}'), ('\u{1b3c}', '\u{1b3c}'), @@ -217,8 +220,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('\u{1cf8}', '\u{1cf9}'), - ('\u{1dc0}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('\u{1dc0}', '\u{1dff}'), ('\u{200c}', '\u{200c}'), ('\u{20d0}', '\u{20f0}'), ('\u{2cef}', '\u{2cf1}'), @@ -276,12 +278,17 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), ('\u{1107f}', '\u{11081}'), ('\u{110b3}', '\u{110b6}'), ('\u{110b9}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{1112b}'), ('\u{1112d}', '\u{11134}'), @@ -294,6 +301,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11234}', '\u{11234}'), ('\u{11236}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112df}'), ('\u{112e3}', '\u{112ea}'), ('\u{11300}', '\u{11301}'), @@ -361,12 +369,20 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11d95}', '\u{11d95}'), ('\u{11d97}', '\u{11d97}'), ('\u{11ef3}', '\u{11ef4}'), + ('\u{11f00}', '\u{11f01}'), + ('\u{11f36}', '\u{11f3a}'), + ('\u{11f40}', '\u{11f40}'), + ('\u{11f42}', '\u{11f42}'), + ('\u{13440}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d165}'), ('\u{1d167}', '\u{1d169}'), ('\u{1d16e}', '\u{1d172}'), @@ -385,8 +401,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e4ec}', '\u{1e4ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('🏻', '🏿'), @@ -1206,16 +1225,18 @@ pub const PREPEND: &'static [(char, char)] = &[ ('\u{600}', '\u{605}'), ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), + ('\u{890}', '\u{891}'), ('\u{8e2}', '\u{8e2}'), ('ൎ', 'ൎ'), ('\u{110bd}', '\u{110bd}'), ('\u{110cd}', '\u{110cd}'), ('𑇂', '𑇃'), - ('\u{1193f}', '\u{1193f}'), - ('\u{11941}', '\u{11941}'), + ('𑤿', '𑤿'), + ('𑥁', '𑥁'), ('𑨺', '𑨺'), ('𑪄', '𑪉'), ('𑵆', '𑵆'), + ('𑼂', '𑼂'), ]; pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('🇦', '🇿')]; @@ -1252,6 +1273,7 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('ೃ', 'ೄ'), ('ೇ', 'ೈ'), ('ೊ', 'ೋ'), + ('ೳ', 'ೳ'), ('ം', 'ഃ'), ('ി', 'ീ'), ('െ', 'ൈ'), @@ -1268,6 +1290,8 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('ျ', 'ြ'), ('ၖ', 'ၗ'), ('ႄ', 'ႄ'), + ('᜕', '᜕'), + ('᜴', '᜴'), ('ា', 'ា'), ('ើ', 'ៅ'), ('ះ', 'ៈ'), @@ -1324,7 +1348,7 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('𑆂', '𑆂'), ('𑆳', '𑆵'), ('𑆿', '𑇀'), - ('\u{111ce}', '\u{111ce}'), + ('𑇎', '𑇎'), ('𑈬', '𑈮'), ('𑈲', '𑈳'), ('𑈵', '𑈵'), @@ -1352,15 +1376,14 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('𑚬', '𑚬'), ('𑚮', '𑚯'), ('𑚶', '𑚶'), - ('𑜠', '𑜡'), ('𑜦', '𑜦'), ('𑠬', '𑠮'), ('𑠸', '𑠸'), - ('\u{11931}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), - ('\u{1193d}', '\u{1193d}'), - ('\u{11940}', '\u{11940}'), - ('\u{11942}', '\u{11942}'), + ('𑤱', '𑤵'), + ('𑤷', '𑤸'), + ('𑤽', '𑤽'), + ('𑥀', '𑥀'), + ('𑥂', '𑥂'), ('𑧑', '𑧓'), ('𑧜', '𑧟'), ('𑧤', '𑧤'), @@ -1376,8 +1399,12 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('𑶓', '𑶔'), ('𑶖', '𑶖'), ('𑻵', '𑻶'), + ('𑼃', '𑼃'), + ('𑼴', '𑼵'), + ('𑼾', '𑼿'), + ('𑽁', '𑽁'), ('𖽑', '𖾇'), - ('\u{16ff0}', '\u{16ff1}'), + ('𖿰', '𖿱'), ('𝅦', '𝅦'), ('𝅭', '𝅭'), ]; diff --git a/src/unicode_tables/perl_decimal.rs b/src/unicode_tables/perl_decimal.rs index 2a09259..4f4c08a 100644 --- a/src/unicode_tables/perl_decimal.rs +++ b/src/unicode_tables/perl_decimal.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate general-category ucd-13.0.0 --chars --include decimalnumber +// ucd-generate general-category ucd-15.0.0 --chars --include decimalnumber // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[("Decimal_Number", DECIMAL_NUMBER)]; @@ -60,15 +60,18 @@ pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ ('𑛀', '𑛉'), ('𑜰', '𑜹'), ('𑣠', '𑣩'), - ('\u{11950}', '\u{11959}'), + ('𑥐', '𑥙'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), + ('𑽐', '𑽙'), ('𖩠', '𖩩'), + ('𖫀', '𖫉'), ('𖭐', '𖭙'), ('𝟎', '𝟿'), ('𞅀', '𞅉'), ('𞋰', '𞋹'), + ('𞓰', '𞓹'), ('𞥐', '𞥙'), - ('\u{1fbf0}', '\u{1fbf9}'), + ('🯰', '🯹'), ]; diff --git a/src/unicode_tables/perl_space.rs b/src/unicode_tables/perl_space.rs index c112dd1..1741695 100644 --- a/src/unicode_tables/perl_space.rs +++ b/src/unicode_tables/perl_space.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-bool ucd-13.0.0 --chars --include whitespace +// ucd-generate property-bool ucd-15.0.0 --chars --include whitespace // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[("White_Space", WHITE_SPACE)]; diff --git a/src/unicode_tables/perl_word.rs b/src/unicode_tables/perl_word.rs index df9eac7..c1b66bd 100644 --- a/src/unicode_tables/perl_word.rs +++ b/src/unicode_tables/perl_word.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate perl-word ucd-13.0.0 --chars +// ucd-generate perl-word ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const PERL_WORD: &'static [(char, char)] = &[ ('0', '9'), @@ -57,9 +57,9 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ࠀ', '\u{82d}'), ('ࡀ', '\u{85b}'), ('ࡠ', 'ࡪ'), - ('ࢠ', 'ࢴ'), - ('ࢶ', '\u{8c7}'), - ('\u{8d3}', '\u{8e1}'), + ('ࡰ', 'ࢇ'), + ('ࢉ', 'ࢎ'), + ('\u{898}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('०', '९'), ('ॱ', 'ঃ'), @@ -143,11 +143,12 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), - ('ఽ', 'ౄ'), + ('\u{c3c}', 'ౄ'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), ('ౠ', '\u{c63}'), ('౦', '౯'), ('ಀ', 'ಃ'), @@ -160,10 +161,10 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{cc6}', 'ೈ'), ('ೊ', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), - ('ೞ', 'ೞ'), + ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), ('೦', '೯'), - ('ೱ', 'ೲ'), + ('ೱ', 'ೳ'), ('\u{d00}', 'ഌ'), ('എ', 'ഐ'), ('ഒ', '\u{d44}'), @@ -196,7 +197,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ວ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('໐', '໙'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), @@ -242,9 +243,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', 'ᜌ'), - ('ᜎ', '\u{1714}'), - ('ᜠ', '\u{1734}'), + ('ᜀ', '᜕'), + ('ᜟ', '᜴'), ('ᝀ', '\u{1753}'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -254,7 +254,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ៜ', '\u{17dd}'), ('០', '៩'), ('\u{180b}', '\u{180d}'), - ('᠐', '᠙'), + ('\u{180f}', '᠙'), ('ᠠ', 'ᡸ'), ('ᢀ', 'ᢪ'), ('ᢰ', 'ᣵ'), @@ -272,8 +272,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{1a7f}', '᪉'), ('᪐', '᪙'), ('ᪧ', 'ᪧ'), - ('\u{1ab0}', '\u{1ac0}'), - ('\u{1b00}', 'ᭋ'), + ('\u{1ab0}', '\u{1ace}'), + ('\u{1b00}', 'ᭌ'), ('᭐', '᭙'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '᯳'), @@ -285,8 +285,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('Ჽ', 'Ჿ'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', 'ᳺ'), - ('ᴀ', '\u{1df9}'), - ('\u{1dfb}', 'ἕ'), + ('ᴀ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), @@ -327,9 +326,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'), ('Ⓐ', 'ⓩ'), - ('Ⰰ', 'Ⱞ'), - ('ⰰ', 'ⱞ'), - ('Ⱡ', 'ⳤ'), + ('Ⰰ', 'ⳤ'), ('Ⳬ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), @@ -358,11 +355,10 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ー', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), - ('ㆠ', '\u{31bf}'), + ('ㆠ', 'ㆿ'), ('ㇰ', 'ㇿ'), - ('㐀', '\u{4dbf}'), - ('一', '\u{9ffc}'), - ('ꀀ', 'ꒌ'), + ('㐀', '䶿'), + ('一', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘫ'), @@ -371,9 +367,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ꙿ', '\u{a6f1}'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꞿ'), - ('Ꟃ', '\u{a7ca}'), - ('\u{a7f5}', 'ꠧ'), + ('Ꞌ', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꠧ'), ('\u{a82c}', '\u{a82c}'), ('ꡀ', 'ꡳ'), ('ꢀ', '\u{a8c5}'), @@ -400,7 +398,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), - ('ꭜ', '\u{ab69}'), + ('ꭜ', 'ꭩ'), ('ꭰ', 'ꯪ'), ('꯬', '\u{abed}'), ('꯰', '꯹'), @@ -462,9 +460,20 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), @@ -499,31 +508,33 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), ('𐴰', '𐴹'), - ('\u{10e80}', '\u{10ea9}'), + ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), - ('\u{10eb0}', '\u{10eb1}'), - ('𐼀', '𐼜'), + ('𐺰', '𐺱'), + ('\u{10efd}', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '\u{10f50}'), - ('\u{10fb0}', '\u{10fc4}'), + ('𐽰', '\u{10f85}'), + ('𐾰', '𐿄'), ('𐿠', '𐿶'), ('𑀀', '\u{11046}'), - ('𑁦', '𑁯'), + ('𑁦', '𑁵'), ('\u{1107f}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('𑃐', '𑃨'), ('𑃰', '𑃹'), ('\u{11100}', '\u{11134}'), ('𑄶', '𑄿'), - ('𑅄', '\u{11147}'), + ('𑅄', '𑅇'), ('𑅐', '\u{11173}'), ('𑅶', '𑅶'), ('\u{11180}', '𑇄'), ('\u{111c9}', '\u{111cc}'), - ('\u{111ce}', '𑇚'), + ('𑇎', '𑇚'), ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), + ('\u{1123e}', '\u{11241}'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), @@ -548,7 +559,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{11370}', '\u{11374}'), ('𑐀', '𑑊'), ('𑑐', '𑑙'), - ('\u{1145e}', '\u{11461}'), + ('\u{1145e}', '𑑡'), ('𑒀', '𑓅'), ('𑓇', '𑓇'), ('𑓐', '𑓙'), @@ -563,16 +574,17 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜹'), + ('𑝀', '𑝆'), ('𑠀', '\u{1183a}'), ('𑢠', '𑣩'), - ('𑣿', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('𑣿', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤵'), + ('𑤷', '𑤸'), ('\u{1193b}', '\u{11943}'), - ('\u{11950}', '\u{11959}'), + ('𑥐', '𑥙'), ('𑦠', '𑦧'), ('𑦪', '\u{119d7}'), ('\u{119da}', '𑧡'), @@ -581,7 +593,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{11a47}', '\u{11a47}'), ('𑩐', '\u{11a99}'), ('𑪝', '𑪝'), - ('𑫀', '𑫸'), + ('𑪰', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱀'), @@ -603,15 +615,23 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𑶓', '𑶘'), ('𑶠', '𑶩'), ('𑻠', '𑻶'), - ('\u{11fb0}', '\u{11fb0}'), + ('\u{11f00}', '𑼐'), + ('𑼒', '\u{11f3a}'), + ('𑼾', '\u{11f42}'), + ('𑽐', '𑽙'), + ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), - ('𓀀', '𓐮'), + ('𒾐', '𒿰'), + ('𓀀', '𓐯'), + ('\u{13440}', '\u{13455}'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), + ('𖩰', '𖪾'), + ('𖫀', '𖫉'), ('𖫐', '𖫭'), ('\u{16af0}', '\u{16af4}'), ('𖬀', '\u{16b36}'), @@ -625,12 +645,17 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), + ('𖿰', '𖿱'), ('𗀀', '𘟷'), - ('𘠀', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('𛀀', '𛄞'), + ('𘠀', '𘳕'), + ('𘴀', '𘴈'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), + ('𛀀', '𛄢'), + ('𛄲', '𛄲'), ('𛅐', '𛅒'), + ('𛅕', '𛅕'), ('𛅤', '𛅧'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), @@ -638,6 +663,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), ('𝅭', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), @@ -681,16 +708,26 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{1da84}', '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'), + ('𝼀', '𝼞'), + ('𝼥', '𝼪'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('𞀰', '𞁭'), + ('\u{1e08f}', '\u{1e08f}'), ('𞄀', '𞄬'), ('\u{1e130}', '𞄽'), ('𞅀', '𞅉'), ('𞅎', '𞅎'), + ('𞊐', '\u{1e2ae}'), ('𞋀', '𞋹'), + ('𞓐', '𞓹'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), ('𞠀', '𞣄'), ('\u{1e8d0}', '\u{1e8d6}'), ('𞤀', '𞥋'), @@ -731,13 +768,14 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('🄰', '🅉'), ('🅐', '🅩'), ('🅰', '🆉'), - ('\u{1fbf0}', '\u{1fbf9}'), - ('𠀀', '\u{2a6dd}'), - ('𪜀', '𫜴'), + ('🯰', '🯹'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), - ('\u{30000}', '\u{3134a}'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), ('\u{e0100}', '\u{e01ef}'), ]; diff --git a/src/unicode_tables/property_bool.rs b/src/unicode_tables/property_bool.rs index 21cbaf9..a3e84b5 100644 --- a/src/unicode_tables/property_bool.rs +++ b/src/unicode_tables/property_bool.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-bool ucd-13.0.0 --chars +// ucd-generate property-bool ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("ASCII_Hex_Digit", ASCII_HEX_DIGIT), @@ -125,8 +125,9 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ࠚ', '\u{82c}'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'), - ('ࢠ', 'ࢴ'), - ('ࢶ', '\u{8c7}'), + ('ࡰ', 'ࢇ'), + ('ࢉ', 'ࢎ'), + ('ࢠ', 'ࣉ'), ('\u{8d4}', '\u{8df}'), ('\u{8e3}', '\u{8e9}'), ('\u{8f0}', 'ऻ'), @@ -206,8 +207,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ொ', 'ௌ'), ('ௐ', 'ௐ'), ('\u{bd7}', '\u{bd7}'), - ('\u{c00}', 'ః'), - ('అ', 'ఌ'), + ('\u{c00}', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), @@ -216,6 +216,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('\u{c4a}', '\u{c4c}'), ('\u{c55}', '\u{c56}'), ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), ('ౠ', '\u{c63}'), ('ಀ', 'ಃ'), ('ಅ', 'ಌ'), @@ -227,9 +228,9 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('\u{cc6}', 'ೈ'), ('ೊ', '\u{ccc}'), ('\u{cd5}', '\u{cd6}'), - ('ೞ', 'ೞ'), + ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), - ('ೱ', 'ೲ'), + ('ೱ', 'ೳ'), ('\u{d00}', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'), @@ -267,7 +268,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ༀ', 'ༀ'), ('ཀ', 'ཇ'), ('ཉ', 'ཬ'), - ('\u{f71}', '\u{f81}'), + ('\u{f71}', '\u{f83}'), ('ྈ', '\u{f97}'), ('\u{f99}', '\u{fbc}'), ('က', '\u{1036}'), @@ -303,9 +304,8 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', 'ᜌ'), - ('ᜎ', '\u{1713}'), - ('ᜠ', '\u{1733}'), + ('ᜀ', '\u{1713}'), + ('ᜟ', '\u{1733}'), ('ᝀ', '\u{1753}'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -329,9 +329,10 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ᩡ', '\u{1a74}'), ('ᪧ', 'ᪧ'), ('\u{1abf}', '\u{1ac0}'), + ('\u{1acc}', '\u{1ace}'), ('\u{1b00}', 'ᬳ'), ('\u{1b35}', 'ᭃ'), - ('ᭅ', 'ᭋ'), + ('ᭅ', 'ᭌ'), ('\u{1b80}', '\u{1ba9}'), ('\u{1bac}', 'ᮯ'), ('ᮺ', 'ᯥ'), @@ -385,9 +386,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'), ('Ⓐ', 'ⓩ'), - ('Ⰰ', 'Ⱞ'), - ('ⰰ', 'ⱞ'), - ('Ⱡ', 'ⳤ'), + ('Ⰰ', 'ⳤ'), ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), @@ -416,11 +415,10 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ー', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), - ('ㆠ', '\u{31bf}'), + ('ㆠ', 'ㆿ'), ('ㇰ', 'ㇿ'), - ('㐀', '\u{4dbf}'), - ('一', '\u{9ffc}'), - ('ꀀ', 'ꒌ'), + ('㐀', '䶿'), + ('一', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘟ'), @@ -430,9 +428,11 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ꙿ', 'ꛯ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꞿ'), - ('Ꟃ', '\u{a7ca}'), - ('\u{a7f5}', 'ꠅ'), + ('Ꞌ', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꠅ'), ('ꠇ', 'ꠧ'), ('ꡀ', 'ꡳ'), ('ꢀ', 'ꣃ'), @@ -463,7 +463,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), - ('ꭜ', '\u{ab69}'), + ('ꭜ', 'ꭩ'), ('ꭰ', 'ꯪ'), ('가', '힣'), ('ힰ', 'ퟆ'), @@ -514,9 +514,20 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), @@ -548,30 +559,33 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), - ('\u{10e80}', '\u{10ea9}'), + ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), - ('\u{10eb0}', '\u{10eb1}'), + ('𐺰', '𐺱'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), - ('\u{10fb0}', '\u{10fc4}'), + ('𐽰', '𐾁'), + ('𐾰', '𐿄'), ('𐿠', '𐿶'), ('𑀀', '\u{11045}'), - ('𑂂', '𑂸'), + ('𑁱', '𑁵'), + ('\u{11080}', '𑂸'), + ('\u{110c2}', '\u{110c2}'), ('𑃐', '𑃨'), ('\u{11100}', '\u{11132}'), - ('𑅄', '\u{11147}'), + ('𑅄', '𑅇'), ('𑅐', '𑅲'), ('𑅶', '𑅶'), ('\u{11180}', '𑆿'), ('𑇁', '𑇄'), - ('\u{111ce}', '\u{111cf}'), + ('𑇎', '\u{111cf}'), ('𑇚', '𑇚'), ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '\u{11234}'), ('\u{11237}', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), + ('\u{1123e}', '\u{11241}'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), @@ -594,7 +608,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𑐀', '𑑁'), ('\u{11443}', '𑑅'), ('𑑇', '𑑊'), - ('𑑟', '\u{11461}'), + ('𑑟', '𑑡'), ('𑒀', '𑓁'), ('𑓄', '𑓅'), ('𑓇', '𑓇'), @@ -608,16 +622,17 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𑚸', '𑚸'), ('𑜀', '𑜚'), ('\u{1171d}', '\u{1172a}'), + ('𑝀', '𑝆'), ('𑠀', '𑠸'), ('𑢠', '𑣟'), - ('𑣿', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('𑣿', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤵'), + ('𑤷', '𑤸'), ('\u{1193b}', '\u{1193c}'), - ('\u{1193f}', '\u{11942}'), + ('𑤿', '𑥂'), ('𑦠', '𑦧'), ('𑦪', '\u{119d7}'), ('\u{119da}', '𑧟'), @@ -627,7 +642,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('\u{11a35}', '\u{11a3e}'), ('𑩐', '𑪗'), ('𑪝', '𑪝'), - ('𑫀', '𑫸'), + ('𑪰', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑰾'), @@ -650,14 +665,20 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𑶓', '𑶖'), ('𑶘', '𑶘'), ('𑻠', '𑻶'), - ('\u{11fb0}', '\u{11fb0}'), + ('\u{11f00}', '𑼐'), + ('𑼒', '\u{11f3a}'), + ('𑼾', '\u{11f40}'), + ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), - ('𓀀', '𓐮'), + ('𒾐', '𒿰'), + ('𓀀', '𓐯'), + ('𓑁', '𓑆'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), + ('𖩰', '𖪾'), ('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭀', '𖭃'), @@ -669,12 +690,17 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '𖿣'), - ('\u{16ff0}', '\u{16ff1}'), + ('𖿰', '𖿱'), ('𗀀', '𘟷'), - ('𘠀', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('𛀀', '𛄞'), + ('𘠀', '𘳕'), + ('𘴀', '𘴈'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), + ('𛀀', '𛄢'), + ('𛄲', '𛄲'), ('𛅐', '𛅒'), + ('𛅕', '𛅕'), ('𛅤', '𛅧'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), @@ -712,15 +738,25 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), + ('𝼀', '𝼞'), + ('𝼥', '𝼪'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('𞀰', '𞁭'), + ('\u{1e08f}', '\u{1e08f}'), ('𞄀', '𞄬'), ('𞄷', '𞄽'), ('𞅎', '𞅎'), + ('𞊐', '𞊭'), ('𞋀', '𞋫'), + ('𞓐', '𞓫'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), ('𞠀', '𞣄'), ('𞤀', '𞥃'), ('\u{1e947}', '\u{1e947}'), @@ -761,13 +797,14 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('🄰', '🅉'), ('🅐', '🅩'), ('🅰', '🆉'), - ('𠀀', '\u{2a6dd}'), - ('𪜀', '𫜴'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), - ('\u{30000}', '\u{3134a}'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), ]; pub const BIDI_CONTROL: &'static [(char, char)] = &[ @@ -873,6 +910,7 @@ pub const BIDI_MIRRORED: &'static [(char, char)] = &[ ('⸌', '⸍'), ('⸜', '⸝'), ('⸠', '⸩'), + ('⹕', '⹜'), ('〈', '】'), ('〔', '〛'), ('﹙', '﹞'), @@ -936,7 +974,10 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{7fd}', '\u{7fd}'), ('\u{816}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{8d3}', '\u{902}'), + ('࢈', '࢈'), + ('\u{890}', '\u{891}'), + ('\u{898}', '\u{89f}'), + ('ࣉ', '\u{902}'), ('\u{93a}', '\u{93a}'), ('\u{93c}', '\u{93c}'), ('\u{941}', '\u{948}'), @@ -977,6 +1018,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{bcd}', '\u{bcd}'), ('\u{c00}', '\u{c00}'), ('\u{c04}', '\u{c04}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c3e}', '\u{c40}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), @@ -1003,7 +1045,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{ebc}'), ('ໆ', 'ໆ'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), @@ -1028,7 +1070,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('ჼ', 'ჼ'), ('\u{135d}', '\u{135f}'), ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1734}'), + ('\u{1732}', '\u{1733}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17b5}'), @@ -1037,7 +1079,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{17c9}', '\u{17d3}'), ('ៗ', 'ៗ'), ('\u{17dd}', '\u{17dd}'), - ('\u{180b}', '\u{180e}'), + ('\u{180b}', '\u{180f}'), ('ᡃ', 'ᡃ'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), @@ -1055,7 +1097,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{1a73}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), ('ᪧ', 'ᪧ'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', '\u{1b03}'), ('\u{1b34}', '\u{1b34}'), ('\u{1b36}', '\u{1b3a}'), @@ -1081,8 +1123,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{1cf8}', '\u{1cf9}'), ('ᴬ', 'ᵪ'), ('ᵸ', 'ᵸ'), - ('ᶛ', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('ᶛ', '\u{1dff}'), ('᾽', '᾽'), ('᾿', '῁'), ('῍', '῏'), @@ -1123,6 +1164,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('꜀', '꜡'), ('ꝰ', 'ꝰ'), ('ꞈ', '꞊'), + ('ꟲ', 'ꟴ'), ('ꟸ', 'ꟹ'), ('\u{a802}', '\u{a802}'), ('\u{a806}', '\u{a806}'), @@ -1157,12 +1199,12 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('ꫳ', 'ꫴ'), ('\u{aaf6}', '\u{aaf6}'), ('꭛', 'ꭟ'), - ('\u{ab69}', '\u{ab6b}'), + ('ꭩ', '꭫'), ('\u{abe5}', '\u{abe5}'), ('\u{abe8}', '\u{abe8}'), ('\u{abed}', '\u{abed}'), ('\u{fb1e}', '\u{fb1e}'), - ('﮲', '﯁'), + ('﮲', '﯂'), ('\u{fe00}', '\u{fe0f}'), ('︓', '︓'), ('\u{fe20}', '\u{fe2f}'), @@ -1181,6 +1223,9 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{101fd}', '\u{101fd}'), ('\u{102e0}', '\u{102e0}'), ('\u{10376}', '\u{1037a}'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('\u{10a01}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}', '\u{10a0f}'), @@ -1189,13 +1234,18 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), ('\u{1107f}', '\u{11081}'), ('\u{110b3}', '\u{110b6}'), ('\u{110b9}', '\u{110ba}'), ('\u{110bd}', '\u{110bd}'), + ('\u{110c2}', '\u{110c2}'), ('\u{110cd}', '\u{110cd}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{1112b}'), @@ -1209,6 +1259,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{11234}', '\u{11234}'), ('\u{11236}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112df}'), ('\u{112e3}', '\u{112ea}'), ('\u{11300}', '\u{11301}'), @@ -1270,7 +1321,12 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{11d95}', '\u{11d95}'), ('\u{11d97}', '\u{11d97}'), ('\u{11ef3}', '\u{11ef4}'), - ('\u{13430}', '\u{13438}'), + ('\u{11f00}', '\u{11f01}'), + ('\u{11f36}', '\u{11f3a}'), + ('\u{11f40}', '\u{11f40}'), + ('\u{11f42}', '\u{11f42}'), + ('\u{13430}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('𖭀', '𖭃'), @@ -1278,8 +1334,13 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '\u{16fe4}'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1bca0}', '\u{1bca3}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d167}', '\u{1d169}'), ('\u{1d173}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), @@ -1296,8 +1357,12 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('𞀰', '𞁭'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e130}', '𞄽'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('𞓫', '\u{1e4ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '𞥋'), ('🏻', '🏿'), @@ -1338,7 +1403,7 @@ pub const CASED: &'static [(char, char)] = &[ ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'), - ('ჽ', 'ჿ'), + ('ჼ', 'ჿ'), ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), @@ -1384,9 +1449,7 @@ pub const CASED: &'static [(char, char)] = &[ ('Ⅰ', 'ⅿ'), ('Ↄ', 'ↄ'), ('Ⓐ', 'ⓩ'), - ('Ⰰ', 'Ⱞ'), - ('ⰰ', 'ⱞ'), - ('Ⱡ', 'ⳤ'), + ('Ⰰ', 'ⳤ'), ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), @@ -1396,12 +1459,14 @@ pub const CASED: &'static [(char, char)] = &[ ('Ꚁ', 'ꚝ'), ('Ꜣ', 'ꞇ'), ('Ꞌ', 'ꞎ'), - ('Ꞑ', 'ꞿ'), - ('Ꟃ', '\u{a7ca}'), - ('\u{a7f5}', '\u{a7f6}'), + ('Ꞑ', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꟶ'), ('ꟸ', 'ꟺ'), ('ꬰ', 'ꭚ'), - ('ꭜ', '\u{ab68}'), + ('ꭜ', 'ꭩ'), ('ꭰ', 'ꮿ'), ('ff', 'st'), ('ﬓ', 'ﬗ'), @@ -1410,6 +1475,18 @@ pub const CASED: &'static [(char, char)] = &[ ('𐐀', '𐑏'), ('𐒰', '𐓓'), ('𐓘', '𐓻'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), + ('𐞀', '𐞀'), + ('𐞃', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𑢠', '𑣟'), @@ -1444,6 +1521,10 @@ pub const CASED: &'static [(char, char)] = &[ ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), + ('𝼀', '𝼉'), + ('𝼋', '𝼞'), + ('𝼥', '𝼪'), + ('𞀰', '𞁭'), ('𞤀', '𞥃'), ('🄰', '🅉'), ('🅐', '🅩'), @@ -1886,7 +1967,7 @@ pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[ ('Ⅰ', 'Ⅿ'), ('Ↄ', 'Ↄ'), ('Ⓐ', 'Ⓩ'), - ('Ⰰ', 'Ⱞ'), + ('Ⰰ', 'Ⱟ'), ('Ⱡ', 'Ⱡ'), ('Ɫ', 'Ɽ'), ('Ⱨ', 'Ⱨ'), @@ -2051,16 +2132,24 @@ pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[ ('Ꞻ', 'Ꞻ'), ('Ꞽ', 'Ꞽ'), ('Ꞿ', 'Ꞿ'), + ('Ꟁ', 'Ꟁ'), ('Ꟃ', 'Ꟃ'), - ('Ꞔ', '\u{a7c7}'), - ('\u{a7c9}', '\u{a7c9}'), - ('\u{a7f5}', '\u{a7f5}'), + ('Ꞔ', 'Ꟈ'), + ('Ꟊ', 'Ꟊ'), + ('Ꟑ', 'Ꟑ'), + ('Ꟗ', 'Ꟗ'), + ('Ꟙ', 'Ꟙ'), + ('Ꟶ', 'Ꟶ'), ('ꭰ', 'ꮿ'), ('ff', 'st'), ('ﬓ', 'ﬗ'), ('A', 'Z'), ('𐐀', '𐐧'), ('𐒰', '𐓓'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), ('𐲀', '𐲲'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), @@ -2156,9 +2245,7 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('Ⅰ', 'ⅿ'), ('Ↄ', 'ↄ'), ('Ⓐ', 'ⓩ'), - ('Ⰰ', 'Ⱞ'), - ('ⰰ', 'ⱞ'), - ('Ⱡ', 'Ɒ'), + ('Ⰰ', 'Ɒ'), ('Ⱳ', 'ⱳ'), ('Ⱶ', 'ⱶ'), ('Ȿ', 'ⳣ'), @@ -2175,9 +2262,10 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('Ꞌ', 'Ɥ'), ('Ꞑ', 'ꞔ'), ('Ꞗ', 'Ɪ'), - ('Ʞ', 'ꞿ'), - ('Ꟃ', '\u{a7ca}'), - ('\u{a7f5}', '\u{a7f6}'), + ('Ʞ', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('Ꟗ', 'ꟙ'), + ('Ꟶ', 'ꟶ'), ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), ('ff', 'st'), @@ -2187,6 +2275,14 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('𐐀', '𐑏'), ('𐒰', '𐓓'), ('𐓘', '𐓻'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𑢠', '𑣟'), @@ -2620,7 +2716,7 @@ pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[ ('Ⅰ', 'Ⅿ'), ('Ↄ', 'Ↄ'), ('Ⓐ', 'Ⓩ'), - ('Ⰰ', 'Ⱞ'), + ('Ⰰ', 'Ⱟ'), ('Ⱡ', 'Ⱡ'), ('Ɫ', 'Ɽ'), ('Ⱨ', 'Ⱨ'), @@ -2785,13 +2881,21 @@ pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[ ('Ꞻ', 'Ꞻ'), ('Ꞽ', 'Ꞽ'), ('Ꞿ', 'Ꞿ'), + ('Ꟁ', 'Ꟁ'), ('Ꟃ', 'Ꟃ'), - ('Ꞔ', '\u{a7c7}'), - ('\u{a7c9}', '\u{a7c9}'), - ('\u{a7f5}', '\u{a7f5}'), + ('Ꞔ', 'Ꟈ'), + ('Ꟊ', 'Ꟊ'), + ('Ꟑ', 'Ꟑ'), + ('Ꟗ', 'Ꟗ'), + ('Ꟙ', 'Ꟙ'), + ('Ꟶ', 'Ꟶ'), ('A', 'Z'), ('𐐀', '𐐧'), ('𐒰', '𐓓'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), ('𐲀', '𐲲'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), @@ -3237,7 +3341,7 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('ⅰ', 'ⅿ'), ('ↄ', 'ↄ'), ('ⓐ', 'ⓩ'), - ('ⰰ', 'ⱞ'), + ('ⰰ', 'ⱟ'), ('ⱡ', 'ⱡ'), ('ⱥ', 'ⱦ'), ('ⱨ', 'ⱨ'), @@ -3402,10 +3506,14 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('ꞻ', 'ꞻ'), ('ꞽ', 'ꞽ'), ('ꞿ', 'ꞿ'), + ('ꟁ', 'ꟁ'), ('ꟃ', 'ꟃ'), - ('\u{a7c8}', '\u{a7c8}'), - ('\u{a7ca}', '\u{a7ca}'), - ('\u{a7f6}', '\u{a7f6}'), + ('ꟈ', 'ꟈ'), + ('ꟊ', 'ꟊ'), + ('ꟑ', 'ꟑ'), + ('ꟗ', 'ꟗ'), + ('ꟙ', 'ꟙ'), + ('ꟶ', 'ꟶ'), ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), ('ff', 'st'), @@ -3413,6 +3521,10 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('a', 'z'), ('𐐨', '𐑏'), ('𐓘', '𐓻'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), ('𐳀', '𐳲'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), @@ -3859,7 +3971,7 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('ⅰ', 'ⅿ'), ('ↄ', 'ↄ'), ('ⓐ', 'ⓩ'), - ('ⰰ', 'ⱞ'), + ('ⰰ', 'ⱟ'), ('ⱡ', 'ⱡ'), ('ⱥ', 'ⱦ'), ('ⱨ', 'ⱨ'), @@ -4024,10 +4136,14 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('ꞻ', 'ꞻ'), ('ꞽ', 'ꞽ'), ('ꞿ', 'ꞿ'), + ('ꟁ', 'ꟁ'), ('ꟃ', 'ꟃ'), - ('\u{a7c8}', '\u{a7c8}'), - ('\u{a7ca}', '\u{a7ca}'), - ('\u{a7f6}', '\u{a7f6}'), + ('ꟈ', 'ꟈ'), + ('ꟊ', 'ꟊ'), + ('ꟑ', 'ꟑ'), + ('ꟗ', 'ꟗ'), + ('ꟙ', 'ꟙ'), + ('ꟶ', 'ꟶ'), ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), ('ff', 'st'), @@ -4035,6 +4151,10 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('a', 'z'), ('𐐨', '𐑏'), ('𐓘', '𐓻'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), ('𐳀', '𐳲'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), @@ -4056,6 +4176,7 @@ pub const DASH: &'static [(char, char)] = &[ ('⸚', '⸚'), ('⸺', '⸻'), ('⹀', '⹀'), + ('⹝', '⹝'), ('〜', '〜'), ('〰', '〰'), ('゠', '゠'), @@ -4063,7 +4184,7 @@ pub const DASH: &'static [(char, char)] = &[ ('﹘', '﹘'), ('﹣', '﹣'), ('-', '-'), - ('\u{10ead}', '\u{10ead}'), + ('𐺭', '𐺭'), ]; pub const DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[ @@ -4072,7 +4193,7 @@ pub const DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[ ('\u{61c}', '\u{61c}'), ('ᅟ', 'ᅠ'), ('\u{17b4}', '\u{17b5}'), - ('\u{180b}', '\u{180e}'), + ('\u{180b}', '\u{180f}'), ('\u{200b}', '\u{200f}'), ('\u{202a}', '\u{202e}'), ('\u{2060}', '\u{206f}'), @@ -4126,6 +4247,8 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{7a6}', '\u{7b0}'), ('\u{7eb}', 'ߵ'), ('\u{818}', '\u{819}'), + ('\u{898}', '\u{89f}'), + ('ࣉ', '\u{8d2}'), ('\u{8e3}', '\u{8fe}'), ('\u{93c}', '\u{93c}'), ('\u{94d}', '\u{94d}'), @@ -4142,6 +4265,7 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{b4d}', '\u{b4d}'), ('\u{b55}', '\u{b55}'), ('\u{bcd}', '\u{bcd}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c4d}', '\u{c4d}'), ('\u{cbc}', '\u{cbc}'), ('\u{ccd}', '\u{ccd}'), @@ -4168,12 +4292,14 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('ႏ', 'ႏ'), ('ႚ', 'ႛ'), ('\u{135d}', '\u{135f}'), + ('\u{1714}', '᜕'), ('\u{17c9}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{1939}', '\u{193b}'), ('\u{1a75}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1abd}'), + ('\u{1ab0}', '\u{1abe}'), + ('\u{1ac1}', '\u{1acb}'), ('\u{1b34}', '\u{1b34}'), ('᭄', '᭄'), ('\u{1b6b}', '\u{1b73}'), @@ -4186,8 +4312,7 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('᳷', '\u{1cf9}'), ('ᴬ', 'ᵪ'), ('\u{1dc4}', '\u{1dcf}'), - ('\u{1df5}', '\u{1df9}'), - ('\u{1dfd}', '\u{1dff}'), + ('\u{1df5}', '\u{1dff}'), ('᾽', '᾽'), ('᾿', '῁'), ('῍', '῏'), @@ -4218,7 +4343,7 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{aabf}', 'ꫂ'), ('\u{aaf6}', '\u{aaf6}'), ('꭛', 'ꭟ'), - ('\u{ab69}', '\u{ab6b}'), + ('ꭩ', '꭫'), ('꯬', '\u{abed}'), ('\u{fb1e}', '\u{fb1e}'), ('\u{fe20}', '\u{fe2f}'), @@ -4228,9 +4353,16 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{ff9e}', '\u{ff9f}'), (' ̄', ' ̄'), ('\u{102e0}', '\u{102e0}'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('\u{10ae5}', '\u{10ae6}'), ('𐴢', '\u{10d27}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), + ('\u{11046}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), ('\u{110b9}', '\u{110ba}'), ('\u{11133}', '\u{11134}'), ('\u{11173}', '\u{11173}'), @@ -4250,7 +4382,7 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('𑚶', '\u{116b7}'), ('\u{1172b}', '\u{1172b}'), ('\u{11839}', '\u{1183a}'), - ('\u{1193d}', '\u{1193e}'), + ('𑤽', '\u{1193e}'), ('\u{11943}', '\u{11943}'), ('\u{119e0}', '\u{119e0}'), ('\u{11a34}', '\u{11a34}'), @@ -4260,16 +4392,24 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{11d42}', '\u{11d42}'), ('\u{11d44}', '\u{11d45}'), ('\u{11d97}', '\u{11d97}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f8f}', '𖾟'), - ('\u{16ff0}', '\u{16ff1}'), + ('𖿰', '𖿱'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d167}', '\u{1d169}'), ('𝅭', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), + ('𞀰', '𞁭'), ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e946}'), @@ -4410,25 +4550,24 @@ pub const EMOJI: &'static [(char, char)] = &[ ('🗺', '🙏'), ('🚀', '🛅'), ('🛋', '🛒'), - ('🛕', '\u{1f6d7}'), - ('🛠', '🛥'), + ('🛕', '🛗'), + ('🛜', '🛥'), ('🛩', '🛩'), ('🛫', '🛬'), ('🛰', '🛰'), - ('🛳', '\u{1f6fc}'), + ('🛳', '🛼'), ('🟠', '🟫'), - ('\u{1f90c}', '🤺'), + ('🟰', '🟰'), + ('🤌', '🤺'), ('🤼', '🥅'), - ('🥇', '\u{1f978}'), - ('🥺', '\u{1f9cb}'), - ('🧍', '🧿'), - ('🩰', '\u{1fa74}'), - ('🩸', '🩺'), - ('🪀', '\u{1fa86}'), - ('🪐', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), + ('🥇', '🧿'), + ('🩰', '🩼'), + ('🪀', '🪈'), + ('🪐', '🪽'), + ('🪿', '🫅'), + ('🫎', '🫛'), + ('🫠', '🫨'), + ('🫰', '🫸'), ]; pub const EMOJI_COMPONENT: &'static [(char, char)] = &[ @@ -4473,18 +4612,20 @@ pub const EMOJI_MODIFIER_BASE: &'static [(char, char)] = &[ ('🚴', '🚶'), ('🛀', '🛀'), ('🛌', '🛌'), - ('\u{1f90c}', '\u{1f90c}'), + ('🤌', '🤌'), ('🤏', '🤏'), ('🤘', '🤟'), ('🤦', '🤦'), ('🤰', '🤹'), ('🤼', '🤾'), - ('\u{1f977}', '\u{1f977}'), + ('🥷', '🥷'), ('🦵', '🦶'), ('🦸', '🦹'), ('🦻', '🦻'), ('🧍', '🧏'), ('🧑', '🧝'), + ('🫃', '🫅'), + ('🫰', '🫸'), ]; pub const EMOJI_PRESENTATION: &'static [(char, char)] = &[ @@ -4553,22 +4694,22 @@ pub const EMOJI_PRESENTATION: &'static [(char, char)] = &[ ('🚀', '🛅'), ('🛌', '🛌'), ('🛐', '🛒'), - ('🛕', '\u{1f6d7}'), + ('🛕', '🛗'), + ('🛜', '🛟'), ('🛫', '🛬'), - ('🛴', '\u{1f6fc}'), + ('🛴', '🛼'), ('🟠', '🟫'), - ('\u{1f90c}', '🤺'), + ('🟰', '🟰'), + ('🤌', '🤺'), ('🤼', '🥅'), - ('🥇', '\u{1f978}'), - ('🥺', '\u{1f9cb}'), - ('🧍', '🧿'), - ('🩰', '\u{1fa74}'), - ('🩸', '🩺'), - ('🪀', '\u{1fa86}'), - ('🪐', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), + ('🥇', '🧿'), + ('🩰', '🩼'), + ('🪀', '🪈'), + ('🪐', '🪽'), + ('🪿', '🫅'), + ('🫎', '🫛'), + ('🫠', '🫨'), + ('🫰', '🫸'), ]; pub const EXTENDED_PICTOGRAPHIC: &'static [(char, char)] = &[ @@ -4623,13 +4764,13 @@ pub const EXTENDED_PICTOGRAPHIC: &'static [(char, char)] = &[ ('㊗', '㊗'), ('㊙', '㊙'), ('🀀', '\u{1f0ff}'), - ('\u{1f10d}', '\u{1f10f}'), + ('🄍', '🄏'), ('🄯', '🄯'), ('🅬', '🅱'), ('🅾', '🅿'), ('🆎', '🆎'), ('🆑', '🆚'), - ('\u{1f1ad}', '\u{1f1e5}'), + ('🆭', '\u{1f1e5}'), ('🈁', '\u{1f20f}'), ('🈚', '🈚'), ('🈯', '🈯'), @@ -4639,14 +4780,14 @@ pub const EXTENDED_PICTOGRAPHIC: &'static [(char, char)] = &[ ('🐀', '🔽'), ('🕆', '🙏'), ('🚀', '\u{1f6ff}'), - ('\u{1f774}', '\u{1f77f}'), + ('🝴', '🝿'), ('🟕', '\u{1f7ff}'), ('\u{1f80c}', '\u{1f80f}'), ('\u{1f848}', '\u{1f84f}'), ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'), ('\u{1f8ae}', '\u{1f8ff}'), - ('\u{1f90c}', '🤺'), + ('🤌', '🤺'), ('🤼', '🥅'), ('🥇', '\u{1faff}'), ('\u{1fc00}', '\u{1fffd}'), @@ -4677,6 +4818,7 @@ pub const EXTENDER: &'static [(char, char)] = &[ ('ꫝ', 'ꫝ'), ('ꫳ', 'ꫴ'), ('ー', 'ー'), + ('𐞁', '𐞂'), ('𑍝', '𑍝'), ('𑗆', '𑗈'), ('\u{11a98}', '\u{11a98}'), @@ -4709,7 +4851,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ׯ', '״'), ('؆', '؏'), ('؛', '؛'), - ('؞', 'ي'), + ('؝', 'ي'), ('٠', 'ٯ'), ('ٱ', 'ە'), ('۞', '۞'), @@ -4730,8 +4872,8 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ࡀ', 'ࡘ'), ('࡞', '࡞'), ('ࡠ', 'ࡪ'), - ('ࢠ', 'ࢴ'), - ('ࢶ', '\u{8c7}'), + ('ࡰ', 'ࢎ'), + ('ࢠ', 'ࣉ'), ('ः', 'ह'), ('ऻ', 'ऻ'), ('ऽ', 'ी'), @@ -4820,6 +4962,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ఽ', 'ఽ'), ('ు', 'ౄ'), ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), ('ౠ', 'ౡ'), ('౦', '౯'), ('౷', 'ಀ'), @@ -4833,10 +4976,10 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ೃ', 'ೄ'), ('ೇ', 'ೈ'), ('ೊ', 'ೋ'), - ('ೞ', 'ೞ'), + ('ೝ', 'ೞ'), ('ೠ', 'ೡ'), ('೦', '೯'), - ('ೱ', 'ೲ'), + ('ೱ', 'ೳ'), ('ം', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'), @@ -4922,10 +5065,10 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ᏸ', 'ᏽ'), ('᐀', '᚜'), ('ᚠ', 'ᛸ'), - ('ᜀ', 'ᜌ'), - ('ᜎ', 'ᜑ'), - ('ᜠ', 'ᜱ'), - ('᜵', '᜶'), + ('ᜀ', 'ᜑ'), + ('᜕', '᜕'), + ('ᜟ', 'ᜱ'), + ('᜴', '᜶'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -4967,9 +5110,9 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ᬄ', 'ᬳ'), ('ᬻ', 'ᬻ'), ('ᬽ', 'ᭁ'), - ('ᭃ', 'ᭋ'), + ('ᭃ', 'ᭌ'), ('᭐', '᭪'), - ('᭴', '᭼'), + ('᭴', '᭾'), ('ᮂ', 'ᮡ'), ('ᮦ', 'ᮧ'), ('᮪', '᮪'), @@ -5013,15 +5156,13 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('⁰', 'ⁱ'), ('⁴', '₎'), ('ₐ', 'ₜ'), - ('₠', '₿'), + ('₠', '⃀'), ('℀', '↋'), ('←', '␦'), ('⑀', '⑊'), ('①', '⭳'), ('⭶', '⮕'), - ('\u{2b97}', 'Ⱞ'), - ('ⰰ', 'ⱞ'), - ('Ⱡ', 'ⳮ'), + ('⮗', 'ⳮ'), ('Ⳳ', 'ⳳ'), ('⳹', 'ⴥ'), ('ⴧ', 'ⴧ'), @@ -5037,7 +5178,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), - ('⸀', '\u{2e52}'), + ('⸀', '⹝'), ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), @@ -5050,8 +5191,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ㄱ', 'ㆎ'), ('㆐', '㇣'), ('ㇰ', '㈞'), - ('㈠', '\u{9ffc}'), - ('ꀀ', 'ꒌ'), + ('㈠', 'ꒌ'), ('꒐', '꓆'), ('ꓐ', 'ꘫ'), ('Ꙁ', 'ꙮ'), @@ -5059,9 +5199,11 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('꙾', 'ꚝ'), ('ꚠ', 'ꛯ'), ('꛲', '꛷'), - ('꜀', 'ꞿ'), - ('Ꟃ', '\u{a7ca}'), - ('\u{a7f5}', 'ꠁ'), + ('꜀', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), ('ꠌ', 'ꠤ'), @@ -5103,7 +5245,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), - ('ꬰ', '\u{ab6b}'), + ('ꬰ', '꭫'), ('ꭰ', 'ꯤ'), ('ꯦ', 'ꯧ'), ('ꯩ', '꯬'), @@ -5121,11 +5263,11 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('מּ', 'מּ'), ('נּ', 'סּ'), ('ףּ', 'פּ'), - ('צּ', '﯁'), - ('ﯓ', '﴿'), - ('ﵐ', 'ﶏ'), + ('צּ', '﯂'), + ('ﯓ', 'ﶏ'), ('ﶒ', 'ﷇ'), - ('ﷰ', '﷽'), + ('﷏', '﷏'), + ('ﷰ', '﷿'), ('︐', '︙'), ('︰', '﹒'), ('﹔', '﹦'), @@ -5151,7 +5293,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𐄀', '𐄂'), ('𐄇', '𐄳'), ('𐄷', '𐆎'), - ('𐆐', '\u{1019c}'), + ('𐆐', '𐆜'), ('𐆠', '𐆠'), ('𐇐', '𐇼'), ('𐊀', '𐊜'), @@ -5169,10 +5311,20 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), - ('𐕯', '𐕯'), + ('𐕯', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), @@ -5209,18 +5361,22 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𐳺', '𐴣'), ('𐴰', '𐴹'), ('𐹠', '𐹾'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10ead}', '\u{10ead}'), - ('\u{10eb0}', '\u{10eb1}'), + ('𐺀', '𐺩'), + ('𐺭', '𐺭'), + ('𐺰', '𐺱'), ('𐼀', '𐼧'), ('𐼰', '𐽅'), ('𐽑', '𐽙'), - ('\u{10fb0}', '\u{10fcb}'), + ('𐽰', '𐾁'), + ('𐾆', '𐾉'), + ('𐾰', '𐿋'), ('𐿠', '𐿶'), ('𑀀', '𑀀'), ('𑀂', '𑀷'), ('𑁇', '𑁍'), ('𑁒', '𑁯'), + ('𑁱', '𑁲'), + ('𑁵', '𑁵'), ('𑂂', '𑂲'), ('𑂷', '𑂸'), ('𑂻', '𑂼'), @@ -5229,12 +5385,12 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑃰', '𑃹'), ('𑄃', '𑄦'), ('𑄬', '𑄬'), - ('𑄶', '\u{11147}'), + ('𑄶', '𑅇'), ('𑅐', '𑅲'), ('𑅴', '𑅶'), ('𑆂', '𑆵'), ('𑆿', '𑇈'), - ('𑇍', '\u{111ce}'), + ('𑇍', '𑇎'), ('𑇐', '𑇟'), ('𑇡', '𑇴'), ('𑈀', '𑈑'), @@ -5242,6 +5398,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑈲', '𑈳'), ('𑈵', '𑈵'), ('𑈸', '𑈽'), + ('𑈿', '𑉀'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), @@ -5269,7 +5426,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑑅', '𑑅'), ('𑑇', '𑑛'), ('𑑝', '𑑝'), - ('𑑟', '\u{11461}'), + ('𑑟', '𑑡'), ('𑒀', '𑒯'), ('𑒱', '𑒲'), ('𑒹', '𑒹'), @@ -5293,27 +5450,27 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑚬', '𑚬'), ('𑚮', '𑚯'), ('𑚶', '𑚶'), - ('𑚸', '𑚸'), + ('𑚸', '𑚹'), ('𑛀', '𑛉'), ('𑜀', '𑜚'), ('𑜠', '𑜡'), ('𑜦', '𑜦'), - ('𑜰', '𑜿'), + ('𑜰', '𑝆'), ('𑠀', '𑠮'), ('𑠸', '𑠸'), ('𑠻', '𑠻'), ('𑢠', '𑣲'), - ('𑣿', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{1192f}'), - ('\u{11931}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), - ('\u{1193d}', '\u{1193d}'), - ('\u{1193f}', '\u{11942}'), - ('\u{11944}', '\u{11946}'), - ('\u{11950}', '\u{11959}'), + ('𑣿', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤯'), + ('𑤱', '𑤵'), + ('𑤷', '𑤸'), + ('𑤽', '𑤽'), + ('𑤿', '𑥂'), + ('𑥄', '𑥆'), + ('𑥐', '𑥙'), ('𑦠', '𑦧'), ('𑦪', '𑧓'), ('𑧜', '𑧟'), @@ -5327,7 +5484,8 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑩜', '𑪉'), ('𑪗', '𑪗'), ('𑪚', '𑪢'), - ('𑫀', '𑫸'), + ('𑪰', '𑫸'), + ('𑬀', '𑬉'), ('𑰀', '𑰈'), ('𑰊', '𑰯'), ('𑰾', '𑰾'), @@ -5351,18 +5509,26 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑶠', '𑶩'), ('𑻠', '𑻲'), ('𑻵', '𑻸'), - ('\u{11fb0}', '\u{11fb0}'), + ('𑼂', '𑼐'), + ('𑼒', '𑼵'), + ('𑼾', '𑼿'), + ('𑽁', '𑽁'), + ('𑽃', '𑽙'), + ('𑾰', '𑾰'), ('𑿀', '𑿱'), ('𑿿', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃'), - ('𓀀', '𓐮'), + ('𒾐', '𒿲'), + ('𓀀', '𓐯'), + ('𓑁', '𓑆'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), - ('𖩮', '𖩯'), + ('𖩮', '𖪾'), + ('𖫀', '𖫉'), ('𖫐', '𖫭'), ('𖫵', '𖫵'), ('𖬀', '𖬯'), @@ -5376,12 +5542,17 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𖽐', '𖾇'), ('𖾓', '𖾟'), ('𖿠', '𖿣'), - ('\u{16ff0}', '\u{16ff1}'), + ('𖿰', '𖿱'), ('𗀀', '𘟷'), - ('𘠀', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('𛀀', '𛄞'), + ('𘠀', '𘳕'), + ('𘴀', '𘴈'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), + ('𛀀', '𛄢'), + ('𛄲', '𛄲'), ('𛅐', '𛅒'), + ('𛅕', '𛅕'), ('𛅤', '𛅧'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), @@ -5390,6 +5561,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𛲐', '𛲙'), ('𛲜', '𛲜'), ('𛲟', '𛲟'), + ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), ('𝄩', '𝅘𝅥𝅲'), @@ -5397,9 +5569,10 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𝅪', '𝅭'), ('𝆃', '𝆄'), ('𝆌', '𝆩'), - ('𝆮', '𝇨'), + ('𝆮', '𝇪'), ('𝈀', '𝉁'), ('𝉅', '𝉅'), + ('𝋀', '𝋓'), ('𝋠', '𝋳'), ('𝌀', '𝍖'), ('𝍠', '𝍸'), @@ -5428,13 +5601,23 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𝩭', '𝩴'), ('𝩶', '𝪃'), ('𝪅', '𝪋'), + ('𝼀', '𝼞'), + ('𝼥', '𝼪'), + ('𞀰', '𞁭'), ('𞄀', '𞄬'), ('𞄷', '𞄽'), ('𞅀', '𞅉'), ('𞅎', '𞅏'), + ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞋰', '𞋹'), ('𞋿', '𞋿'), + ('𞓐', '𞓫'), + ('𞓰', '𞓹'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), ('𞠀', '𞣄'), ('𞣇', '𞣏'), ('𞤀', '𞥃'), @@ -5483,45 +5666,45 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('🂱', '🂿'), ('🃁', '🃏'), ('🃑', '🃵'), - ('🄀', '\u{1f1ad}'), + ('🄀', '🆭'), ('🇦', '🈂'), ('🈐', '🈻'), ('🉀', '🉈'), ('🉐', '🉑'), ('🉠', '🉥'), - ('🌀', '\u{1f6d7}'), - ('🛠', '🛬'), - ('🛰', '\u{1f6fc}'), - ('🜀', '🝳'), - ('🞀', '🟘'), + ('🌀', '🛗'), + ('🛜', '🛬'), + ('🛰', '🛼'), + ('🜀', '🝶'), + ('🝻', '🟙'), ('🟠', '🟫'), + ('🟰', '🟰'), ('🠀', '🠋'), ('🠐', '🡇'), ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('\u{1f8b0}', '\u{1f8b1}'), - ('🤀', '\u{1f978}'), - ('🥺', '\u{1f9cb}'), - ('🧍', '🩓'), + ('🢰', '🢱'), + ('🤀', '🩓'), ('🩠', '🩭'), - ('🩰', '\u{1fa74}'), - ('🩸', '🩺'), - ('🪀', '\u{1fa86}'), - ('🪐', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), - ('\u{1fb00}', '\u{1fb92}'), - ('\u{1fb94}', '\u{1fbca}'), - ('\u{1fbf0}', '\u{1fbf9}'), - ('𠀀', '\u{2a6dd}'), - ('𪜀', '𫜴'), + ('🩰', '🩼'), + ('🪀', '🪈'), + ('🪐', '🪽'), + ('🪿', '🫅'), + ('🫎', '🫛'), + ('🫠', '🫨'), + ('🫰', '🫸'), + ('🬀', '🮒'), + ('🮔', '🯊'), + ('🯰', '🯹'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), - ('\u{30000}', '\u{3134a}'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), ]; pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ @@ -5549,7 +5732,8 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{8d3}', '\u{8e1}'), + ('\u{898}', '\u{89f}'), + ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'), ('\u{93c}', '\u{93c}'), @@ -5594,6 +5778,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c00}'), ('\u{c04}', '\u{c04}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c3e}', '\u{c40}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), @@ -5625,7 +5810,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), @@ -5649,7 +5834,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{109d}', '\u{109d}'), ('\u{135d}', '\u{135f}'), ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1734}'), + ('\u{1732}', '\u{1733}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17b5}'), @@ -5658,6 +5843,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{17c9}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', '\u{1922}'), @@ -5673,7 +5859,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{1a65}', '\u{1a6c}'), ('\u{1a73}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', '\u{1b03}'), ('\u{1b34}', '\u{1b3a}'), ('\u{1b3c}', '\u{1b3c}'), @@ -5695,8 +5881,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('\u{1cf8}', '\u{1cf9}'), - ('\u{1dc0}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('\u{1dc0}', '\u{1dff}'), ('\u{200c}', '\u{200c}'), ('\u{20d0}', '\u{20f0}'), ('\u{2cef}', '\u{2cf1}'), @@ -5754,12 +5939,17 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), ('\u{1107f}', '\u{11081}'), ('\u{110b3}', '\u{110b6}'), ('\u{110b9}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{1112b}'), ('\u{1112d}', '\u{11134}'), @@ -5772,6 +5962,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{11234}', '\u{11234}'), ('\u{11236}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112df}'), ('\u{112e3}', '\u{112ea}'), ('\u{11300}', '\u{11301}'), @@ -5839,12 +6030,20 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{11d95}', '\u{11d95}'), ('\u{11d97}', '\u{11d97}'), ('\u{11ef3}', '\u{11ef4}'), + ('\u{11f00}', '\u{11f01}'), + ('\u{11f36}', '\u{11f3a}'), + ('\u{11f40}', '\u{11f40}'), + ('\u{11f42}', '\u{11f42}'), + ('\u{13440}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d165}'), ('\u{1d167}', '\u{1d169}'), ('\u{1d16e}', '\u{1d172}'), @@ -5863,8 +6062,11 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e4ec}', '\u{1e4ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0020}', '\u{e007f}'), @@ -5887,8 +6089,8 @@ pub const GRAPHEME_LINK: &'static [(char, char)] = &[ ('\u{eba}', '\u{eba}'), ('\u{f84}', '\u{f84}'), ('\u{1039}', '\u{103a}'), - ('\u{1714}', '\u{1714}'), - ('\u{1734}', '\u{1734}'), + ('\u{1714}', '᜕'), + ('᜴', '᜴'), ('\u{17d2}', '\u{17d2}'), ('\u{1a60}', '\u{1a60}'), ('᭄', '᭄'), @@ -5904,6 +6106,7 @@ pub const GRAPHEME_LINK: &'static [(char, char)] = &[ ('\u{abed}', '\u{abed}'), ('\u{10a3f}', '\u{10a3f}'), ('\u{11046}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), ('\u{1107f}', '\u{1107f}'), ('\u{110b9}', '\u{110b9}'), ('\u{11133}', '\u{11134}'), @@ -5918,7 +6121,7 @@ pub const GRAPHEME_LINK: &'static [(char, char)] = &[ ('𑚶', '𑚶'), ('\u{1172b}', '\u{1172b}'), ('\u{11839}', '\u{11839}'), - ('\u{1193d}', '\u{1193e}'), + ('𑤽', '\u{1193e}'), ('\u{119e0}', '\u{119e0}'), ('\u{11a34}', '\u{11a34}'), ('\u{11a47}', '\u{11a47}'), @@ -5926,6 +6129,7 @@ pub const GRAPHEME_LINK: &'static [(char, char)] = &[ ('\u{11c3f}', '\u{11c3f}'), ('\u{11d44}', '\u{11d45}'), ('\u{11d97}', '\u{11d97}'), + ('𑽁', '\u{11f42}'), ]; pub const HEX_DIGIT: &'static [(char, char)] = &[ @@ -6007,9 +6211,9 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ࠀ', '\u{82d}'), ('ࡀ', '\u{85b}'), ('ࡠ', 'ࡪ'), - ('ࢠ', 'ࢴ'), - ('ࢶ', '\u{8c7}'), - ('\u{8d3}', '\u{8e1}'), + ('ࡰ', 'ࢇ'), + ('ࢉ', 'ࢎ'), + ('\u{898}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('०', '९'), ('ॱ', 'ঃ'), @@ -6093,11 +6297,12 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), - ('ఽ', 'ౄ'), + ('\u{c3c}', 'ౄ'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), ('ౠ', '\u{c63}'), ('౦', '౯'), ('ಀ', 'ಃ'), @@ -6110,10 +6315,10 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{cc6}', 'ೈ'), ('ೊ', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), - ('ೞ', 'ೞ'), + ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), ('೦', '೯'), - ('ೱ', 'ೲ'), + ('ೱ', 'ೳ'), ('\u{d00}', 'ഌ'), ('എ', 'ഐ'), ('ഒ', '\u{d44}'), @@ -6146,7 +6351,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ວ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('໐', '໙'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), @@ -6193,9 +6398,8 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', 'ᜌ'), - ('ᜎ', '\u{1714}'), - ('ᜠ', '\u{1734}'), + ('ᜀ', '᜕'), + ('ᜟ', '᜴'), ('ᝀ', '\u{1753}'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -6205,7 +6409,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ៜ', '\u{17dd}'), ('០', '៩'), ('\u{180b}', '\u{180d}'), - ('᠐', '᠙'), + ('\u{180f}', '᠙'), ('ᠠ', 'ᡸ'), ('ᢀ', 'ᢪ'), ('ᢰ', 'ᣵ'), @@ -6224,8 +6428,8 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('᪐', '᪙'), ('ᪧ', 'ᪧ'), ('\u{1ab0}', '\u{1abd}'), - ('\u{1abf}', '\u{1ac0}'), - ('\u{1b00}', 'ᭋ'), + ('\u{1abf}', '\u{1ace}'), + ('\u{1b00}', 'ᭌ'), ('᭐', '᭙'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '᯳'), @@ -6237,8 +6441,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('Ჽ', 'Ჿ'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', 'ᳺ'), - ('ᴀ', '\u{1df9}'), - ('\u{1dfb}', 'ἕ'), + ('ᴀ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), @@ -6278,9 +6481,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'), - ('Ⰰ', 'Ⱞ'), - ('ⰰ', 'ⱞ'), - ('Ⱡ', 'ⳤ'), + ('Ⰰ', 'ⳤ'), ('Ⳬ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), @@ -6307,11 +6508,10 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ー', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), - ('ㆠ', '\u{31bf}'), + ('ㆠ', 'ㆿ'), ('ㇰ', 'ㇿ'), - ('㐀', '\u{4dbf}'), - ('一', '\u{9ffc}'), - ('ꀀ', 'ꒌ'), + ('㐀', '䶿'), + ('一', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘫ'), @@ -6320,9 +6520,11 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ꙿ', '\u{a6f1}'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꞿ'), - ('Ꟃ', '\u{a7ca}'), - ('\u{a7f5}', 'ꠧ'), + ('Ꞌ', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꠧ'), ('\u{a82c}', '\u{a82c}'), ('ꡀ', 'ꡳ'), ('ꢀ', '\u{a8c5}'), @@ -6349,7 +6551,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), - ('ꭜ', '\u{ab69}'), + ('ꭜ', 'ꭩ'), ('ꭰ', 'ꯪ'), ('꯬', '\u{abed}'), ('꯰', '꯹'), @@ -6411,9 +6613,20 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), @@ -6448,31 +6661,33 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), ('𐴰', '𐴹'), - ('\u{10e80}', '\u{10ea9}'), + ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), - ('\u{10eb0}', '\u{10eb1}'), - ('𐼀', '𐼜'), + ('𐺰', '𐺱'), + ('\u{10efd}', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '\u{10f50}'), - ('\u{10fb0}', '\u{10fc4}'), + ('𐽰', '\u{10f85}'), + ('𐾰', '𐿄'), ('𐿠', '𐿶'), ('𑀀', '\u{11046}'), - ('𑁦', '𑁯'), + ('𑁦', '𑁵'), ('\u{1107f}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('𑃐', '𑃨'), ('𑃰', '𑃹'), ('\u{11100}', '\u{11134}'), ('𑄶', '𑄿'), - ('𑅄', '\u{11147}'), + ('𑅄', '𑅇'), ('𑅐', '\u{11173}'), ('𑅶', '𑅶'), ('\u{11180}', '𑇄'), ('\u{111c9}', '\u{111cc}'), - ('\u{111ce}', '𑇚'), + ('𑇎', '𑇚'), ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), + ('\u{1123e}', '\u{11241}'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), @@ -6497,7 +6712,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{11370}', '\u{11374}'), ('𑐀', '𑑊'), ('𑑐', '𑑙'), - ('\u{1145e}', '\u{11461}'), + ('\u{1145e}', '𑑡'), ('𑒀', '𑓅'), ('𑓇', '𑓇'), ('𑓐', '𑓙'), @@ -6512,16 +6727,17 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜹'), + ('𑝀', '𑝆'), ('𑠀', '\u{1183a}'), ('𑢠', '𑣩'), - ('𑣿', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('𑣿', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤵'), + ('𑤷', '𑤸'), ('\u{1193b}', '\u{11943}'), - ('\u{11950}', '\u{11959}'), + ('𑥐', '𑥙'), ('𑦠', '𑦧'), ('𑦪', '\u{119d7}'), ('\u{119da}', '𑧡'), @@ -6530,7 +6746,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{11a47}', '\u{11a47}'), ('𑩐', '\u{11a99}'), ('𑪝', '𑪝'), - ('𑫀', '𑫸'), + ('𑪰', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱀'), @@ -6552,15 +6768,23 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𑶓', '𑶘'), ('𑶠', '𑶩'), ('𑻠', '𑻶'), - ('\u{11fb0}', '\u{11fb0}'), + ('\u{11f00}', '𑼐'), + ('𑼒', '\u{11f3a}'), + ('𑼾', '\u{11f42}'), + ('𑽐', '𑽙'), + ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), - ('𓀀', '𓐮'), + ('𒾐', '𒿰'), + ('𓀀', '𓐯'), + ('\u{13440}', '\u{13455}'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), + ('𖩰', '𖪾'), + ('𖫀', '𖫉'), ('𖫐', '𖫭'), ('\u{16af0}', '\u{16af4}'), ('𖬀', '\u{16b36}'), @@ -6574,12 +6798,17 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), + ('𖿰', '𖿱'), ('𗀀', '𘟷'), - ('𘠀', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('𛀀', '𛄞'), + ('𘠀', '𘳕'), + ('𘴀', '𘴈'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), + ('𛀀', '𛄢'), + ('𛄲', '𛄲'), ('𛅐', '𛅒'), + ('𛅕', '𛅕'), ('𛅤', '𛅧'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), @@ -6587,6 +6816,8 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), ('𝅭', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), @@ -6630,16 +6861,26 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{1da84}', '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'), + ('𝼀', '𝼞'), + ('𝼥', '𝼪'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('𞀰', '𞁭'), + ('\u{1e08f}', '\u{1e08f}'), ('𞄀', '𞄬'), ('\u{1e130}', '𞄽'), ('𞅀', '𞅉'), ('𞅎', '𞅎'), + ('𞊐', '\u{1e2ae}'), ('𞋀', '𞋹'), + ('𞓐', '𞓹'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), ('𞠀', '𞣄'), ('\u{1e8d0}', '\u{1e8d6}'), ('𞤀', '𞥋'), @@ -6677,14 +6918,15 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), - ('\u{1fbf0}', '\u{1fbf9}'), - ('𠀀', '\u{2a6dd}'), - ('𪜀', '𫜴'), + ('🯰', '🯹'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), - ('\u{30000}', '\u{3134a}'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), ('\u{e0100}', '\u{e01ef}'), ]; @@ -6738,8 +6980,9 @@ pub const ID_START: &'static [(char, char)] = &[ ('ࠨ', 'ࠨ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'), - ('ࢠ', 'ࢴ'), - ('ࢶ', '\u{8c7}'), + ('ࡰ', 'ࢇ'), + ('ࢉ', 'ࢎ'), + ('ࢠ', 'ࣉ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'), ('ॐ', 'ॐ'), @@ -6804,6 +7047,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('ప', 'హ'), ('ఽ', 'ఽ'), ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'), ('ಅ', 'ಌ'), @@ -6812,10 +7056,10 @@ pub const ID_START: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('ಽ', 'ಽ'), - ('ೞ', 'ೞ'), + ('ೝ', 'ೞ'), ('ೠ', 'ೡ'), ('ೱ', 'ೲ'), - ('\u{d04}', 'ഌ'), + ('ഄ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'), ('ഽ', 'ഽ'), @@ -6883,9 +7127,8 @@ pub const ID_START: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', 'ᜌ'), - ('ᜎ', 'ᜑ'), - ('ᜠ', 'ᜱ'), + ('ᜀ', 'ᜑ'), + ('ᜟ', 'ᜱ'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -6905,7 +7148,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('ᨠ', 'ᩔ'), ('ᪧ', 'ᪧ'), ('ᬅ', 'ᬳ'), - ('ᭅ', 'ᭋ'), + ('ᭅ', 'ᭌ'), ('ᮃ', 'ᮠ'), ('ᮮ', 'ᮯ'), ('ᮺ', 'ᯥ'), @@ -6955,9 +7198,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'), - ('Ⰰ', 'Ⱞ'), - ('ⰰ', 'ⱞ'), - ('Ⱡ', 'ⳤ'), + ('Ⰰ', 'ⳤ'), ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), @@ -6984,11 +7225,10 @@ pub const ID_START: &'static [(char, char)] = &[ ('ー', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), - ('ㆠ', '\u{31bf}'), + ('ㆠ', 'ㆿ'), ('ㇰ', 'ㇿ'), - ('㐀', '\u{4dbf}'), - ('一', '\u{9ffc}'), - ('ꀀ', 'ꒌ'), + ('㐀', '䶿'), + ('一', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘟ'), @@ -6998,9 +7238,11 @@ pub const ID_START: &'static [(char, char)] = &[ ('ꚠ', 'ꛯ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꞿ'), - ('Ꟃ', '\u{a7ca}'), - ('\u{a7f5}', 'ꠁ'), + ('Ꞌ', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), ('ꠌ', 'ꠢ'), @@ -7037,7 +7279,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), - ('ꭜ', '\u{ab69}'), + ('ꭜ', 'ꭩ'), ('ꭰ', 'ꯢ'), ('가', '힣'), ('ힰ', 'ퟆ'), @@ -7089,9 +7331,20 @@ pub const ID_START: &'static [(char, char)] = &[ ('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), @@ -7122,19 +7375,22 @@ pub const ID_START: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '𐴣'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eb0}', '\u{10eb1}'), + ('𐺀', '𐺩'), + ('𐺰', '𐺱'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), - ('\u{10fb0}', '\u{10fc4}'), + ('𐽰', '𐾁'), + ('𐾰', '𐿄'), ('𐿠', '𐿶'), ('𑀃', '𑀷'), + ('𑁱', '𑁲'), + ('𑁵', '𑁵'), ('𑂃', '𑂯'), ('𑃐', '𑃨'), ('𑄃', '𑄦'), ('𑅄', '𑅄'), - ('\u{11147}', '\u{11147}'), + ('𑅇', '𑅇'), ('𑅐', '𑅲'), ('𑅶', '𑅶'), ('𑆃', '𑆲'), @@ -7143,6 +7399,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈫'), + ('𑈿', '𑉀'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), @@ -7160,7 +7417,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('𑍝', '𑍡'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), - ('𑑟', '\u{11461}'), + ('𑑟', '𑑡'), ('𑒀', '𑒯'), ('𑓄', '𑓅'), ('𑓇', '𑓇'), @@ -7171,15 +7428,16 @@ pub const ID_START: &'static [(char, char)] = &[ ('𑚀', '𑚪'), ('𑚸', '𑚸'), ('𑜀', '𑜚'), + ('𑝀', '𑝆'), ('𑠀', '𑠫'), ('𑢠', '𑣟'), - ('𑣿', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{1192f}'), - ('\u{1193f}', '\u{1193f}'), - ('\u{11941}', '\u{11941}'), + ('𑣿', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤯'), + ('𑤿', '𑤿'), + ('𑥁', '𑥁'), ('𑦠', '𑦧'), ('𑦪', '𑧐'), ('𑧡', '𑧡'), @@ -7190,7 +7448,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('𑩐', '𑩐'), ('𑩜', '𑪉'), ('𑪝', '𑪝'), - ('𑫀', '𑫸'), + ('𑪰', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -7204,14 +7462,20 @@ pub const ID_START: &'static [(char, char)] = &[ ('𑵪', '𑶉'), ('𑶘', '𑶘'), ('𑻠', '𑻲'), - ('\u{11fb0}', '\u{11fb0}'), + ('𑼂', '𑼂'), + ('𑼄', '𑼐'), + ('𑼒', '𑼳'), + ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), - ('𓀀', '𓐮'), + ('𒾐', '𒿰'), + ('𓀀', '𓐯'), + ('𓑁', '𓑆'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), + ('𖩰', '𖪾'), ('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭀', '𖭃'), @@ -7224,10 +7488,15 @@ pub const ID_START: &'static [(char, char)] = &[ ('𖿠', '𖿡'), ('𖿣', '𖿣'), ('𗀀', '𘟷'), - ('𘠀', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('𛀀', '𛄞'), + ('𘠀', '𘳕'), + ('𘴀', '𘴈'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), + ('𛀀', '𛄢'), + ('𛄲', '𛄲'), ('𛅐', '𛅒'), + ('𛅕', '𛅕'), ('𛅤', '𛅧'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), @@ -7264,10 +7533,19 @@ pub const ID_START: &'static [(char, char)] = &[ ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), + ('𝼀', '𝼞'), + ('𝼥', '𝼪'), + ('𞀰', '𞁭'), ('𞄀', '𞄬'), ('𞄷', '𞄽'), ('𞅎', '𞅎'), + ('𞊐', '𞊭'), ('𞋀', '𞋫'), + ('𞓐', '𞓫'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), ('𞠀', '𞣄'), ('𞤀', '𞥃'), ('𞥋', '𞥋'), @@ -7304,35 +7582,37 @@ pub const ID_START: &'static [(char, char)] = &[ ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), - ('𠀀', '\u{2a6dd}'), - ('𪜀', '𫜴'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), - ('\u{30000}', '\u{3134a}'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), ]; pub const IDEOGRAPHIC: &'static [(char, char)] = &[ ('〆', '〇'), ('〡', '〩'), ('〸', '〺'), - ('㐀', '\u{4dbf}'), - ('一', '\u{9ffc}'), + ('㐀', '䶿'), + ('一', '鿿'), ('豈', '舘'), ('並', '龎'), ('\u{16fe4}', '\u{16fe4}'), ('𗀀', '𘟷'), - ('𘠀', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), + ('𘠀', '𘳕'), + ('𘴀', '𘴈'), ('𛅰', '𛋻'), - ('𠀀', '\u{2a6dd}'), - ('𪜀', '𫜴'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), - ('\u{30000}', '\u{3134a}'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), ]; pub const JOIN_CONTROL: &'static [(char, char)] = &[('\u{200c}', '\u{200d}')]; @@ -7624,7 +7904,7 @@ pub const LOWERCASE: &'static [(char, char)] = &[ ('ԯ', 'ԯ'), ('ՠ', 'ֈ'), ('ა', 'ჺ'), - ('ჽ', 'ჿ'), + ('ჼ', 'ჿ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᴀ', 'ᶿ'), @@ -7787,7 +8067,7 @@ pub const LOWERCASE: &'static [(char, char)] = &[ ('ⅰ', 'ⅿ'), ('ↄ', 'ↄ'), ('ⓐ', 'ⓩ'), - ('ⰰ', 'ⱞ'), + ('ⰰ', 'ⱟ'), ('ⱡ', 'ⱡ'), ('ⱥ', 'ⱦ'), ('ⱨ', 'ⱨ'), @@ -7955,19 +8235,34 @@ pub const LOWERCASE: &'static [(char, char)] = &[ ('ꞻ', 'ꞻ'), ('ꞽ', 'ꞽ'), ('ꞿ', 'ꞿ'), + ('ꟁ', 'ꟁ'), ('ꟃ', 'ꟃ'), - ('\u{a7c8}', '\u{a7c8}'), - ('\u{a7ca}', '\u{a7ca}'), - ('\u{a7f6}', '\u{a7f6}'), + ('ꟈ', 'ꟈ'), + ('ꟊ', 'ꟊ'), + ('ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟕ'), + ('ꟗ', 'ꟗ'), + ('ꟙ', 'ꟙ'), + ('ꟲ', 'ꟴ'), + ('ꟶ', 'ꟶ'), ('ꟸ', 'ꟺ'), ('ꬰ', 'ꭚ'), - ('ꭜ', '\u{ab68}'), + ('ꭜ', 'ꭩ'), ('ꭰ', 'ꮿ'), ('ff', 'st'), ('ﬓ', 'ﬗ'), ('a', 'z'), ('𐐨', '𐑏'), ('𐓘', '𐓻'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), + ('𐞀', '𐞀'), + ('𐞃', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('𐳀', '𐳲'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), @@ -7999,6 +8294,10 @@ pub const LOWERCASE: &'static [(char, char)] = &[ ('𝞪', '𝟂'), ('𝟄', '𝟉'), ('𝟋', '𝟋'), + ('𝼀', '𝼉'), + ('𝼋', '𝼞'), + ('𝼥', '𝼪'), + ('𞀰', '𞁭'), ('𞤢', '𞥃'), ]; @@ -8224,7 +8523,7 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('ெ', 'ை'), ('ொ', 'ௌ'), ('\u{bd7}', '\u{bd7}'), - ('\u{c00}', 'ః'), + ('\u{c00}', '\u{c04}'), ('\u{c3e}', 'ౄ'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4c}'), @@ -8236,6 +8535,7 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('ೊ', '\u{ccc}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), + ('ೳ', 'ೳ'), ('\u{d00}', 'ഃ'), ('\u{d3e}', '\u{d44}'), ('െ', 'ൈ'), @@ -8254,7 +8554,7 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{eb4}', '\u{eb9}'), ('\u{ebb}', '\u{ebc}'), ('\u{ecd}', '\u{ecd}'), - ('\u{f71}', '\u{f81}'), + ('\u{f71}', '\u{f83}'), ('\u{f8d}', '\u{f97}'), ('\u{f99}', '\u{fbc}'), ('ါ', '\u{1036}'), @@ -8281,6 +8581,7 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('ᩕ', '\u{1a5e}'), ('ᩡ', '\u{1a74}'), ('\u{1abf}', '\u{1ac0}'), + ('\u{1acc}', '\u{1ace}'), ('\u{1b00}', 'ᬄ'), ('\u{1b35}', 'ᭃ'), ('\u{1b80}', 'ᮂ'), @@ -8325,17 +8626,20 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{10eab}', '\u{10eac}'), ('𑀀', '𑀂'), ('\u{11038}', '\u{11045}'), - ('𑂂', '𑂂'), + ('\u{11073}', '\u{11074}'), + ('\u{11080}', '𑂂'), ('𑂰', '𑂸'), + ('\u{110c2}', '\u{110c2}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{11132}'), ('𑅅', '𑅆'), ('\u{11180}', '𑆂'), ('𑆳', '𑆿'), - ('\u{111ce}', '\u{111cf}'), + ('𑇎', '\u{111cf}'), ('𑈬', '\u{11234}'), ('\u{11237}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112e8}'), ('\u{11300}', '𑌃'), ('\u{1133e}', '𑍄'), @@ -8354,11 +8658,11 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{116ab}', '\u{116b5}'), ('\u{1171d}', '\u{1172a}'), ('𑠬', '𑠸'), - ('\u{11930}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('\u{11930}', '𑤵'), + ('𑤷', '𑤸'), ('\u{1193b}', '\u{1193c}'), - ('\u{11940}', '\u{11940}'), - ('\u{11942}', '\u{11942}'), + ('𑥀', '𑥀'), + ('𑥂', '𑥂'), ('𑧑', '\u{119d7}'), ('\u{119da}', '𑧟'), ('𑧤', '𑧤'), @@ -8381,16 +8685,21 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{11d90}', '\u{11d91}'), ('𑶓', '𑶖'), ('\u{11ef3}', '𑻶'), + ('\u{11f00}', '\u{11f01}'), + ('𑼃', '𑼃'), + ('𑼴', '\u{11f3a}'), + ('𑼾', '\u{11f40}'), ('\u{16f4f}', '\u{16f4f}'), ('𖽑', '𖾇'), ('\u{16f8f}', '\u{16f92}'), - ('\u{16ff0}', '\u{16ff1}'), + ('𖿰', '𖿱'), ('\u{1bc9e}', '\u{1bc9e}'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e947}', '\u{1e947}'), ('🄰', '🅉'), ('🅐', '🅩'), @@ -8453,6 +8762,7 @@ pub const OTHER_LOWERCASE: &'static [(char, char)] = &[ ('ˠ', 'ˤ'), ('\u{345}', '\u{345}'), ('ͺ', 'ͺ'), + ('ჼ', 'ჼ'), ('ᴬ', 'ᵪ'), ('ᵸ', 'ᵸ'), ('ᶛ', 'ᶿ'), @@ -8464,8 +8774,15 @@ pub const OTHER_LOWERCASE: &'static [(char, char)] = &[ ('ⱼ', 'ⱽ'), ('ꚜ', 'ꚝ'), ('ꝰ', 'ꝰ'), + ('ꟲ', 'ꟴ'), ('ꟸ', 'ꟹ'), ('ꭜ', 'ꭟ'), + ('ꭩ', 'ꭩ'), + ('𐞀', '𐞀'), + ('𐞃', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), + ('𞀰', '𞁭'), ]; pub const OTHER_MATH: &'static [(char, char)] = &[ @@ -8651,13 +8968,14 @@ pub const PREPENDED_CONCATENATION_MARK: &'static [(char, char)] = &[ ('\u{600}', '\u{605}'), ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), + ('\u{890}', '\u{891}'), ('\u{8e2}', '\u{8e2}'), ('\u{110bd}', '\u{110bd}'), ('\u{110cd}', '\u{110cd}'), ]; pub const QUOTATION_MARK: &'static [(char, char)] = &[ - ('\"', '\"'), + ('"', '"'), ('\'', '\''), ('«', '«'), ('»', '»'), @@ -8682,7 +9000,7 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('.', '.'), ('?', '?'), ('։', '։'), - ('؞', '؟'), + ('؝', '؟'), ('۔', '۔'), ('܀', '܂'), ('߹', '߹'), @@ -8701,12 +9019,14 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('᪨', '᪫'), ('᭚', '᭛'), ('᭞', '᭟'), + ('᭽', '᭾'), ('᰻', '᰼'), ('᱾', '᱿'), ('‼', '‽'), ('⁇', '⁉'), ('⸮', '⸮'), ('⸼', '⸼'), + ('⹓', '⹔'), ('。', '。'), ('꓿', '꓿'), ('꘎', '꘏'), @@ -8727,6 +9047,7 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('。', '。'), ('𐩖', '𐩗'), ('𐽕', '𐽙'), + ('𐾆', '𐾉'), ('𑁇', '𑁈'), ('𑂾', '𑃁'), ('𑅁', '𑅃'), @@ -8741,12 +9062,13 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('𑗉', '𑗗'), ('𑙁', '𑙂'), ('𑜼', '𑜾'), - ('\u{11944}', '\u{11944}'), - ('\u{11946}', '\u{11946}'), + ('𑥄', '𑥄'), + ('𑥆', '𑥆'), ('𑩂', '𑩃'), ('𑪛', '𑪜'), ('𑱁', '𑱂'), ('𑻷', '𑻸'), + ('𑽃', '𑽄'), ('𖩮', '𖩯'), ('𖫵', '𖫵'), ('𖬷', '𖬸'), @@ -8788,6 +9110,9 @@ pub const SOFT_DOTTED: &'static [(char, char)] = &[ ('𝘪', '𝘫'), ('𝙞', '𝙟'), ('𝚒', '𝚓'), + ('𝼚', '𝼚'), + ('𞁌', '𞁍'), + ('𞁨', '𞁨'), ]; pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ @@ -8802,7 +9127,7 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('׃', '׃'), ('،', '،'), ('؛', '؛'), - ('؞', '؟'), + ('؝', '؟'), ('۔', '۔'), ('܀', '܊'), ('܌', '܌'), @@ -8826,6 +9151,7 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('᪨', '᪫'), ('᭚', '᭛'), ('᭝', '᭟'), + ('᭽', '᭾'), ('᰻', '᰿'), ('᱾', '᱿'), ('‼', '‽'), @@ -8835,6 +9161,7 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('⹁', '⹁'), ('⹌', '⹌'), ('⹎', '⹏'), + ('⹓', '⹔'), ('、', '。'), ('꓾', '꓿'), ('꘍', '꘏'), @@ -8865,6 +9192,7 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('𐬺', '𐬿'), ('𐮙', '𐮜'), ('𐽕', '𐽙'), + ('𐾆', '𐾉'), ('𑁇', '𑁍'), ('𑂾', '𑃁'), ('𑅁', '𑅃'), @@ -8874,19 +9202,20 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('𑈸', '𑈼'), ('𑊩', '𑊩'), ('𑑋', '𑑍'), - ('\u{1145a}', '𑑛'), + ('𑑚', '𑑛'), ('𑗂', '𑗅'), ('𑗉', '𑗗'), ('𑙁', '𑙂'), ('𑜼', '𑜾'), - ('\u{11944}', '\u{11944}'), - ('\u{11946}', '\u{11946}'), + ('𑥄', '𑥄'), + ('𑥆', '𑥆'), ('𑩂', '𑩃'), ('𑪛', '𑪜'), ('𑪡', '𑪢'), ('𑱁', '𑱃'), ('𑱱', '𑱱'), ('𑻷', '𑻸'), + ('𑽃', '𑽄'), ('𒑰', '𒑴'), ('𖩮', '𖩯'), ('𖫵', '𖫵'), @@ -8898,8 +9227,8 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ]; pub const UNIFIED_IDEOGRAPH: &'static [(char, char)] = &[ - ('㐀', '\u{4dbf}'), - ('一', '\u{9ffc}'), + ('㐀', '䶿'), + ('一', '鿿'), ('﨎', '﨏'), ('﨑', '﨑'), ('﨓', '﨔'), @@ -8907,12 +9236,13 @@ pub const UNIFIED_IDEOGRAPH: &'static [(char, char)] = &[ ('﨡', '﨡'), ('﨣', '﨤'), ('﨧', '﨩'), - ('𠀀', '\u{2a6dd}'), - ('𪜀', '𫜴'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), - ('\u{30000}', '\u{3134a}'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), ]; pub const UPPERCASE: &'static [(char, char)] = &[ @@ -9349,7 +9679,7 @@ pub const UPPERCASE: &'static [(char, char)] = &[ ('Ⅰ', 'Ⅿ'), ('Ↄ', 'Ↄ'), ('Ⓐ', 'Ⓩ'), - ('Ⰰ', 'Ⱞ'), + ('Ⰰ', 'Ⱟ'), ('Ⱡ', 'Ⱡ'), ('Ɫ', 'Ɽ'), ('Ⱨ', 'Ⱨ'), @@ -9514,13 +9844,21 @@ pub const UPPERCASE: &'static [(char, char)] = &[ ('Ꞻ', 'Ꞻ'), ('Ꞽ', 'Ꞽ'), ('Ꞿ', 'Ꞿ'), + ('Ꟁ', 'Ꟁ'), ('Ꟃ', 'Ꟃ'), - ('Ꞔ', '\u{a7c7}'), - ('\u{a7c9}', '\u{a7c9}'), - ('\u{a7f5}', '\u{a7f5}'), + ('Ꞔ', 'Ꟈ'), + ('Ꟊ', 'Ꟊ'), + ('Ꟑ', 'Ꟑ'), + ('Ꟗ', 'Ꟗ'), + ('Ꟙ', 'Ꟙ'), + ('Ꟶ', 'Ꟶ'), ('A', 'Z'), ('𐐀', '𐐧'), ('𐒰', '𐓓'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), ('𐲀', '𐲲'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), @@ -9563,6 +9901,7 @@ pub const UPPERCASE: &'static [(char, char)] = &[ pub const VARIATION_SELECTOR: &'static [(char, char)] = &[ ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), ('\u{fe00}', '\u{fe0f}'), ('\u{e0100}', '\u{e01ef}'), ]; @@ -9632,9 +9971,9 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ࠀ', '\u{82d}'), ('ࡀ', '\u{85b}'), ('ࡠ', 'ࡪ'), - ('ࢠ', 'ࢴ'), - ('ࢶ', '\u{8c7}'), - ('\u{8d3}', '\u{8e1}'), + ('ࡰ', 'ࢇ'), + ('ࢉ', 'ࢎ'), + ('\u{898}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('०', '९'), ('ॱ', 'ঃ'), @@ -9718,11 +10057,12 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), - ('ఽ', 'ౄ'), + ('\u{c3c}', 'ౄ'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), ('ౠ', '\u{c63}'), ('౦', '౯'), ('ಀ', 'ಃ'), @@ -9735,10 +10075,10 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{cc6}', 'ೈ'), ('ೊ', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), - ('ೞ', 'ೞ'), + ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), ('೦', '೯'), - ('ೱ', 'ೲ'), + ('ೱ', 'ೳ'), ('\u{d00}', 'ഌ'), ('എ', 'ഐ'), ('ഒ', '\u{d44}'), @@ -9771,7 +10111,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ວ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('໐', '໙'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), @@ -9818,9 +10158,8 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', 'ᜌ'), - ('ᜎ', '\u{1714}'), - ('ᜠ', '\u{1734}'), + ('ᜀ', '᜕'), + ('ᜟ', '᜴'), ('ᝀ', '\u{1753}'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -9830,7 +10169,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ៜ', '\u{17dd}'), ('០', '៩'), ('\u{180b}', '\u{180d}'), - ('᠐', '᠙'), + ('\u{180f}', '᠙'), ('ᠠ', 'ᡸ'), ('ᢀ', 'ᢪ'), ('ᢰ', 'ᣵ'), @@ -9849,8 +10188,8 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('᪐', '᪙'), ('ᪧ', 'ᪧ'), ('\u{1ab0}', '\u{1abd}'), - ('\u{1abf}', '\u{1ac0}'), - ('\u{1b00}', 'ᭋ'), + ('\u{1abf}', '\u{1ace}'), + ('\u{1b00}', 'ᭌ'), ('᭐', '᭙'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '᯳'), @@ -9862,8 +10201,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('Ჽ', 'Ჿ'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', 'ᳺ'), - ('ᴀ', '\u{1df9}'), - ('\u{1dfb}', 'ἕ'), + ('ᴀ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), @@ -9903,9 +10241,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'), - ('Ⰰ', 'Ⱞ'), - ('ⰰ', 'ⱞ'), - ('Ⱡ', 'ⳤ'), + ('Ⰰ', 'ⳤ'), ('Ⳬ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), @@ -9933,11 +10269,10 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ー', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), - ('ㆠ', '\u{31bf}'), + ('ㆠ', 'ㆿ'), ('ㇰ', 'ㇿ'), - ('㐀', '\u{4dbf}'), - ('一', '\u{9ffc}'), - ('ꀀ', 'ꒌ'), + ('㐀', '䶿'), + ('一', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘫ'), @@ -9946,9 +10281,11 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ꙿ', '\u{a6f1}'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꞿ'), - ('Ꟃ', '\u{a7ca}'), - ('\u{a7f5}', 'ꠧ'), + ('Ꞌ', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꠧ'), ('\u{a82c}', '\u{a82c}'), ('ꡀ', 'ꡳ'), ('ꢀ', '\u{a8c5}'), @@ -9975,7 +10312,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), - ('ꭜ', '\u{ab69}'), + ('ꭜ', 'ꭩ'), ('ꭰ', 'ꯪ'), ('꯬', '\u{abed}'), ('꯰', '꯹'), @@ -10043,9 +10380,20 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), @@ -10080,31 +10428,33 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), ('𐴰', '𐴹'), - ('\u{10e80}', '\u{10ea9}'), + ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), - ('\u{10eb0}', '\u{10eb1}'), - ('𐼀', '𐼜'), + ('𐺰', '𐺱'), + ('\u{10efd}', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '\u{10f50}'), - ('\u{10fb0}', '\u{10fc4}'), + ('𐽰', '\u{10f85}'), + ('𐾰', '𐿄'), ('𐿠', '𐿶'), ('𑀀', '\u{11046}'), - ('𑁦', '𑁯'), + ('𑁦', '𑁵'), ('\u{1107f}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('𑃐', '𑃨'), ('𑃰', '𑃹'), ('\u{11100}', '\u{11134}'), ('𑄶', '𑄿'), - ('𑅄', '\u{11147}'), + ('𑅄', '𑅇'), ('𑅐', '\u{11173}'), ('𑅶', '𑅶'), ('\u{11180}', '𑇄'), ('\u{111c9}', '\u{111cc}'), - ('\u{111ce}', '𑇚'), + ('𑇎', '𑇚'), ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), + ('\u{1123e}', '\u{11241}'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), @@ -10129,7 +10479,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{11370}', '\u{11374}'), ('𑐀', '𑑊'), ('𑑐', '𑑙'), - ('\u{1145e}', '\u{11461}'), + ('\u{1145e}', '𑑡'), ('𑒀', '𑓅'), ('𑓇', '𑓇'), ('𑓐', '𑓙'), @@ -10144,16 +10494,17 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜹'), + ('𑝀', '𑝆'), ('𑠀', '\u{1183a}'), ('𑢠', '𑣩'), - ('𑣿', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('𑣿', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤵'), + ('𑤷', '𑤸'), ('\u{1193b}', '\u{11943}'), - ('\u{11950}', '\u{11959}'), + ('𑥐', '𑥙'), ('𑦠', '𑦧'), ('𑦪', '\u{119d7}'), ('\u{119da}', '𑧡'), @@ -10162,7 +10513,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{11a47}', '\u{11a47}'), ('𑩐', '\u{11a99}'), ('𑪝', '𑪝'), - ('𑫀', '𑫸'), + ('𑪰', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱀'), @@ -10184,15 +10535,23 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𑶓', '𑶘'), ('𑶠', '𑶩'), ('𑻠', '𑻶'), - ('\u{11fb0}', '\u{11fb0}'), + ('\u{11f00}', '𑼐'), + ('𑼒', '\u{11f3a}'), + ('𑼾', '\u{11f42}'), + ('𑽐', '𑽙'), + ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), - ('𓀀', '𓐮'), + ('𒾐', '𒿰'), + ('𓀀', '𓐯'), + ('\u{13440}', '\u{13455}'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), + ('𖩰', '𖪾'), + ('𖫀', '𖫉'), ('𖫐', '𖫭'), ('\u{16af0}', '\u{16af4}'), ('𖬀', '\u{16b36}'), @@ -10206,12 +10565,17 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), + ('𖿰', '𖿱'), ('𗀀', '𘟷'), - ('𘠀', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('𛀀', '𛄞'), + ('𘠀', '𘳕'), + ('𘴀', '𘴈'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), + ('𛀀', '𛄢'), + ('𛄲', '𛄲'), ('𛅐', '𛅒'), + ('𛅕', '𛅕'), ('𛅤', '𛅧'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), @@ -10219,6 +10583,8 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), ('𝅭', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), @@ -10262,16 +10628,26 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{1da84}', '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'), + ('𝼀', '𝼞'), + ('𝼥', '𝼪'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('𞀰', '𞁭'), + ('\u{1e08f}', '\u{1e08f}'), ('𞄀', '𞄬'), ('\u{1e130}', '𞄽'), ('𞅀', '𞅉'), ('𞅎', '𞅎'), + ('𞊐', '\u{1e2ae}'), ('𞋀', '𞋹'), + ('𞓐', '𞓹'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), ('𞠀', '𞣄'), ('\u{1e8d0}', '\u{1e8d6}'), ('𞤀', '𞥋'), @@ -10309,14 +10685,15 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), - ('\u{1fbf0}', '\u{1fbf9}'), - ('𠀀', '\u{2a6dd}'), - ('𪜀', '𫜴'), + ('🯰', '🯹'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), - ('\u{30000}', '\u{3134a}'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), ('\u{e0100}', '\u{e01ef}'), ]; @@ -10370,8 +10747,9 @@ pub const XID_START: &'static [(char, char)] = &[ ('ࠨ', 'ࠨ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'), - ('ࢠ', 'ࢴ'), - ('ࢶ', '\u{8c7}'), + ('ࡰ', 'ࢇ'), + ('ࢉ', 'ࢎ'), + ('ࢠ', 'ࣉ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'), ('ॐ', 'ॐ'), @@ -10436,6 +10814,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('ప', 'హ'), ('ఽ', 'ఽ'), ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'), ('ಅ', 'ಌ'), @@ -10444,10 +10823,10 @@ pub const XID_START: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('ಽ', 'ಽ'), - ('ೞ', 'ೞ'), + ('ೝ', 'ೞ'), ('ೠ', 'ೡ'), ('ೱ', 'ೲ'), - ('\u{d04}', 'ഌ'), + ('ഄ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'), ('ഽ', 'ഽ'), @@ -10515,9 +10894,8 @@ pub const XID_START: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', 'ᜌ'), - ('ᜎ', 'ᜑ'), - ('ᜠ', 'ᜱ'), + ('ᜀ', 'ᜑ'), + ('ᜟ', 'ᜱ'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -10537,7 +10915,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('ᨠ', 'ᩔ'), ('ᪧ', 'ᪧ'), ('ᬅ', 'ᬳ'), - ('ᭅ', 'ᭋ'), + ('ᭅ', 'ᭌ'), ('ᮃ', 'ᮠ'), ('ᮮ', 'ᮯ'), ('ᮺ', 'ᯥ'), @@ -10587,9 +10965,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'), - ('Ⰰ', 'Ⱞ'), - ('ⰰ', 'ⱞ'), - ('Ⱡ', 'ⳤ'), + ('Ⰰ', 'ⳤ'), ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), @@ -10616,11 +10992,10 @@ pub const XID_START: &'static [(char, char)] = &[ ('ー', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), - ('ㆠ', '\u{31bf}'), + ('ㆠ', 'ㆿ'), ('ㇰ', 'ㇿ'), - ('㐀', '\u{4dbf}'), - ('一', '\u{9ffc}'), - ('ꀀ', 'ꒌ'), + ('㐀', '䶿'), + ('一', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘟ'), @@ -10630,9 +11005,11 @@ pub const XID_START: &'static [(char, char)] = &[ ('ꚠ', 'ꛯ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꞿ'), - ('Ꟃ', '\u{a7ca}'), - ('\u{a7f5}', 'ꠁ'), + ('Ꞌ', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), ('ꠌ', 'ꠢ'), @@ -10669,7 +11046,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), - ('ꭜ', '\u{ab69}'), + ('ꭜ', 'ꭩ'), ('ꭰ', 'ꯢ'), ('가', '힣'), ('ힰ', 'ퟆ'), @@ -10728,9 +11105,20 @@ pub const XID_START: &'static [(char, char)] = &[ ('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), @@ -10761,19 +11149,22 @@ pub const XID_START: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '𐴣'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eb0}', '\u{10eb1}'), + ('𐺀', '𐺩'), + ('𐺰', '𐺱'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), - ('\u{10fb0}', '\u{10fc4}'), + ('𐽰', '𐾁'), + ('𐾰', '𐿄'), ('𐿠', '𐿶'), ('𑀃', '𑀷'), + ('𑁱', '𑁲'), + ('𑁵', '𑁵'), ('𑂃', '𑂯'), ('𑃐', '𑃨'), ('𑄃', '𑄦'), ('𑅄', '𑅄'), - ('\u{11147}', '\u{11147}'), + ('𑅇', '𑅇'), ('𑅐', '𑅲'), ('𑅶', '𑅶'), ('𑆃', '𑆲'), @@ -10782,6 +11173,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈫'), + ('𑈿', '𑉀'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), @@ -10799,7 +11191,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('𑍝', '𑍡'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), - ('𑑟', '\u{11461}'), + ('𑑟', '𑑡'), ('𑒀', '𑒯'), ('𑓄', '𑓅'), ('𑓇', '𑓇'), @@ -10810,15 +11202,16 @@ pub const XID_START: &'static [(char, char)] = &[ ('𑚀', '𑚪'), ('𑚸', '𑚸'), ('𑜀', '𑜚'), + ('𑝀', '𑝆'), ('𑠀', '𑠫'), ('𑢠', '𑣟'), - ('𑣿', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{1192f}'), - ('\u{1193f}', '\u{1193f}'), - ('\u{11941}', '\u{11941}'), + ('𑣿', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤯'), + ('𑤿', '𑤿'), + ('𑥁', '𑥁'), ('𑦠', '𑦧'), ('𑦪', '𑧐'), ('𑧡', '𑧡'), @@ -10829,7 +11222,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('𑩐', '𑩐'), ('𑩜', '𑪉'), ('𑪝', '𑪝'), - ('𑫀', '𑫸'), + ('𑪰', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -10843,14 +11236,20 @@ pub const XID_START: &'static [(char, char)] = &[ ('𑵪', '𑶉'), ('𑶘', '𑶘'), ('𑻠', '𑻲'), - ('\u{11fb0}', '\u{11fb0}'), + ('𑼂', '𑼂'), + ('𑼄', '𑼐'), + ('𑼒', '𑼳'), + ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), - ('𓀀', '𓐮'), + ('𒾐', '𒿰'), + ('𓀀', '𓐯'), + ('𓑁', '𓑆'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), + ('𖩰', '𖪾'), ('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭀', '𖭃'), @@ -10863,10 +11262,15 @@ pub const XID_START: &'static [(char, char)] = &[ ('𖿠', '𖿡'), ('𖿣', '𖿣'), ('𗀀', '𘟷'), - ('𘠀', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('𛀀', '𛄞'), + ('𘠀', '𘳕'), + ('𘴀', '𘴈'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), + ('𛀀', '𛄢'), + ('𛄲', '𛄲'), ('𛅐', '𛅒'), + ('𛅕', '𛅕'), ('𛅤', '𛅧'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), @@ -10903,10 +11307,19 @@ pub const XID_START: &'static [(char, char)] = &[ ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), + ('𝼀', '𝼞'), + ('𝼥', '𝼪'), + ('𞀰', '𞁭'), ('𞄀', '𞄬'), ('𞄷', '𞄽'), ('𞅎', '𞅎'), + ('𞊐', '𞊭'), ('𞋀', '𞋫'), + ('𞓐', '𞓫'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), ('𞠀', '𞣄'), ('𞤀', '𞥃'), ('𞥋', '𞥋'), @@ -10943,11 +11356,12 @@ pub const XID_START: &'static [(char, char)] = &[ ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), - ('𠀀', '\u{2a6dd}'), - ('𪜀', '𫜴'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), - ('\u{30000}', '\u{3134a}'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), ]; diff --git a/src/unicode_tables/property_names.rs b/src/unicode_tables/property_names.rs index 6393df2..599a123 100644 --- a/src/unicode_tables/property_names.rs +++ b/src/unicode_tables/property_names.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-names ucd-13.0.0 +// ucd-generate property-names ucd-15.0.0 // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ ("age", "Age"), diff --git a/src/unicode_tables/property_values.rs b/src/unicode_tables/property_values.rs index c46653a..cb2d32f 100644 --- a/src/unicode_tables/property_values.rs +++ b/src/unicode_tables/property_values.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-values ucd-13.0.0 --include gc,script,scx,age,gcb,wb,sb +// ucd-generate property-values ucd-15.0.0 --include gc,script,scx,age,gcb,wb,sb // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const PROPERTY_VALUES: &'static [( &'static str, @@ -19,6 +19,8 @@ pub const PROPERTY_VALUES: &'static [( ("12.0", "V12_0"), ("12.1", "V12_1"), ("13.0", "V13_0"), + ("14.0", "V14_0"), + ("15.0", "V15_0"), ("2.0", "V2_0"), ("2.1", "V2_1"), ("3.0", "V3_0"), @@ -44,6 +46,8 @@ pub const PROPERTY_VALUES: &'static [( ("v120", "V12_0"), ("v121", "V12_1"), ("v130", "V13_0"), + ("v140", "V14_0"), + ("v150", "V15_0"), ("v20", "V2_0"), ("v21", "V2_1"), ("v30", "V3_0"), @@ -233,9 +237,11 @@ pub const PROPERTY_VALUES: &'static [( ("common", "Common"), ("copt", "Coptic"), ("coptic", "Coptic"), + ("cpmn", "Cypro_Minoan"), ("cprt", "Cypriot"), ("cuneiform", "Cuneiform"), ("cypriot", "Cypriot"), + ("cyprominoan", "Cypro_Minoan"), ("cyrillic", "Cyrillic"), ("cyrl", "Cyrillic"), ("deseret", "Deseret"), @@ -304,6 +310,7 @@ pub const PROPERTY_VALUES: &'static [( ("kannada", "Kannada"), ("katakana", "Katakana"), ("katakanaorhiragana", "Katakana_Or_Hiragana"), + ("kawi", "Kawi"), ("kayahli", "Kayah_Li"), ("khar", "Kharoshthi"), ("kharoshthi", "Kharoshthi"), @@ -368,6 +375,8 @@ pub const PROPERTY_VALUES: &'static [( ("myanmar", "Myanmar"), ("mymr", "Myanmar"), ("nabataean", "Nabataean"), + ("nagm", "Nag_Mundari"), + ("nagmundari", "Nag_Mundari"), ("nand", "Nandinagari"), ("nandinagari", "Nandinagari"), ("narb", "Old_North_Arabian"), @@ -391,6 +400,7 @@ pub const PROPERTY_VALUES: &'static [( ("oldsogdian", "Old_Sogdian"), ("oldsoutharabian", "Old_South_Arabian"), ("oldturkic", "Old_Turkic"), + ("olduyghur", "Old_Uyghur"), ("oriya", "Oriya"), ("orkh", "Old_Turkic"), ("orya", "Oriya"), @@ -398,6 +408,7 @@ pub const PROPERTY_VALUES: &'static [( ("osge", "Osage"), ("osma", "Osmanya"), ("osmanya", "Osmanya"), + ("ougr", "Old_Uyghur"), ("pahawhhmong", "Pahawh_Hmong"), ("palm", "Palmyrene"), ("palmyrene", "Palmyrene"), @@ -462,6 +473,7 @@ pub const PROPERTY_VALUES: &'static [( ("tamil", "Tamil"), ("taml", "Tamil"), ("tang", "Tangut"), + ("tangsa", "Tangsa"), ("tangut", "Tangut"), ("tavt", "Tai_Viet"), ("telu", "Telugu"), @@ -476,11 +488,15 @@ pub const PROPERTY_VALUES: &'static [( ("tifinagh", "Tifinagh"), ("tirh", "Tirhuta"), ("tirhuta", "Tirhuta"), + ("tnsa", "Tangsa"), + ("toto", "Toto"), ("ugar", "Ugaritic"), ("ugaritic", "Ugaritic"), ("unknown", "Unknown"), ("vai", "Vai"), ("vaii", "Vai"), + ("vith", "Vithkuqi"), + ("vithkuqi", "Vithkuqi"), ("wancho", "Wancho"), ("wara", "Warang_Citi"), ("warangciti", "Warang_Citi"), @@ -550,9 +566,11 @@ pub const PROPERTY_VALUES: &'static [( ("common", "Common"), ("copt", "Coptic"), ("coptic", "Coptic"), + ("cpmn", "Cypro_Minoan"), ("cprt", "Cypriot"), ("cuneiform", "Cuneiform"), ("cypriot", "Cypriot"), + ("cyprominoan", "Cypro_Minoan"), ("cyrillic", "Cyrillic"), ("cyrl", "Cyrillic"), ("deseret", "Deseret"), @@ -621,6 +639,7 @@ pub const PROPERTY_VALUES: &'static [( ("kannada", "Kannada"), ("katakana", "Katakana"), ("katakanaorhiragana", "Katakana_Or_Hiragana"), + ("kawi", "Kawi"), ("kayahli", "Kayah_Li"), ("khar", "Kharoshthi"), ("kharoshthi", "Kharoshthi"), @@ -685,6 +704,8 @@ pub const PROPERTY_VALUES: &'static [( ("myanmar", "Myanmar"), ("mymr", "Myanmar"), ("nabataean", "Nabataean"), + ("nagm", "Nag_Mundari"), + ("nagmundari", "Nag_Mundari"), ("nand", "Nandinagari"), ("nandinagari", "Nandinagari"), ("narb", "Old_North_Arabian"), @@ -708,6 +729,7 @@ pub const PROPERTY_VALUES: &'static [( ("oldsogdian", "Old_Sogdian"), ("oldsoutharabian", "Old_South_Arabian"), ("oldturkic", "Old_Turkic"), + ("olduyghur", "Old_Uyghur"), ("oriya", "Oriya"), ("orkh", "Old_Turkic"), ("orya", "Oriya"), @@ -715,6 +737,7 @@ pub const PROPERTY_VALUES: &'static [( ("osge", "Osage"), ("osma", "Osmanya"), ("osmanya", "Osmanya"), + ("ougr", "Old_Uyghur"), ("pahawhhmong", "Pahawh_Hmong"), ("palm", "Palmyrene"), ("palmyrene", "Palmyrene"), @@ -779,6 +802,7 @@ pub const PROPERTY_VALUES: &'static [( ("tamil", "Tamil"), ("taml", "Tamil"), ("tang", "Tangut"), + ("tangsa", "Tangsa"), ("tangut", "Tangut"), ("tavt", "Tai_Viet"), ("telu", "Telugu"), @@ -793,11 +817,15 @@ pub const PROPERTY_VALUES: &'static [( ("tifinagh", "Tifinagh"), ("tirh", "Tirhuta"), ("tirhuta", "Tirhuta"), + ("tnsa", "Tangsa"), + ("toto", "Toto"), ("ugar", "Ugaritic"), ("ugaritic", "Ugaritic"), ("unknown", "Unknown"), ("vai", "Vai"), ("vaii", "Vai"), + ("vith", "Vithkuqi"), + ("vithkuqi", "Vithkuqi"), ("wancho", "Wancho"), ("wara", "Warang_Citi"), ("warangciti", "Warang_Citi"), diff --git a/src/unicode_tables/script.rs b/src/unicode_tables/script.rs index cd86cba..cc5c400 100644 --- a/src/unicode_tables/script.rs +++ b/src/unicode_tables/script.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate script ucd-13.0.0 --chars +// ucd-generate script ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Adlam", ADLAM), @@ -35,6 +35,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Coptic", COPTIC), ("Cuneiform", CUNEIFORM), ("Cypriot", CYPRIOT), + ("Cypro_Minoan", CYPRO_MINOAN), ("Cyrillic", CYRILLIC), ("Deseret", DESERET), ("Devanagari", DEVANAGARI), @@ -68,6 +69,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Kaithi", KAITHI), ("Kannada", KANNADA), ("Katakana", KATAKANA), + ("Kawi", KAWI), ("Kayah_Li", KAYAH_LI), ("Kharoshthi", KHAROSHTHI), ("Khitan_Small_Script", KHITAN_SMALL_SCRIPT), @@ -102,6 +104,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Multani", MULTANI), ("Myanmar", MYANMAR), ("Nabataean", NABATAEAN), + ("Nag_Mundari", NAG_MUNDARI), ("Nandinagari", NANDINAGARI), ("New_Tai_Lue", NEW_TAI_LUE), ("Newa", NEWA), @@ -118,6 +121,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Old_Sogdian", OLD_SOGDIAN), ("Old_South_Arabian", OLD_SOUTH_ARABIAN), ("Old_Turkic", OLD_TURKIC), + ("Old_Uyghur", OLD_UYGHUR), ("Oriya", ORIYA), ("Osage", OSAGE), ("Osmanya", OSMANYA), @@ -149,6 +153,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Tai_Viet", TAI_VIET), ("Takri", TAKRI), ("Tamil", TAMIL), + ("Tangsa", TANGSA), ("Tangut", TANGUT), ("Telugu", TELUGU), ("Thaana", THAANA), @@ -156,8 +161,10 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Tibetan", TIBETAN), ("Tifinagh", TIFINAGH), ("Tirhuta", TIRHUTA), + ("Toto", TOTO), ("Ugaritic", UGARITIC), ("Vai", VAI), + ("Vithkuqi", VITHKUQI), ("Wancho", WANCHO), ("Warang_Citi", WARANG_CITI), ("Yezidi", YEZIDI), @@ -169,7 +176,7 @@ pub const ADLAM: &'static [(char, char)] = &[('𞤀', '𞥋'), ('𞥐', '𞥙'), ('𞥞', '𞥟')]; pub const AHOM: &'static [(char, char)] = - &[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜿')]; + &[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑝆')]; pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[('𔐀', '𔙆')]; @@ -177,26 +184,27 @@ pub const ARABIC: &'static [(char, char)] = &[ ('\u{600}', '\u{604}'), ('؆', '؋'), ('؍', '\u{61a}'), - ('\u{61c}', '\u{61c}'), - ('؞', '؞'), + ('\u{61c}', '؞'), ('ؠ', 'ؿ'), ('ف', 'ي'), ('\u{656}', 'ٯ'), ('ٱ', '\u{6dc}'), ('۞', 'ۿ'), ('ݐ', 'ݿ'), - ('ࢠ', 'ࢴ'), - ('ࢶ', '\u{8c7}'), - ('\u{8d3}', '\u{8e1}'), + ('ࡰ', 'ࢎ'), + ('\u{890}', '\u{891}'), + ('\u{898}', '\u{8e1}'), ('\u{8e3}', '\u{8ff}'), - ('ﭐ', '﯁'), + ('ﭐ', '﯂'), ('ﯓ', 'ﴽ'), - ('ﵐ', 'ﶏ'), + ('﵀', 'ﶏ'), ('ﶒ', 'ﷇ'), - ('ﷰ', '﷽'), + ('﷏', '﷏'), + ('ﷰ', '﷿'), ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'), ('𐹠', '𐹾'), + ('\u{10efd}', '\u{10eff}'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), @@ -238,7 +246,7 @@ pub const ARMENIAN: &'static [(char, char)] = pub const AVESTAN: &'static [(char, char)] = &[('𐬀', '𐬵'), ('𐬹', '𐬿')]; -pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭋ'), ('᭐', '᭼')]; +pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭌ'), ('᭐', '᭾')]; pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')]; @@ -268,10 +276,10 @@ pub const BHAIKSUKI: &'static [(char, char)] = &[('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱅'), ('𑱐', '𑱬')]; pub const BOPOMOFO: &'static [(char, char)] = - &[('˪', '˫'), ('ㄅ', 'ㄯ'), ('ㆠ', '\u{31bf}')]; + &[('˪', '˫'), ('ㄅ', 'ㄯ'), ('ㆠ', 'ㆿ')]; pub const BRAHMI: &'static [(char, char)] = - &[('𑀀', '𑁍'), ('𑁒', '𑁯'), ('\u{1107f}', '\u{1107f}')]; + &[('𑀀', '𑁍'), ('𑁒', '𑁵'), ('\u{1107f}', '\u{1107f}')]; pub const BRAILLE: &'static [(char, char)] = &[('⠀', '⣿')]; @@ -280,7 +288,7 @@ pub const BUGINESE: &'static [(char, char)] = &[('ᨀ', '\u{1a1b}'), ('᨞', ' pub const BUHID: &'static [(char, char)] = &[('ᝀ', '\u{1753}')]; pub const CANADIAN_ABORIGINAL: &'static [(char, char)] = - &[('᐀', 'ᙿ'), ('ᢰ', 'ᣵ')]; + &[('᐀', 'ᙿ'), ('ᢰ', 'ᣵ'), ('𑪰', '𑪿')]; pub const CARIAN: &'static [(char, char)] = &[('𐊠', '𐋐')]; @@ -288,7 +296,7 @@ pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = &[('𐔰', '𐕣'), ('𐕯', '𐕯')]; pub const CHAKMA: &'static [(char, char)] = - &[('\u{11100}', '\u{11134}'), ('𑄶', '\u{11147}')]; + &[('\u{11100}', '\u{11134}'), ('𑄶', '𑅇')]; pub const CHAM: &'static [(char, char)] = &[('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟')]; @@ -296,10 +304,10 @@ pub const CHAM: &'static [(char, char)] = pub const CHEROKEE: &'static [(char, char)] = &[('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ꭰ', 'ꮿ')]; -pub const CHORASMIAN: &'static [(char, char)] = &[('\u{10fb0}', '\u{10fcb}')]; +pub const CHORASMIAN: &'static [(char, char)] = &[('𐾰', '𐿋')]; pub const COMMON: &'static [(char, char)] = &[ - ('\u{0}', '@'), + ('\0', '@'), ('[', '`'), ('{', '©'), ('«', '¹'), @@ -339,7 +347,7 @@ pub const COMMON: &'static [(char, char)] = &[ ('\u{2066}', '⁰'), ('⁴', '⁾'), ('₀', '₎'), - ('₠', '₿'), + ('₠', '⃀'), ('℀', '℥'), ('℧', '℩'), ('ℬ', 'ℱ'), @@ -351,8 +359,8 @@ pub const COMMON: &'static [(char, char)] = &[ ('①', '⟿'), ('⤀', '⭳'), ('⭶', '⮕'), - ('\u{2b97}', '⯿'), - ('⸀', '\u{2e52}'), + ('⮗', '⯿'), + ('⸀', '⹝'), ('⿰', '⿻'), ('\u{3000}', '〄'), ('〆', '〆'), @@ -375,7 +383,7 @@ pub const COMMON: &'static [(char, char)] = &[ ('꤮', '꤮'), ('ꧏ', 'ꧏ'), ('꭛', '꭛'), - ('\u{ab6a}', '\u{ab6b}'), + ('꭪', '꭫'), ('﴾', '﴿'), ('︐', '︙'), ('︰', '﹒'), @@ -393,18 +401,19 @@ pub const COMMON: &'static [(char, char)] = &[ ('𐄀', '𐄂'), ('𐄇', '𐄳'), ('𐄷', '𐄿'), - ('𐆐', '\u{1019c}'), + ('𐆐', '𐆜'), ('𐇐', '𐇼'), ('𐋡', '𐋻'), - ('𖿢', '𖿣'), ('\u{1bca0}', '\u{1bca3}'), + ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), ('𝄩', '𝅦'), ('𝅪', '\u{1d17a}'), ('𝆃', '𝆄'), ('𝆌', '𝆩'), - ('𝆮', '𝇨'), + ('𝆮', '𝇪'), + ('𝋀', '𝋓'), ('𝋠', '𝋳'), ('𝌀', '𝍖'), ('𝍠', '𝍸'), @@ -437,39 +446,38 @@ pub const COMMON: &'static [(char, char)] = &[ ('🂱', '🂿'), ('🃁', '🃏'), ('🃑', '🃵'), - ('🄀', '\u{1f1ad}'), + ('🄀', '🆭'), ('🇦', '🇿'), ('🈁', '🈂'), ('🈐', '🈻'), ('🉀', '🉈'), ('🉐', '🉑'), ('🉠', '🉥'), - ('🌀', '\u{1f6d7}'), - ('🛠', '🛬'), - ('🛰', '\u{1f6fc}'), - ('🜀', '🝳'), - ('🞀', '🟘'), + ('🌀', '🛗'), + ('🛜', '🛬'), + ('🛰', '🛼'), + ('🜀', '🝶'), + ('🝻', '🟙'), ('🟠', '🟫'), + ('🟰', '🟰'), ('🠀', '🠋'), ('🠐', '🡇'), ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('\u{1f8b0}', '\u{1f8b1}'), - ('🤀', '\u{1f978}'), - ('🥺', '\u{1f9cb}'), - ('🧍', '🩓'), + ('🢰', '🢱'), + ('🤀', '🩓'), ('🩠', '🩭'), - ('🩰', '\u{1fa74}'), - ('🩸', '🩺'), - ('🪀', '\u{1fa86}'), - ('🪐', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), - ('\u{1fb00}', '\u{1fb92}'), - ('\u{1fb94}', '\u{1fbca}'), - ('\u{1fbf0}', '\u{1fbf9}'), + ('🩰', '🩼'), + ('🪀', '🪈'), + ('🪐', '🪽'), + ('🪿', '🫅'), + ('🫎', '🫛'), + ('🫠', '🫨'), + ('🫰', '🫸'), + ('🬀', '🮒'), + ('🮔', '🯊'), + ('🯰', '🯹'), ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}'), ]; @@ -483,6 +491,8 @@ pub const CUNEIFORM: &'static [(char, char)] = pub const CYPRIOT: &'static [(char, char)] = &[('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐠿')]; +pub const CYPRO_MINOAN: &'static [(char, char)] = &[('𒾐', '𒿲')]; + pub const CYRILLIC: &'static [(char, char)] = &[ ('Ѐ', '\u{484}'), ('\u{487}', 'ԯ'), @@ -492,6 +502,8 @@ pub const CYRILLIC: &'static [(char, char)] = &[ ('\u{2de0}', '\u{2dff}'), ('Ꙁ', '\u{a69f}'), ('\u{fe2e}', '\u{fe2f}'), + ('𞀰', '𞁭'), + ('\u{1e08f}', '\u{1e08f}'), ]; pub const DESERET: &'static [(char, char)] = &[('𐐀', '𐑏')]; @@ -501,17 +513,18 @@ pub const DEVANAGARI: &'static [(char, char)] = &[ ('\u{955}', '\u{963}'), ('०', 'ॿ'), ('\u{a8e0}', '\u{a8ff}'), + ('𑬀', '𑬉'), ]; pub const DIVES_AKURU: &'static [(char, char)] = &[ - ('\u{11900}', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), - ('\u{1193b}', '\u{11946}'), - ('\u{11950}', '\u{11959}'), + ('𑤀', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤵'), + ('𑤷', '𑤸'), + ('\u{1193b}', '𑥆'), + ('𑥐', '𑥙'), ]; pub const DOGRA: &'static [(char, char)] = &[('𑠀', '𑠻')]; @@ -520,7 +533,7 @@ pub const DUPLOYAN: &'static [(char, char)] = &[('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '𛲟')]; pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] = - &[('𓀀', '𓐮'), ('\u{13430}', '\u{13438}')]; + &[('𓀀', '\u{13455}')]; pub const ELBASAN: &'static [(char, char)] = &[('𐔀', '𐔧')]; @@ -559,6 +572,10 @@ pub const ETHIOPIC: &'static [(char, char)] = &[ ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), ]; pub const GEORGIAN: &'static [(char, char)] = &[ @@ -575,8 +592,7 @@ pub const GEORGIAN: &'static [(char, char)] = &[ ]; pub const GLAGOLITIC: &'static [(char, char)] = &[ - ('Ⰰ', 'Ⱞ'), - ('ⰰ', 'ⱞ'), + ('Ⰰ', 'ⱟ'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), @@ -696,18 +712,20 @@ pub const HAN: &'static [(char, char)] = &[ ('〇', '〇'), ('〡', '〩'), ('〸', '〻'), - ('㐀', '\u{4dbf}'), - ('一', '\u{9ffc}'), + ('㐀', '䶿'), + ('一', '鿿'), ('豈', '舘'), ('並', '龎'), - ('\u{16ff0}', '\u{16ff1}'), - ('𠀀', '\u{2a6dd}'), - ('𪜀', '𫜴'), + ('𖿢', '𖿣'), + ('𖿰', '𖿱'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), - ('\u{30000}', '\u{3134a}'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), ]; pub const HANGUL: &'static [(char, char)] = &[ @@ -730,7 +748,7 @@ pub const HANGUL: &'static [(char, char)] = &[ pub const HANIFI_ROHINGYA: &'static [(char, char)] = &[('𐴀', '\u{10d27}'), ('𐴰', '𐴹')]; -pub const HANUNOO: &'static [(char, char)] = &[('ᜠ', '\u{1734}')]; +pub const HANUNOO: &'static [(char, char)] = &[('ᜠ', '᜴')]; pub const HATRAN: &'static [(char, char)] = &[('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿')]; @@ -747,8 +765,14 @@ pub const HEBREW: &'static [(char, char)] = &[ ('צּ', 'ﭏ'), ]; -pub const HIRAGANA: &'static [(char, char)] = - &[('ぁ', 'ゖ'), ('ゝ', 'ゟ'), ('𛀁', '𛄞'), ('𛅐', '𛅒'), ('🈀', '🈀')]; +pub const HIRAGANA: &'static [(char, char)] = &[ + ('ぁ', 'ゖ'), + ('ゝ', 'ゟ'), + ('𛀁', '𛄟'), + ('𛄲', '𛄲'), + ('𛅐', '𛅒'), + ('🈀', '🈀'), +]; pub const IMPERIAL_ARAMAIC: &'static [(char, char)] = &[('𐡀', '𐡕'), ('𐡗', '𐡟')]; @@ -759,15 +783,14 @@ pub const INHERITED: &'static [(char, char)] = &[ ('\u{64b}', '\u{655}'), ('\u{670}', '\u{670}'), ('\u{951}', '\u{954}'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1ce0}'), ('\u{1ce2}', '\u{1ce8}'), ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('\u{1cf8}', '\u{1cf9}'), - ('\u{1dc0}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('\u{1dc0}', '\u{1dff}'), ('\u{200c}', '\u{200d}'), ('\u{20d0}', '\u{20f0}'), ('\u{302a}', '\u{302d}'), @@ -777,6 +800,8 @@ pub const INHERITED: &'static [(char, char)] = &[ ('\u{101fd}', '\u{101fd}'), ('\u{102e0}', '\u{102e0}'), ('\u{1133b}', '\u{1133b}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d167}', '\u{1d169}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), @@ -794,7 +819,7 @@ pub const JAVANESE: &'static [(char, char)] = &[('\u{a980}', '꧍'), ('꧐', '꧙'), ('꧞', '꧟')]; pub const KAITHI: &'static [(char, char)] = - &[('\u{11080}', '𑃁'), ('\u{110cd}', '\u{110cd}')]; + &[('\u{11080}', '\u{110c2}'), ('\u{110cd}', '\u{110cd}')]; pub const KANNADA: &'static [(char, char)] = &[ ('ಀ', 'ಌ'), @@ -806,10 +831,10 @@ pub const KANNADA: &'static [(char, char)] = &[ ('\u{cc6}', 'ೈ'), ('ೊ', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), - ('ೞ', 'ೞ'), + ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), ('೦', '೯'), - ('ೱ', 'ೲ'), + ('ೱ', 'ೳ'), ]; pub const KATAKANA: &'static [(char, char)] = &[ @@ -820,10 +845,18 @@ pub const KATAKANA: &'static [(char, char)] = &[ ('㌀', '㍗'), ('ヲ', 'ッ'), ('ア', 'ン'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), ('𛀀', '𛀀'), + ('𛄠', '𛄢'), + ('𛅕', '𛅕'), ('𛅤', '𛅧'), ]; +pub const KAWI: &'static [(char, char)] = + &[('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '𑽙')]; + pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '\u{a92d}'), ('꤯', '꤯')]; pub const KHAROSHTHI: &'static [(char, char)] = &[ @@ -838,12 +871,12 @@ pub const KHAROSHTHI: &'static [(char, char)] = &[ ]; pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] = - &[('\u{16fe4}', '\u{16fe4}'), ('\u{18b00}', '\u{18cd5}')]; + &[('\u{16fe4}', '\u{16fe4}'), ('𘬀', '𘳕')]; pub const KHMER: &'static [(char, char)] = &[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿')]; -pub const KHOJKI: &'static [(char, char)] = &[('𑈀', '𑈑'), ('𑈓', '\u{1123e}')]; +pub const KHOJKI: &'static [(char, char)] = &[('𑈀', '𑈑'), ('𑈓', '\u{11241}')]; pub const KHUDAWADI: &'static [(char, char)] = &[('𑊰', '\u{112ea}'), ('𑋰', '𑋹')]; @@ -857,7 +890,7 @@ pub const LAO: &'static [(char, char)] = &[ ('ວ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('໐', '໙'), ('ໜ', 'ໟ'), ]; @@ -886,15 +919,22 @@ pub const LATIN: &'static [(char, char)] = &[ ('Ⅰ', 'ↈ'), ('Ⱡ', 'Ɀ'), ('Ꜣ', 'ꞇ'), - ('Ꞌ', 'ꞿ'), - ('Ꟃ', '\u{a7ca}'), - ('\u{a7f5}', 'ꟿ'), + ('Ꞌ', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꟿ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭤ'), - ('ꭦ', '\u{ab69}'), + ('ꭦ', 'ꭩ'), ('ff', 'st'), ('A', 'Z'), ('a', 'z'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), + ('𝼀', '𝼞'), + ('𝼥', '𝼪'), ]; pub const LEPCHA: &'static [(char, char)] = @@ -921,8 +961,7 @@ pub const LINEAR_B: &'static [(char, char)] = &[ ('𐂀', '𐃺'), ]; -pub const LISU: &'static [(char, char)] = - &[('ꓐ', '꓿'), ('\u{11fb0}', '\u{11fb0}')]; +pub const LISU: &'static [(char, char)] = &[('ꓐ', '꓿'), ('𑾰', '𑾰')]; pub const LYCIAN: &'static [(char, char)] = &[('𐊀', '𐊜')]; @@ -978,15 +1017,8 @@ pub const MIAO: &'static [(char, char)] = pub const MODI: &'static [(char, char)] = &[('𑘀', '𑙄'), ('𑙐', '𑙙')]; -pub const MONGOLIAN: &'static [(char, char)] = &[ - ('᠀', '᠁'), - ('᠄', '᠄'), - ('᠆', '\u{180e}'), - ('᠐', '᠙'), - ('ᠠ', 'ᡸ'), - ('ᢀ', 'ᢪ'), - ('𑙠', '𑙬'), -]; +pub const MONGOLIAN: &'static [(char, char)] = + &[('᠀', '᠁'), ('᠄', '᠄'), ('᠆', '᠙'), ('ᠠ', 'ᡸ'), ('ᢀ', 'ᢪ'), ('𑙠', '𑙬')]; pub const MRO: &'static [(char, char)] = &[('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯')]; @@ -998,13 +1030,15 @@ pub const MYANMAR: &'static [(char, char)] = pub const NABATAEAN: &'static [(char, char)] = &[('𐢀', '𐢞'), ('𐢧', '𐢯')]; +pub const NAG_MUNDARI: &'static [(char, char)] = &[('𞓐', '𞓹')]; + pub const NANDINAGARI: &'static [(char, char)] = &[('𑦠', '𑦧'), ('𑦪', '\u{119d7}'), ('\u{119da}', '𑧤')]; pub const NEW_TAI_LUE: &'static [(char, char)] = &[('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟')]; -pub const NEWA: &'static [(char, char)] = &[('𑐀', '𑑛'), ('𑑝', '\u{11461}')]; +pub const NEWA: &'static [(char, char)] = &[('𑐀', '𑑛'), ('𑑝', '𑑡')]; pub const NKO: &'static [(char, char)] = &[('߀', 'ߺ'), ('\u{7fd}', '߿')]; @@ -1034,6 +1068,8 @@ pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[('𐩠', '𐩿')]; pub const OLD_TURKIC: &'static [(char, char)] = &[('𐰀', '𐱈')]; +pub const OLD_UYGHUR: &'static [(char, char)] = &[('𐽰', '𐾉')]; + pub const ORIYA: &'static [(char, char)] = &[ ('\u{b01}', 'ଃ'), ('ଅ', 'ଌ'), @@ -1118,7 +1154,7 @@ pub const SYLOTI_NAGRI: &'static [(char, char)] = &[('ꠀ', '\u{a82c}')]; pub const SYRIAC: &'static [(char, char)] = &[('܀', '܍'), ('\u{70f}', '\u{74a}'), ('ݍ', 'ݏ'), ('ࡠ', 'ࡪ')]; -pub const TAGALOG: &'static [(char, char)] = &[('ᜀ', 'ᜌ'), ('ᜎ', '\u{1714}')]; +pub const TAGALOG: &'static [(char, char)] = &[('ᜀ', '᜕'), ('ᜟ', 'ᜟ')]; pub const TAGBANWA: &'static [(char, char)] = &[('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}')]; @@ -1135,7 +1171,7 @@ pub const TAI_THAM: &'static [(char, char)] = &[ pub const TAI_VIET: &'static [(char, char)] = &[('ꪀ', 'ꫂ'), ('ꫛ', '꫟')]; -pub const TAKRI: &'static [(char, char)] = &[('𑚀', '𑚸'), ('𑛀', '𑛉')]; +pub const TAKRI: &'static [(char, char)] = &[('𑚀', '𑚹'), ('𑛀', '𑛉')]; pub const TAMIL: &'static [(char, char)] = &[ ('\u{b82}', 'ஃ'), @@ -1158,23 +1194,22 @@ pub const TAMIL: &'static [(char, char)] = &[ ('𑿿', '𑿿'), ]; -pub const TANGUT: &'static [(char, char)] = &[ - ('𖿠', '𖿠'), - ('𗀀', '𘟷'), - ('𘠀', '\u{18aff}'), - ('\u{18d00}', '\u{18d08}'), -]; +pub const TANGSA: &'static [(char, char)] = &[('𖩰', '𖪾'), ('𖫀', '𖫉')]; + +pub const TANGUT: &'static [(char, char)] = + &[('𖿠', '𖿠'), ('𗀀', '𘟷'), ('𘠀', '𘫿'), ('𘴀', '𘴈')]; pub const TELUGU: &'static [(char, char)] = &[ ('\u{c00}', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), - ('ఽ', 'ౄ'), + ('\u{c3c}', 'ౄ'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), ('ౠ', '\u{c63}'), ('౦', '౯'), ('౷', '౿'), @@ -1199,19 +1234,29 @@ pub const TIFINAGH: &'static [(char, char)] = pub const TIRHUTA: &'static [(char, char)] = &[('𑒀', '𑓇'), ('𑓐', '𑓙')]; +pub const TOTO: &'static [(char, char)] = &[('𞊐', '\u{1e2ae}')]; + pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')]; pub const VAI: &'static [(char, char)] = &[('ꔀ', 'ꘫ')]; +pub const VITHKUQI: &'static [(char, char)] = &[ + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), +]; + pub const WANCHO: &'static [(char, char)] = &[('𞋀', '𞋹'), ('𞋿', '𞋿')]; pub const WARANG_CITI: &'static [(char, char)] = &[('𑢠', '𑣲'), ('𑣿', '𑣿')]; -pub const YEZIDI: &'static [(char, char)] = &[ - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eab}', '\u{10ead}'), - ('\u{10eb0}', '\u{10eb1}'), -]; +pub const YEZIDI: &'static [(char, char)] = + &[('𐺀', '𐺩'), ('\u{10eab}', '𐺭'), ('𐺰', '𐺱')]; pub const YI: &'static [(char, char)] = &[('ꀀ', 'ꒌ'), ('꒐', '꓆')]; diff --git a/src/unicode_tables/script_extension.rs b/src/unicode_tables/script_extension.rs index 7fca2af..42625e2 100644 --- a/src/unicode_tables/script_extension.rs +++ b/src/unicode_tables/script_extension.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate script-extension ucd-13.0.0 --chars +// ucd-generate script-extension ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Adlam", ADLAM), @@ -35,6 +35,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Coptic", COPTIC), ("Cuneiform", CUNEIFORM), ("Cypriot", CYPRIOT), + ("Cypro_Minoan", CYPRO_MINOAN), ("Cyrillic", CYRILLIC), ("Deseret", DESERET), ("Devanagari", DEVANAGARI), @@ -68,6 +69,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Kaithi", KAITHI), ("Kannada", KANNADA), ("Katakana", KATAKANA), + ("Kawi", KAWI), ("Kayah_Li", KAYAH_LI), ("Kharoshthi", KHAROSHTHI), ("Khitan_Small_Script", KHITAN_SMALL_SCRIPT), @@ -102,6 +104,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Multani", MULTANI), ("Myanmar", MYANMAR), ("Nabataean", NABATAEAN), + ("Nag_Mundari", NAG_MUNDARI), ("Nandinagari", NANDINAGARI), ("New_Tai_Lue", NEW_TAI_LUE), ("Newa", NEWA), @@ -118,6 +121,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Old_Sogdian", OLD_SOGDIAN), ("Old_South_Arabian", OLD_SOUTH_ARABIAN), ("Old_Turkic", OLD_TURKIC), + ("Old_Uyghur", OLD_UYGHUR), ("Oriya", ORIYA), ("Osage", OSAGE), ("Osmanya", OSMANYA), @@ -149,6 +153,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Tai_Viet", TAI_VIET), ("Takri", TAKRI), ("Tamil", TAMIL), + ("Tangsa", TANGSA), ("Tangut", TANGUT), ("Telugu", TELUGU), ("Thaana", THAANA), @@ -156,8 +161,10 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Tibetan", TIBETAN), ("Tifinagh", TIFINAGH), ("Tirhuta", TIRHUTA), + ("Toto", TOTO), ("Ugaritic", UGARITIC), ("Vai", VAI), + ("Vithkuqi", VITHKUQI), ("Wancho", WANCHO), ("Warang_Citi", WARANG_CITI), ("Yezidi", YEZIDI), @@ -166,32 +173,32 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ]; pub const ADLAM: &'static [(char, char)] = - &[('ـ', 'ـ'), ('𞤀', '𞥋'), ('𞥐', '𞥙'), ('𞥞', '𞥟')]; + &[('؟', '؟'), ('ـ', 'ـ'), ('𞤀', '𞥋'), ('𞥐', '𞥙'), ('𞥞', '𞥟')]; pub const AHOM: &'static [(char, char)] = - &[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜿')]; + &[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑝆')]; pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[('𔐀', '𔙆')]; pub const ARABIC: &'static [(char, char)] = &[ ('\u{600}', '\u{604}'), - ('؆', '\u{61c}'), - ('؞', '\u{6dc}'), + ('؆', '\u{6dc}'), ('۞', 'ۿ'), ('ݐ', 'ݿ'), - ('ࢠ', 'ࢴ'), - ('ࢶ', '\u{8c7}'), - ('\u{8d3}', '\u{8e1}'), + ('ࡰ', 'ࢎ'), + ('\u{890}', '\u{891}'), + ('\u{898}', '\u{8e1}'), ('\u{8e3}', '\u{8ff}'), - ('ﭐ', '﯁'), - ('ﯓ', 'ﴽ'), - ('ﵐ', 'ﶏ'), + ('ﭐ', '﯂'), + ('ﯓ', 'ﶏ'), ('ﶒ', 'ﷇ'), - ('ﷰ', '﷽'), + ('﷏', '﷏'), + ('ﷰ', '﷿'), ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'), ('\u{102e0}', '𐋻'), ('𐹠', '𐹾'), + ('\u{10efd}', '\u{10eff}'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), @@ -233,7 +240,7 @@ pub const ARMENIAN: &'static [(char, char)] = pub const AVESTAN: &'static [(char, char)] = &[('𐬀', '𐬵'), ('𐬹', '𐬿')]; -pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭋ'), ('᭐', '᭼')]; +pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭌ'), ('᭐', '᭾')]; pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')]; @@ -284,13 +291,13 @@ pub const BOPOMOFO: &'static [(char, char)] = &[ ('〷', '〷'), ('・', '・'), ('ㄅ', 'ㄯ'), - ('ㆠ', '\u{31bf}'), + ('ㆠ', 'ㆿ'), ('﹅', '﹆'), ('。', '・'), ]; pub const BRAHMI: &'static [(char, char)] = - &[('𑀀', '𑁍'), ('𑁒', '𑁯'), ('\u{1107f}', '\u{1107f}')]; + &[('𑀀', '𑁍'), ('𑁒', '𑁵'), ('\u{1107f}', '\u{1107f}')]; pub const BRAILLE: &'static [(char, char)] = &[('⠀', '⣿')]; @@ -300,7 +307,7 @@ pub const BUGINESE: &'static [(char, char)] = pub const BUHID: &'static [(char, char)] = &[('᜵', '᜶'), ('ᝀ', '\u{1753}')]; pub const CANADIAN_ABORIGINAL: &'static [(char, char)] = - &[('᐀', 'ᙿ'), ('ᢰ', 'ᣵ')]; + &[('᐀', 'ᙿ'), ('ᢰ', 'ᣵ'), ('𑪰', '𑪿')]; pub const CARIAN: &'static [(char, char)] = &[('𐊠', '𐋐')]; @@ -308,7 +315,7 @@ pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = &[('𐔰', '𐕣'), ('𐕯', '𐕯')]; pub const CHAKMA: &'static [(char, char)] = - &[('০', '৯'), ('၀', '၉'), ('\u{11100}', '\u{11134}'), ('𑄶', '\u{11147}')]; + &[('০', '৯'), ('၀', '၉'), ('\u{11100}', '\u{11134}'), ('𑄶', '𑅇')]; pub const CHAM: &'static [(char, char)] = &[('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟')]; @@ -316,10 +323,10 @@ pub const CHAM: &'static [(char, char)] = pub const CHEROKEE: &'static [(char, char)] = &[('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ꭰ', 'ꮿ')]; -pub const CHORASMIAN: &'static [(char, char)] = &[('\u{10fb0}', '\u{10fcb}')]; +pub const CHORASMIAN: &'static [(char, char)] = &[('𐾰', '𐿋')]; pub const COMMON: &'static [(char, char)] = &[ - ('\u{0}', '@'), + ('\0', '@'), ('[', '`'), ('{', '©'), ('«', '¹'), @@ -345,7 +352,7 @@ pub const COMMON: &'static [(char, char)] = &[ ('\u{2066}', '⁰'), ('⁴', '⁾'), ('₀', '₎'), - ('₠', '₿'), + ('₠', '⃀'), ('℀', '℥'), ('℧', '℩'), ('ℬ', 'ℱ'), @@ -357,9 +364,9 @@ pub const COMMON: &'static [(char, char)] = &[ ('①', '⟿'), ('⤀', '⭳'), ('⭶', '⮕'), - ('\u{2b97}', '⯿'), + ('⮗', '⯿'), ('⸀', '⹂'), - ('⹄', '\u{2e52}'), + ('⹄', '⹝'), ('⿰', '⿻'), ('\u{3000}', '\u{3000}'), ('〄', '〄'), @@ -377,8 +384,7 @@ pub const COMMON: &'static [(char, char)] = &[ ('꜈', '꜡'), ('ꞈ', '꞊'), ('꭛', '꭛'), - ('\u{ab6a}', '\u{ab6b}'), - ('﴾', '﴿'), + ('꭪', '꭫'), ('︐', '︙'), ('︰', '﹄'), ('﹇', '﹒'), @@ -391,16 +397,17 @@ pub const COMMON: &'static [(char, char)] = &[ ('¢', '₩'), ('│', '○'), ('\u{fff9}', '�'), - ('𐆐', '\u{1019c}'), + ('𐆐', '𐆜'), ('𐇐', '𐇼'), - ('𖿢', '𖿣'), + ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), ('𝄩', '𝅦'), ('𝅪', '\u{1d17a}'), ('𝆃', '𝆄'), ('𝆌', '𝆩'), - ('𝆮', '𝇨'), + ('𝆮', '𝇪'), + ('𝋀', '𝋓'), ('𝋠', '𝋳'), ('𝌀', '𝍖'), ('𝍲', '𝍸'), @@ -433,38 +440,37 @@ pub const COMMON: &'static [(char, char)] = &[ ('🂱', '🂿'), ('🃁', '🃏'), ('🃑', '🃵'), - ('🄀', '\u{1f1ad}'), + ('🄀', '🆭'), ('🇦', '🇿'), ('🈁', '🈂'), ('🈐', '🈻'), ('🉀', '🉈'), ('🉠', '🉥'), - ('🌀', '\u{1f6d7}'), - ('🛠', '🛬'), - ('🛰', '\u{1f6fc}'), - ('🜀', '🝳'), - ('🞀', '🟘'), + ('🌀', '🛗'), + ('🛜', '🛬'), + ('🛰', '🛼'), + ('🜀', '🝶'), + ('🝻', '🟙'), ('🟠', '🟫'), + ('🟰', '🟰'), ('🠀', '🠋'), ('🠐', '🡇'), ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('\u{1f8b0}', '\u{1f8b1}'), - ('🤀', '\u{1f978}'), - ('🥺', '\u{1f9cb}'), - ('🧍', '🩓'), + ('🢰', '🢱'), + ('🤀', '🩓'), ('🩠', '🩭'), - ('🩰', '\u{1fa74}'), - ('🩸', '🩺'), - ('🪀', '\u{1fa86}'), - ('🪐', '\u{1faa8}'), - ('\u{1fab0}', '\u{1fab6}'), - ('\u{1fac0}', '\u{1fac2}'), - ('\u{1fad0}', '\u{1fad6}'), - ('\u{1fb00}', '\u{1fb92}'), - ('\u{1fb94}', '\u{1fbca}'), - ('\u{1fbf0}', '\u{1fbf9}'), + ('🩰', '🩼'), + ('🪀', '🪈'), + ('🪐', '🪽'), + ('🪿', '🫅'), + ('🫎', '🫛'), + ('🫠', '🫨'), + ('🫰', '🫸'), + ('🬀', '🮒'), + ('🮔', '🯊'), + ('🯰', '🯹'), ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}'), ]; @@ -487,6 +493,8 @@ pub const CYPRIOT: &'static [(char, char)] = &[ ('𐠿', '𐠿'), ]; +pub const CYPRO_MINOAN: &'static [(char, char)] = &[('𐄀', '𐄁'), ('𒾐', '𒿲')]; + pub const CYRILLIC: &'static [(char, char)] = &[ ('Ѐ', 'ԯ'), ('ᲀ', 'ᲈ'), @@ -497,6 +505,8 @@ pub const CYRILLIC: &'static [(char, char)] = &[ ('⹃', '⹃'), ('Ꙁ', '\u{a69f}'), ('\u{fe2e}', '\u{fe2f}'), + ('𞀰', '𞁭'), + ('\u{1e08f}', '\u{1e08f}'), ]; pub const DESERET: &'static [(char, char)] = &[('𐐀', '𐑏')]; @@ -509,17 +519,18 @@ pub const DEVANAGARI: &'static [(char, char)] = &[ ('\u{20f0}', '\u{20f0}'), ('꠰', '꠹'), ('\u{a8e0}', '\u{a8ff}'), + ('𑬀', '𑬉'), ]; pub const DIVES_AKURU: &'static [(char, char)] = &[ - ('\u{11900}', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), - ('\u{1193b}', '\u{11946}'), - ('\u{11950}', '\u{11959}'), + ('𑤀', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤵'), + ('𑤷', '𑤸'), + ('\u{1193b}', '𑥆'), + ('𑥐', '𑥙'), ]; pub const DOGRA: &'static [(char, char)] = @@ -529,7 +540,7 @@ pub const DUPLOYAN: &'static [(char, char)] = &[('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '\u{1bca3}')]; pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] = - &[('𓀀', '𓐮'), ('\u{13430}', '\u{13438}')]; + &[('𓀀', '\u{13455}')]; pub const ELBASAN: &'static [(char, char)] = &[('𐔀', '𐔧')]; @@ -568,6 +579,10 @@ pub const ETHIOPIC: &'static [(char, char)] = &[ ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), ]; pub const GEORGIAN: &'static [(char, char)] = &[ @@ -585,8 +600,7 @@ pub const GEORGIAN: &'static [(char, char)] = &[ pub const GLAGOLITIC: &'static [(char, char)] = &[ ('\u{484}', '\u{484}'), ('\u{487}', '\u{487}'), - ('Ⰰ', 'Ⱞ'), - ('ⰰ', 'ⱞ'), + ('Ⰰ', 'ⱟ'), ('⹃', '⹃'), ('\u{a66f}', '\u{a66f}'), ('\u{1e000}', '\u{1e006}'), @@ -739,23 +753,25 @@ pub const HAN: &'static [(char, char)] = &[ ('㍘', '㍰'), ('㍻', '㍿'), ('㏠', '㏾'), - ('㐀', '\u{4dbf}'), - ('一', '\u{9ffc}'), + ('㐀', '䶿'), + ('一', '鿿'), ('꜀', '꜇'), ('豈', '舘'), ('並', '龎'), ('﹅', '﹆'), ('。', '・'), - ('\u{16ff0}', '\u{16ff1}'), + ('𖿢', '𖿣'), + ('𖿰', '𖿱'), ('𝍠', '𝍱'), ('🉐', '🉑'), - ('𠀀', '\u{2a6dd}'), - ('𪜀', '𫜴'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), - ('\u{30000}', '\u{3134a}'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), ]; pub const HANGUL: &'static [(char, char)] = &[ @@ -823,7 +839,8 @@ pub const HIRAGANA: &'static [(char, char)] = &[ ('。', '・'), ('ー', 'ー'), ('\u{ff9e}', '\u{ff9f}'), - ('𛀁', '𛄞'), + ('𛀁', '𛄟'), + ('𛄲', '𛄲'), ('𛅐', '𛅒'), ('🈀', '🈀'), ]; @@ -836,7 +853,7 @@ pub const INHERITED: &'static [(char, char)] = &[ ('\u{343}', '\u{344}'), ('\u{346}', '\u{362}'), ('\u{953}', '\u{954}'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1dc2}', '\u{1df7}'), ('\u{1df9}', '\u{1df9}'), ('\u{1dfb}', '\u{1dff}'), @@ -845,6 +862,8 @@ pub const INHERITED: &'static [(char, char)] = &[ ('\u{fe00}', '\u{fe0f}'), ('\u{fe20}', '\u{fe2d}'), ('\u{101fd}', '\u{101fd}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d167}', '\u{1d169}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), @@ -861,8 +880,12 @@ pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] = pub const JAVANESE: &'static [(char, char)] = &[('\u{a980}', '꧍'), ('ꧏ', '꧙'), ('꧞', '꧟')]; -pub const KAITHI: &'static [(char, char)] = - &[('०', '९'), ('꠰', '꠹'), ('\u{11080}', '𑃁'), ('\u{110cd}', '\u{110cd}')]; +pub const KAITHI: &'static [(char, char)] = &[ + ('०', '९'), + ('꠰', '꠹'), + ('\u{11080}', '\u{110c2}'), + ('\u{110cd}', '\u{110cd}'), +]; pub const KANNADA: &'static [(char, char)] = &[ ('\u{951}', '\u{952}'), @@ -876,10 +899,10 @@ pub const KANNADA: &'static [(char, char)] = &[ ('\u{cc6}', 'ೈ'), ('ೊ', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), - ('ೞ', 'ೞ'), + ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), ('೦', '೯'), - ('ೱ', 'ೲ'), + ('ೱ', 'ೳ'), ('\u{1cd0}', '\u{1cd0}'), ('\u{1cd2}', '\u{1cd2}'), ('\u{1cda}', '\u{1cda}'), @@ -902,10 +925,18 @@ pub const KATAKANA: &'static [(char, char)] = &[ ('㌀', '㍗'), ('﹅', '﹆'), ('。', '\u{ff9f}'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), ('𛀀', '𛀀'), + ('𛄠', '𛄢'), + ('𛅕', '𛅕'), ('𛅤', '𛅧'), ]; +pub const KAWI: &'static [(char, char)] = + &[('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '𑽙')]; + pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '꤯')]; pub const KHAROSHTHI: &'static [(char, char)] = &[ @@ -920,13 +951,13 @@ pub const KHAROSHTHI: &'static [(char, char)] = &[ ]; pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] = - &[('\u{16fe4}', '\u{16fe4}'), ('\u{18b00}', '\u{18cd5}')]; + &[('\u{16fe4}', '\u{16fe4}'), ('𘬀', '𘳕')]; pub const KHMER: &'static [(char, char)] = &[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿')]; pub const KHOJKI: &'static [(char, char)] = - &[('૦', '૯'), ('꠰', '꠹'), ('𑈀', '𑈑'), ('𑈓', '\u{1123e}')]; + &[('૦', '૯'), ('꠰', '꠹'), ('𑈀', '𑈑'), ('𑈓', '\u{11241}')]; pub const KHUDAWADI: &'static [(char, char)] = &[('।', '॥'), ('꠰', '꠹'), ('𑊰', '\u{112ea}'), ('𑋰', '𑋹')]; @@ -940,7 +971,7 @@ pub const LAO: &'static [(char, char)] = &[ ('ວ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('໐', '໙'), ('ໜ', 'ໟ'), ]; @@ -976,16 +1007,23 @@ pub const LATIN: &'static [(char, char)] = &[ ('Ⱡ', 'Ɀ'), ('꜀', '꜇'), ('Ꜣ', 'ꞇ'), - ('Ꞌ', 'ꞿ'), - ('Ꟃ', '\u{a7ca}'), - ('\u{a7f5}', 'ꟿ'), + ('Ꞌ', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꟿ'), ('꤮', '꤮'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭤ'), - ('ꭦ', '\u{ab69}'), + ('ꭦ', 'ꭩ'), ('ff', 'st'), ('A', 'Z'), ('a', 'z'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), + ('𝼀', '𝼞'), + ('𝼥', '𝼪'), ]; pub const LEPCHA: &'static [(char, char)] = @@ -1016,8 +1054,7 @@ pub const LINEAR_B: &'static [(char, char)] = &[ ('𐄷', '𐄿'), ]; -pub const LISU: &'static [(char, char)] = - &[('ꓐ', '꓿'), ('\u{11fb0}', '\u{11fb0}')]; +pub const LISU: &'static [(char, char)] = &[('ꓐ', '꓿'), ('𑾰', '𑾰')]; pub const LYCIAN: &'static [(char, char)] = &[('𐊀', '𐊜')]; @@ -1082,8 +1119,7 @@ pub const MODI: &'static [(char, char)] = &[('꠰', '꠹'), ('𑘀', '𑙄'), ('𑙐', '𑙙')]; pub const MONGOLIAN: &'static [(char, char)] = &[ - ('᠀', '\u{180e}'), - ('᠐', '᠙'), + ('᠀', '᠙'), ('ᠠ', 'ᡸ'), ('ᢀ', 'ᢪ'), ('\u{202f}', '\u{202f}'), @@ -1100,6 +1136,8 @@ pub const MYANMAR: &'static [(char, char)] = pub const NABATAEAN: &'static [(char, char)] = &[('𐢀', '𐢞'), ('𐢧', '𐢯')]; +pub const NAG_MUNDARI: &'static [(char, char)] = &[('𞓐', '𞓹')]; + pub const NANDINAGARI: &'static [(char, char)] = &[ ('।', '॥'), ('೦', '೯'), @@ -1115,9 +1153,16 @@ pub const NANDINAGARI: &'static [(char, char)] = &[ pub const NEW_TAI_LUE: &'static [(char, char)] = &[('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟')]; -pub const NEWA: &'static [(char, char)] = &[('𑐀', '𑑛'), ('𑑝', '\u{11461}')]; +pub const NEWA: &'static [(char, char)] = &[('𑐀', '𑑛'), ('𑑝', '𑑡')]; -pub const NKO: &'static [(char, char)] = &[('߀', 'ߺ'), ('\u{7fd}', '߿')]; +pub const NKO: &'static [(char, char)] = &[ + ('،', '،'), + ('؛', '؛'), + ('؟', '؟'), + ('߀', 'ߺ'), + ('\u{7fd}', '߿'), + ('﴾', '﴿'), +]; pub const NUSHU: &'static [(char, char)] = &[('𖿡', '𖿡'), ('𛅰', '𛋻')]; @@ -1146,6 +1191,9 @@ pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[('𐩠', '𐩿')]; pub const OLD_TURKIC: &'static [(char, char)] = &[('𐰀', '𐱈')]; +pub const OLD_UYGHUR: &'static [(char, char)] = + &[('ـ', 'ـ'), ('𐫲', '𐫲'), ('𐽰', '𐾉')]; + pub const ORIYA: &'static [(char, char)] = &[ ('\u{951}', '\u{952}'), ('।', '॥'), @@ -1253,10 +1301,11 @@ pub const SYRIAC: &'static [(char, char)] = &[ ('ݍ', 'ݏ'), ('ࡠ', 'ࡪ'), ('\u{1df8}', '\u{1df8}'), + ('\u{1dfa}', '\u{1dfa}'), ]; pub const TAGALOG: &'static [(char, char)] = - &[('ᜀ', 'ᜌ'), ('ᜎ', '\u{1714}'), ('᜵', '᜶')]; + &[('ᜀ', '᜕'), ('ᜟ', 'ᜟ'), ('᜵', '᜶')]; pub const TAGBANWA: &'static [(char, char)] = &[('᜵', '᜶'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}')]; @@ -1275,7 +1324,7 @@ pub const TAI_THAM: &'static [(char, char)] = &[ pub const TAI_VIET: &'static [(char, char)] = &[('ꪀ', 'ꫂ'), ('ꫛ', '꫟')]; pub const TAKRI: &'static [(char, char)] = - &[('।', '॥'), ('꠰', '꠹'), ('𑚀', '𑚸'), ('𑛀', '𑛉')]; + &[('।', '॥'), ('꠰', '꠹'), ('𑚀', '𑚹'), ('𑛀', '𑛉')]; pub const TAMIL: &'static [(char, char)] = &[ ('\u{951}', '\u{952}'), @@ -1305,12 +1354,10 @@ pub const TAMIL: &'static [(char, char)] = &[ ('𑿿', '𑿿'), ]; -pub const TANGUT: &'static [(char, char)] = &[ - ('𖿠', '𖿠'), - ('𗀀', '𘟷'), - ('𘠀', '\u{18aff}'), - ('\u{18d00}', '\u{18d08}'), -]; +pub const TANGSA: &'static [(char, char)] = &[('𖩰', '𖪾'), ('𖫀', '𖫉')]; + +pub const TANGUT: &'static [(char, char)] = + &[('𖿠', '𖿠'), ('𗀀', '𘟷'), ('𘠀', '𘫿'), ('𘴀', '𘴈')]; pub const TELUGU: &'static [(char, char)] = &[ ('\u{951}', '\u{952}'), @@ -1319,11 +1366,12 @@ pub const TELUGU: &'static [(char, char)] = &[ ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), - ('ఽ', 'ౄ'), + ('\u{c3c}', 'ౄ'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), ('ౠ', '\u{c63}'), ('౦', '౯'), ('౷', '౿'), @@ -1365,10 +1413,23 @@ pub const TIRHUTA: &'static [(char, char)] = &[ ('𑓐', '𑓙'), ]; +pub const TOTO: &'static [(char, char)] = &[('𞊐', '\u{1e2ae}')]; + pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')]; pub const VAI: &'static [(char, char)] = &[('ꔀ', 'ꘫ')]; +pub const VITHKUQI: &'static [(char, char)] = &[ + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), +]; + pub const WANCHO: &'static [(char, char)] = &[('𞋀', '𞋹'), ('𞋿', '𞋿')]; pub const WARANG_CITI: &'static [(char, char)] = &[('𑢠', '𑣲'), ('𑣿', '𑣿')]; @@ -1378,9 +1439,9 @@ pub const YEZIDI: &'static [(char, char)] = &[ ('؛', '؛'), ('؟', '؟'), ('٠', '٩'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eab}', '\u{10ead}'), - ('\u{10eb0}', '\u{10eb1}'), + ('𐺀', '𐺩'), + ('\u{10eab}', '𐺭'), + ('𐺰', '𐺱'), ]; pub const YI: &'static [(char, char)] = &[ diff --git a/src/unicode_tables/sentence_break.rs b/src/unicode_tables/sentence_break.rs index 67d830f..2434873 100644 --- a/src/unicode_tables/sentence_break.rs +++ b/src/unicode_tables/sentence_break.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate sentence-break ucd-13.0.0 --chars +// ucd-generate sentence-break ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("ATerm", ATERM), @@ -29,7 +29,7 @@ pub const ATERM: &'static [(char, char)] = pub const CR: &'static [(char, char)] = &[('\r', '\r')]; pub const CLOSE: &'static [(char, char)] = &[ - ('\"', '\"'), + ('"', '"'), ('\'', ')'), ('[', '['), (']', ']'), @@ -57,6 +57,7 @@ pub const CLOSE: &'static [(char, char)] = &[ ('⸜', '⸝'), ('⸠', '⸩'), ('⹂', '⹂'), + ('⹕', '⹜'), ('〈', '】'), ('〔', '〛'), ('〝', '〟'), @@ -100,7 +101,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{8d3}', '\u{8e1}'), + ('\u{898}', '\u{89f}'), + ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', 'ः'), ('\u{93a}', '\u{93c}'), ('ा', 'ॏ'), @@ -142,6 +144,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('ொ', '\u{bcd}'), ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c04}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c3e}', 'ౄ'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), @@ -154,6 +157,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('ೊ', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), + ('ೳ', 'ೳ'), ('\u{d00}', 'ഃ'), ('\u{d3b}', '\u{d3c}'), ('\u{d3e}', '\u{d44}'), @@ -172,7 +176,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), @@ -193,13 +197,14 @@ pub const EXTEND: &'static [(char, char)] = &[ ('ႏ', 'ႏ'), ('ႚ', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1734}'), + ('\u{1712}', '᜕'), + ('\u{1732}', '᜴'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', 'ᤫ'), @@ -208,7 +213,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('ᩕ', '\u{1a5e}'), ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', 'ᬄ'), ('\u{1b34}', '᭄'), ('\u{1b6b}', '\u{1b73}'), @@ -221,8 +226,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('᳷', '\u{1cf9}'), - ('\u{1dc0}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('\u{1dc0}', '\u{1dff}'), ('\u{200c}', '\u{200d}'), ('\u{20d0}', '\u{20f0}'), ('\u{2cef}', '\u{2cf1}'), @@ -276,11 +280,16 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), ('𑀀', '𑀂'), ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), ('\u{1107f}', '𑂂'), ('𑂰', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{11134}'), ('𑅅', '𑅆'), @@ -288,9 +297,10 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11180}', '𑆂'), ('𑆳', '𑇀'), ('\u{111c9}', '\u{111cc}'), - ('\u{111ce}', '\u{111cf}'), + ('𑇎', '\u{111cf}'), ('𑈬', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112ea}'), ('\u{11300}', '𑌃'), ('\u{1133b}', '\u{1133c}'), @@ -311,11 +321,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{116ab}', '\u{116b7}'), ('\u{1171d}', '\u{1172b}'), ('𑠬', '\u{1183a}'), - ('\u{11930}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('\u{11930}', '𑤵'), + ('𑤷', '𑤸'), ('\u{1193b}', '\u{1193e}'), - ('\u{11940}', '\u{11940}'), - ('\u{11942}', '\u{11943}'), + ('𑥀', '𑥀'), + ('𑥂', '\u{11943}'), ('𑧑', '\u{119d7}'), ('\u{119da}', '\u{119e0}'), ('𑧤', '𑧤'), @@ -338,14 +348,22 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11d90}', '\u{11d91}'), ('𑶓', '\u{11d97}'), ('\u{11ef3}', '𑻶'), + ('\u{11f00}', '\u{11f01}'), + ('𑼃', '𑼃'), + ('𑼴', '\u{11f3a}'), + ('𑼾', '\u{11f42}'), + ('\u{13440}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('𖽑', '𖾇'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), + ('𖿰', '𖿱'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), ('𝅭', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), @@ -363,8 +381,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e4ec}', '\u{1e4ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0020}', '\u{e007f}'), @@ -377,6 +398,7 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{61c}', '\u{61c}'), ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), + ('\u{890}', '\u{891}'), ('\u{8e2}', '\u{8e2}'), ('\u{180e}', '\u{180e}'), ('\u{200b}', '\u{200b}'), @@ -388,7 +410,7 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{fff9}', '\u{fffb}'), ('\u{110bd}', '\u{110bd}'), ('\u{110cd}', '\u{110cd}'), - ('\u{13430}', '\u{13438}'), + ('\u{13430}', '\u{1343f}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'), ('\u{e0001}', '\u{e0001}'), @@ -671,6 +693,7 @@ pub const LOWER: &'static [(char, char)] = &[ ('ԭ', 'ԭ'), ('ԯ', 'ԯ'), ('ՠ', 'ֈ'), + ('ჼ', 'ჼ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᴀ', 'ᶿ'), @@ -833,7 +856,7 @@ pub const LOWER: &'static [(char, char)] = &[ ('ⅰ', 'ⅿ'), ('ↄ', 'ↄ'), ('ⓐ', 'ⓩ'), - ('ⰰ', 'ⱞ'), + ('ⰰ', 'ⱟ'), ('ⱡ', 'ⱡ'), ('ⱥ', 'ⱦ'), ('ⱨ', 'ⱨ'), @@ -1001,19 +1024,34 @@ pub const LOWER: &'static [(char, char)] = &[ ('ꞻ', 'ꞻ'), ('ꞽ', 'ꞽ'), ('ꞿ', 'ꞿ'), + ('ꟁ', 'ꟁ'), ('ꟃ', 'ꟃ'), - ('\u{a7c8}', '\u{a7c8}'), - ('\u{a7ca}', '\u{a7ca}'), - ('\u{a7f6}', '\u{a7f6}'), + ('ꟈ', 'ꟈ'), + ('ꟊ', 'ꟊ'), + ('ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟕ'), + ('ꟗ', 'ꟗ'), + ('ꟙ', 'ꟙ'), + ('ꟲ', 'ꟴ'), + ('ꟶ', 'ꟶ'), ('ꟸ', 'ꟺ'), ('ꬰ', 'ꭚ'), - ('ꭜ', '\u{ab68}'), + ('ꭜ', 'ꭩ'), ('ꭰ', 'ꮿ'), ('ff', 'st'), ('ﬓ', 'ﬗ'), ('a', 'z'), ('𐐨', '𐑏'), ('𐓘', '𐓻'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), + ('𐞀', '𐞀'), + ('𐞃', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('𐳀', '𐳲'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), @@ -1045,6 +1083,10 @@ pub const LOWER: &'static [(char, char)] = &[ ('𝞪', '𝟂'), ('𝟄', '𝟉'), ('𝟋', '𝟋'), + ('𝼀', '𝼉'), + ('𝼋', '𝼞'), + ('𝼥', '𝼪'), + ('𞀰', '𞁭'), ('𞤢', '𞥃'), ]; @@ -1100,17 +1142,20 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('𑛀', '𑛉'), ('𑜰', '𑜹'), ('𑣠', '𑣩'), - ('\u{11950}', '\u{11959}'), + ('𑥐', '𑥙'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), + ('𑽐', '𑽙'), ('𖩠', '𖩩'), + ('𖫀', '𖫉'), ('𖭐', '𖭙'), ('𝟎', '𝟿'), ('𞅀', '𞅉'), ('𞋰', '𞋹'), + ('𞓰', '𞓹'), ('𞥐', '𞥙'), - ('\u{1fbf0}', '\u{1fbf9}'), + ('🯰', '🯹'), ]; pub const OLETTER: &'static [(char, char)] = &[ @@ -1146,8 +1191,9 @@ pub const OLETTER: &'static [(char, char)] = &[ ('ࠨ', 'ࠨ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'), - ('ࢠ', 'ࢴ'), - ('ࢶ', '\u{8c7}'), + ('ࡰ', 'ࢇ'), + ('ࢉ', 'ࢎ'), + ('ࢠ', 'ࣉ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'), ('ॐ', 'ॐ'), @@ -1212,6 +1258,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('ప', 'హ'), ('ఽ', 'ఽ'), ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'), ('ಅ', 'ಌ'), @@ -1220,10 +1267,10 @@ pub const OLETTER: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('ಽ', 'ಽ'), - ('ೞ', 'ೞ'), + ('ೝ', 'ೞ'), ('ೠ', 'ೡ'), ('ೱ', 'ೲ'), - ('\u{d04}', 'ഌ'), + ('ഄ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'), ('ഽ', 'ഽ'), @@ -1264,7 +1311,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('ၵ', 'ႁ'), ('ႎ', 'ႎ'), ('ა', 'ჺ'), - ('ჼ', 'ቈ'), + ('ჽ', 'ቈ'), ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), @@ -1286,9 +1333,8 @@ pub const OLETTER: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', 'ᜌ'), - ('ᜎ', 'ᜑ'), - ('ᜠ', 'ᜱ'), + ('ᜀ', 'ᜑ'), + ('ᜟ', 'ᜱ'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -1309,7 +1355,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('ᨠ', 'ᩔ'), ('ᪧ', 'ᪧ'), ('ᬅ', 'ᬳ'), - ('ᭅ', 'ᭋ'), + ('ᭅ', 'ᭌ'), ('ᮃ', 'ᮠ'), ('ᮮ', 'ᮯ'), ('ᮺ', 'ᯥ'), @@ -1347,11 +1393,10 @@ pub const OLETTER: &'static [(char, char)] = &[ ('ー', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), - ('ㆠ', '\u{31bf}'), + ('ㆠ', 'ㆿ'), ('ㇰ', 'ㇿ'), - ('㐀', '\u{4dbf}'), - ('一', '\u{9ffc}'), - ('ꀀ', 'ꒌ'), + ('㐀', '䶿'), + ('一', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘟ'), @@ -1399,7 +1444,6 @@ pub const OLETTER: &'static [(char, char)] = &[ ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), - ('\u{ab69}', '\u{ab69}'), ('ꯀ', 'ꯢ'), ('가', '힣'), ('ힰ', 'ퟆ'), @@ -1449,6 +1493,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), + ('𐞁', '𐞂'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), @@ -1477,19 +1522,22 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𐮀', '𐮑'), ('𐰀', '𐱈'), ('𐴀', '𐴣'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eb0}', '\u{10eb1}'), + ('𐺀', '𐺩'), + ('𐺰', '𐺱'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), - ('\u{10fb0}', '\u{10fc4}'), + ('𐽰', '𐾁'), + ('𐾰', '𐿄'), ('𐿠', '𐿶'), ('𑀃', '𑀷'), + ('𑁱', '𑁲'), + ('𑁵', '𑁵'), ('𑂃', '𑂯'), ('𑃐', '𑃨'), ('𑄃', '𑄦'), ('𑅄', '𑅄'), - ('\u{11147}', '\u{11147}'), + ('𑅇', '𑅇'), ('𑅐', '𑅲'), ('𑅶', '𑅶'), ('𑆃', '𑆲'), @@ -1498,6 +1546,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈫'), + ('𑈿', '𑉀'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), @@ -1515,7 +1564,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𑍝', '𑍡'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), - ('𑑟', '\u{11461}'), + ('𑑟', '𑑡'), ('𑒀', '𑒯'), ('𑓄', '𑓅'), ('𑓇', '𑓇'), @@ -1526,14 +1575,15 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𑚀', '𑚪'), ('𑚸', '𑚸'), ('𑜀', '𑜚'), + ('𑝀', '𑝆'), ('𑠀', '𑠫'), - ('𑣿', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{1192f}'), - ('\u{1193f}', '\u{1193f}'), - ('\u{11941}', '\u{11941}'), + ('𑣿', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤯'), + ('𑤿', '𑤿'), + ('𑥁', '𑥁'), ('𑦠', '𑦧'), ('𑦪', '𑧐'), ('𑧡', '𑧡'), @@ -1544,7 +1594,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𑩐', '𑩐'), ('𑩜', '𑪉'), ('𑪝', '𑪝'), - ('𑫀', '𑫸'), + ('𑪰', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -1558,14 +1608,20 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𑵪', '𑶉'), ('𑶘', '𑶘'), ('𑻠', '𑻲'), - ('\u{11fb0}', '\u{11fb0}'), + ('𑼂', '𑼂'), + ('𑼄', '𑼐'), + ('𑼒', '𑼳'), + ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), - ('𓀀', '𓐮'), + ('𒾐', '𒿰'), + ('𓀀', '𓐯'), + ('𓑁', '𓑆'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), + ('𖩰', '𖪾'), ('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭀', '𖭃'), @@ -1577,20 +1633,32 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𖿠', '𖿡'), ('𖿣', '𖿣'), ('𗀀', '𘟷'), - ('𘠀', '\u{18cd5}'), - ('\u{18d00}', '\u{18d08}'), - ('𛀀', '𛄞'), + ('𘠀', '𘳕'), + ('𘴀', '𘴈'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), + ('𛀀', '𛄢'), + ('𛄲', '𛄲'), ('𛅐', '𛅒'), + ('𛅕', '𛅕'), ('𛅤', '𛅧'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), + ('𝼊', '𝼊'), ('𞄀', '𞄬'), ('𞄷', '𞄽'), ('𞅎', '𞅎'), + ('𞊐', '𞊭'), ('𞋀', '𞋫'), + ('𞓐', '𞓫'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), ('𞠀', '𞣄'), ('𞥋', '𞥋'), ('𞸀', '𞸃'), @@ -1626,13 +1694,14 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), - ('𠀀', '\u{2a6dd}'), - ('𪜀', '𫜴'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'), - ('\u{30000}', '\u{3134a}'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), ]; pub const SCONTINUE: &'static [(char, char)] = &[ @@ -1661,7 +1730,7 @@ pub const STERM: &'static [(char, char)] = &[ ('!', '!'), ('?', '?'), ('։', '։'), - ('؞', '؟'), + ('؝', '؟'), ('۔', '۔'), ('܀', '܂'), ('߹', '߹'), @@ -1680,12 +1749,14 @@ pub const STERM: &'static [(char, char)] = &[ ('᪨', '᪫'), ('᭚', '᭛'), ('᭞', '᭟'), + ('᭽', '᭾'), ('᰻', '᰼'), ('᱾', '᱿'), ('‼', '‽'), ('⁇', '⁉'), ('⸮', '⸮'), ('⸼', '⸼'), + ('⹓', '⹔'), ('。', '。'), ('꓿', '꓿'), ('꘎', '꘏'), @@ -1704,6 +1775,7 @@ pub const STERM: &'static [(char, char)] = &[ ('。', '。'), ('𐩖', '𐩗'), ('𐽕', '𐽙'), + ('𐾆', '𐾉'), ('𑁇', '𑁈'), ('𑂾', '𑃁'), ('𑅁', '𑅃'), @@ -1718,12 +1790,13 @@ pub const STERM: &'static [(char, char)] = &[ ('𑗉', '𑗗'), ('𑙁', '𑙂'), ('𑜼', '𑜾'), - ('\u{11944}', '\u{11944}'), - ('\u{11946}', '\u{11946}'), + ('𑥄', '𑥄'), + ('𑥆', '𑥆'), ('𑩂', '𑩃'), ('𑪛', '𑪜'), ('𑱁', '𑱂'), ('𑻷', '𑻸'), + ('𑽃', '𑽄'), ('𖩮', '𖩯'), ('𖫵', '𖫵'), ('𖬷', '𖬸'), @@ -2183,7 +2256,7 @@ pub const UPPER: &'static [(char, char)] = &[ ('Ⅰ', 'Ⅿ'), ('Ↄ', 'Ↄ'), ('Ⓐ', 'Ⓩ'), - ('Ⰰ', 'Ⱞ'), + ('Ⰰ', 'Ⱟ'), ('Ⱡ', 'Ⱡ'), ('Ɫ', 'Ɽ'), ('Ⱨ', 'Ⱨ'), @@ -2348,13 +2421,21 @@ pub const UPPER: &'static [(char, char)] = &[ ('Ꞻ', 'Ꞻ'), ('Ꞽ', 'Ꞽ'), ('Ꞿ', 'Ꞿ'), + ('Ꟁ', 'Ꟁ'), ('Ꟃ', 'Ꟃ'), - ('Ꞔ', '\u{a7c7}'), - ('\u{a7c9}', '\u{a7c9}'), - ('\u{a7f5}', '\u{a7f5}'), + ('Ꞔ', 'Ꟈ'), + ('Ꟊ', 'Ꟊ'), + ('Ꟑ', 'Ꟑ'), + ('Ꟗ', 'Ꟗ'), + ('Ꟙ', 'Ꟙ'), + ('Ꟶ', 'Ꟶ'), ('A', 'Z'), ('𐐀', '𐐧'), ('𐒰', '𐓓'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), ('𐲀', '𐲲'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), diff --git a/src/unicode_tables/word_break.rs b/src/unicode_tables/word_break.rs index bd23e00..c071495 100644 --- a/src/unicode_tables/word_break.rs +++ b/src/unicode_tables/word_break.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate word-break ucd-13.0.0 --chars +// ucd-generate word-break ucd-15.0.0 --chars // -// Unicode version: 13.0.0. +// Unicode version: 15.0.0. // -// ucd-generate 0.2.8 is available on crates.io. +// ucd-generate 0.2.14 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("ALetter", ALETTER), @@ -75,8 +75,9 @@ pub const ALETTER: &'static [(char, char)] = &[ ('ࠨ', 'ࠨ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'), - ('ࢠ', 'ࢴ'), - ('ࢶ', '\u{8c7}'), + ('ࡰ', 'ࢇ'), + ('ࢉ', 'ࢎ'), + ('ࢠ', 'ࣉ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'), ('ॐ', 'ॐ'), @@ -141,6 +142,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('ప', 'హ'), ('ఽ', 'ఽ'), ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'), ('ಅ', 'ಌ'), @@ -149,10 +151,10 @@ pub const ALETTER: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('ಽ', 'ಽ'), - ('ೞ', 'ೞ'), + ('ೝ', 'ೞ'), ('ೠ', 'ೡ'), ('ೱ', 'ೲ'), - ('\u{d04}', 'ഌ'), + ('ഄ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'), ('ഽ', 'ഽ'), @@ -197,9 +199,8 @@ pub const ALETTER: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', 'ᜌ'), - ('ᜎ', 'ᜑ'), - ('ᜠ', 'ᜱ'), + ('ᜀ', 'ᜑ'), + ('ᜟ', 'ᜱ'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -211,7 +212,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('ᤀ', 'ᤞ'), ('ᨀ', 'ᨖ'), ('ᬅ', 'ᬳ'), - ('ᭅ', 'ᭋ'), + ('ᭅ', 'ᭌ'), ('ᮃ', 'ᮠ'), ('ᮮ', 'ᮯ'), ('ᮺ', 'ᯥ'), @@ -263,9 +264,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'), ('Ⓐ', 'ⓩ'), - ('Ⰰ', 'Ⱞ'), - ('ⰰ', 'ⱞ'), - ('Ⱡ', 'ⳤ'), + ('Ⰰ', 'ⳤ'), ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), @@ -287,7 +286,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('〻', '〼'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), - ('ㆠ', '\u{31bf}'), + ('ㆠ', 'ㆿ'), ('ꀀ', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'), @@ -296,9 +295,11 @@ pub const ALETTER: &'static [(char, char)] = &[ ('Ꙁ', 'ꙮ'), ('ꙿ', 'ꚝ'), ('ꚠ', 'ꛯ'), - ('꜈', 'ꞿ'), - ('Ꟃ', '\u{a7ca}'), - ('\u{a7f5}', 'ꠁ'), + ('꜈', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), ('ꠌ', 'ꠢ'), @@ -322,7 +323,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), - ('ꬰ', '\u{ab69}'), + ('ꬰ', 'ꭩ'), ('ꭰ', 'ꯢ'), ('가', '힣'), ('ힰ', 'ퟆ'), @@ -365,9 +366,20 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), @@ -398,19 +410,22 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '𐴣'), - ('\u{10e80}', '\u{10ea9}'), - ('\u{10eb0}', '\u{10eb1}'), + ('𐺀', '𐺩'), + ('𐺰', '𐺱'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), - ('\u{10fb0}', '\u{10fc4}'), + ('𐽰', '𐾁'), + ('𐾰', '𐿄'), ('𐿠', '𐿶'), ('𑀃', '𑀷'), + ('𑁱', '𑁲'), + ('𑁵', '𑁵'), ('𑂃', '𑂯'), ('𑃐', '𑃨'), ('𑄃', '𑄦'), ('𑅄', '𑅄'), - ('\u{11147}', '\u{11147}'), + ('𑅇', '𑅇'), ('𑅐', '𑅲'), ('𑅶', '𑅶'), ('𑆃', '𑆲'), @@ -419,6 +434,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈫'), + ('𑈿', '𑉀'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), @@ -436,7 +452,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𑍝', '𑍡'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), - ('𑑟', '\u{11461}'), + ('𑑟', '𑑡'), ('𑒀', '𑒯'), ('𑓄', '𑓅'), ('𑓇', '𑓇'), @@ -448,13 +464,13 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𑚸', '𑚸'), ('𑠀', '𑠫'), ('𑢠', '𑣟'), - ('𑣿', '\u{11906}'), - ('\u{11909}', '\u{11909}'), - ('\u{1190c}', '\u{11913}'), - ('\u{11915}', '\u{11916}'), - ('\u{11918}', '\u{1192f}'), - ('\u{1193f}', '\u{1193f}'), - ('\u{11941}', '\u{11941}'), + ('𑣿', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤯'), + ('𑤿', '𑤿'), + ('𑥁', '𑥁'), ('𑦠', '𑦧'), ('𑦪', '𑧐'), ('𑧡', '𑧡'), @@ -465,7 +481,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𑩐', '𑩐'), ('𑩜', '𑪉'), ('𑪝', '𑪝'), - ('𑫀', '𑫸'), + ('𑪰', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -479,14 +495,20 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𑵪', '𑶉'), ('𑶘', '𑶘'), ('𑻠', '𑻲'), - ('\u{11fb0}', '\u{11fb0}'), + ('𑼂', '𑼂'), + ('𑼄', '𑼐'), + ('𑼒', '𑼳'), + ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), - ('𓀀', '𓐮'), + ('𒾐', '𒿰'), + ('𓀀', '𓐯'), + ('𓑁', '𓑆'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), + ('𖩰', '𖪾'), ('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭀', '𖭃'), @@ -532,10 +554,19 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), + ('𝼀', '𝼞'), + ('𝼥', '𝼪'), + ('𞀰', '𞁭'), ('𞄀', '𞄬'), ('𞄷', '𞄽'), ('𞅎', '𞅎'), + ('𞊐', '𞊭'), ('𞋀', '𞋫'), + ('𞓐', '𞓫'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), ('𞠀', '𞣄'), ('𞤀', '𞥃'), ('𞥋', '𞥋'), @@ -579,7 +610,7 @@ pub const ALETTER: &'static [(char, char)] = &[ pub const CR: &'static [(char, char)] = &[('\r', '\r')]; -pub const DOUBLE_QUOTE: &'static [(char, char)] = &[('\"', '\"')]; +pub const DOUBLE_QUOTE: &'static [(char, char)] = &[('"', '"')]; pub const EXTEND: &'static [(char, char)] = &[ ('\u{300}', '\u{36f}'), @@ -606,7 +637,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{8d3}', '\u{8e1}'), + ('\u{898}', '\u{89f}'), + ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', 'ः'), ('\u{93a}', '\u{93c}'), ('ा', 'ॏ'), @@ -648,6 +680,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('ொ', '\u{bcd}'), ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c04}'), + ('\u{c3c}', '\u{c3c}'), ('\u{c3e}', 'ౄ'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), @@ -660,6 +693,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('ೊ', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), + ('ೳ', 'ೳ'), ('\u{d00}', 'ഃ'), ('\u{d3b}', '\u{d3c}'), ('\u{d3e}', '\u{d44}'), @@ -678,7 +712,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ecd}'), + ('\u{ec8}', '\u{ece}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), @@ -699,13 +733,14 @@ pub const EXTEND: &'static [(char, char)] = &[ ('ႏ', 'ႏ'), ('ႚ', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1734}'), + ('\u{1712}', '᜕'), + ('\u{1732}', '᜴'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), ('\u{1885}', '\u{1886}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', 'ᤫ'), @@ -714,7 +749,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('ᩕ', '\u{1a5e}'), ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ac0}'), + ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', 'ᬄ'), ('\u{1b34}', '᭄'), ('\u{1b6b}', '\u{1b73}'), @@ -727,8 +762,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'), ('᳷', '\u{1cf9}'), - ('\u{1dc0}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), + ('\u{1dc0}', '\u{1dff}'), ('\u{200c}', '\u{200c}'), ('\u{20d0}', '\u{20f0}'), ('\u{2cef}', '\u{2cf1}'), @@ -782,11 +816,16 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), ('𑀀', '𑀂'), ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), ('\u{1107f}', '𑂂'), ('𑂰', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), ('\u{11100}', '\u{11102}'), ('\u{11127}', '\u{11134}'), ('𑅅', '𑅆'), @@ -794,9 +833,10 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11180}', '𑆂'), ('𑆳', '𑇀'), ('\u{111c9}', '\u{111cc}'), - ('\u{111ce}', '\u{111cf}'), + ('𑇎', '\u{111cf}'), ('𑈬', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112ea}'), ('\u{11300}', '𑌃'), ('\u{1133b}', '\u{1133c}'), @@ -817,11 +857,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{116ab}', '\u{116b7}'), ('\u{1171d}', '\u{1172b}'), ('𑠬', '\u{1183a}'), - ('\u{11930}', '\u{11935}'), - ('\u{11937}', '\u{11938}'), + ('\u{11930}', '𑤵'), + ('𑤷', '𑤸'), ('\u{1193b}', '\u{1193e}'), - ('\u{11940}', '\u{11940}'), - ('\u{11942}', '\u{11943}'), + ('𑥀', '𑥀'), + ('𑥂', '\u{11943}'), ('𑧑', '\u{119d7}'), ('\u{119da}', '\u{119e0}'), ('𑧤', '𑧤'), @@ -844,14 +884,22 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11d90}', '\u{11d91}'), ('𑶓', '\u{11d97}'), ('\u{11ef3}', '𑻶'), + ('\u{11f00}', '\u{11f01}'), + ('𑼃', '𑼃'), + ('𑼴', '\u{11f3a}'), + ('𑼾', '\u{11f42}'), + ('\u{13440}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('𖽑', '𖾇'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), + ('𖿰', '𖿱'), ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), ('𝅭', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), @@ -869,8 +917,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e4ec}', '\u{1e4ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('🏻', '🏿'), @@ -894,6 +945,7 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{61c}', '\u{61c}'), ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), + ('\u{890}', '\u{891}'), ('\u{8e2}', '\u{8e2}'), ('\u{180e}', '\u{180e}'), ('\u{200e}', '\u{200f}'), @@ -904,7 +956,7 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{fff9}', '\u{fffb}'), ('\u{110bd}', '\u{110bd}'), ('\u{110cd}', '\u{110cd}'), - ('\u{13430}', '\u{13438}'), + ('\u{13430}', '\u{1343f}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'), ('\u{e0001}', '\u{e0001}'), @@ -932,7 +984,12 @@ pub const KATAKANA: &'static [(char, char)] = &[ ('㋐', '㋾'), ('㌀', '㍗'), ('ヲ', 'ン'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), ('𛀀', '𛀀'), + ('𛄠', '𛄢'), + ('𛅕', '𛅕'), ('𛅤', '𛅧'), ]; @@ -1031,17 +1088,20 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('𑛀', '𑛉'), ('𑜰', '𑜹'), ('𑣠', '𑣩'), - ('\u{11950}', '\u{11959}'), + ('𑥐', '𑥙'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), + ('𑽐', '𑽙'), ('𖩠', '𖩩'), + ('𖫀', '𖫉'), ('𖭐', '𖭙'), ('𝟎', '𝟿'), ('𞅀', '𞅉'), ('𞋰', '𞋹'), + ('𞓰', '𞓹'), ('𞥐', '𞥙'), - ('\u{1fbf0}', '\u{1fbf9}'), + ('🯰', '🯹'), ]; pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('🇦', '🇿')]; diff --git a/src/utf8.rs b/src/utf8.rs index dc05503..b9c8655 100644 --- a/src/utf8.rs +++ b/src/utf8.rs @@ -198,7 +198,7 @@ impl<'a> IntoIterator for &'a Utf8Sequence { type Item = &'a Utf8Range; fn into_iter(self) -> Self::IntoIter { - self.as_slice().into_iter() + self.as_slice().iter() } } @@ -448,7 +448,7 @@ fn max_scalar_value(nbytes: usize) -> u32 { 1 => 0x007F, 2 => 0x07FF, 3 => 0xFFFF, - 4 => 0x10FFFF, + 4 => 0x0010_FFFF, _ => unreachable!("invalid UTF-8 byte sequence size"), } } @@ -492,7 +492,7 @@ mod tests { fn single_codepoint_one_sequence() { // Tests that every range of scalar values that contains a single // scalar value is recognized by one sequence of byte ranges. - for i in 0x0..(0x10FFFF + 1) { + for i in 0x0..=0x0010_FFFF { let c = match char::from_u32(i) { None => continue, Some(c) => c, |