Snap for 8188146 from 6453fc5ed39eb1c8fc72e9d291eb8c7084a05ad7 to tm-frc-ipsec-release

Change-Id: Ia03e3604c435998c774cedd79bdf00c9aef761b4
author: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2022-02-16 01:13:04 +0000
committer: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2022-02-16 01:13:04 +0000
commit: ec9c482f6dca777e0955c304457a80ef915e564f (patch)
tree: 47be3ea825e1446e093c3b760db7d40a4e4e55d6
parent: 0ceec9b4cae6be5020bb678f0172ee13a83c1509 (diff)
parent: 6453fc5ed39eb1c8fc72e9d291eb8c7084a05ad7 (diff)
download: unicode-segmentation-ec9c482f6dca777e0955c304457a80ef915e564f.tar.gz
19 files changed, 88 insertions, 343 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index e74a153..22f2cec 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
 {
   "git": {
-    "sha1": "907d4d0b7e5c6f5e0f815c90a51d28b793d0c7a4"
+    "sha1": "3b75ee19b3c0ddacaeec03be688a7b8766833728"
   }
 }
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
deleted file mode 100644
index 3c13d1b..0000000
--- a/.github/workflows/rust.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-name: Rust
-
-on:
-  push:
-    branches: [ master ]
-  pull_request:
-    branches: [ master ]
-
-env:
-  CARGO_TERM_COLOR: always
-
-jobs:
-  build:
-
-    runs-on: ubuntu-latest
-
-    steps:
-    - uses: actions/checkout@v2
-    - name: Build
-      run: cargo build --verbose
-    - name: Run tests
-      run: cargo test --verbose
diff --git a/Android.bp b/Android.bp
index 5e67fb1..da0cf0d 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1,5 +1,4 @@
-// This file is generated by cargo2android.py --config cargo2android.json.
-// Do not modify this file as changes will be overridden on upgrade.
+// This file is generated by cargo2android.py --run --device --dependencies.
 
 package {
     default_applicable_licenses: [
@@ -42,10 +41,9 @@ license {
 
 rust_library {
     name: "libunicode_segmentation",
+    // has rustc warnings
     host_supported: true,
     crate_name: "unicode_segmentation",
-    cargo_env_compat: true,
-    cargo_pkg_version: "1.8.0",
     srcs: ["src/lib.rs"],
-    edition: "2018",
+    edition: "2015",
 }
diff --git a/Cargo.toml b/Cargo.toml
index 583df10..0f21309 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,9 +11,8 @@
 # will likely look very different (and much more reasonable)
 
 [package]
-edition = "2018"
 name = "unicode-segmentation"
-version = "1.8.0"
+version = "1.7.1"
 authors = ["kwantam <kwantam@gmail.com>", "Manish Goregaokar <manishsmail@gmail.com>"]
 exclude = ["target/*", "Cargo.lock", "scripts/tmp", "benches/texts/*", "*.txt"]
 description = "This crate provides Grapheme Cluster, Word and Sentence boundaries\naccording to Unicode Standard Annex #29 rules.\n"
@@ -27,16 +26,8 @@ repository = "https://github.com/unicode-rs/unicode-segmentation"
 [[bench]]
 name = "graphemes"
 harness = false
-
-[[bench]]
-name = "unicode_words"
-harness = false
-
-[[bench]]
-name = "word_bounds"
-harness = false
-[dev-dependencies.criterion]
-version = "0.3"
+[dev-dependencies.bencher]
+version = "0.1"
 
 [dev-dependencies.quickcheck]
 version = "0.7"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index c1c16e6..3f55167 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,10 +1,9 @@
 [package]
 
 name = "unicode-segmentation"
-version = "1.8.0"
+version = "1.7.1"
 authors = ["kwantam <kwantam@gmail.com>", "Manish Goregaokar <manishsmail@gmail.com>"]
 
-edition = "2018"
 homepage = "https://github.com/unicode-rs/unicode-segmentation"
 repository = "https://github.com/unicode-rs/unicode-segmentation"
 documentation = "https://unicode-rs.github.io/unicode-segmentation"
@@ -24,16 +23,8 @@ no_std = [] # This is a no-op, preserved for backward compatibility only.
 
 [dev-dependencies]
 quickcheck = "0.7"
-criterion = "0.3"
+bencher = "0.1"
 
 [[bench]]
 name = "graphemes"
-harness = false
-
-[[bench]]
-name = "unicode_words"
-harness = false
-
-[[bench]]
-name = "word_bounds"
 harness = false
 \ No newline at end of file
diff --git a/METADATA b/METADATA
index 00df7c3..bf983b5 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@ third_party {
   }
   url {
     type: ARCHIVE
-    value: "https://static.crates.io/crates/unicode-segmentation/unicode-segmentation-1.8.0.crate"
+    value: "https://static.crates.io/crates/unicode-segmentation/unicode-segmentation-1.7.1.crate"
   }
-  version: "1.8.0"
+  version: "1.7.1"
   license_type: NOTICE
   last_upgrade_date {
     year: 2021
-    month: 8
-    day: 9
+    month: 1
+    day: 12
   }
 }
diff --git a/README.md b/README.md
index a65c0e2..2f3bdca 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ to your `Cargo.toml`:
 
 ```toml
 [dependencies]
-unicode-segmentation = "1.8.0"
+unicode-segmentation = "1.7.1"
 ```
 
 # Change Log
diff --git a/TEST_MAPPING b/TEST_MAPPING
index c934591..60b40b7 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -1,21 +1,8 @@
-// Generated by update_crate_tests.py for tests that depend on this crate.
+// Generated by cargo2android.py for tests in Android.bp
 {
-  "imports": [
-    {
-      "path": "external/rust/crates/base64"
-    },
-    {
-      "path": "external/rust/crates/heck"
-    }
-  ],
   "presubmit": [
     {
-      "name": "authfs_device_test_src_lib"
-    }
-  ],
-  "presubmit-rust": [
-    {
-      "name": "authfs_device_test_src_lib"
+      "name": "heck_device_test_src_lib"
     }
   ]
 }
diff --git a/benches/graphemes.rs b/benches/graphemes.rs
index 8a7a379..5f14352 100644
--- a/benches/graphemes.rs
+++ b/benches/graphemes.rs
@@ -1,54 +1,55 @@
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use unicode_segmentation;
+#[macro_use]
+extern crate bencher;
+extern crate unicode_segmentation;
 
-use std::fs;
+use bencher::Bencher;
 use unicode_segmentation::UnicodeSegmentation;
+use std::fs;
 
-fn graphemes(c: &mut Criterion, lang: &str, path: &str) {
+fn graphemes(bench: &mut Bencher, path: &str) {
     let text = fs::read_to_string(path).unwrap();
-
-    c.bench_function(&format!("graphemes_{}",lang), |bench| {
-        bench.iter(|| {
-            for g in UnicodeSegmentation::graphemes(black_box(&*text), true) {
-                black_box(g);
-            }
-        })
+    bench.iter(|| {
+        for g in UnicodeSegmentation::graphemes(&*text, true) {
+            bencher::black_box(g);
+        }
     });
+
+    bench.bytes = text.len() as u64;
 }
 
-fn graphemes_arabic(c: &mut Criterion) {
-    graphemes(c, "arabic" ,"benches/texts/arabic.txt");
+fn graphemes_arabic(bench: &mut Bencher) {
+    graphemes(bench, "benches/texts/arabic.txt");
 }
 
-fn graphemes_english(c: &mut Criterion) {
-    graphemes(c, "english" ,"benches/texts/english.txt");
+fn graphemes_english(bench: &mut Bencher) {
+    graphemes(bench, "benches/texts/english.txt");
 }
 
-fn graphemes_hindi(c: &mut Criterion) {
-    graphemes(c, "hindi" ,"benches/texts/hindi.txt");
+fn graphemes_hindi(bench: &mut Bencher) {
+    graphemes(bench, "benches/texts/hindi.txt");
 }
 
-fn graphemes_japanese(c: &mut Criterion) {
-    graphemes(c, "japanese" ,"benches/texts/japanese.txt");
+fn graphemes_japanese(bench: &mut Bencher) {
+    graphemes(bench, "benches/texts/japanese.txt");
 }
 
-fn graphemes_korean(c: &mut Criterion) {
-    graphemes(c, "korean" ,"benches/texts/korean.txt");
+fn graphemes_korean(bench: &mut Bencher) {
+    graphemes(bench, "benches/texts/korean.txt");
 }
 
-fn graphemes_mandarin(c: &mut Criterion) {
-    graphemes(c, "mandarin" ,"benches/texts/mandarin.txt");
+fn graphemes_mandarin(bench: &mut Bencher) {
+    graphemes(bench, "benches/texts/mandarin.txt");
 }
 
-fn graphemes_russian(c: &mut Criterion) {
-    graphemes(c, "russian" ,"benches/texts/russian.txt");
+fn graphemes_russian(bench: &mut Bencher) {
+    graphemes(bench, "benches/texts/russian.txt");
 }
 
-fn graphemes_source_code(c: &mut Criterion) {
-    graphemes(c, "source_code","benches/texts/source_code.txt");
+fn graphemes_source_code(bench: &mut Bencher) {
+    graphemes(bench, "benches/texts/source_code.txt");
 }
 
-criterion_group!(
+benchmark_group!(
     benches,
     graphemes_arabic,
     graphemes_english,
@@ -60,4 +61,4 @@ criterion_group!(
     graphemes_source_code,
 );
 
-criterion_main!(benches);
+benchmark_main!(benches);
diff --git a/benches/unicode_words.rs b/benches/unicode_words.rs
deleted file mode 100644
index 731e325..0000000
--- a/benches/unicode_words.rs
+++ /dev/null
@@ -1,64 +0,0 @@
-#[macro_use]
-extern crate bencher;
-extern crate unicode_segmentation;
-
-use bencher::Bencher;
-use unicode_segmentation::UnicodeSegmentation;
-use std::fs;
-
-fn unicode_words(bench: &mut Bencher, path: &str) {
-    let text = fs::read_to_string(path).unwrap();
-    bench.iter(|| {
-        for w in text.unicode_words() {
-            bencher::black_box(w);
-        }
-    });
-
-    bench.bytes = text.len() as u64;
-}
-
-fn unicode_words_arabic(bench: &mut Bencher) {
-    unicode_words(bench, "benches/texts/arabic.txt");
-}
-
-fn unicode_words_english(bench: &mut Bencher) {
-    unicode_words(bench, "benches/texts/english.txt");
-}
-
-fn unicode_words_hindi(bench: &mut Bencher) {
-    unicode_words(bench, "benches/texts/hindi.txt");
-}
-
-fn unicode_words_japanese(bench: &mut Bencher) {
-    unicode_words(bench, "benches/texts/japanese.txt");
-}
-
-fn unicode_words_korean(bench: &mut Bencher) {
-    unicode_words(bench, "benches/texts/korean.txt");
-}
-
-fn unicode_words_mandarin(bench: &mut Bencher) {
-    unicode_words(bench, "benches/texts/mandarin.txt");
-}
-
-fn unicode_words_russian(bench: &mut Bencher) {
-    unicode_words(bench, "benches/texts/russian.txt");
-}
-
-fn unicode_words_source_code(bench: &mut Bencher) {
-    unicode_words(bench, "benches/texts/source_code.txt");
-}
-
-benchmark_group!(
-    benches,
-    unicode_words_arabic,
-    unicode_words_english,
-    unicode_words_hindi,
-    unicode_words_japanese,
-    unicode_words_korean,
-    unicode_words_mandarin,
-    unicode_words_russian,
-    unicode_words_source_code,
-);
-
-benchmark_main!(benches);
diff --git a/benches/word_bounds.rs b/benches/word_bounds.rs
deleted file mode 100644
index 035f57e..0000000
--- a/benches/word_bounds.rs
+++ /dev/null
@@ -1,64 +0,0 @@
-#[macro_use]
-extern crate bencher;
-extern crate unicode_segmentation;
-
-use bencher::Bencher;
-use unicode_segmentation::UnicodeSegmentation;
-use std::fs;
-
-fn word_bounds(bench: &mut Bencher, path: &str) {
-    let text = fs::read_to_string(path).unwrap();
-    bench.iter(|| {
-        for w in text.split_word_bounds() {
-            bencher::black_box(w);
-        }
-    });
-
-    bench.bytes = text.len() as u64;
-}
-
-fn word_bounds_arabic(bench: &mut Bencher) {
-    word_bounds(bench, "benches/texts/arabic.txt");
-}
-
-fn word_bounds_english(bench: &mut Bencher) {
-    word_bounds(bench, "benches/texts/english.txt");
-}
-
-fn word_bounds_hindi(bench: &mut Bencher) {
-    word_bounds(bench, "benches/texts/hindi.txt");
-}
-
-fn word_bounds_japanese(bench: &mut Bencher) {
-    word_bounds(bench, "benches/texts/japanese.txt");
-}
-
-fn word_bounds_korean(bench: &mut Bencher) {
-    word_bounds(bench, "benches/texts/korean.txt");
-}
-
-fn word_bounds_mandarin(bench: &mut Bencher) {
-    word_bounds(bench, "benches/texts/mandarin.txt");
-}
-
-fn word_bounds_russian(bench: &mut Bencher) {
-    word_bounds(bench, "benches/texts/russian.txt");
-}
-
-fn word_bounds_source_code(bench: &mut Bencher) {
-    word_bounds(bench, "benches/texts/source_code.txt");
-}
-
-benchmark_group!(
-    benches,
-    word_bounds_arabic,
-    word_bounds_english,
-    word_bounds_hindi,
-    word_bounds_japanese,
-    word_bounds_korean,
-    word_bounds_mandarin,
-    word_bounds_russian,
-    word_bounds_source_code,
-);
-
-benchmark_main!(benches);
diff --git a/cargo2android.json b/cargo2android.json
deleted file mode 100644
index bf78496..0000000
--- a/cargo2android.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-  "device": true,
-  "run": true
-}
-\ No newline at end of file
diff --git a/scripts/unicode.py b/scripts/unicode.py
index 16e321d..1841e35 100644
--- a/scripts/unicode.py
+++ b/scripts/unicode.py
@@ -229,7 +229,7 @@ pub mod util {
     #[inline]
     fn is_alphabetic(c: char) -> bool {
         match c {
-            'a' ..= 'z' | 'A' ..= 'Z' => true,
+            'a' ... 'z' | 'A' ... 'Z' => true,
             c if c > '\x7f' => super::derived_property::Alphabetic(c),
             _ => false,
         }
@@ -238,7 +238,7 @@ pub mod util {
     #[inline]
     fn is_numeric(c: char) -> bool {
         match c {
-            '0' ..= '9' => true,
+            '0' ... '9' => true,
             c if c > '\x7f' => super::general_category::N(c),
             _ => false,
         }
@@ -281,6 +281,7 @@ def emit_break_module(f, break_table, break_cats, name):
     f.write("""    }
 
     fn bsearch_range_value_table(c: char, r: &'static [(char, char, %sCat)]) -> (u32, u32, %sCat) {
+        use core;
         use core::cmp::Ordering::{Equal, Less, Greater};
         match r.binary_search_by(|&(lo, hi, _)| {
             if lo <= c && c <= hi { Equal }
diff --git a/src/grapheme.rs b/src/grapheme.rs
index 190b86e..e95d478 100644
--- a/src/grapheme.rs
+++ b/src/grapheme.rs
@@ -10,7 +10,7 @@
 
 use core::cmp;
 
-use crate::tables::grapheme::GraphemeCat;
+use tables::grapheme::GraphemeCat;
 
 /// External iterator for grapheme clusters and byte offsets.
 ///
@@ -73,7 +73,7 @@ impl<'a> DoubleEndedIterator for GraphemeIndices<'a> {
 ///
 /// [`graphemes`]: trait.UnicodeSegmentation.html#tymethod.graphemes
 /// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
-#[derive(Clone, Debug)]
+#[derive(Clone)]
 pub struct Graphemes<'a> {
     string: &'a str,
     cursor: GraphemeCursor,
@@ -148,7 +148,7 @@ pub fn new_grapheme_indices<'b>(s: &'b str, is_extended: bool) -> GraphemeIndice
 
 // maybe unify with PairResult?
 // An enum describing information about a potential boundary.
-#[derive(PartialEq, Eq, Clone, Debug)]
+#[derive(PartialEq, Eq, Clone)]
 enum GraphemeState {
     // No information is known.
     Unknown,
@@ -165,7 +165,7 @@ enum GraphemeState {
 }
 
 /// Cursor-based segmenter for grapheme clusters.
-#[derive(Clone, Debug)]
+#[derive(Clone)]
 pub struct GraphemeCursor {
     // Current cursor position.
     offset: usize,
@@ -228,9 +228,8 @@ enum PairResult {
     Emoji,  // a break if preceded by emoji base and (Extend)*
 }
 
-#[inline]
 fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
-    use crate::tables::grapheme::GraphemeCat::*;
+    use tables::grapheme::GraphemeCat::*;
     use self::PairResult::*;
     match (before, after) {
         (GC_CR, GC_LF) => NotBreak,  // GB3
@@ -296,8 +295,8 @@ impl GraphemeCursor {
     }
 
     fn grapheme_category(&mut self, ch: char) -> GraphemeCat {
-        use crate::tables::grapheme as gr;
-        use crate::tables::grapheme::GraphemeCat::*;
+        use tables::grapheme as gr;
+        use tables::grapheme::GraphemeCat::*;
 
         if ch <= '\u{7e}' {
             // Special-case optimization for ascii, except U+007F.  This
@@ -388,7 +387,7 @@ impl GraphemeCursor {
     /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Ok(true));
     /// ```
     pub fn provide_context(&mut self, chunk: &str, chunk_start: usize) {
-        use crate::tables::grapheme as gr;
+        use tables::grapheme as gr;
         assert!(chunk_start + chunk.len() == self.pre_context_offset.unwrap());
         self.pre_context_offset = None;
         if self.is_extended && chunk_start + chunk.len() == self.offset {
@@ -408,7 +407,6 @@ impl GraphemeCursor {
         }
     }
 
-    #[inline]
     fn decide(&mut self, is_break: bool) {
         self.state = if is_break {
             GraphemeState::Break
@@ -417,13 +415,11 @@ impl GraphemeCursor {
         };
     }
 
-    #[inline]
     fn decision(&mut self, is_break: bool) -> Result<bool, GraphemeIncomplete> {
         self.decide(is_break);
         Ok(is_break)
     }
 
-    #[inline]
     fn is_boundary_result(&self) -> Result<bool, GraphemeIncomplete> {
         if self.state == GraphemeState::Break {
             Ok(true)
@@ -436,9 +432,8 @@ impl GraphemeCursor {
         }
     }
 
-    #[inline]
     fn handle_regional(&mut self, chunk: &str, chunk_start: usize) {
-        use crate::tables::grapheme as gr;
+        use tables::grapheme as gr;
         let mut ris_count = self.ris_count.unwrap_or(0);
         for ch in chunk.chars().rev() {
             if self.grapheme_category(ch) != gr::GC_Regional_Indicator {
@@ -457,9 +452,8 @@ impl GraphemeCursor {
         self.state = GraphemeState::Regional;
     }
 
-    #[inline]
     fn handle_emoji(&mut self, chunk: &str, chunk_start: usize) {
-        use crate::tables::grapheme as gr;
+        use tables::grapheme as gr;
         let mut iter = chunk.chars().rev();
         if let Some(ch) = iter.next() {
             if self.grapheme_category(ch) != gr::GC_ZWJ {
@@ -488,7 +482,6 @@ impl GraphemeCursor {
         self.state = GraphemeState::Emoji;
     }
 
-    #[inline]
     /// Determine whether the current cursor location is a grapheme cluster boundary.
     /// Only a part of the string need be supplied. If `chunk_start` is nonzero or
     /// the length of `chunk` is not equal to `len` on creation, then this method
@@ -513,7 +506,7 @@ impl GraphemeCursor {
     /// assert_eq!(cursor.is_boundary(flags, 0), Ok(false));
     /// ```
     pub fn is_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<bool, GraphemeIncomplete> {
-        use crate::tables::grapheme as gr;
+        use tables::grapheme as gr;
         if self.state == GraphemeState::Break {
             return Ok(true)
         }
@@ -570,7 +563,6 @@ impl GraphemeCursor {
         }
     }
 
-    #[inline]
     /// Find the next boundary after the current cursor position. Only a part of
     /// the string need be supplied. If the chunk is incomplete, then this
     /// method might return `GraphemeIncomplete::PreContext` or
diff --git a/src/lib.rs b/src/lib.rs
index 6077bbd..571e33a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -66,7 +66,7 @@ extern crate quickcheck;
 pub use grapheme::{Graphemes, GraphemeIndices};
 pub use grapheme::{GraphemeCursor, GraphemeIncomplete};
 pub use tables::UNICODE_VERSION;
-pub use word::{UWordBounds, UWordBoundIndices, UnicodeWords, UnicodeWordIndices};
+pub use word::{UWordBounds, UWordBoundIndices, UnicodeWords};
 pub use sentence::{USentenceBounds, USentenceBoundIndices, UnicodeSentences};
 
 mod grapheme;
@@ -146,30 +146,6 @@ pub trait UnicodeSegmentation {
     /// ```
     fn unicode_words<'a>(&'a self) -> UnicodeWords<'a>;
 
-    /// Returns an iterator over the words of `self`, separated on
-    /// [UAX#29 word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries), and their
-    /// offsets.
-    ///
-    /// Here, "words" are just those substrings which, after splitting on
-    /// UAX#29 word boundaries, contain any alphanumeric characters. That is, the
-    /// substring must contain at least one character with the
-    /// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic)
-    /// property, or with
-    /// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values).
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// # use self::unicode_segmentation::UnicodeSegmentation;
-    /// let uwis = "The quick (\"brown\") fox can't jump 32.3 feet, right?";
-    /// let uwi1 = uwis.unicode_word_indices().collect::<Vec<(usize, &str)>>();
-    /// let b: &[_] = &[(0, "The"), (4, "quick"), (12, "brown"), (20, "fox"), (24, "can't"),
-    ///                 (30, "jump"), (35, "32.3"), (40, "feet"), (46, "right")];
-    ///
-    /// assert_eq!(&uwi1[..], b);
-    /// ```
-    fn unicode_word_indices<'a>(&'a self) -> UnicodeWordIndices<'a>;
-
     /// Returns an iterator over substrings of `self` separated on
     /// [UAX#29 word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries).
     ///
@@ -274,11 +250,6 @@ impl UnicodeSegmentation for str {
     }
 
     #[inline]
-    fn unicode_word_indices(&self) -> UnicodeWordIndices {
-        word::new_unicode_word_indices(self)
-    }
-
-    #[inline]
     fn split_word_bounds(&self) -> UWordBounds {
         word::new_word_bounds(self)
     }
diff --git a/src/sentence.rs b/src/sentence.rs
index 0a23abd..275da52 100644
--- a/src/sentence.rs
+++ b/src/sentence.rs
@@ -13,7 +13,7 @@ use core::iter::Filter;
 
 // All of the logic for forward iteration over sentences
 mod fwd {
-    use crate::tables::sentence::SentenceCat;
+    use tables::sentence::SentenceCat;
     use core::cmp;
 
     // Describe a parsed part of source string as described in this table:
@@ -111,7 +111,7 @@ mod fwd {
         if parts[idx] == StatePart::ClosePlus { idx -= 1 }
 
         if parts[idx] == StatePart::ATerm {
-            use crate::tables::sentence as se;
+            use tables::sentence as se;
 
             for next_char in ahead.chars() {
                 //( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )* Lower
@@ -176,7 +176,7 @@ mod fwd {
 
         #[inline]
         fn next(&mut self) -> Option<usize> {
-            use crate::tables::sentence as se;
+            use tables::sentence as se;
 
             for next_char in self.string[self.pos..].chars() {
                 let position_before = self.pos;
@@ -331,7 +331,7 @@ pub fn new_sentence_bound_indices<'a>(source: &'a str) -> USentenceBoundIndices<
 #[inline]
 pub fn new_unicode_sentences<'b>(s: &'b str) -> UnicodeSentences<'b> {
     use super::UnicodeSegmentation;
-    use crate::tables::util::is_alphanumeric;
+    use tables::util::is_alphanumeric;
 
     fn has_alphanumeric(s: &&str) -> bool { s.chars().any(|c| is_alphanumeric(c)) }
     let has_alphanumeric: fn(&&str) -> bool = has_alphanumeric; // coerce to fn pointer
diff --git a/src/tables.rs b/src/tables.rs
index 6d09ea2..7062e36 100644
--- a/src/tables.rs
+++ b/src/tables.rs
@@ -30,7 +30,7 @@ pub mod util {
     #[inline]
     fn is_alphabetic(c: char) -> bool {
         match c {
-            'a' ..= 'z' | 'A' ..= 'Z' => true,
+            'a' ... 'z' | 'A' ... 'Z' => true,
             c if c > '' => super::derived_property::Alphabetic(c),
             _ => false,
         }
@@ -39,7 +39,7 @@ pub mod util {
     #[inline]
     fn is_numeric(c: char) -> bool {
         match c {
-            '0' ..= '9' => true,
+            '0' ... '9' => true,
             c if c > '' => super::general_category::N(c),
             _ => false,
         }
@@ -352,6 +352,7 @@ pub mod grapheme {
     }
 
     fn bsearch_range_value_table(c: char, r: &'static [(char, char, GraphemeCat)]) -> (u32, u32, GraphemeCat) {
+        use core;
         use core::cmp::Ordering::{Equal, Less, Greater};
         match r.binary_search_by(|&(lo, hi, _)| {
             if lo <= c && c <= hi { Equal }
@@ -1002,6 +1003,7 @@ pub mod word {
     }
 
     fn bsearch_range_value_table(c: char, r: &'static [(char, char, WordCat)]) -> (u32, u32, WordCat) {
+        use core;
         use core::cmp::Ordering::{Equal, Less, Greater};
         match r.binary_search_by(|&(lo, hi, _)| {
             if lo <= c && c <= hi { Equal }
@@ -1477,6 +1479,7 @@ pub mod emoji {
     }
 
     fn bsearch_range_value_table(c: char, r: &'static [(char, char, EmojiCat)]) -> (u32, u32, EmojiCat) {
+        use core;
         use core::cmp::Ordering::{Equal, Less, Greater};
         match r.binary_search_by(|&(lo, hi, _)| {
             if lo <= c && c <= hi { Equal }
@@ -1580,6 +1583,7 @@ pub mod sentence {
     }
 
     fn bsearch_range_value_table(c: char, r: &'static [(char, char, SentenceCat)]) -> (u32, u32, SentenceCat) {
+        use core;
         use core::cmp::Ordering::{Equal, Less, Greater};
         match r.binary_search_by(|&(lo, hi, _)| {
             if lo <= c && c <= hi { Equal }
diff --git a/src/test.rs b/src/test.rs
index ae74c64..75b77c5 100644
--- a/src/test.rs
+++ b/src/test.rs
@@ -14,7 +14,7 @@ use std::prelude::v1::*;
 
 #[test]
 fn test_graphemes() {
-    use crate::testdata::{TEST_SAME, TEST_DIFF};
+    use testdata::{TEST_SAME, TEST_DIFF};
 
     pub const EXTRA_DIFF: &'static [(&'static str,
                                      &'static [&'static str],
@@ -88,7 +88,7 @@ fn test_graphemes() {
 
 #[test]
 fn test_words() {
-    use crate::testdata::TEST_WORD;
+    use testdata::TEST_WORD;
 
     // Unicode's official tests don't really test longer chains of flag emoji
     // TODO This could be improved with more tests like flag emoji with interspersed Extend chars and ZWJ
@@ -144,7 +144,7 @@ fn test_words() {
 
 #[test]
 fn test_sentences() {
-    use crate::testdata::TEST_SENTENCE;
+    use testdata::TEST_SENTENCE;
 
     for &(s, w) in TEST_SENTENCE.iter() {
         macro_rules! assert_ {
diff --git a/src/word.rs b/src/word.rs
index 5cfde0d..179d122 100644
--- a/src/word.rs
+++ b/src/word.rs
@@ -11,7 +11,7 @@
 use core::cmp;
 use core::iter::Filter;
 
-use crate::tables::word::WordCat;
+use tables::word::WordCat;
 
 /// An iterator over the substrings of a string which, after splitting the string on
 /// [word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries),
@@ -40,34 +40,6 @@ impl<'a> DoubleEndedIterator for UnicodeWords<'a> {
     fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }
 }
 
-/// An iterator over the substrings of a string which, after splitting the string on
-/// [word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries),
-/// contain any characters with the
-/// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic)
-/// property, or with
-/// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values).
-/// This iterator also provides the byte offsets for each substring.
-///
-/// This struct is created by the [`unicode_word_indices`] method on the [`UnicodeSegmentation`] trait. See
-/// its documentation for more.
-///
-/// [`unicode_word_indices`]: trait.UnicodeSegmentation.html#tymethod.unicode_word_indices
-/// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
-pub struct UnicodeWordIndices<'a> {
-    inner: Filter<UWordBoundIndices<'a>, fn(&(usize, &str)) -> bool>,
-}
-
-impl<'a> Iterator for UnicodeWordIndices<'a> {
-    type Item = (usize, &'a str);
-
-    #[inline]
-    fn next(&mut self) -> Option<(usize, &'a str)> { self.inner.next() }
-}
-impl<'a> DoubleEndedIterator for UnicodeWordIndices<'a> {
-    #[inline]
-    fn next_back(&mut self) -> Option<(usize, &'a str)> { self.inner.next_back() }
-}
-
 /// External iterator for a string's
 /// [word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries).
 ///
@@ -170,7 +142,7 @@ enum RegionalState {
 }
 
 fn is_emoji(ch: char) -> bool {
-    use crate::tables::emoji;
+    use tables::emoji;
     emoji::emoji_category(ch).2 == emoji::EmojiCat::EC_Extended_Pictographic
 }
 
@@ -187,7 +159,7 @@ impl<'a> Iterator for UWordBounds<'a> {
     fn next(&mut self) -> Option<&'a str> {
         use self::UWordBoundsState::*;
         use self::FormatExtendType::*;
-        use crate::tables::word as wd;
+        use tables::word as wd;
         if self.string.len() == 0 {
             return None;
         }
@@ -200,13 +172,14 @@ impl<'a> Iterator for UWordBounds<'a> {
         let mut cat = wd::WC_Any;
         let mut savecat = wd::WC_Any;
 
+        // Whether or not the previous category was ZWJ
+        // ZWJs get collapsed, so this handles precedence of WB3c over WB4
+        let mut prev_zwj;
         // If extend/format/zwj were skipped. Handles precedence of WB3d over WB4
         let mut skipped_format_extend = false;
         for (curr, ch) in self.string.char_indices() {
             idx = curr;
-            // Whether or not the previous category was ZWJ
-            // ZWJs get collapsed, so this handles precedence of WB3c over WB4
-            let prev_zwj = cat == wd::WC_ZWJ;
+            prev_zwj = cat == wd::WC_ZWJ;
             // if there's a category cached, grab it
             cat = match self.cat {
                 None => wd::word_category(ch).2,
@@ -413,7 +386,7 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
     fn next_back(&mut self) -> Option<&'a str> {
         use self::UWordBoundsState::*;
         use self::FormatExtendType::*;
-        use crate::tables::word as wd;
+        use tables::word as wd;
         if self.string.len() == 0 {
             return None;
         }
@@ -665,7 +638,7 @@ impl<'a> UWordBounds<'a> {
 
     #[inline]
     fn get_next_cat(&self, idx: usize) -> Option<WordCat> {
-        use crate::tables::word as wd;
+        use tables::word as wd;
         let nidx = idx + self.string[idx..].chars().next().unwrap().len_utf8();
         if nidx < self.string.len() {
             let nch = self.string[nidx..].chars().next().unwrap();
@@ -677,7 +650,7 @@ impl<'a> UWordBounds<'a> {
 
     #[inline]
     fn get_prev_cat(&self, idx: usize) -> Option<WordCat> {
-        use crate::tables::word as wd;
+        use tables::word as wd;
         if idx > 0 {
             let nch = self.string[..idx].chars().next_back().unwrap();
             Some(wd::word_category(nch).2)
@@ -698,22 +671,12 @@ pub fn new_word_bound_indices<'b>(s: &'b str) -> UWordBoundIndices<'b> {
 }
 
 #[inline]
-fn has_alphanumeric(s: &&str) -> bool {
-    use crate::tables::util::is_alphanumeric;
-
-    s.chars().any(|c| is_alphanumeric(c))
-}
-
-#[inline]
 pub fn new_unicode_words<'b>(s: &'b str) -> UnicodeWords<'b> {
     use super::UnicodeSegmentation;
+    use tables::util::is_alphanumeric;
 
-    UnicodeWords { inner: s.split_word_bounds().filter(has_alphanumeric) }
-}
+    fn has_alphanumeric(s: &&str) -> bool { s.chars().any(|c| is_alphanumeric(c)) }
+    let has_alphanumeric: fn(&&str) -> bool = has_alphanumeric; // coerce to fn pointer
 
-#[inline]
-pub fn new_unicode_word_indices<'b>(s: &'b str) -> UnicodeWordIndices<'b> {
-    use super::UnicodeSegmentation;
-
-    UnicodeWordIndices { inner: s.split_word_bound_indices().filter(|(_, c)| has_alphanumeric(c)) }
+    UnicodeWords { inner: s.split_word_bounds().filter(has_alphanumeric) }
 }
author	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2022-02-16 01:13:04 +0000
committer	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2022-02-16 01:13:04 +0000
commit	ec9c482f6dca777e0955c304457a80ef915e564f (patch)
tree	47be3ea825e1446e093c3b760db7d40a4e4e55d6
parent	0ceec9b4cae6be5020bb678f0172ee13a83c1509 (diff)
parent	6453fc5ed39eb1c8fc72e9d291eb8c7084a05ad7 (diff)
download	unicode-segmentation-ec9c482f6dca777e0955c304457a80ef915e564f.tar.gz