aboutsummaryrefslogtreecommitdiff
path: root/src/word_splitters.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/word_splitters.rs')
-rw-r--r--src/word_splitters.rs314
1 files changed, 0 insertions, 314 deletions
diff --git a/src/word_splitters.rs b/src/word_splitters.rs
deleted file mode 100644
index 69e246f..0000000
--- a/src/word_splitters.rs
+++ /dev/null
@@ -1,314 +0,0 @@
-//! Word splitting functionality.
-//!
-//! To wrap text into lines, long words sometimes need to be split
-//! across lines. The [`WordSplitter`] enum defines this
-//! functionality.
-
-use crate::core::{display_width, Word};
-
-/// The `WordSplitter` enum describes where words can be split.
-///
-/// If the textwrap crate has been compiled with the `hyphenation`
-/// Cargo feature enabled, you will find a
-/// [`WordSplitter::Hyphenation`] variant. Use this struct for
-/// language-aware hyphenation:
-///
-/// ```
-/// #[cfg(feature = "hyphenation")] {
-/// use hyphenation::{Language, Load, Standard};
-/// use textwrap::{wrap, Options, WordSplitter};
-///
-/// let text = "Oxidation is the loss of electrons.";
-/// let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
-/// let options = Options::new(8).word_splitter(WordSplitter::Hyphenation(dictionary));
-/// assert_eq!(wrap(text, &options), vec!["Oxida-",
-/// "tion is",
-/// "the loss",
-/// "of elec-",
-/// "trons."]);
-/// }
-/// ```
-///
-/// Please see the documentation for the [hyphenation] crate for more
-/// details.
-///
-/// [hyphenation]: https://docs.rs/hyphenation/
-#[derive(Clone)]
-pub enum WordSplitter {
- /// Use this as a [`Options.word_splitter`] to avoid any kind of
- /// hyphenation:
- ///
- /// ```
- /// use textwrap::{wrap, Options, WordSplitter};
- ///
- /// let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation);
- /// assert_eq!(wrap("foo bar-baz", &options),
- /// vec!["foo", "bar-baz"]);
- /// ```
- ///
- /// [`Options.word_splitter`]: super::Options::word_splitter
- NoHyphenation,
-
- /// `HyphenSplitter` is the default `WordSplitter` used by
- /// [`Options::new`](super::Options::new). It will split words on
- /// existing hyphens in the word.
- ///
- /// It will only use hyphens that are surrounded by alphanumeric
- /// characters, which prevents a word like `"--foo-bar"` from
- /// being split into `"--"` and `"foo-bar"`.
- ///
- /// # Examples
- ///
- /// ```
- /// use textwrap::WordSplitter;
- ///
- /// assert_eq!(WordSplitter::HyphenSplitter.split_points("--foo-bar"),
- /// vec![6]);
- /// ```
- HyphenSplitter,
-
- /// Use a custom function as the word splitter.
- ///
- /// This varian lets you implement a custom word splitter using
- /// your own function.
- ///
- /// # Examples
- ///
- /// ```
- /// use textwrap::WordSplitter;
- ///
- /// fn split_at_underscore(word: &str) -> Vec<usize> {
- /// word.match_indices('_').map(|(idx, _)| idx + 1).collect()
- /// }
- ///
- /// let word_splitter = WordSplitter::Custom(split_at_underscore);
- /// assert_eq!(word_splitter.split_points("a_long_identifier"),
- /// vec![2, 7]);
- /// ```
- Custom(fn(word: &str) -> Vec<usize>),
-
- /// A hyphenation dictionary can be used to do language-specific
- /// hyphenation using patterns from the [hyphenation] crate.
- ///
- /// **Note:** Only available when the `hyphenation` Cargo feature is
- /// enabled.
- ///
- /// [hyphenation]: https://docs.rs/hyphenation/
- #[cfg(feature = "hyphenation")]
- Hyphenation(hyphenation::Standard),
-}
-
-impl std::fmt::Debug for WordSplitter {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- match self {
- WordSplitter::NoHyphenation => f.write_str("NoHyphenation"),
- WordSplitter::HyphenSplitter => f.write_str("HyphenSplitter"),
- WordSplitter::Custom(_) => f.write_str("Custom(...)"),
- #[cfg(feature = "hyphenation")]
- WordSplitter::Hyphenation(dict) => write!(f, "Hyphenation({})", dict.language()),
- }
- }
-}
-
-impl PartialEq<WordSplitter> for WordSplitter {
- fn eq(&self, other: &WordSplitter) -> bool {
- match (self, other) {
- (WordSplitter::NoHyphenation, WordSplitter::NoHyphenation) => true,
- (WordSplitter::HyphenSplitter, WordSplitter::HyphenSplitter) => true,
- #[cfg(feature = "hyphenation")]
- (WordSplitter::Hyphenation(this_dict), WordSplitter::Hyphenation(other_dict)) => {
- this_dict.language() == other_dict.language()
- }
- (_, _) => false,
- }
- }
-}
-
-impl WordSplitter {
- /// Return all possible indices where `word` can be split.
- ///
- /// The indices are in the range `0..word.len()`. They point to
- /// the index _after_ the split point, i.e., after `-` if
- /// splitting on hyphens. This way, `word.split_at(idx)` will
- /// break the word into two well-formed pieces.
- ///
- /// # Examples
- ///
- /// ```
- /// use textwrap::WordSplitter;
- /// assert_eq!(WordSplitter::NoHyphenation.split_points("cannot-be-split"), vec![]);
- /// assert_eq!(WordSplitter::HyphenSplitter.split_points("can-be-split"), vec![4, 7]);
- /// assert_eq!(WordSplitter::Custom(|word| vec![word.len()/2]).split_points("middle"), vec![3]);
- /// ```
- pub fn split_points(&self, word: &str) -> Vec<usize> {
- match self {
- WordSplitter::NoHyphenation => Vec::new(),
- WordSplitter::HyphenSplitter => {
- let mut splits = Vec::new();
-
- for (idx, _) in word.match_indices('-') {
- // We only use hyphens that are surrounded by alphanumeric
- // characters. This is to avoid splitting on repeated hyphens,
- // such as those found in --foo-bar.
- let prev = word[..idx].chars().next_back();
- let next = word[idx + 1..].chars().next();
-
- if prev.filter(|ch| ch.is_alphanumeric()).is_some()
- && next.filter(|ch| ch.is_alphanumeric()).is_some()
- {
- splits.push(idx + 1); // +1 due to width of '-'.
- }
- }
-
- splits
- }
- WordSplitter::Custom(splitter_func) => splitter_func(word),
- #[cfg(feature = "hyphenation")]
- WordSplitter::Hyphenation(dictionary) => {
- use hyphenation::Hyphenator;
- dictionary.hyphenate(word).breaks
- }
- }
- }
-}
-
-/// Split words into smaller words according to the split points given
-/// by `word_splitter`.
-///
-/// Note that we split all words, regardless of their length. This is
-/// to more cleanly separate the business of splitting (including
-/// automatic hyphenation) from the business of word wrapping.
-pub fn split_words<'a, I>(
- words: I,
- word_splitter: &'a WordSplitter,
-) -> impl Iterator<Item = Word<'a>>
-where
- I: IntoIterator<Item = Word<'a>>,
-{
- words.into_iter().flat_map(move |word| {
- let mut prev = 0;
- let mut split_points = word_splitter.split_points(&word).into_iter();
- std::iter::from_fn(move || {
- if let Some(idx) = split_points.next() {
- let need_hyphen = !word[..idx].ends_with('-');
- let w = Word {
- word: &word.word[prev..idx],
- width: display_width(&word[prev..idx]),
- whitespace: "",
- penalty: if need_hyphen { "-" } else { "" },
- };
- prev = idx;
- return Some(w);
- }
-
- if prev < word.word.len() || prev == 0 {
- let w = Word {
- word: &word.word[prev..],
- width: display_width(&word[prev..]),
- whitespace: word.whitespace,
- penalty: word.penalty,
- };
- prev = word.word.len() + 1;
- return Some(w);
- }
-
- None
- })
- })
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- // Like assert_eq!, but the left expression is an iterator.
- macro_rules! assert_iter_eq {
- ($left:expr, $right:expr) => {
- assert_eq!($left.collect::<Vec<_>>(), $right);
- };
- }
-
- #[test]
- fn split_words_no_words() {
- assert_iter_eq!(split_words(vec![], &WordSplitter::HyphenSplitter), vec![]);
- }
-
- #[test]
- fn split_words_empty_word() {
- assert_iter_eq!(
- split_words(vec![Word::from(" ")], &WordSplitter::HyphenSplitter),
- vec![Word::from(" ")]
- );
- }
-
- #[test]
- fn split_words_single_word() {
- assert_iter_eq!(
- split_words(vec![Word::from("foobar")], &WordSplitter::HyphenSplitter),
- vec![Word::from("foobar")]
- );
- }
-
- #[test]
- fn split_words_hyphen_splitter() {
- assert_iter_eq!(
- split_words(vec![Word::from("foo-bar")], &WordSplitter::HyphenSplitter),
- vec![Word::from("foo-"), Word::from("bar")]
- );
- }
-
- #[test]
- fn split_words_no_hyphenation() {
- assert_iter_eq!(
- split_words(vec![Word::from("foo-bar")], &WordSplitter::NoHyphenation),
- vec![Word::from("foo-bar")]
- );
- }
-
- #[test]
- fn split_words_adds_penalty() {
- let fixed_split_point = |_: &str| vec![3];
-
- assert_iter_eq!(
- split_words(
- vec![Word::from("foobar")].into_iter(),
- &WordSplitter::Custom(fixed_split_point)
- ),
- vec![
- Word {
- word: "foo",
- width: 3,
- whitespace: "",
- penalty: "-"
- },
- Word {
- word: "bar",
- width: 3,
- whitespace: "",
- penalty: ""
- }
- ]
- );
-
- assert_iter_eq!(
- split_words(
- vec![Word::from("fo-bar")].into_iter(),
- &WordSplitter::Custom(fixed_split_point)
- ),
- vec![
- Word {
- word: "fo-",
- width: 3,
- whitespace: "",
- penalty: ""
- },
- Word {
- word: "bar",
- width: 3,
- whitespace: "",
- penalty: ""
- }
- ]
- );
- }
-}