From 6234ec131f8ccd62f29d6484320aa8b106fa17ee Mon Sep 17 00:00:00 2001 From: Joel Galenson Date: Mon, 21 Jun 2021 14:10:04 -0700 Subject: Upgrade rust/crates/textwrap to 0.14.0 Test: make Change-Id: I82ffe13940d8eb5fce82ed3f755ad221d2f0b093 --- src/core.rs | 512 +++--------------------------------------------------------- 1 file changed, 22 insertions(+), 490 deletions(-) (limited to 'src/core.rs') diff --git a/src/core.rs b/src/core.rs index b6f5b46..af02460 100644 --- a/src/core.rs +++ b/src/core.rs @@ -8,20 +8,25 @@ //! something: //! //! 1. Split your input into [`Fragment`]s. These are abstract blocks -//! of text or content which can be wrapped into lines. You can use -//! [`find_words`] to do this for text. +//! of text or content which can be wrapped into lines. See +//! [`WordSeparator`](crate::word_separators::WordSeparator) for +//! how to do this for text. //! //! 2. Potentially split your fragments into smaller pieces. This -//! allows you to implement things like hyphenation. If wrapping -//! text, [`split_words`] can help you do this. +//! allows you to implement things like hyphenation. If you are +//! wrapping text represented as a sequence of [`Word`]s, then you +//! can use [`split_words`](crate::word_splitters::split_words) can +//! help you do this. //! //! 3. Potentially break apart fragments that are still too large to //! fit on a single line. This is implemented in [`break_words`]. //! //! 4. Finally take your fragments and put them into lines. There are -//! two algorithms for this: [`wrap_optimal_fit`] and -//! [`wrap_first_fit`]. The former produces better line breaks, the -//! latter is faster. +//! two algorithms for this in the +//! [`wrap_algorithms`](crate::wrap_algorithms) module: +//! [`wrap_optimal_fit`](crate::wrap_algorithms::wrap_optimal_fit) +//! and [`wrap_first_fit`](crate::wrap_algorithms::wrap_first_fit). +//! The former produces better line breaks, the latter is faster. //! //! 5. Iterate through the slices returned by the wrapping functions //! and construct your lines of output. @@ -30,13 +35,6 @@ //! the functionality here is not sufficient or if you have ideas for //! improving it. We would love to hear from you! -use crate::{Options, WordSplitter}; - -#[cfg(feature = "smawk")] -mod optimal_fit; -#[cfg(feature = "smawk")] -pub use optimal_fit::wrap_optimal_fit; - /// The CSI or “Control Sequence Introducer” introduces an ANSI escape /// sequence. This is typically used for colored text and will be /// ignored when computing the text width. @@ -48,7 +46,7 @@ const ANSI_FINAL_BYTE: std::ops::RangeInclusive = '\x40'..='\x7e'; /// `chars` provide the following characters. The `chars` will be /// modified if `ch` is the start of an ANSI escape sequence. #[inline] -fn skip_ansi_escape_sequence>(ch: char, chars: &mut I) -> bool { +pub(crate) fn skip_ansi_escape_sequence>(ch: char, chars: &mut I) -> bool { if ch == CSI.0 && chars.next() == Some(CSI.1) { // We have found the start of an ANSI escape code, typically // used for colored terminal text. We skip until we find a @@ -175,7 +173,6 @@ fn ch_width(ch: char) -> usize { /// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence /// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters /// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html -#[inline] pub fn display_width(text: &str) -> usize { let mut chars = text.chars(); let mut width = 0; @@ -217,10 +214,14 @@ pub trait Fragment: std::fmt::Debug { /// trailing whitespace, and potentially a penalty item. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct Word<'a> { - word: &'a str, - width: usize, - pub(crate) whitespace: &'a str, - pub(crate) penalty: &'a str, + /// Word content. + pub word: &'a str, + /// Whitespace to insert if the word does not fall at the end of a line. + pub whitespace: &'a str, + /// Penalty string to insert if the word falls at the end of a line. + pub penalty: &'a str, + // Cached width in columns. + pub(crate) width: usize, } impl std::ops::Deref for Word<'_> { @@ -232,7 +233,7 @@ impl std::ops::Deref for Word<'_> { } impl<'a> Word<'a> { - /// Construct a new `Word`. + /// Construct a `Word` from a string. /// /// A trailing stretch of `' '` is automatically taken to be the /// whitespace part of the word. @@ -322,118 +323,6 @@ impl Fragment for Word<'_> { } } -/// Split line into words separated by regions of `' '` characters. -/// -/// # Examples -/// -/// ``` -/// use textwrap::core::{find_words, Fragment, Word}; -/// let words = find_words("Hello World!").collect::>(); -/// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]); -/// assert_eq!(words[0].width(), 5); -/// assert_eq!(words[0].whitespace_width(), 1); -/// assert_eq!(words[0].penalty_width(), 0); -/// ``` -pub fn find_words(line: &str) -> impl Iterator { - let mut start = 0; - let mut in_whitespace = false; - let mut char_indices = line.char_indices(); - - std::iter::from_fn(move || { - // for (idx, ch) in char_indices does not work, gives this - // error: - // - // > cannot move out of `char_indices`, a captured variable in - // > an `FnMut` closure - #[allow(clippy::while_let_on_iterator)] - while let Some((idx, ch)) = char_indices.next() { - if in_whitespace && ch != ' ' { - let word = Word::from(&line[start..idx]); - start = idx; - in_whitespace = ch == ' '; - return Some(word); - } - - in_whitespace = ch == ' '; - } - - if start < line.len() { - let word = Word::from(&line[start..]); - start = line.len(); - return Some(word); - } - - None - }) -} - -/// Split words into smaller words according to the split points given -/// by `options`. -/// -/// Note that we split all words, regardless of their length. This is -/// to more cleanly separate the business of splitting (including -/// automatic hyphenation) from the business of word wrapping. -/// -/// # Examples -/// -/// ``` -/// use textwrap::core::{split_words, Word}; -/// use textwrap::{NoHyphenation, Options}; -/// -/// // The default splitter is HyphenSplitter: -/// let options = Options::new(80); -/// assert_eq!( -/// split_words(vec![Word::from("foo-bar")], &options).collect::>(), -/// vec![Word::from("foo-"), Word::from("bar")] -/// ); -/// -/// // The NoHyphenation splitter ignores the '-': -/// let options = Options::new(80).splitter(NoHyphenation); -/// assert_eq!( -/// split_words(vec![Word::from("foo-bar")], &options).collect::>(), -/// vec![Word::from("foo-bar")] -/// ); -/// ``` -pub fn split_words<'a, I, S, Opt>(words: I, options: Opt) -> impl Iterator> -where - I: IntoIterator>, - S: WordSplitter, - Opt: Into>, -{ - let options = options.into(); - - words.into_iter().flat_map(move |word| { - let mut prev = 0; - let mut split_points = options.splitter.split_points(&word).into_iter(); - std::iter::from_fn(move || { - if let Some(idx) = split_points.next() { - let need_hyphen = !word[..idx].ends_with('-'); - let w = Word { - word: &word.word[prev..idx], - width: display_width(&word[prev..idx]), - whitespace: "", - penalty: if need_hyphen { "-" } else { "" }, - }; - prev = idx; - return Some(w); - } - - if prev < word.word.len() || prev == 0 { - let w = Word { - word: &word.word[prev..], - width: display_width(&word[prev..]), - whitespace: word.whitespace, - penalty: word.penalty, - }; - prev = word.word.len() + 1; - return Some(w); - } - - None - }) - }) -} - /// Forcibly break words wider than `line_width` into smaller words. /// /// This simply calls [`Word::break_apart`] on words that are too @@ -454,200 +343,6 @@ where shortened_words } -/// Wrapping algorithms. -/// -/// After a text has been broken into [`Fragment`]s, the one now has -/// to decide how to break the fragments into lines. The simplest -/// algorithm for this is implemented by [`wrap_first_fit`]: it uses -/// no look-ahead and simply adds fragments to the line as long as -/// they fit. However, this can lead to poor line breaks if a large -/// fragment almost-but-not-quite fits on a line. When that happens, -/// the fragment is moved to the next line and it will leave behind a -/// large gap. A more advanced algorithm, implemented by -/// [`wrap_optimal_fit`], will take this into account. The optimal-fit -/// algorithm considers all possible line breaks and will attempt to -/// minimize the gaps left behind by overly short lines. -/// -/// While both algorithms run in linear time, the first-fit algorithm -/// is about 4 times faster than the optimal-fit algorithm. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum WrapAlgorithm { - /// Use an advanced algorithm which considers the entire paragraph - /// to find optimal line breaks. Implemented by - /// [`wrap_optimal_fit`]. - /// - /// **Note:** Only available when the `smawk` Cargo feature is - /// enabled. - #[cfg(feature = "smawk")] - OptimalFit, - /// Use a fast and simple algorithm with no look-ahead to find - /// line breaks. Implemented by [`wrap_first_fit`]. - FirstFit, -} - -/// Wrap abstract fragments into lines with a first-fit algorithm. -/// -/// The `line_widths` map line numbers (starting from 0) to a target -/// line width. This can be used to implement hanging indentation. -/// -/// The fragments must already have been split into the desired -/// widths, this function will not (and cannot) attempt to split them -/// further when arranging them into lines. -/// -/// # First-Fit Algorithm -/// -/// This implements a simple “greedy” algorithm: accumulate fragments -/// one by one and when a fragment no longer fits, start a new line. -/// There is no look-ahead, we simply take first fit of the fragments -/// we find. -/// -/// While fast and predictable, this algorithm can produce poor line -/// breaks when a long fragment is moved to a new line, leaving behind -/// a large gap: -/// -/// ``` -/// use textwrap::core::{find_words, wrap_first_fit, Word}; -/// -/// // Helper to convert wrapped lines to a Vec. -/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec { -/// lines.iter().map(|line| { -/// line.iter().map(|word| &**word).collect::>().join(" ") -/// }).collect::>() -/// } -/// -/// let text = "These few words will unfortunately not wrap nicely."; -/// let words = find_words(text).collect::>(); -/// assert_eq!(lines_to_strings(wrap_first_fit(&words, |_| 15)), -/// vec!["These few words", -/// "will", // <-- short line -/// "unfortunately", -/// "not wrap", -/// "nicely."]); -/// -/// // We can avoid the short line if we look ahead: -/// #[cfg(feature = "smawk")] -/// assert_eq!(lines_to_strings(textwrap::core::wrap_optimal_fit(&words, |_| 15)), -/// vec!["These few", -/// "words will", -/// "unfortunately", -/// "not wrap", -/// "nicely."]); -/// ``` -/// -/// The [`wrap_optimal_fit`] function was used above to get better -/// line breaks. It uses an advanced algorithm which tries to avoid -/// short lines. This function is about 4 times faster than -/// [`wrap_optimal_fit`]. -/// -/// # Examples -/// -/// Imagine you're building a house site and you have a number of -/// tasks you need to execute. Things like pour foundation, complete -/// framing, install plumbing, electric cabling, install insulation. -/// -/// The construction workers can only work during daytime, so they -/// need to pack up everything at night. Because they need to secure -/// their tools and move machines back to the garage, this process -/// takes much more time than the time it would take them to simply -/// switch to another task. -/// -/// You would like to make a list of tasks to execute every day based -/// on your estimates. You can model this with a program like this: -/// -/// ``` -/// use textwrap::core::{wrap_first_fit, Fragment}; -/// -/// #[derive(Debug)] -/// struct Task<'a> { -/// name: &'a str, -/// hours: usize, // Time needed to complete task. -/// sweep: usize, // Time needed for a quick sweep after task during the day. -/// cleanup: usize, // Time needed for full cleanup if day ends with this task. -/// } -/// -/// impl Fragment for Task<'_> { -/// fn width(&self) -> usize { self.hours } -/// fn whitespace_width(&self) -> usize { self.sweep } -/// fn penalty_width(&self) -> usize { self.cleanup } -/// } -/// -/// // The morning tasks -/// let tasks = vec![ -/// Task { name: "Foundation", hours: 4, sweep: 2, cleanup: 3 }, -/// Task { name: "Framing", hours: 3, sweep: 1, cleanup: 2 }, -/// Task { name: "Plumbing", hours: 2, sweep: 2, cleanup: 2 }, -/// Task { name: "Electrical", hours: 2, sweep: 1, cleanup: 2 }, -/// Task { name: "Insulation", hours: 2, sweep: 1, cleanup: 2 }, -/// Task { name: "Drywall", hours: 3, sweep: 1, cleanup: 2 }, -/// Task { name: "Floors", hours: 3, sweep: 1, cleanup: 2 }, -/// Task { name: "Countertops", hours: 1, sweep: 1, cleanup: 2 }, -/// Task { name: "Bathrooms", hours: 2, sweep: 1, cleanup: 2 }, -/// ]; -/// -/// // Fill tasks into days, taking `day_length` into account. The -/// // output shows the hours worked per day along with the names of -/// // the tasks for that day. -/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: usize) -> Vec<(usize, Vec<&'a str>)> { -/// let mut days = Vec::new(); -/// // Assign tasks to days. The assignment is a vector of slices, -/// // with a slice per day. -/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, |i| day_length); -/// for day in assigned_days.iter() { -/// let last = day.last().unwrap(); -/// let work_hours: usize = day.iter().map(|t| t.hours + t.sweep).sum(); -/// let names = day.iter().map(|t| t.name).collect::>(); -/// days.push((work_hours - last.sweep + last.cleanup, names)); -/// } -/// days -/// } -/// -/// // With a single crew working 8 hours a day: -/// assert_eq!( -/// assign_days(&tasks, 8), -/// [ -/// (7, vec!["Foundation"]), -/// (8, vec!["Framing", "Plumbing"]), -/// (7, vec!["Electrical", "Insulation"]), -/// (5, vec!["Drywall"]), -/// (7, vec!["Floors", "Countertops"]), -/// (4, vec!["Bathrooms"]), -/// ] -/// ); -/// -/// // With two crews working in shifts, 16 hours a day: -/// assert_eq!( -/// assign_days(&tasks, 16), -/// [ -/// (14, vec!["Foundation", "Framing", "Plumbing"]), -/// (15, vec!["Electrical", "Insulation", "Drywall", "Floors"]), -/// (6, vec!["Countertops", "Bathrooms"]), -/// ] -/// ); -/// ``` -/// -/// Apologies to anyone who actually knows how to build a house and -/// knows how long each step takes :-) -pub fn wrap_first_fit usize>( - fragments: &[T], - line_widths: F, -) -> Vec<&[T]> { - let mut lines = Vec::new(); - let mut start = 0; - let mut width = 0; - - for (idx, fragment) in fragments.iter().enumerate() { - let line_width = line_widths(lines.len()); - if width + fragment.width() + fragment.penalty_width() > line_width && idx > start { - lines.push(&fragments[start..idx]); - start = idx; - width = 0; - } - width += fragment.width() + fragment.whitespace_width(); - } - lines.push(&fragments[start..]); - lines -} - #[cfg(test)] mod tests { use super::*; @@ -655,13 +350,6 @@ mod tests { #[cfg(feature = "unicode-width")] use unicode_width::UnicodeWidthChar; - // Like assert_eq!, but the left expression is an iterator. - macro_rules! assert_iter_eq { - ($left:expr, $right:expr) => { - assert_eq!($left.collect::>(), $right); - }; - } - #[test] fn skip_ansi_escape_sequence_works() { let blue_text = "\u{1b}[34mHello\u{1b}[0m"; @@ -743,160 +431,4 @@ mod tests { fn display_width_emojis() { assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20); } - - #[test] - fn find_words_empty() { - assert_iter_eq!(find_words(""), vec![]); - } - - #[test] - fn find_words_single_word() { - assert_iter_eq!(find_words("foo"), vec![Word::from("foo")]); - } - - #[test] - fn find_words_two_words() { - assert_iter_eq!( - find_words("foo bar"), - vec![Word::from("foo "), Word::from("bar")] - ); - } - - #[test] - fn find_words_multiple_words() { - assert_iter_eq!( - find_words("foo bar baz"), - vec![Word::from("foo "), Word::from("bar "), Word::from("baz")] - ); - } - - #[test] - fn find_words_whitespace() { - assert_iter_eq!(find_words(" "), vec![Word::from(" ")]); - } - - #[test] - fn find_words_inter_word_whitespace() { - assert_iter_eq!( - find_words("foo bar"), - vec![Word::from("foo "), Word::from("bar")] - ) - } - - #[test] - fn find_words_trailing_whitespace() { - assert_iter_eq!(find_words("foo "), vec![Word::from("foo ")]); - } - - #[test] - fn find_words_leading_whitespace() { - assert_iter_eq!( - find_words(" foo"), - vec![Word::from(" "), Word::from("foo")] - ); - } - - #[test] - fn find_words_multi_column_char() { - assert_iter_eq!( - find_words("\u{1f920}"), // cowboy emoji 🤠 - vec![Word::from("\u{1f920}")] - ); - } - - #[test] - fn find_words_hyphens() { - assert_iter_eq!(find_words("foo-bar"), vec![Word::from("foo-bar")]); - assert_iter_eq!( - find_words("foo- bar"), - vec![Word::from("foo- "), Word::from("bar")] - ); - assert_iter_eq!( - find_words("foo - bar"), - vec![Word::from("foo "), Word::from("- "), Word::from("bar")] - ); - assert_iter_eq!( - find_words("foo -bar"), - vec![Word::from("foo "), Word::from("-bar")] - ); - } - - #[test] - fn split_words_no_words() { - assert_iter_eq!(split_words(vec![], 80), vec![]); - } - - #[test] - fn split_words_empty_word() { - assert_iter_eq!( - split_words(vec![Word::from(" ")], 80), - vec![Word::from(" ")] - ); - } - - #[test] - fn split_words_hyphen_splitter() { - assert_iter_eq!( - split_words(vec![Word::from("foo-bar")], 80), - vec![Word::from("foo-"), Word::from("bar")] - ); - } - - #[test] - fn split_words_short_line() { - // Note that `split_words` does not take the line width into - // account, that is the job of `break_words`. - assert_iter_eq!( - split_words(vec![Word::from("foobar")], 3), - vec![Word::from("foobar")] - ); - } - - #[test] - fn split_words_adds_penalty() { - #[derive(Debug)] - struct FixedSplitPoint; - impl WordSplitter for FixedSplitPoint { - fn split_points(&self, _: &str) -> Vec { - vec![3] - } - } - - let options = Options::new(80).splitter(FixedSplitPoint); - assert_iter_eq!( - split_words(vec![Word::from("foobar")].into_iter(), &options), - vec![ - Word { - word: "foo", - width: 3, - whitespace: "", - penalty: "-" - }, - Word { - word: "bar", - width: 3, - whitespace: "", - penalty: "" - } - ] - ); - - assert_iter_eq!( - split_words(vec![Word::from("fo-bar")].into_iter(), &options), - vec![ - Word { - word: "fo-", - width: 3, - whitespace: "", - penalty: "" - }, - Word { - word: "bar", - width: 3, - whitespace: "", - penalty: "" - } - ] - ); - } } -- cgit v1.2.3