aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Galenson <jgalenson@google.com>2021-06-22 08:10:45 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2021-06-22 08:10:45 +0000
commit623cdf09fce31e8c90514f8de590ab37f48ab4a3 (patch)
tree4cd80f9f2486b095410db1ad6bafbbbe78f6d6ee
parentd39a740fbbaf67543751a82c97bb2360d6faaa9e (diff)
parent6234ec131f8ccd62f29d6484320aa8b106fa17ee (diff)
downloadtextwrap-623cdf09fce31e8c90514f8de590ab37f48ab4a3.tar.gz
Upgrade rust/crates/textwrap to 0.14.0 am: 6234ec131f
Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/textwrap/+/1742656 Change-Id: Ibe6185b1f30b1d641a79698d4cd42fa5671a3f6b
-rw-r--r--.cargo_vcs_info.json2
-rw-r--r--CHANGELOG.md105
-rw-r--r--Cargo.toml17
-rw-r--r--METADATA8
-rw-r--r--README.md39
-rw-r--r--TEST_MAPPING6
-rw-r--r--src/core.rs512
-rw-r--r--src/indentation.rs84
-rw-r--r--src/lib.rs715
-rw-r--r--src/splitting.rs140
-rw-r--r--src/word_separators.rs406
-rw-r--r--src/word_splitters.rs311
-rw-r--r--src/wrap_algorithms.rs257
-rw-r--r--src/wrap_algorithms/optimal_fit.rs (renamed from src/core/optimal_fit.rs)48
-rw-r--r--tests/traits.rs86
15 files changed, 1796 insertions, 940 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 3237c3c..3d9a69b 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
{
"git": {
- "sha1": "c0a0db1a1460f8923f3cb8d8aa4366ce61237211"
+ "sha1": "65277f5c22aa71fb60d5c53142cadf0c8fcadf28"
}
}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3d75e30..534072e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,111 @@
This file lists the most important changes made in each release of
`textwrap`.
+## Version 0.14.0 (2021-06-05)
+
+This is a major feature release which makes Textwrap more configurable
+and flexible. The high-level API of `textwrap::wrap` and
+`textwrap::fill` remains unchanged, but low-level structs have moved
+around.
+
+The biggest change is the introduction of new generic type parameters
+to the `Options` struct. These parameters lets you statically
+configure the wrapping algorithm, the word separator, and the word
+splitter. If you previously spelled out the full type for `Options`,
+you now need to take the extra type parameters into account. This
+means that
+
+```rust
+let options: Options<HyphenSplitter> = Options::new(80);
+```
+
+changes to
+
+```rust
+let options: Options<
+ wrap_algorithms::FirstFit,
+ word_separators::AsciiSpace,
+ word_splitters::HyphenSplitter,
+> = Options::new(80);
+```
+
+This is quite a mouthful, so we suggest using type inferrence where
+possible. You won’t see any chance if you call `wrap` directly with a
+width or with an `Options` value constructed on the fly. Please open
+an issue if this causes problems for you!
+
+### New `WordSeparator` Trait
+
+* [#332](https://github.com/mgeisler/textwrap/pull/332): Add
+ `WordSeparator` trait to allow customizing how words are found in a
+ line of text. Until now, Textwrap would always assume that words are
+ separated by ASCII space characters. You can now customize this as
+ needed.
+
+* [#313](https://github.com/mgeisler/textwrap/pull/313): Add support
+ for using the Unicode line breaking algorithm to find words. This is
+ done by adding a second implementation of the new `WordSeparator`
+ trait. The implementation uses the unicode-linebreak crate, which is
+ a new optional dependency.
+
+ With this, Textwrap can be used with East-Asian languages such as
+ Chinese or Japanese where there are no spaces between words.
+ Breaking a long sequence of emojis is another example where line
+ breaks might be wanted even if there are no whitespace to be found.
+ Feedback would be appreciated for this feature.
+
+
+### Indent
+
+* [#353](https://github.com/mgeisler/textwrap/pull/353): Trim trailing
+ whitespace from `prefix` in `indent`.
+
+ Before, empty lines would get no prefix added. Now, empty lines have
+ a trimmed prefix added. This little trick makes `indent` much more
+ useful since you can now safely indent with `"# "` without creating
+ trailing whitespace in the output due to the trailing whitespace in
+ your prefix.
+
+* [#354](https://github.com/mgeisler/textwrap/pull/354): Make `indent`
+ about 20% faster by preallocating the output string.
+
+
+### Documentation
+
+* [#308](https://github.com/mgeisler/textwrap/pull/308): Document
+ handling of leading and trailing whitespace when wrapping text.
+
+### WebAssembly Demo
+
+* [#310](https://github.com/mgeisler/textwrap/pull/310): Thanks to
+ WebAssembly, you can now try out Textwrap directly in your browser.
+ Please try it out: https://mgeisler.github.io/textwrap/.
+
+### New Generic Parameters
+
+* [#331](https://github.com/mgeisler/textwrap/pull/331): Remove outer
+ boxing from `Options`.
+
+* [#357](https://github.com/mgeisler/textwrap/pull/357): Replace
+ `core::WrapAlgorithm` enum with a `wrap_algorithms::WrapAlgorithm`
+ trait. This allows for arbitrary wrapping algorithms to be plugged
+ into the library.
+
+* [#358](https://github.com/mgeisler/textwrap/pull/358): Switch
+ wrapping functions to use a slice for `line_widths`.
+
+* [#368](https://github.com/mgeisler/textwrap/pull/368): Move
+ `WordSeparator` and `WordSplitter` traits to separate modules.
+ Before, Textwrap had several top-level structs such as
+ `NoHyphenation` and `HyphenSplitter`. These implementations of
+ `WordSplitter` now lives in a dedicated `word_splitters` module.
+ Similarly, we have a new `word_separators` module for
+ implementations of `WordSeparator`.
+
+* [#369](https://github.com/mgeisler/textwrap/pull/369): Rename
+ `Options::splitter` to `Options::word_splitter` for consistency with
+ the other fields backed by traits.
+
## Version 0.13.4 (2021-02-23)
This release removes `println!` statements which was left behind in
diff --git a/Cargo.toml b/Cargo.toml
index 61abe49..037ae2f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,7 @@
[package]
edition = "2018"
name = "textwrap"
-version = "0.13.4"
+version = "0.14.0"
authors = ["Martin Geisler <martin@geisler.net>"]
exclude = [".github/", ".gitignore", "benches/", "examples/", "fuzz/", "images/"]
description = "Powerful library for word wrapping, indenting, and dedenting strings"
@@ -30,8 +30,13 @@ all-features = true
name = "linear"
path = "benches/linear.rs"
harness = false
+
+[[bench]]
+name = "indent"
+path = "benches/indent.rs"
+harness = false
[dependencies.hyphenation]
-version = "0.8"
+version = "0.8.2"
features = ["embed_en-us"]
optional = true
@@ -43,6 +48,10 @@ optional = true
version = "0.1"
optional = true
+[dependencies.unicode-linebreak]
+version = "0.1"
+optional = true
+
[dependencies.unicode-width]
version = "0.1"
optional = true
@@ -50,7 +59,7 @@ optional = true
version = "0.3"
[dev-dependencies.lipsum]
-version = "0.7"
+version = "0.8"
[dev-dependencies.unic-emoji-char]
version = "0.9.0"
@@ -59,6 +68,6 @@ version = "0.9.0"
version = "0.9"
[features]
-default = ["unicode-width", "smawk"]
+default = ["unicode-linebreak", "unicode-width", "smawk"]
[target."cfg(unix)".dev-dependencies.termion]
version = "1.5"
diff --git a/METADATA b/METADATA
index dfad3bc..59cabda 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@ third_party {
}
url {
type: ARCHIVE
- value: "https://static.crates.io/crates/textwrap/textwrap-0.13.4.crate"
+ value: "https://static.crates.io/crates/textwrap/textwrap-0.14.0.crate"
}
- version: "0.13.4"
+ version: "0.14.0"
license_type: NOTICE
last_upgrade_date {
year: 2021
- month: 4
- day: 2
+ month: 6
+ day: 21
}
}
diff --git a/README.md b/README.md
index 39093e0..b32924c 100644
--- a/README.md
+++ b/README.md
@@ -7,20 +7,20 @@
Textwrap is a library for wrapping and indenting text. It is most
often used by command-line programs to format dynamic output nicely so
-it looks good in a terminal. However, you can use the library to wrap
-arbitrary things by implementing the `Fragment` trait — an example
-would be wrapping text for PDF files.
+it looks good in a terminal. You can also use Textwrap to wrap text
+set in a proportional font—such as text used to generate PDF files, or
+drawn on a [HTML5 canvas using WebAssembly][wasm-demo].
## Usage
To use the textwrap crate, add this to your `Cargo.toml` file:
```toml
[dependencies]
-textwrap = "0.13"
+textwrap = "0.14"
```
By default, this enables word wrapping with support for Unicode
-strings. Extra features can be enabled with Cargo features — and the
+strings. Extra features can be enabled with Cargo features—and the
Unicode support can be disabled if needed. This allows you slim down
the library and so you will only pay for the features you actually
use. Please see the [_Cargo Features_ in the crate
@@ -68,14 +68,14 @@ wrapping text.
```
The second line is now shorter and the text is more ragged. The kind
-of wrapping can be configured via `Option::wrap_algorithm`.
+of wrapping can be configured via `Options::wrap_algorithm`.
If you enable the `hyphenation` Cargo feature, you get support for
automatic hyphenation for [about 70 languages][patterns] via
high-quality TeX hyphenation patterns.
Your program must load the hyphenation pattern and configure
-`Options::splitter` to use it:
+`Options::word_splitter` to use it:
```rust
use hyphenation::{Language, Load, Standard};
@@ -83,7 +83,7 @@ use textwrap::Options;
fn main() {
let hyphenator = Standard::from_embedded(Language::EnglishUS).unwrap();
- let options = Options::new(28).splitter(hyphenator);
+ let options = Options::new(28).word_splitter(hyphenator);
let text = "textwrap: an efficient and powerful library for wrapping text.";
println!("{}", fill(text, &options);
}
@@ -112,14 +112,20 @@ procedural macros from the [`textwrap-macros` crate].
The library comes with [a
collection](https://github.com/mgeisler/textwrap/tree/master/examples)
-of small example programs that shows various features. You’re invited
-to clone the repository and try them out for yourself!
+of small example programs that shows various features.
-Of special note is the `interactive` example. This is a demo program
-which demonstrates most of the available features: you can enter text
-and adjust the width at which it is wrapped interactively. You can
-also adjust the `Options` used to see the effect of different
-`WordSplitter`s and wrap algorithms.
+If you want to see Textwrap in action right away, then take a look at
+[`examples/wasm/`], which shows how to wrap sans-serif, serif, and
+monospace text. It uses WebAssembly and is automatically deployed to
+https://mgeisler.github.io/textwrap/.
+
+For the command-line examples, you’re invited to clone the repository
+and try them out for yourself! Of special note is
+[`examples/interactive.rs`]. This is a demo program which demonstrates
+most of the available features: you can enter text and adjust the
+width at which it is wrapped interactively. You can also adjust the
+`Options` used to see the effect of different `WordSplitter`s and wrap
+algorithms.
Run the demo with
@@ -142,6 +148,7 @@ Contributions will be accepted under the same license.
[crates-io]: https://crates.io/crates/textwrap
[build-status]: https://github.com/mgeisler/textwrap/actions?query=workflow%3Abuild+branch%3Amaster
[codecov]: https://codecov.io/gh/mgeisler/textwrap
+[wasm-demo]: https://mgeisler.github.io/textwrap/
[`textwrap-macros` crate]: https://crates.io/crates/textwrap-macros
[`hyphenation` example]: https://github.com/mgeisler/textwrap/blob/master/examples/hyphenation.rs
[`termwidth` example]: https://github.com/mgeisler/textwrap/blob/master/examples/termwidth.rs
@@ -149,6 +156,8 @@ Contributions will be accepted under the same license.
[en-us license]: https://github.com/hyphenation/tex-hyphen/blob/master/hyph-utf8/tex/generic/hyph-utf8/patterns/tex/hyph-en-us.tex
[bincode]: https://github.com/tapeinosyne/hyphenation/tree/master/dictionaries
[`hyphenation` documentation]: http://docs.rs/hyphenation
+[`examples/wasm/`]: https://github.com/mgeisler/textwrap/tree/master/examples/wasm
+[`examples/interactive.rs`]: https://github.com/mgeisler/textwrap/tree/master/examples/interactive.rs
[api-docs]: https://docs.rs/textwrap/
[CHANGELOG file]: https://github.com/mgeisler/textwrap/blob/master/CHANGELOG.md
[mit]: LICENSE
diff --git a/TEST_MAPPING b/TEST_MAPPING
index a731acb..eca742d 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -8,6 +8,12 @@
"name": "libsqlite3-sys_device_test_src_lib"
},
{
+ "name": "unicode-xid_device_test_src_lib"
+ },
+ {
+ "name": "unicode-xid_device_test_tests_exhaustive_tests"
+ },
+ {
"name": "vpnprofilestore_test"
}
]
diff --git a/src/core.rs b/src/core.rs
index b6f5b46..af02460 100644
--- a/src/core.rs
+++ b/src/core.rs
@@ -8,20 +8,25 @@
//! something:
//!
//! 1. Split your input into [`Fragment`]s. These are abstract blocks
-//! of text or content which can be wrapped into lines. You can use
-//! [`find_words`] to do this for text.
+//! of text or content which can be wrapped into lines. See
+//! [`WordSeparator`](crate::word_separators::WordSeparator) for
+//! how to do this for text.
//!
//! 2. Potentially split your fragments into smaller pieces. This
-//! allows you to implement things like hyphenation. If wrapping
-//! text, [`split_words`] can help you do this.
+//! allows you to implement things like hyphenation. If you are
+//! wrapping text represented as a sequence of [`Word`]s, then you
+//! can use [`split_words`](crate::word_splitters::split_words) can
+//! help you do this.
//!
//! 3. Potentially break apart fragments that are still too large to
//! fit on a single line. This is implemented in [`break_words`].
//!
//! 4. Finally take your fragments and put them into lines. There are
-//! two algorithms for this: [`wrap_optimal_fit`] and
-//! [`wrap_first_fit`]. The former produces better line breaks, the
-//! latter is faster.
+//! two algorithms for this in the
+//! [`wrap_algorithms`](crate::wrap_algorithms) module:
+//! [`wrap_optimal_fit`](crate::wrap_algorithms::wrap_optimal_fit)
+//! and [`wrap_first_fit`](crate::wrap_algorithms::wrap_first_fit).
+//! The former produces better line breaks, the latter is faster.
//!
//! 5. Iterate through the slices returned by the wrapping functions
//! and construct your lines of output.
@@ -30,13 +35,6 @@
//! the functionality here is not sufficient or if you have ideas for
//! improving it. We would love to hear from you!
-use crate::{Options, WordSplitter};
-
-#[cfg(feature = "smawk")]
-mod optimal_fit;
-#[cfg(feature = "smawk")]
-pub use optimal_fit::wrap_optimal_fit;
-
/// The CSI or “Control Sequence Introducer” introduces an ANSI escape
/// sequence. This is typically used for colored text and will be
/// ignored when computing the text width.
@@ -48,7 +46,7 @@ const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e';
/// `chars` provide the following characters. The `chars` will be
/// modified if `ch` is the start of an ANSI escape sequence.
#[inline]
-fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool {
+pub(crate) fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool {
if ch == CSI.0 && chars.next() == Some(CSI.1) {
// We have found the start of an ANSI escape code, typically
// used for colored terminal text. We skip until we find a
@@ -175,7 +173,6 @@ fn ch_width(ch: char) -> usize {
/// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence
/// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters
/// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html
-#[inline]
pub fn display_width(text: &str) -> usize {
let mut chars = text.chars();
let mut width = 0;
@@ -217,10 +214,14 @@ pub trait Fragment: std::fmt::Debug {
/// trailing whitespace, and potentially a penalty item.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct Word<'a> {
- word: &'a str,
- width: usize,
- pub(crate) whitespace: &'a str,
- pub(crate) penalty: &'a str,
+ /// Word content.
+ pub word: &'a str,
+ /// Whitespace to insert if the word does not fall at the end of a line.
+ pub whitespace: &'a str,
+ /// Penalty string to insert if the word falls at the end of a line.
+ pub penalty: &'a str,
+ // Cached width in columns.
+ pub(crate) width: usize,
}
impl std::ops::Deref for Word<'_> {
@@ -232,7 +233,7 @@ impl std::ops::Deref for Word<'_> {
}
impl<'a> Word<'a> {
- /// Construct a new `Word`.
+ /// Construct a `Word` from a string.
///
/// A trailing stretch of `' '` is automatically taken to be the
/// whitespace part of the word.
@@ -322,118 +323,6 @@ impl Fragment for Word<'_> {
}
}
-/// Split line into words separated by regions of `' '` characters.
-///
-/// # Examples
-///
-/// ```
-/// use textwrap::core::{find_words, Fragment, Word};
-/// let words = find_words("Hello World!").collect::<Vec<_>>();
-/// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]);
-/// assert_eq!(words[0].width(), 5);
-/// assert_eq!(words[0].whitespace_width(), 1);
-/// assert_eq!(words[0].penalty_width(), 0);
-/// ```
-pub fn find_words(line: &str) -> impl Iterator<Item = Word> {
- let mut start = 0;
- let mut in_whitespace = false;
- let mut char_indices = line.char_indices();
-
- std::iter::from_fn(move || {
- // for (idx, ch) in char_indices does not work, gives this
- // error:
- //
- // > cannot move out of `char_indices`, a captured variable in
- // > an `FnMut` closure
- #[allow(clippy::while_let_on_iterator)]
- while let Some((idx, ch)) = char_indices.next() {
- if in_whitespace && ch != ' ' {
- let word = Word::from(&line[start..idx]);
- start = idx;
- in_whitespace = ch == ' ';
- return Some(word);
- }
-
- in_whitespace = ch == ' ';
- }
-
- if start < line.len() {
- let word = Word::from(&line[start..]);
- start = line.len();
- return Some(word);
- }
-
- None
- })
-}
-
-/// Split words into smaller words according to the split points given
-/// by `options`.
-///
-/// Note that we split all words, regardless of their length. This is
-/// to more cleanly separate the business of splitting (including
-/// automatic hyphenation) from the business of word wrapping.
-///
-/// # Examples
-///
-/// ```
-/// use textwrap::core::{split_words, Word};
-/// use textwrap::{NoHyphenation, Options};
-///
-/// // The default splitter is HyphenSplitter:
-/// let options = Options::new(80);
-/// assert_eq!(
-/// split_words(vec![Word::from("foo-bar")], &options).collect::<Vec<_>>(),
-/// vec![Word::from("foo-"), Word::from("bar")]
-/// );
-///
-/// // The NoHyphenation splitter ignores the '-':
-/// let options = Options::new(80).splitter(NoHyphenation);
-/// assert_eq!(
-/// split_words(vec![Word::from("foo-bar")], &options).collect::<Vec<_>>(),
-/// vec![Word::from("foo-bar")]
-/// );
-/// ```
-pub fn split_words<'a, I, S, Opt>(words: I, options: Opt) -> impl Iterator<Item = Word<'a>>
-where
- I: IntoIterator<Item = Word<'a>>,
- S: WordSplitter,
- Opt: Into<Options<'a, S>>,
-{
- let options = options.into();
-
- words.into_iter().flat_map(move |word| {
- let mut prev = 0;
- let mut split_points = options.splitter.split_points(&word).into_iter();
- std::iter::from_fn(move || {
- if let Some(idx) = split_points.next() {
- let need_hyphen = !word[..idx].ends_with('-');
- let w = Word {
- word: &word.word[prev..idx],
- width: display_width(&word[prev..idx]),
- whitespace: "",
- penalty: if need_hyphen { "-" } else { "" },
- };
- prev = idx;
- return Some(w);
- }
-
- if prev < word.word.len() || prev == 0 {
- let w = Word {
- word: &word.word[prev..],
- width: display_width(&word[prev..]),
- whitespace: word.whitespace,
- penalty: word.penalty,
- };
- prev = word.word.len() + 1;
- return Some(w);
- }
-
- None
- })
- })
-}
-
/// Forcibly break words wider than `line_width` into smaller words.
///
/// This simply calls [`Word::break_apart`] on words that are too
@@ -454,200 +343,6 @@ where
shortened_words
}
-/// Wrapping algorithms.
-///
-/// After a text has been broken into [`Fragment`]s, the one now has
-/// to decide how to break the fragments into lines. The simplest
-/// algorithm for this is implemented by [`wrap_first_fit`]: it uses
-/// no look-ahead and simply adds fragments to the line as long as
-/// they fit. However, this can lead to poor line breaks if a large
-/// fragment almost-but-not-quite fits on a line. When that happens,
-/// the fragment is moved to the next line and it will leave behind a
-/// large gap. A more advanced algorithm, implemented by
-/// [`wrap_optimal_fit`], will take this into account. The optimal-fit
-/// algorithm considers all possible line breaks and will attempt to
-/// minimize the gaps left behind by overly short lines.
-///
-/// While both algorithms run in linear time, the first-fit algorithm
-/// is about 4 times faster than the optimal-fit algorithm.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum WrapAlgorithm {
- /// Use an advanced algorithm which considers the entire paragraph
- /// to find optimal line breaks. Implemented by
- /// [`wrap_optimal_fit`].
- ///
- /// **Note:** Only available when the `smawk` Cargo feature is
- /// enabled.
- #[cfg(feature = "smawk")]
- OptimalFit,
- /// Use a fast and simple algorithm with no look-ahead to find
- /// line breaks. Implemented by [`wrap_first_fit`].
- FirstFit,
-}
-
-/// Wrap abstract fragments into lines with a first-fit algorithm.
-///
-/// The `line_widths` map line numbers (starting from 0) to a target
-/// line width. This can be used to implement hanging indentation.
-///
-/// The fragments must already have been split into the desired
-/// widths, this function will not (and cannot) attempt to split them
-/// further when arranging them into lines.
-///
-/// # First-Fit Algorithm
-///
-/// This implements a simple “greedy” algorithm: accumulate fragments
-/// one by one and when a fragment no longer fits, start a new line.
-/// There is no look-ahead, we simply take first fit of the fragments
-/// we find.
-///
-/// While fast and predictable, this algorithm can produce poor line
-/// breaks when a long fragment is moved to a new line, leaving behind
-/// a large gap:
-///
-/// ```
-/// use textwrap::core::{find_words, wrap_first_fit, Word};
-///
-/// // Helper to convert wrapped lines to a Vec<String>.
-/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
-/// lines.iter().map(|line| {
-/// line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
-/// }).collect::<Vec<_>>()
-/// }
-///
-/// let text = "These few words will unfortunately not wrap nicely.";
-/// let words = find_words(text).collect::<Vec<_>>();
-/// assert_eq!(lines_to_strings(wrap_first_fit(&words, |_| 15)),
-/// vec!["These few words",
-/// "will", // <-- short line
-/// "unfortunately",
-/// "not wrap",
-/// "nicely."]);
-///
-/// // We can avoid the short line if we look ahead:
-/// #[cfg(feature = "smawk")]
-/// assert_eq!(lines_to_strings(textwrap::core::wrap_optimal_fit(&words, |_| 15)),
-/// vec!["These few",
-/// "words will",
-/// "unfortunately",
-/// "not wrap",
-/// "nicely."]);
-/// ```
-///
-/// The [`wrap_optimal_fit`] function was used above to get better
-/// line breaks. It uses an advanced algorithm which tries to avoid
-/// short lines. This function is about 4 times faster than
-/// [`wrap_optimal_fit`].
-///
-/// # Examples
-///
-/// Imagine you're building a house site and you have a number of
-/// tasks you need to execute. Things like pour foundation, complete
-/// framing, install plumbing, electric cabling, install insulation.
-///
-/// The construction workers can only work during daytime, so they
-/// need to pack up everything at night. Because they need to secure
-/// their tools and move machines back to the garage, this process
-/// takes much more time than the time it would take them to simply
-/// switch to another task.
-///
-/// You would like to make a list of tasks to execute every day based
-/// on your estimates. You can model this with a program like this:
-///
-/// ```
-/// use textwrap::core::{wrap_first_fit, Fragment};
-///
-/// #[derive(Debug)]
-/// struct Task<'a> {
-/// name: &'a str,
-/// hours: usize, // Time needed to complete task.
-/// sweep: usize, // Time needed for a quick sweep after task during the day.
-/// cleanup: usize, // Time needed for full cleanup if day ends with this task.
-/// }
-///
-/// impl Fragment for Task<'_> {
-/// fn width(&self) -> usize { self.hours }
-/// fn whitespace_width(&self) -> usize { self.sweep }
-/// fn penalty_width(&self) -> usize { self.cleanup }
-/// }
-///
-/// // The morning tasks
-/// let tasks = vec![
-/// Task { name: "Foundation", hours: 4, sweep: 2, cleanup: 3 },
-/// Task { name: "Framing", hours: 3, sweep: 1, cleanup: 2 },
-/// Task { name: "Plumbing", hours: 2, sweep: 2, cleanup: 2 },
-/// Task { name: "Electrical", hours: 2, sweep: 1, cleanup: 2 },
-/// Task { name: "Insulation", hours: 2, sweep: 1, cleanup: 2 },
-/// Task { name: "Drywall", hours: 3, sweep: 1, cleanup: 2 },
-/// Task { name: "Floors", hours: 3, sweep: 1, cleanup: 2 },
-/// Task { name: "Countertops", hours: 1, sweep: 1, cleanup: 2 },
-/// Task { name: "Bathrooms", hours: 2, sweep: 1, cleanup: 2 },
-/// ];
-///
-/// // Fill tasks into days, taking `day_length` into account. The
-/// // output shows the hours worked per day along with the names of
-/// // the tasks for that day.
-/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: usize) -> Vec<(usize, Vec<&'a str>)> {
-/// let mut days = Vec::new();
-/// // Assign tasks to days. The assignment is a vector of slices,
-/// // with a slice per day.
-/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, |i| day_length);
-/// for day in assigned_days.iter() {
-/// let last = day.last().unwrap();
-/// let work_hours: usize = day.iter().map(|t| t.hours + t.sweep).sum();
-/// let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
-/// days.push((work_hours - last.sweep + last.cleanup, names));
-/// }
-/// days
-/// }
-///
-/// // With a single crew working 8 hours a day:
-/// assert_eq!(
-/// assign_days(&tasks, 8),
-/// [
-/// (7, vec!["Foundation"]),
-/// (8, vec!["Framing", "Plumbing"]),
-/// (7, vec!["Electrical", "Insulation"]),
-/// (5, vec!["Drywall"]),
-/// (7, vec!["Floors", "Countertops"]),
-/// (4, vec!["Bathrooms"]),
-/// ]
-/// );
-///
-/// // With two crews working in shifts, 16 hours a day:
-/// assert_eq!(
-/// assign_days(&tasks, 16),
-/// [
-/// (14, vec!["Foundation", "Framing", "Plumbing"]),
-/// (15, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
-/// (6, vec!["Countertops", "Bathrooms"]),
-/// ]
-/// );
-/// ```
-///
-/// Apologies to anyone who actually knows how to build a house and
-/// knows how long each step takes :-)
-pub fn wrap_first_fit<T: Fragment, F: Fn(usize) -> usize>(
- fragments: &[T],
- line_widths: F,
-) -> Vec<&[T]> {
- let mut lines = Vec::new();
- let mut start = 0;
- let mut width = 0;
-
- for (idx, fragment) in fragments.iter().enumerate() {
- let line_width = line_widths(lines.len());
- if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
- lines.push(&fragments[start..idx]);
- start = idx;
- width = 0;
- }
- width += fragment.width() + fragment.whitespace_width();
- }
- lines.push(&fragments[start..]);
- lines
-}
-
#[cfg(test)]
mod tests {
use super::*;
@@ -655,13 +350,6 @@ mod tests {
#[cfg(feature = "unicode-width")]
use unicode_width::UnicodeWidthChar;
- // Like assert_eq!, but the left expression is an iterator.
- macro_rules! assert_iter_eq {
- ($left:expr, $right:expr) => {
- assert_eq!($left.collect::<Vec<_>>(), $right);
- };
- }
-
#[test]
fn skip_ansi_escape_sequence_works() {
let blue_text = "\u{1b}[34mHello\u{1b}[0m";
@@ -743,160 +431,4 @@ mod tests {
fn display_width_emojis() {
assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20);
}
-
- #[test]
- fn find_words_empty() {
- assert_iter_eq!(find_words(""), vec![]);
- }
-
- #[test]
- fn find_words_single_word() {
- assert_iter_eq!(find_words("foo"), vec![Word::from("foo")]);
- }
-
- #[test]
- fn find_words_two_words() {
- assert_iter_eq!(
- find_words("foo bar"),
- vec![Word::from("foo "), Word::from("bar")]
- );
- }
-
- #[test]
- fn find_words_multiple_words() {
- assert_iter_eq!(
- find_words("foo bar baz"),
- vec![Word::from("foo "), Word::from("bar "), Word::from("baz")]
- );
- }
-
- #[test]
- fn find_words_whitespace() {
- assert_iter_eq!(find_words(" "), vec![Word::from(" ")]);
- }
-
- #[test]
- fn find_words_inter_word_whitespace() {
- assert_iter_eq!(
- find_words("foo bar"),
- vec![Word::from("foo "), Word::from("bar")]
- )
- }
-
- #[test]
- fn find_words_trailing_whitespace() {
- assert_iter_eq!(find_words("foo "), vec![Word::from("foo ")]);
- }
-
- #[test]
- fn find_words_leading_whitespace() {
- assert_iter_eq!(
- find_words(" foo"),
- vec![Word::from(" "), Word::from("foo")]
- );
- }
-
- #[test]
- fn find_words_multi_column_char() {
- assert_iter_eq!(
- find_words("\u{1f920}"), // cowboy emoji 🤠
- vec![Word::from("\u{1f920}")]
- );
- }
-
- #[test]
- fn find_words_hyphens() {
- assert_iter_eq!(find_words("foo-bar"), vec![Word::from("foo-bar")]);
- assert_iter_eq!(
- find_words("foo- bar"),
- vec![Word::from("foo- "), Word::from("bar")]
- );
- assert_iter_eq!(
- find_words("foo - bar"),
- vec![Word::from("foo "), Word::from("- "), Word::from("bar")]
- );
- assert_iter_eq!(
- find_words("foo -bar"),
- vec![Word::from("foo "), Word::from("-bar")]
- );
- }
-
- #[test]
- fn split_words_no_words() {
- assert_iter_eq!(split_words(vec![], 80), vec![]);
- }
-
- #[test]
- fn split_words_empty_word() {
- assert_iter_eq!(
- split_words(vec![Word::from(" ")], 80),
- vec![Word::from(" ")]
- );
- }
-
- #[test]
- fn split_words_hyphen_splitter() {
- assert_iter_eq!(
- split_words(vec![Word::from("foo-bar")], 80),
- vec![Word::from("foo-"), Word::from("bar")]
- );
- }
-
- #[test]
- fn split_words_short_line() {
- // Note that `split_words` does not take the line width into
- // account, that is the job of `break_words`.
- assert_iter_eq!(
- split_words(vec![Word::from("foobar")], 3),
- vec![Word::from("foobar")]
- );
- }
-
- #[test]
- fn split_words_adds_penalty() {
- #[derive(Debug)]
- struct FixedSplitPoint;
- impl WordSplitter for FixedSplitPoint {
- fn split_points(&self, _: &str) -> Vec<usize> {
- vec![3]
- }
- }
-
- let options = Options::new(80).splitter(FixedSplitPoint);
- assert_iter_eq!(
- split_words(vec![Word::from("foobar")].into_iter(), &options),
- vec![
- Word {
- word: "foo",
- width: 3,
- whitespace: "",
- penalty: "-"
- },
- Word {
- word: "bar",
- width: 3,
- whitespace: "",
- penalty: ""
- }
- ]
- );
-
- assert_iter_eq!(
- split_words(vec![Word::from("fo-bar")].into_iter(), &options),
- vec![
- Word {
- word: "fo-",
- width: 3,
- whitespace: "",
- penalty: ""
- },
- Word {
- word: "bar",
- width: 3,
- whitespace: "",
- penalty: ""
- }
- ]
- );
- }
}
diff --git a/src/indentation.rs b/src/indentation.rs
index cc2351f..5d90c06 100644
--- a/src/indentation.rs
+++ b/src/indentation.rs
@@ -4,42 +4,45 @@
//! The functions here can be used to uniformly indent or dedent
//! (unindent) word wrapped lines of text.
-/// Add prefix to each non-empty line.
+/// Indent each line by the given prefix.
+///
+/// # Examples
///
/// ```
/// use textwrap::indent;
///
-/// assert_eq!(indent("
-/// Foo
-/// Bar
-/// ", " "), "
-/// Foo
-/// Bar
-/// ");
+/// assert_eq!(indent("First line.\nSecond line.\n", " "),
+/// " First line.\n Second line.\n");
/// ```
///
-/// Lines consisting only of whitespace are kept unchanged:
+/// When indenting, trailing whitespace is stripped from the prefix.
+/// This means that empty lines remain empty afterwards:
///
/// ```
/// use textwrap::indent;
///
-/// assert_eq!(indent("
-/// Foo
+/// assert_eq!(indent("First line.\n\n\nSecond line.\n", " "),
+/// " First line.\n\n\n Second line.\n");
+/// ```
///
-/// Bar
-/// \t
-/// Baz
-/// ", "->"), "
-/// ->Foo
+/// Notice how `"\n\n\n"` remained as `"\n\n\n"`.
///
-/// ->Bar
-/// \t
-/// ->Baz
-/// ");
+/// This feature is useful when you want to indent text and have a
+/// space between your prefix and the text. In this case, you _don't_
+/// want a trailing space on empty lines:
+///
+/// ```
+/// use textwrap::indent;
+///
+/// assert_eq!(indent("foo = 123\n\nprint(foo)\n", "# "),
+/// "# foo = 123\n#\n# print(foo)\n");
/// ```
///
-/// Leading and trailing whitespace on non-empty lines is kept
-/// unchanged:
+/// Notice how `"\n\n"` became `"\n#\n"` instead of `"\n# \n"` which
+/// would have trailing whitespace.
+///
+/// Leading and trailing whitespace coming from the text itself is
+/// kept unchanged:
///
/// ```
/// use textwrap::indent;
@@ -47,18 +50,27 @@
/// assert_eq!(indent(" \t Foo ", "->"), "-> \t Foo ");
/// ```
pub fn indent(s: &str, prefix: &str) -> String {
- let mut result = String::new();
-
- for (idx, line) in s.split('\n').enumerate() {
+ // We know we'll need more than s.len() bytes for the output, but
+ // without counting '\n' characters (which is somewhat slow), we
+ // don't know exactly how much. However, we can preemptively do
+ // the first doubling of the output size.
+ let mut result = String::with_capacity(2 * s.len());
+ let trimmed_prefix = prefix.trim_end();
+ for (idx, line) in s.split_terminator('\n').enumerate() {
if idx > 0 {
result.push('\n');
}
- if !line.trim().is_empty() {
+ if line.trim().is_empty() {
+ result.push_str(trimmed_prefix);
+ } else {
result.push_str(prefix);
}
result.push_str(line);
}
-
+ if s.ends_with('\n') {
+ // split_terminator will have eaten the final '\n'.
+ result.push('\n');
+ }
result
}
@@ -155,11 +167,11 @@ mod tests {
" baz\n",
].join("");
let expected = [
- "// foo\n",
- "//bar\n",
- "// baz\n",
+ "// foo\n",
+ "// bar\n",
+ "// baz\n",
].join("");
- assert_eq!(indent(&text, "//"), expected);
+ assert_eq!(indent(&text, "// "), expected);
}
#[test]
@@ -172,12 +184,12 @@ mod tests {
" baz",
].join("\n");
let expected = [
- "// foo",
- "//bar",
- "",
- "// baz",
+ "// foo",
+ "// bar",
+ "//",
+ "// baz",
].join("\n");
- assert_eq!(indent(&text, "//"), expected);
+ assert_eq!(indent(&text, "// "), expected);
}
#[test]
diff --git a/src/lib.rs b/src/lib.rs
index ee6d5d8..5a3f4b1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -34,7 +34,7 @@
//! fn main() {
//! let text = "textwrap: a small library for wrapping text.";
//! let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
-//! let options = Options::new(18).splitter(dictionary);
+//! let options = Options::new(18).word_splitter(dictionary);
//! println!("{}", fill(text, &options));
//! }
//!
@@ -86,12 +86,12 @@
//! into a bullet list:
//!
//! ```
-//! let before = "
+//! let before = "\
//! foo
//! bar
//! baz
//! ";
-//! let after = "
+//! let after = "\
//! * foo
//! * bar
//! * baz
@@ -124,12 +124,23 @@
//! The full dependency graph, where dashed lines indicate optional
//! dependencies, is shown below:
//!
-//! <img src="https://raw.githubusercontent.com/mgeisler/textwrap/master/images/textwrap-0.13.4.svg">
+//! <img src="https://raw.githubusercontent.com/mgeisler/textwrap/master/images/textwrap-0.14.0.svg">
//!
//! ## Default Features
//!
//! These features are enabled by default:
//!
+//! * `unicode-linebreak`: enables finding words using the
+//! [unicode-linebreak] crate, which implements the line breaking
+//! algorithm described in [Unicode Standard Annex
+//! #14](https://www.unicode.org/reports/tr14/).
+//!
+//! This feature can be disabled if you are happy to find words
+//! separated by ASCII space characters only. People wrapping text
+//! with emojis or East-Asian characters will want most likely want
+//! to enable this feature. See the
+//! [`word_separators::WordSeparator`] trait for details.
+//!
//! * `unicode-width`: enables correct width computation of non-ASCII
//! characters via the [unicode-width] crate. Without this feature,
//! every [`char`] is 1 column wide, except for emojis which are 2
@@ -142,11 +153,11 @@
//! other ways.
//!
//! * `smawk`: enables linear-time wrapping of the whole paragraph via
-//! the [smawk] crate. See the [`core::wrap_optimal_fit`] function
-//! for details on the optimal-fit algorithm.
+//! the [smawk] crate. See the [`wrap_algorithms::wrap_optimal_fit`]
+//! function for details on the optimal-fit algorithm.
//!
//! This feature can be disabled if you only ever intend to use
-//! [`core::wrap_first_fit`].
+//! [`wrap_algorithms::wrap_first_fit`].
//!
//! ## Optional Features
//!
@@ -157,15 +168,16 @@
//! [`Options::with_termwidth`] constructor for details.
//!
//! * `hyphenation`: enables language-sensitive hyphenation via the
-//! [hyphenation] crate. See the [`WordSplitter`] trait for details.
+//! [hyphenation] crate. See the [`word_splitters::WordSplitter`] trait for details.
//!
+//! [unicode-linebreak]: https://docs.rs/unicode-linebreak/
//! [unicode-width]: https://docs.rs/unicode-width/
//! [smawk]: https://docs.rs/smawk/
//! [textwrap-macros]: https://docs.rs/textwrap-macros/
//! [terminal_size]: https://docs.rs/terminal_size/
//! [hyphenation]: https://docs.rs/hyphenation/
-#![doc(html_root_url = "https://docs.rs/textwrap/0.13.4")]
+#![doc(html_root_url = "https://docs.rs/textwrap/0.14.0")]
#![forbid(unsafe_code)] // See https://github.com/mgeisler/textwrap/issues/210
#![deny(missing_docs)]
#![deny(missing_debug_implementations)]
@@ -177,14 +189,50 @@ mod indentation;
pub use crate::indentation::dedent;
pub use crate::indentation::indent;
-mod splitting;
-pub use crate::splitting::{HyphenSplitter, NoHyphenation, WordSplitter};
+pub mod word_separators;
+pub mod word_splitters;
+pub mod wrap_algorithms;
pub mod core;
+// These private macros lets us hide the actual WrapAlgorithm and
+// WordSeperator used in the function signatures below.
+#[cfg(feature = "smawk")]
+macro_rules! DefaultWrapAlgorithm {
+ () => {
+ wrap_algorithms::OptimalFit
+ };
+}
+
+#[cfg(not(feature = "smawk"))]
+macro_rules! DefaultWrapAlgorithm {
+ () => {
+ wrap_algorithms::FirstFit
+ };
+}
+
+#[cfg(feature = "unicode-linebreak")]
+macro_rules! DefaultWordSeparator {
+ () => {
+ word_separators::UnicodeBreakProperties
+ };
+}
+
+#[cfg(not(feature = "unicode-linebreak"))]
+macro_rules! DefaultWordSeparator {
+ () => {
+ word_separators::AsciiSpace
+ };
+}
+
/// Holds settings for wrapping and filling text.
#[derive(Debug, Clone)]
-pub struct Options<'a, S: ?Sized = Box<dyn WordSplitter>> {
+pub struct Options<
+ 'a,
+ WrapAlgo = Box<dyn wrap_algorithms::WrapAlgorithm>,
+ WordSep = Box<dyn word_separators::WordSeparator>,
+ WordSplit = Box<dyn word_splitters::WordSplitter>,
+> {
/// The width in columns at which the text will be wrapped.
pub width: usize,
/// Indentation used for the first line of output. See the
@@ -197,42 +245,64 @@ pub struct Options<'a, S: ?Sized = Box<dyn WordSplitter>> {
/// When set to `false`, some lines may be longer than
/// `self.width`. See the [`Options::break_words`] method.
pub break_words: bool,
- /// Wraping algorithm to use, see [`core::WrapAlgorithm`] for
- /// details.
- pub wrap_algorithm: core::WrapAlgorithm,
+ /// Wrapping algorithm to use, see the implementations of the
+ /// [`wrap_algorithms::WrapAlgorithm`] trait for details.
+ pub wrap_algorithm: WrapAlgo,
+ /// The line breaking algorithm to use, see
+ /// [`word_separators::WordSeparator`] trait for an overview and
+ /// possible implementations.
+ pub word_separator: WordSep,
/// The method for splitting words. This can be used to prohibit
/// splitting words on hyphens, or it can be used to implement
/// language-aware machine hyphenation. Please see the
- /// [`WordSplitter`] trait for details.
- pub splitter: S,
+ /// [`word_splitters::WordSplitter`] trait for details.
+ pub word_splitter: WordSplit,
}
-impl<'a, S: ?Sized> From<&'a Options<'a, S>> for Options<'a, &'a S> {
- fn from(options: &'a Options<'a, S>) -> Self {
+impl<'a, WrapAlgo, WordSep, WordSplit> From<&'a Options<'a, WrapAlgo, WordSep, WordSplit>>
+ for Options<'a, WrapAlgo, WordSep, WordSplit>
+where
+ WrapAlgo: Clone,
+ WordSep: Clone,
+ WordSplit: Clone,
+{
+ fn from(options: &'a Options<'a, WrapAlgo, WordSep, WordSplit>) -> Self {
Self {
width: options.width,
initial_indent: options.initial_indent,
subsequent_indent: options.subsequent_indent,
break_words: options.break_words,
- wrap_algorithm: options.wrap_algorithm,
- splitter: &options.splitter,
+ word_separator: options.word_separator.clone(),
+ wrap_algorithm: options.wrap_algorithm.clone(),
+ word_splitter: options.word_splitter.clone(),
}
}
}
-impl<'a> From<usize> for Options<'a, HyphenSplitter> {
+impl<'a> From<usize>
+ for Options<
+ 'a,
+ DefaultWrapAlgorithm!(),
+ DefaultWordSeparator!(),
+ word_splitters::HyphenSplitter,
+ >
+{
fn from(width: usize) -> Self {
Options::new(width)
}
}
/// Constructors for boxed Options, specifically.
-impl<'a> Options<'a, HyphenSplitter> {
+impl<'a>
+ Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), word_splitters::HyphenSplitter>
+{
/// Creates a new [`Options`] with the specified width and static
- /// dispatch using the [`HyphenSplitter`]. Equivalent to
+ /// dispatch using the [`word_splitters::HyphenSplitter`].
+ /// Equivalent to
///
/// ```
- /// # use textwrap::{Options, HyphenSplitter, WordSplitter};
+ /// # use textwrap::word_splitters::{HyphenSplitter, WordSplitter};
+ /// # use textwrap::Options;
/// # let width = 80;
/// # let actual = Options::new(width);
/// # let expected =
@@ -241,75 +311,77 @@ impl<'a> Options<'a, HyphenSplitter> {
/// initial_indent: "",
/// subsequent_indent: "",
/// break_words: true,
+ /// #[cfg(feature = "unicode-linebreak")]
+ /// word_separator: textwrap::word_separators::UnicodeBreakProperties,
+ /// #[cfg(not(feature = "unicode-linebreak"))]
+ /// word_separator: textwrap::word_separators::AsciiSpace,
/// #[cfg(feature = "smawk")]
- /// wrap_algorithm: textwrap::core::WrapAlgorithm::OptimalFit,
+ /// wrap_algorithm: textwrap::wrap_algorithms::OptimalFit,
/// #[cfg(not(feature = "smawk"))]
- /// wrap_algorithm: textwrap::core::WrapAlgorithm::FirstFit,
- /// splitter: HyphenSplitter,
+ /// wrap_algorithm: textwrap::wrap_algorithms::FirstFit,
+ /// word_splitter: textwrap::word_splitters::HyphenSplitter,
/// }
/// # ;
/// # assert_eq!(actual.width, expected.width);
/// # assert_eq!(actual.initial_indent, expected.initial_indent);
/// # assert_eq!(actual.subsequent_indent, expected.subsequent_indent);
/// # assert_eq!(actual.break_words, expected.break_words);
- /// # assert_eq!(actual.wrap_algorithm, expected.wrap_algorithm);
- /// # let expected_coerced: Options<'static, HyphenSplitter> = expected;
/// ```
///
- /// Note that the default wrap algorithm changes based on the
- /// `smawk` Cargo feature. The best available algorithm is used by
- /// default.
+ /// Note that the default word separator and wrap algorithms
+ /// changes based on the available Cargo features. The best
+ /// available algorithm is used by default.
///
- /// Static dispatch mean here, that the splitter is stored as-is
+ /// Static dispatch means here, that the word splitter is stored as-is
/// and the type is known at compile-time. Thus the returned value
- /// is actually a `Options<HyphenSplitter>`.
+ /// is actually a `Options<AsciiSpace, HyphenSplitter>`.
///
- /// Dynamic dispatch on the other hand, mean that the splitter is
- /// stored as a trait object for instance in a `Box<dyn
- /// WordSplitter>`. This way the splitter's inner type can be
- /// changed without changing the type of this struct, which then
- /// would be just `Options` as a short cut for `Options<Box<dyn
- /// WordSplitter>>`.
+ /// Dynamic dispatch on the other hand, means that the word
+ /// separator and/or word splitter is stored as a trait object
+ /// such as a `Box<dyn word_splitters::WordSplitter>`. This way
+ /// the word splitter's inner type can be changed without changing
+ /// the type of this struct, which then would be just `Options` as
+ /// a short cut for `Options<Box<dyn
+ /// word_separators::WordSeparator>, Box<dyn
+ /// word_splitters::WordSplitter>>`.
///
- /// The value and type of the splitter can be choose from the
- /// start using the [`Options::with_splitter`] constructor or
- /// changed afterwards using the [`Options::splitter`] method.
- /// Whether static or dynamic dispatch is used, depends on whether
- /// these functions are given a boxed [`WordSplitter`] or not.
- /// Take for example:
+ /// The value and type of the word splitter can be choose from the
+ /// start using the [`Options::with_word_splitter`] constructor or
+ /// changed afterwards using the [`Options::word_splitter`]
+ /// method. Whether static or dynamic dispatch is used, depends on
+ /// whether these functions are given a boxed
+ /// [`word_splitters::WordSplitter`] or not. Take for example:
///
/// ```
- /// use textwrap::{HyphenSplitter, NoHyphenation, Options};
- /// # use textwrap::{WordSplitter};
+ /// use textwrap::Options;
+ /// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation};
+ /// # use textwrap::word_splitters::WordSplitter;
+ /// # use textwrap::word_separators::AsciiSpace;
/// # let width = 80;
///
/// // uses HyphenSplitter with static dispatch
- /// // the actual type: Options<HyphenSplitter>
+ /// // the actual type: Options<AsciiSpace, HyphenSplitter>
/// let opt = Options::new(width);
- /// # let opt_coerce: Options<HyphenSplitter> = opt;
///
/// // uses NoHyphenation with static dispatch
- /// // the actual type: Options<NoHyphenation>
- /// let opt = Options::new(width).splitter(NoHyphenation);
- /// # let opt_coerce: Options<NoHyphenation> = opt;
+ /// // the actual type: Options<AsciiSpace, NoHyphenation>
+ /// let opt = Options::new(width).word_splitter(NoHyphenation);
///
/// // uses HyphenSplitter with dynamic dispatch
- /// // the actual type: Options<Box<dyn WordSplitter>>
- /// let opt: Options = Options::new(width).splitter(Box::new(HyphenSplitter));
- /// # let opt_coerce: Options<Box<dyn WordSplitter>> = opt;
+ /// // the actual type: Options<AsciiSpace, Box<dyn word_splitters::WordSplitter>>
+ /// let opt: Options<_, _, _> = Options::new(width).word_splitter(Box::new(HyphenSplitter));
///
/// // uses NoHyphenation with dynamic dispatch
- /// // the actual type: Options<Box<dyn WordSplitter>>
- /// let opt: Options = Options::new(width).splitter(Box::new(NoHyphenation));
- /// # let opt_coerce: Options<Box<dyn WordSplitter>> = opt;
+ /// // the actual type: Options<AsciiSpace, Box<dyn word_splitters::WordSplitter>>
+ /// let opt: Options<_, _, _> = Options::new(width).word_splitter(Box::new(NoHyphenation));
/// ```
///
/// Notice that the last two variables have the same type, despite
/// the different `WordSplitter` in use. Thus dynamic dispatch
- /// allows to change the splitter at run-time without changing the
- /// variables type.
+ /// allows to change the word splitter at run-time without
+ /// changing the variables type.
pub const fn new(width: usize) -> Self {
- Options::with_splitter(width, HyphenSplitter)
+ Options::with_word_splitter(width, word_splitters::HyphenSplitter)
}
/// Creates a new [`Options`] with `width` set to the current
@@ -335,90 +407,97 @@ impl<'a> Options<'a, HyphenSplitter> {
}
}
-impl<'a, S> Options<'a, S> {
+impl<'a, WordSplit> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), WordSplit> {
/// Creates a new [`Options`] with the specified width and
- /// splitter. Equivalent to
+ /// word splitter. Equivalent to
///
/// ```
- /// # use textwrap::{Options, NoHyphenation, HyphenSplitter};
- /// # const splitter: NoHyphenation = NoHyphenation;
+ /// # use textwrap::Options;
+ /// # use textwrap::word_splitters::{NoHyphenation, HyphenSplitter};
+ /// # const word_splitter: NoHyphenation = NoHyphenation;
/// # const width: usize = 80;
- /// # const actual: Options<'static, NoHyphenation> = Options::with_splitter(width, splitter);
+ /// # let actual = Options::with_word_splitter(width, word_splitter);
/// # let expected =
/// Options {
/// width: width,
/// initial_indent: "",
/// subsequent_indent: "",
/// break_words: true,
+ /// #[cfg(feature = "unicode-linebreak")]
+ /// word_separator: textwrap::word_separators::UnicodeBreakProperties,
+ /// #[cfg(not(feature = "unicode-linebreak"))]
+ /// word_separator: textwrap::word_separators::AsciiSpace,
/// #[cfg(feature = "smawk")]
- /// wrap_algorithm: textwrap::core::WrapAlgorithm::OptimalFit,
+ /// wrap_algorithm: textwrap::wrap_algorithms::OptimalFit,
/// #[cfg(not(feature = "smawk"))]
- /// wrap_algorithm: textwrap::core::WrapAlgorithm::FirstFit,
- /// splitter: splitter,
+ /// wrap_algorithm: textwrap::wrap_algorithms::FirstFit,
+ /// word_splitter: word_splitter,
/// }
/// # ;
/// # assert_eq!(actual.width, expected.width);
/// # assert_eq!(actual.initial_indent, expected.initial_indent);
/// # assert_eq!(actual.subsequent_indent, expected.subsequent_indent);
/// # assert_eq!(actual.break_words, expected.break_words);
- /// # assert_eq!(actual.wrap_algorithm, expected.wrap_algorithm);
- /// # let expected_coerced: Options<'static, NoHyphenation> = expected;
/// ```
///
- /// This constructor allows to specify the splitter to be used. It
- /// is like a short-cut for `Options::new(w).splitter(s)`, but
- /// this function is a `const fn`. The given splitter may be in a
- /// [`Box`], which then can be coerced into a trait object for
- /// dynamic dispatch:
+ /// This constructor allows to specify the word splitter to be
+ /// used. It is like a short-cut for
+ /// `Options::new(w).word_splitter(s)`, but this function is a
+ /// `const fn`. The given word splitter may be in a [`Box`], which
+ /// then can be coerced into a trait object for dynamic dispatch:
///
/// ```
- /// use textwrap::{HyphenSplitter, NoHyphenation, Options};
- /// # use textwrap::{WordSplitter};
+ /// use textwrap::Options;
+ /// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation, WordSplitter};
/// # const width: usize = 80;
///
/// // This opt contains a boxed trait object as splitter.
/// // The type annotation is important, otherwise it will be not a trait object
- /// let mut opt: Options = Options::with_splitter(width, Box::new(NoHyphenation));
- /// // Its type is actually: `Options<Box<dyn WordSplitter>>`:
- /// let opt_coerced: Options<Box<dyn WordSplitter>> = opt;
+ /// let mut opt: Options<_, _, Box<dyn WordSplitter>>
+ /// = Options::with_word_splitter(width, Box::new(NoHyphenation));
+ /// // Its type is actually: `Options<AsciiSpace, Box<dyn word_splitters::WordSplitter>>`:
+ /// let opt_coerced: Options<_, _, Box<dyn WordSplitter>> = opt;
///
- /// // Thus, it can be overridden with a different splitter.
- /// opt = Options::with_splitter(width, Box::new(HyphenSplitter));
+ /// // Thus, it can be overridden with a different word splitter.
+ /// opt = Options::with_word_splitter(width, Box::new(HyphenSplitter));
/// // Now, containing a `HyphenSplitter` instead.
/// ```
///
- /// Since the splitter is given by value, which determines the
- /// generic type parameter, it can be used to produce both an
+ /// Since the word splitter is given by value, which determines
+ /// the generic type parameter, it can be used to produce both an
/// [`Options`] with static and dynamic dispatch, respectively.
/// While dynamic dispatch allows to change the type of the inner
- /// splitter at run time as seen above, static dispatch especially
- /// can store the splitter directly, without the need for a box.
- /// This in turn allows it to be used in constant and static
- /// context:
+ /// word splitter at run time as seen above, static dispatch
+ /// especially can store the word splitter directly, without the
+ /// need for a box. This in turn allows it to be used in constant
+ /// and static context:
///
/// ```
- /// use textwrap::{HyphenSplitter, Options};
+ /// use textwrap::word_splitters::HyphenSplitter; use textwrap::{ Options};
+ /// use textwrap::word_separators::AsciiSpace;
+ /// use textwrap::wrap_algorithms::FirstFit;
/// # const width: usize = 80;
///
- /// const FOO: Options<HyphenSplitter> = Options::with_splitter(width, HyphenSplitter);
- /// static BAR: Options<HyphenSplitter> = FOO;
+ /// # #[cfg(all(not(feature = "smawk"), not(feature = "unicode-linebreak")))] {
+ /// const FOO: Options<FirstFit, AsciiSpace, HyphenSplitter> =
+ /// Options::with_word_splitter(width, HyphenSplitter);
+ /// static BAR: Options<FirstFit, AsciiSpace, HyphenSplitter> = FOO;
+ /// # }
/// ```
- pub const fn with_splitter(width: usize, splitter: S) -> Self {
+ pub const fn with_word_splitter(width: usize, word_splitter: WordSplit) -> Self {
Options {
width,
initial_indent: "",
subsequent_indent: "",
break_words: true,
- #[cfg(feature = "smawk")]
- wrap_algorithm: core::WrapAlgorithm::OptimalFit,
- #[cfg(not(feature = "smawk"))]
- wrap_algorithm: core::WrapAlgorithm::FirstFit,
- splitter: splitter,
+ word_separator: DefaultWordSeparator!(),
+ wrap_algorithm: DefaultWrapAlgorithm!(),
+ word_splitter: word_splitter,
}
}
}
-impl<'a, S: WordSplitter> Options<'a, S> {
+impl<'a, WrapAlgo, WordSep, WordSplit> Options<'a, WrapAlgo, WordSep, WordSplit> {
/// Change [`self.initial_indent`]. The initial indentation is
/// used on the very first line of output.
///
@@ -507,20 +586,50 @@ impl<'a, S: WordSplitter> Options<'a, S> {
}
}
+ /// Change [`self.word_separator`].
+ ///
+ /// See [`word_separators::WordSeparator`] for details on the choices.
+ ///
+ /// [`self.word_separator`]: #structfield.word_separator
+ pub fn word_separator<NewWordSep>(
+ self,
+ word_separator: NewWordSep,
+ ) -> Options<'a, WrapAlgo, NewWordSep, WordSplit> {
+ Options {
+ width: self.width,
+ initial_indent: self.initial_indent,
+ subsequent_indent: self.subsequent_indent,
+ break_words: self.break_words,
+ word_separator: word_separator,
+ wrap_algorithm: self.wrap_algorithm,
+ word_splitter: self.word_splitter,
+ }
+ }
+
/// Change [`self.wrap_algorithm`].
///
- /// See [`core::WrapAlgorithm`] for details on the choices.
+ /// See the [`wrap_algorithms::WrapAlgorithm`] trait for details on
+ /// the choices.
///
/// [`self.wrap_algorithm`]: #structfield.wrap_algorithm
- pub fn wrap_algorithm(self, wrap_algorithm: core::WrapAlgorithm) -> Self {
+ pub fn wrap_algorithm<NewWrapAlgo>(
+ self,
+ wrap_algorithm: NewWrapAlgo,
+ ) -> Options<'a, NewWrapAlgo, WordSep, WordSplit> {
Options {
- wrap_algorithm,
- ..self
+ width: self.width,
+ initial_indent: self.initial_indent,
+ subsequent_indent: self.subsequent_indent,
+ break_words: self.break_words,
+ word_separator: self.word_separator,
+ wrap_algorithm: wrap_algorithm,
+ word_splitter: self.word_splitter,
}
}
- /// Change [`self.splitter`]. The [`WordSplitter`] is used to fit
- /// part of a word into the current line when wrapping text.
+ /// Change [`self.word_splitter`]. The
+ /// [`word_splitters::WordSplitter`] is used to fit part of a word
+ /// into the current line when wrapping text.
///
/// This function may return a different type than `Self`. That is
/// the case when the given `splitter` is of a different type the
@@ -528,29 +637,36 @@ impl<'a, S: WordSplitter> Options<'a, S> {
/// example:
///
/// ```
- /// use textwrap::{HyphenSplitter, NoHyphenation, Options};
- /// // The default type returned by `new` is `Options<HyphenSplitter>`
- /// let opt: Options<HyphenSplitter> = Options::new(80);
- /// // Setting a different splitter changes the type
- /// let opt: Options<NoHyphenation> = opt.splitter(NoHyphenation);
+ /// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation};
+ /// use textwrap::Options;
+ /// // The default type returned by `new`:
+ /// let opt: Options<_, _, HyphenSplitter> = Options::new(80);
+ /// // Setting a different word splitter changes the type
+ /// let opt: Options<_, _, NoHyphenation> = opt.word_splitter(NoHyphenation);
/// ```
///
- /// [`self.splitter`]: #structfield.splitter
- pub fn splitter<T>(self, splitter: T) -> Options<'a, T> {
+ /// [`self.word_splitter`]: #structfield.word_splitter
+ pub fn word_splitter<NewWordSplit>(
+ self,
+ word_splitter: NewWordSplit,
+ ) -> Options<'a, WrapAlgo, WordSep, NewWordSplit> {
Options {
width: self.width,
initial_indent: self.initial_indent,
subsequent_indent: self.subsequent_indent,
break_words: self.break_words,
+ word_separator: self.word_separator,
wrap_algorithm: self.wrap_algorithm,
- splitter: splitter,
+ word_splitter,
}
}
}
-/// Return the current terminal width. If the terminal width cannot be
-/// determined (typically because the standard output is not connected
-/// to a terminal), a default width of 80 characters will be used.
+/// Return the current terminal width.
+///
+/// If the terminal width cannot be determined (typically because the
+/// standard output is not connected to a terminal), a default width
+/// of 80 characters will be used.
///
/// # Examples
///
@@ -558,11 +674,12 @@ impl<'a, S: WordSplitter> Options<'a, S> {
/// with a two column margin to the left and the right:
///
/// ```no_run
-/// use textwrap::{termwidth, NoHyphenation, Options};
+/// use textwrap::{termwidth, Options};
+/// use textwrap::word_splitters::NoHyphenation;
///
/// let width = termwidth() - 4; // Two columns on each side.
/// let options = Options::new(width)
-/// .splitter(NoHyphenation)
+/// .word_splitter(NoHyphenation)
/// .initial_indent(" ")
/// .subsequent_indent(" ");
/// ```
@@ -606,10 +723,12 @@ pub fn termwidth() -> usize {
/// "- Memory safety\n without\n garbage\n collection."
/// );
/// ```
-pub fn fill<'a, S, Opt>(text: &str, width_or_options: Opt) -> String
+pub fn fill<'a, WrapAlgo, WordSep, WordSplit, Opt>(text: &str, width_or_options: Opt) -> String
where
- S: WordSplitter,
- Opt: Into<Options<'a, S>>,
+ WrapAlgo: wrap_algorithms::WrapAlgorithm,
+ WordSep: word_separators::WordSeparator,
+ WordSplit: word_splitters::WordSplitter,
+ Opt: Into<Options<'a, WrapAlgo, WordSep, WordSplit>>,
{
// This will avoid reallocation in simple cases (no
// indentation, no hyphenation).
@@ -671,7 +790,12 @@ where
/// assert_eq!(options.initial_indent, "* ");
/// assert_eq!(options.subsequent_indent, " ");
/// ```
-pub fn unfill<'a>(text: &'a str) -> (String, Options<'a, HyphenSplitter>) {
+pub fn unfill(
+ text: &str,
+) -> (
+ String,
+ Options<'_, DefaultWrapAlgorithm!(), DefaultWordSeparator!(), word_splitters::HyphenSplitter>,
+) {
let trimmed = text.trim_end_matches('\n');
let prefix_chars: &[_] = &[' ', '-', '+', '*', '>', '#', '/'];
@@ -728,20 +852,53 @@ pub fn unfill<'a>(text: &'a str) -> (String, Options<'a, HyphenSplitter>) {
/// ```
/// use textwrap::refill;
///
+/// // Some loosely wrapped text. The "> " prefix is recognized automatically.
/// let text = "\
-/// > Memory safety without
-/// > garbage collection.
+/// > Memory
+/// > safety without garbage
+/// > collection.
/// ";
-/// assert_eq!(refill(text, 15), "\
+///
+/// assert_eq!(refill(text, 20), "\
/// > Memory safety
-/// > without
-/// > garbage
+/// > without garbage
+/// > collection.
+/// ");
+///
+/// assert_eq!(refill(text, 40), "\
+/// > Memory safety without garbage
/// > collection.
/// ");
-pub fn refill<'a, S, Opt>(filled_text: &str, new_width_or_options: Opt) -> String
+///
+/// assert_eq!(refill(text, 60), "\
+/// > Memory safety without garbage collection.
+/// ");
+/// ```
+///
+/// You can also reshape bullet points:
+///
+/// ```
+/// use textwrap::refill;
+///
+/// let text = "\
+/// - This is my
+/// list item.
+/// ";
+///
+/// assert_eq!(refill(text, 20), "\
+/// - This is my list
+/// item.
+/// ");
+/// ```
+pub fn refill<'a, WrapAlgo, WordSep, WordSplit, Opt>(
+ filled_text: &str,
+ new_width_or_options: Opt,
+) -> String
where
- S: WordSplitter,
- Opt: Into<Options<'a, S>>,
+ WrapAlgo: wrap_algorithms::WrapAlgorithm,
+ WordSep: word_separators::WordSeparator,
+ WordSplit: word_splitters::WordSplitter,
+ Opt: Into<Options<'a, WrapAlgo, WordSep, WordSplit>>,
{
let trimmed = filled_text.trim_end_matches('\n');
let (text, options) = unfill(trimmed);
@@ -757,8 +914,9 @@ where
///
/// The result is a vector of lines, each line is of type [`Cow<'_,
/// str>`](Cow), which means that the line will borrow from the input
-/// `&str` if possible. The lines do not have a trailing `'\n'`. Use
-/// the [`fill`] function if you need a [`String`] instead.
+/// `&str` if possible. The lines do not have trailing whitespace,
+/// including a final `'\n'`. Please use the [`fill`] function if you
+/// need a [`String`] instead.
///
/// The easiest way to use this function is to pass an integer for
/// `width_or_options`:
@@ -806,8 +964,7 @@ where
/// narrow column with room for only 10 characters looks like this:
///
/// ```
-/// # use textwrap::{Options, wrap};
-/// # use textwrap::core::WrapAlgorithm::FirstFit;
+/// # use textwrap::{wrap_algorithms::FirstFit, Options, wrap};
/// #
/// # let lines = wrap("To be, or not to be: that is the question",
/// # Options::new(10).wrap_algorithm(FirstFit));
@@ -832,7 +989,7 @@ where
/// ```
/// # #[cfg(feature = "smawk")] {
/// # use textwrap::{Options, wrap};
-/// # use textwrap::core::WrapAlgorithm::OptimalFit;
+/// # use textwrap::wrap_algorithms::OptimalFit;
/// #
/// # let lines = wrap("To be, or not to be: that is the question",
/// # Options::new(10).wrap_algorithm(OptimalFit));
@@ -845,7 +1002,7 @@ where
/// # "); }
/// ```
///
-/// Please see [`core::WrapAlgorithm`] for details.
+/// Please see the [`wrap_algorithms::WrapAlgorithm`] trait for details.
///
/// # Examples
///
@@ -876,10 +1033,61 @@ where
/// ]
/// );
/// ```
-pub fn wrap<'a, S, Opt>(text: &str, width_or_options: Opt) -> Vec<Cow<'_, str>>
+///
+/// ## Leading and Trailing Whitespace
+///
+/// As a rule, leading whitespace (indentation) is preserved and
+/// trailing whitespace is discarded.
+///
+/// In more details, when wrapping words into lines, words are found
+/// by splitting the input text on space characters. One or more
+/// spaces (shown here as “␣”) are attached to the end of each word:
+///
+/// ```text
+/// "Foo␣␣␣bar␣baz" -> ["Foo␣␣␣", "bar␣", "baz"]
+/// ```
+///
+/// These words are then put into lines. The interword whitespace is
+/// preserved, unless the lines are wrapped so that the `"Foo␣␣␣"`
+/// word falls at the end of a line:
+///
+/// ```
+/// use textwrap::wrap;
+///
+/// assert_eq!(wrap("Foo bar baz", 10), vec!["Foo bar", "baz"]);
+/// assert_eq!(wrap("Foo bar baz", 8), vec!["Foo", "bar baz"]);
+/// ```
+///
+/// Notice how the trailing whitespace is removed in both case: in the
+/// first example, `"bar␣"` becomes `"bar"` and in the second case
+/// `"Foo␣␣␣"` becomes `"Foo"`.
+///
+/// Leading whitespace is preserved when the following word fits on
+/// the first line. To understand this, consider how words are found
+/// in a text with leading spaces:
+///
+/// ```text
+/// "␣␣foo␣bar" -> ["␣␣", "foo␣", "bar"]
+/// ```
+///
+/// When put into lines, the indentation is preserved if `"foo"` fits
+/// on the first line, otherwise you end up with an empty line:
+///
+/// ```
+/// use textwrap::wrap;
+///
+/// assert_eq!(wrap(" foo bar", 8), vec![" foo", "bar"]);
+/// assert_eq!(wrap(" foo bar", 4), vec!["", "foo", "bar"]);
+/// ```
+pub fn wrap<'a, WrapAlgo, WordSep, WordSplit, Opt>(
+ text: &str,
+ width_or_options: Opt,
+) -> Vec<Cow<'_, str>>
where
- S: WordSplitter,
- Opt: Into<Options<'a, S>>,
+ WrapAlgo: wrap_algorithms::WrapAlgorithm,
+ WordSep: word_separators::WordSeparator,
+ WordSplit: word_splitters::WordSplitter,
+ Opt: Into<Options<'a, WrapAlgo, WordSep, WordSplit>>,
{
let options = width_or_options.into();
@@ -892,8 +1100,8 @@ where
let mut lines = Vec::new();
for line in text.split('\n') {
- let words = core::find_words(line);
- let split_words = core::split_words(words, &options);
+ let words = options.word_separator.find_words(line);
+ let split_words = word_splitters::split_words(words, &options.word_splitter);
let broken_words = if options.break_words {
let mut broken_words = core::break_words(split_words, subsequent_width);
if !options.initial_indent.is_empty() {
@@ -909,13 +1117,8 @@ where
split_words.collect::<Vec<_>>()
};
- #[rustfmt::skip]
- let line_lengths = |i| if i == 0 { initial_width } else { subsequent_width };
- let wrapped_words = match options.wrap_algorithm {
- #[cfg(feature = "smawk")]
- core::WrapAlgorithm::OptimalFit => core::wrap_optimal_fit(&broken_words, line_lengths),
- core::WrapAlgorithm::FirstFit => core::wrap_first_fit(&broken_words, line_lengths),
- };
+ let line_widths = [initial_width, subsequent_width];
+ let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths);
let mut idx = 0;
for words in wrapped_words {
@@ -969,7 +1172,7 @@ where
/// Wrap text into columns with a given total width.
///
-/// The `left_gap`, `mid_gap` and `right_gap` arguments specify the
+/// The `left_gap`, `middle_gap` and `right_gap` arguments specify the
/// strings to insert before, between, and after the columns. The
/// total width of all columns and all gaps is specified using the
/// `total_width_or_options` argument. This argument can simply be an
@@ -1024,17 +1227,19 @@ where
/// "| example text, | columns. | shorter than |",
/// "| which is | Notice how | the others. |",
/// "| wrapped into | the final | |"]);
-pub fn wrap_columns<'a, S, Opt>(
+pub fn wrap_columns<'a, WrapAlgo, WordSep, WordSplit, Opt>(
text: &str,
columns: usize,
total_width_or_options: Opt,
left_gap: &str,
- mid_gap: &str,
+ middle_gap: &str,
right_gap: &str,
) -> Vec<String>
where
- S: WordSplitter,
- Opt: Into<Options<'a, S>>,
+ WrapAlgo: wrap_algorithms::WrapAlgorithm,
+ WordSep: word_separators::WordSeparator,
+ WordSplit: word_splitters::WordSplitter,
+ Opt: Into<Options<'a, WrapAlgo, WordSep, WordSplit>>,
{
assert!(columns > 0);
@@ -1044,7 +1249,7 @@ where
.width
.saturating_sub(core::display_width(left_gap))
.saturating_sub(core::display_width(right_gap))
- .saturating_sub(core::display_width(mid_gap) * (columns - 1));
+ .saturating_sub(core::display_width(middle_gap) * (columns - 1));
let column_width = std::cmp::max(inner_width / columns, 1);
options.width = column_width;
@@ -1068,7 +1273,7 @@ where
if column_no == columns - 1 {
line.push_str(&last_column_padding);
} else {
- line.push_str(mid_gap);
+ line.push_str(middle_gap);
}
}
line.push_str(right_gap);
@@ -1086,24 +1291,28 @@ where
///
/// Since we can only replace existing whitespace in the input with
/// `'\n'`, we cannot do hyphenation nor can we split words longer
-/// than the line width. Indentation is also ruled out. In other
-/// words, `fill_inplace(width)` behaves as if you had called [`fill`]
-/// with these options:
+/// than the line width. We also need to use `AsciiSpace` as the word
+/// separator since we need `' '` characters between words in order to
+/// replace some of them with a `'\n'`. Indentation is also ruled out.
+/// In other words, `fill_inplace(width)` behaves as if you had called
+/// [`fill`] with these options:
///
/// ```
-/// # use textwrap::{Options, NoHyphenation};
+/// # use textwrap::{core, Options};
+/// # use textwrap::{word_separators, word_splitters, wrap_algorithms};
/// # let width = 80;
/// Options {
/// width: width,
/// initial_indent: "",
/// subsequent_indent: "",
/// break_words: false,
-/// wrap_algorithm: textwrap::core::WrapAlgorithm::FirstFit,
-/// splitter: NoHyphenation,
+/// word_separator: word_separators::AsciiSpace,
+/// wrap_algorithm: wrap_algorithms::FirstFit,
+/// word_splitter: word_splitters::NoHyphenation,
/// };
/// ```
///
-/// The wrap algorithm is [`core::WrapAlgorithm::FirstFit`] since this
+/// The wrap algorithm is [`wrap_algorithms::FirstFit`] since this
/// is the fastest algorithm — and the main reason to use
/// `fill_inplace` is to get the string broken into newlines as fast
/// as possible.
@@ -1129,12 +1338,15 @@ where
/// benchmark](https://github.com/mgeisler/textwrap/blob/master/benches/linear.rs)
/// for details.
pub fn fill_inplace(text: &mut String, width: usize) {
+ use word_separators::WordSeparator;
let mut indices = Vec::new();
let mut offset = 0;
for line in text.split('\n') {
- let words = core::find_words(line).collect::<Vec<_>>();
- let wrapped_words = core::wrap_first_fit(&words, |_| width);
+ let words = word_separators::AsciiSpace
+ .find_words(line)
+ .collect::<Vec<_>>();
+ let wrapped_words = wrap_algorithms::wrap_first_fit(&words, &[width]);
let mut line_offset = offset;
for words in &wrapped_words[..wrapped_words.len() - 1] {
@@ -1164,6 +1376,9 @@ pub fn fill_inplace(text: &mut String, width: usize) {
#[cfg(test)]
mod tests {
use super::*;
+ use crate::word_splitters::WordSplitter;
+ use crate::{word_splitters, wrap_algorithms};
+
#[cfg(feature = "hyphenation")]
use hyphenation::{Language, Load, Standard};
@@ -1177,8 +1392,8 @@ mod tests {
assert_eq!(opt_usize.subsequent_indent, opt_options.subsequent_indent);
assert_eq!(opt_usize.break_words, opt_options.break_words);
assert_eq!(
- opt_usize.splitter.split_points("hello-world"),
- opt_options.splitter.split_points("hello-world")
+ opt_usize.word_splitter.split_points("hello-world"),
+ opt_options.word_splitter.split_points("hello-world")
);
}
@@ -1197,7 +1412,7 @@ mod tests {
assert_eq!(
wrap(
"To be, or not to be, that is the question.",
- Options::new(10).wrap_algorithm(core::WrapAlgorithm::FirstFit)
+ Options::new(10).wrap_algorithm(wrap_algorithms::FirstFit)
),
vec!["To be, or", "not to be,", "that is", "the", "question."]
);
@@ -1229,6 +1444,15 @@ mod tests {
}
#[test]
+ fn leading_whitespace_empty_first_line() {
+ // If there is no space for the first word, the first line
+ // will be empty. This is because the string is split into
+ // words like [" ", "foobar ", "baz"], which puts "foobar " on
+ // the second line. We never output trailing whitespace
+ assert_eq!(wrap(" foobar baz", 6), vec!["", "foobar", "baz"]);
+ }
+
+ #[test]
fn trailing_whitespace() {
// Whitespace is only significant inside a line. After a line
// gets too long and is broken, the first word starts in
@@ -1250,7 +1474,8 @@ mod tests {
fn issue_129() {
// The dash is an em-dash which takes up four bytes. We used
// to panic since we tried to index into the character.
- assert_eq!(wrap("x – x", 1), vec!["x", "–", "x"]);
+ let options = Options::new(1).word_separator(word_separators::AsciiSpace);
+ assert_eq!(wrap("x – x", options), vec!["x", "–", "x"]);
}
#[test]
@@ -1258,9 +1483,23 @@ mod tests {
fn wide_character_handling() {
assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]);
assert_eq!(
- wrap("Hello, World!", 15),
+ wrap(
+ "Hello, World!",
+ Options::new(15).word_separator(word_separators::AsciiSpace)
+ ),
vec!["Hello,", "World!"]
);
+
+ // Wide characters are allowed to break if the
+ // unicode-linebreak feature is enabled.
+ #[cfg(feature = "unicode-linebreak")]
+ assert_eq!(
+ wrap(
+ "Hello, World!",
+ Options::new(15).word_separator(word_separators::UnicodeBreakProperties)
+ ),
+ vec!["Hello, W", "orld!"]
+ );
}
#[test]
@@ -1389,25 +1628,27 @@ mod tests {
#[test]
fn simple_hyphens_static() {
- let options = Options::new(8).splitter(HyphenSplitter);
+ let options = Options::new(8).word_splitter(word_splitters::HyphenSplitter);
assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]);
}
#[test]
fn simple_hyphens_dynamic() {
- let options: Options = Options::new(8).splitter(Box::new(HyphenSplitter));
+ let options: Options<_, _> =
+ Options::new(8).word_splitter(Box::new(word_splitters::HyphenSplitter));
assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]);
}
#[test]
fn no_hyphenation_static() {
- let options = Options::new(8).splitter(NoHyphenation);
+ let options = Options::new(8).word_splitter(word_splitters::NoHyphenation);
assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]);
}
#[test]
fn no_hyphenation_dynamic() {
- let options: Options = Options::new(8).splitter(Box::new(NoHyphenation));
+ let options: Options<_, _> =
+ Options::new(8).word_splitter(Box::new(word_splitters::NoHyphenation));
assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]);
}
@@ -1421,7 +1662,7 @@ mod tests {
vec!["Internatio", "nalization"]
);
- let options = Options::new(10).splitter(dictionary);
+ let options = Options::new(10).word_splitter(dictionary);
assert_eq!(
wrap("Internationalization", &options),
vec!["Interna-", "tionaliza-", "tion"]
@@ -1432,13 +1673,14 @@ mod tests {
#[cfg(feature = "hyphenation")]
fn auto_hyphenation_double_hyphenation_dynamic() {
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let mut options: Options = Options::new(10).splitter(Box::new(HyphenSplitter));
+ let mut options: Options<_, _, Box<dyn word_splitters::WordSplitter>> =
+ Options::new(10).word_splitter(Box::new(word_splitters::HyphenSplitter));
assert_eq!(
wrap("Internationalization", &options),
vec!["Internatio", "nalization"]
);
- options = Options::new(10).splitter(Box::new(dictionary));
+ options = Options::new(10).word_splitter(Box::new(dictionary));
assert_eq!(
wrap("Internationalization", &options),
vec!["Interna-", "tionaliza-", "tion"]
@@ -1455,7 +1697,7 @@ mod tests {
vec!["participat", "ion is", "the key to", "success"]
);
- let options = Options::new(10).splitter(dictionary);
+ let options = Options::new(10).word_splitter(dictionary);
assert_eq!(
wrap("participation is the key to success", &options),
vec!["partici-", "pation is", "the key to", "success"]
@@ -1468,7 +1710,7 @@ mod tests {
// Test that hyphenation takes the width of the wihtespace
// into account.
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let options = Options::new(15).splitter(dictionary);
+ let options = Options::new(15).word_splitter(dictionary);
assert_eq!(
wrap("garbage collection", &options),
vec!["garbage col-", "lection"]
@@ -1482,7 +1724,7 @@ mod tests {
// line is borrowed.
use std::borrow::Cow::{Borrowed, Owned};
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let options = Options::new(10).splitter(dictionary);
+ let options = Options::new(10).word_splitter(dictionary);
let lines = wrap("Internationalization", &options);
if let Borrowed(s) = lines[0] {
assert!(false, "should not have been borrowed: {:?}", s);
@@ -1505,7 +1747,7 @@ mod tests {
vec!["over-", "caffinated"]
);
- let options = options.splitter(dictionary);
+ let options = options.word_splitter(dictionary);
assert_eq!(
wrap("over-caffinated", &options),
vec!["over-", "caffi-", "nated"]
@@ -1521,7 +1763,8 @@ mod tests {
fn break_words_wide_characters() {
// Even the poor man's version of `ch_width` counts these
// characters as wide.
- assert_eq!(wrap("Hello", 5), vec!["He", "ll", "o"]);
+ let options = Options::new(5).word_separator(word_separators::AsciiSpace);
+ assert_eq!(wrap("Hello", options), vec!["He", "ll", "o"]);
}
#[test]
@@ -1558,14 +1801,14 @@ mod tests {
assert_eq!(
fill(
"1 3 5 7\n1 3 5 7",
- Options::new(7).wrap_algorithm(core::WrapAlgorithm::FirstFit)
+ Options::new(7).wrap_algorithm(wrap_algorithms::FirstFit)
),
"1 3 5 7\n1 3 5 7"
);
assert_eq!(
fill(
"1 3 5 7\n1 3 5 7",
- Options::new(5).wrap_algorithm(core::WrapAlgorithm::FirstFit)
+ Options::new(5).wrap_algorithm(wrap_algorithms::FirstFit)
),
"1 3 5\n7\n1 3 5\n7"
);
@@ -1608,8 +1851,14 @@ mod tests {
}
#[test]
+ #[cfg(not(feature = "smawk"))]
+ #[cfg(not(feature = "unicode-linebreak"))]
fn cloning_works() {
- static OPT: Options<HyphenSplitter> = Options::with_splitter(80, HyphenSplitter);
+ static OPT: Options<
+ wrap_algorithms::FirstFit,
+ word_separators::AsciiSpace,
+ word_splitters::HyphenSplitter,
+ > = Options::with_word_splitter(80, word_splitters::HyphenSplitter);
#[allow(clippy::clone_on_copy)]
let opt = OPT.clone();
assert_eq!(opt.width, 80);
@@ -1751,74 +2000,60 @@ mod tests {
}
#[test]
- fn trait_object() {
- let opt_a: Options<NoHyphenation> = Options::with_splitter(20, NoHyphenation);
- let opt_b: Options<HyphenSplitter> = 10.into();
-
- let mut dyn_opt: &Options<dyn WordSplitter> = &opt_a;
- assert_eq!(wrap("foo bar-baz", dyn_opt), vec!["foo bar-baz"]);
-
- // Just assign a totally different option
- dyn_opt = &opt_b;
- assert_eq!(wrap("foo bar-baz", dyn_opt), vec!["foo bar-", "baz"]);
- }
-
- #[test]
fn trait_object_vec() {
- // Create a vector of referenced trait-objects
- let mut vector: Vec<&Options<dyn WordSplitter>> = Vec::new();
+ // Create a vector of Options containing trait-objects.
+ let mut vector: Vec<
+ Options<
+ _,
+ Box<dyn word_separators::WordSeparator>,
+ Box<dyn word_splitters::WordSplitter>,
+ >,
+ > = Vec::new();
// Expected result from each options
let mut results = Vec::new();
- let opt_usize: Options<_> = 10.into();
- vector.push(&opt_usize);
+ let opt_full_type: Options<
+ _,
+ Box<dyn word_separators::WordSeparator>,
+ Box<dyn word_splitters::WordSplitter>,
+ > =
+ Options::new(10)
+ .word_splitter(Box::new(word_splitters::HyphenSplitter)
+ as Box<dyn word_splitters::WordSplitter>)
+ .word_separator(Box::new(word_separators::AsciiSpace)
+ as Box<dyn word_separators::WordSeparator>);
+ vector.push(opt_full_type);
results.push(vec!["over-", "caffinated"]);
- #[cfg(feature = "hyphenation")]
- let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- #[cfg(feature = "hyphenation")]
- let opt_hyp = Options::new(8).splitter(dictionary);
- #[cfg(feature = "hyphenation")]
- vector.push(&opt_hyp);
- #[cfg(feature = "hyphenation")]
- results.push(vec!["over-", "caffi-", "nated"]);
-
- // Actually: Options<Box<dyn WordSplitter>>
- let opt_box: Options = Options::new(10)
- .break_words(false)
- .splitter(Box::new(NoHyphenation));
- vector.push(&opt_box);
+ // Actually: Options<Box<AsciiSpace>, Box<dyn word_splitters::WordSplitter>>
+ let opt_abbreviated_type =
+ Options::new(10)
+ .break_words(false)
+ .word_splitter(Box::new(word_splitters::NoHyphenation)
+ as Box<dyn word_splitters::WordSplitter>)
+ .word_separator(Box::new(word_separators::AsciiSpace)
+ as Box<dyn word_separators::WordSeparator>);
+ vector.push(opt_abbreviated_type);
results.push(vec!["over-caffinated"]);
+ #[cfg(feature = "hyphenation")]
+ {
+ let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+ let opt_hyp = Options::new(8)
+ .word_splitter(Box::new(dictionary) as Box<dyn word_splitters::WordSplitter>)
+ .word_separator(Box::new(word_separators::AsciiSpace)
+ as Box<dyn word_separators::WordSeparator>);
+ vector.push(opt_hyp);
+ results.push(vec!["over-", "caffi-", "nated"]);
+ }
+
// Test each entry
for (opt, expected) in vector.into_iter().zip(results) {
- assert_eq!(
- // Just all the totally different options
- wrap("over-caffinated", opt),
- expected
- );
+ assert_eq!(wrap("over-caffinated", opt), expected);
}
}
#[test]
- fn outer_boxing() {
- let mut wrapper: Box<Options<dyn WordSplitter>> = Box::new(Options::new(80));
-
- // We must first deref the Box into a trait object and pass it by-reference
- assert_eq!(wrap("foo bar baz", &*wrapper), vec!["foo bar baz"]);
-
- // Replace the `Options` with a `usize`
- wrapper = Box::new(Options::from(5));
-
- // Deref per-se works as well, it already returns a reference
- use std::ops::Deref;
- assert_eq!(
- wrap("foo bar baz", wrapper.deref()),
- vec!["foo", "bar", "baz"]
- );
- }
-
- #[test]
fn wrap_columns_empty_text() {
assert_eq!(wrap_columns("", 1, 10, "| ", "", " |"), vec!["| |"]);
}
diff --git a/src/splitting.rs b/src/splitting.rs
deleted file mode 100644
index e92b188..0000000
--- a/src/splitting.rs
+++ /dev/null
@@ -1,140 +0,0 @@
-//! Word splitting functionality.
-//!
-//! To wrap text into lines, long words sometimes need to be split
-//! across lines. The [`WordSplitter`] trait defines this
-//! functionality. [`HyphenSplitter`] is the default implementation of
-//! this treat: it will simply split words on existing hyphens.
-
-/// The `WordSplitter` trait describes where words can be split.
-///
-/// If the textwrap crate has been compiled with the `hyphenation`
-/// Cargo feature enabled, you will find an implementation of
-/// `WordSplitter` by the `hyphenation::Standard` struct. Use this
-/// struct for language-aware hyphenation:
-///
-/// ```
-/// #[cfg(feature = "hyphenation")]
-/// {
-/// use hyphenation::{Language, Load, Standard};
-/// use textwrap::{wrap, Options};
-///
-/// let text = "Oxidation is the loss of electrons.";
-/// let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
-/// let options = Options::new(8).splitter(dictionary);
-/// assert_eq!(wrap(text, &options), vec!["Oxida-",
-/// "tion is",
-/// "the loss",
-/// "of elec-",
-/// "trons."]);
-/// }
-/// ```
-///
-/// Please see the documentation for the [hyphenation] crate for more
-/// details.
-///
-/// [hyphenation]: https://docs.rs/hyphenation/
-pub trait WordSplitter: std::fmt::Debug {
- /// Return all possible indices where `word` can be split.
- ///
- /// The indices returned must be in range `0..word.len()`. They
- /// should point to the index _after_ the split point, i.e., after
- /// `-` if splitting on hyphens. This way, `word.split_at(idx)`
- /// will break the word into two well-formed pieces.
- ///
- /// # Examples
- ///
- /// ```
- /// use textwrap::{HyphenSplitter, NoHyphenation, WordSplitter};
- /// assert_eq!(NoHyphenation.split_points("cannot-be-split"), vec![]);
- /// assert_eq!(HyphenSplitter.split_points("can-be-split"), vec![4, 7]);
- /// ```
- fn split_points(&self, word: &str) -> Vec<usize>;
-}
-
-impl<S: WordSplitter + ?Sized> WordSplitter for Box<S> {
- fn split_points(&self, word: &str) -> Vec<usize> {
- use std::ops::Deref;
- self.deref().split_points(word)
- }
-}
-
-impl<T: ?Sized + WordSplitter> WordSplitter for &T {
- fn split_points(&self, word: &str) -> Vec<usize> {
- (*self).split_points(word)
- }
-}
-
-/// Use this as a [`Options.splitter`] to avoid any kind of
-/// hyphenation:
-///
-/// ```
-/// use textwrap::{wrap, NoHyphenation, Options};
-///
-/// let options = Options::new(8).splitter(NoHyphenation);
-/// assert_eq!(wrap("foo bar-baz", &options),
-/// vec!["foo", "bar-baz"]);
-/// ```
-///
-/// [`Options.splitter`]: super::Options::splitter
-#[derive(Clone, Copy, Debug)]
-pub struct NoHyphenation;
-
-/// `NoHyphenation` implements `WordSplitter` by not splitting the
-/// word at all.
-impl WordSplitter for NoHyphenation {
- fn split_points(&self, _: &str) -> Vec<usize> {
- Vec::new()
- }
-}
-
-/// Simple and default way to split words: splitting on existing
-/// hyphens only.
-///
-/// You probably don't need to use this type since it's already used
-/// by default by [`Options::new`](super::Options::new).
-#[derive(Clone, Copy, Debug)]
-pub struct HyphenSplitter;
-
-/// `HyphenSplitter` is the default `WordSplitter` used by
-/// [`Options::new`](super::Options::new). It will split words on any
-/// existing hyphens in the word.
-///
-/// It will only use hyphens that are surrounded by alphanumeric
-/// characters, which prevents a word like `"--foo-bar"` from being
-/// split into `"--"` and `"foo-bar"`.
-impl WordSplitter for HyphenSplitter {
- fn split_points(&self, word: &str) -> Vec<usize> {
- let mut splits = Vec::new();
-
- for (idx, _) in word.match_indices('-') {
- // We only use hyphens that are surrounded by alphanumeric
- // characters. This is to avoid splitting on repeated hyphens,
- // such as those found in --foo-bar.
- let prev = word[..idx].chars().next_back();
- let next = word[idx + 1..].chars().next();
-
- if prev.filter(|ch| ch.is_alphanumeric()).is_some()
- && next.filter(|ch| ch.is_alphanumeric()).is_some()
- {
- splits.push(idx + 1); // +1 due to width of '-'.
- }
- }
-
- splits
- }
-}
-
-/// A hyphenation dictionary can be used to do language-specific
-/// hyphenation using patterns from the [hyphenation] crate.
-///
-/// **Note:** Only available when the `hyphenation` Cargo feature is
-/// enabled.
-///
-/// [hyphenation]: https://docs.rs/hyphenation/
-#[cfg(feature = "hyphenation")]
-impl WordSplitter for hyphenation::Standard {
- fn split_points(&self, word: &str) -> Vec<usize> {
- use hyphenation::Hyphenator;
- self.hyphenate(word).breaks
- }
-}
diff --git a/src/word_separators.rs b/src/word_separators.rs
new file mode 100644
index 0000000..cb1b8a9
--- /dev/null
+++ b/src/word_separators.rs
@@ -0,0 +1,406 @@
+//! Functionality for finding words.
+//!
+//! In order to wrap text, we need to know where the legal break
+//! points are, i.e., where the words of the text are. This means that
+//! we need to define what a "word" is.
+//!
+//! A simple approach is to simply split the text on whitespace, but
+//! this does not work for East-Asian languages such as Chinese or
+//! Japanese where there are no spaces between words. Breaking a long
+//! sequence of emojis is another example where line breaks might be
+//! wanted even if there are no whitespace to be found.
+//!
+//! The [`WordSeparator`] trait is responsible for determining where
+//! there words are in a line of text. Please refer to the trait and
+//! the structs which implement it for more information.
+
+#[cfg(feature = "unicode-linebreak")]
+use crate::core::skip_ansi_escape_sequence;
+use crate::core::Word;
+
+/// Describes where words occur in a line of text.
+///
+/// The simplest approach is say that words are separated by one or
+/// more ASCII spaces (`' '`). This works for Western languages
+/// without emojis. A more complex approach is to use the Unicode line
+/// breaking algorithm, which finds break points in non-ASCII text.
+///
+/// The line breaks occur between words, please see the
+/// [`WordSplitter`](crate::word_splitters::WordSplitter) trait for
+/// options of how to handle hyphenation of individual words.
+///
+/// # Examples
+///
+/// ```
+/// use textwrap::core::Word;
+/// use textwrap::word_separators::{WordSeparator, AsciiSpace};
+///
+/// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>();
+/// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]);
+/// ```
+pub trait WordSeparator: WordSeparatorClone + std::fmt::Debug {
+ // This trait should really return impl Iterator<Item = Word>, but
+ // this isn't possible until Rust supports higher-kinded types:
+ // https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md
+ /// Find all words in `line`.
+ fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a>;
+}
+
+// The internal `WordSeparatorClone` trait is allows us to implement
+// `Clone` for `Box<dyn WordSeparator>`. This in used in the
+// `From<&Options<'_, WrapAlgo, WordSep, WordSplit>> for Options<'a,
+// WrapAlgo, WordSep, WordSplit>` implementation.
+#[doc(hidden)]
+pub trait WordSeparatorClone {
+ fn clone_box(&self) -> Box<dyn WordSeparator>;
+}
+
+impl<T: WordSeparator + Clone + 'static> WordSeparatorClone for T {
+ fn clone_box(&self) -> Box<dyn WordSeparator> {
+ Box::new(self.clone())
+ }
+}
+
+impl Clone for Box<dyn WordSeparator> {
+ fn clone(&self) -> Box<dyn WordSeparator> {
+ use std::ops::Deref;
+ self.deref().clone_box()
+ }
+}
+
+impl WordSeparator for Box<dyn WordSeparator> {
+ fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
+ use std::ops::Deref;
+ self.deref().find_words(line)
+ }
+}
+
+/// Find words by splitting on regions of `' '` characters.
+#[derive(Clone, Copy, Debug, Default)]
+pub struct AsciiSpace;
+
+/// Split `line` into words separated by regions of `' '` characters.
+///
+/// # Examples
+///
+/// ```
+/// use textwrap::core::Word;
+/// use textwrap::word_separators::{AsciiSpace, WordSeparator};
+///
+/// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>();
+/// assert_eq!(words, vec![Word::from("Hello "),
+/// Word::from("World!")]);
+/// ```
+impl WordSeparator for AsciiSpace {
+ fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
+ let mut start = 0;
+ let mut in_whitespace = false;
+ let mut char_indices = line.char_indices();
+
+ Box::new(std::iter::from_fn(move || {
+ // for (idx, ch) in char_indices does not work, gives this
+ // error:
+ //
+ // > cannot move out of `char_indices`, a captured variable in
+ // > an `FnMut` closure
+ #[allow(clippy::while_let_on_iterator)]
+ while let Some((idx, ch)) = char_indices.next() {
+ if in_whitespace && ch != ' ' {
+ let word = Word::from(&line[start..idx]);
+ start = idx;
+ in_whitespace = ch == ' ';
+ return Some(word);
+ }
+
+ in_whitespace = ch == ' ';
+ }
+
+ if start < line.len() {
+ let word = Word::from(&line[start..]);
+ start = line.len();
+ return Some(word);
+ }
+
+ None
+ }))
+ }
+}
+
+/// Find words using the Unicode line breaking algorithm.
+#[cfg(feature = "unicode-linebreak")]
+#[derive(Clone, Copy, Debug, Default)]
+pub struct UnicodeBreakProperties;
+
+/// Split `line` into words using Unicode break properties.
+///
+/// This word separator uses the Unicode line breaking algorithm
+/// described in [Unicode Standard Annex
+/// #14](https://www.unicode.org/reports/tr14/) to find legal places
+/// to break lines. There is a small difference in that the U+002D
+/// (Hyphen-Minus) and U+00AD (Soft Hyphen) don’t create a line break:
+/// to allow a line break at a hyphen, use the
+/// [`HyphenSplitter`](crate::word_splitters::HyphenSplitter). Soft
+/// hyphens are not currently supported.
+///
+/// # Examples
+///
+/// Unlike [`AsciiSpace`], the Unicode line breaking algorithm will
+/// find line break opportunities between some characters with no
+/// intervening whitespace:
+///
+/// ```
+/// #[cfg(feature = "unicode-linebreak")] {
+/// use textwrap::word_separators::{WordSeparator, UnicodeBreakProperties};
+/// use textwrap::core::Word;
+///
+/// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂😍").collect::<Vec<_>>(),
+/// vec![Word::from("Emojis: "),
+/// Word::from("😂"),
+/// Word::from("😍")]);
+///
+/// assert_eq!(UnicodeBreakProperties.find_words("CJK: 你好").collect::<Vec<_>>(),
+/// vec![Word::from("CJK: "),
+/// Word::from("你"),
+/// Word::from("好")]);
+/// }
+/// ```
+///
+/// A U+2060 (Word Joiner) character can be inserted if you want to
+/// manually override the defaults and keep the characters together:
+///
+/// ```
+/// #[cfg(feature = "unicode-linebreak")] {
+/// use textwrap::word_separators::{UnicodeBreakProperties, WordSeparator};
+/// use textwrap::core::Word;
+///
+/// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂\u{2060}😍").collect::<Vec<_>>(),
+/// vec![Word::from("Emojis: "),
+/// Word::from("😂\u{2060}😍")]);
+/// }
+/// ```
+///
+/// The Unicode line breaking algorithm will also automatically
+/// suppress break breaks around certain punctuation characters::
+///
+/// ```
+/// #[cfg(feature = "unicode-linebreak")] {
+/// use textwrap::word_separators::{UnicodeBreakProperties, WordSeparator};
+/// use textwrap::core::Word;
+///
+/// assert_eq!(UnicodeBreakProperties.find_words("[ foo ] bar !").collect::<Vec<_>>(),
+/// vec![Word::from("[ foo ] "),
+/// Word::from("bar !")]);
+/// }
+/// ```
+#[cfg(feature = "unicode-linebreak")]
+impl WordSeparator for UnicodeBreakProperties {
+ fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
+ // Construct an iterator over (original index, stripped index)
+ // tuples. We find the Unicode linebreaks on a stripped string,
+ // but we need the original indices so we can form words based on
+ // the original string.
+ let mut last_stripped_idx = 0;
+ let mut char_indices = line.char_indices();
+ let mut idx_map = std::iter::from_fn(move || match char_indices.next() {
+ Some((orig_idx, ch)) => {
+ let stripped_idx = last_stripped_idx;
+ if !skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) {
+ last_stripped_idx += ch.len_utf8();
+ }
+ Some((orig_idx, stripped_idx))
+ }
+ None => None,
+ });
+
+ let stripped = strip_ansi_escape_sequences(&line);
+ let mut opportunities = unicode_linebreak::linebreaks(&stripped)
+ .filter(|(idx, _)| {
+ #[allow(clippy::match_like_matches_macro)]
+ match &line[..*idx].chars().next_back() {
+ // We suppress breaks at ‘-’ since we want to control
+ // this via the WordSplitter.
+ Some('-') => false,
+ // Soft hyphens are currently not supported since we
+ // require all `Word` fragments to be continuous in
+ // the input string.
+ Some(SHY) => false,
+ // Other breaks should be fine!
+ _ => true,
+ }
+ })
+ .collect::<Vec<_>>()
+ .into_iter();
+
+ // Remove final break opportunity, we will add it below using
+ // &line[start..]; This ensures that we correctly include a
+ // trailing ANSI escape sequence.
+ opportunities.next_back();
+
+ let mut start = 0;
+ Box::new(std::iter::from_fn(move || {
+ #[allow(clippy::while_let_on_iterator)]
+ while let Some((idx, _)) = opportunities.next() {
+ if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx)
+ {
+ let word = Word::from(&line[start..orig_idx]);
+ start = orig_idx;
+ return Some(word);
+ }
+ }
+
+ if start < line.len() {
+ let word = Word::from(&line[start..]);
+ start = line.len();
+ return Some(word);
+ }
+
+ None
+ }))
+ }
+}
+
+/// Soft hyphen, also knows as a “shy hyphen”. Should show up as ‘-’
+/// if a line is broken at this point, and otherwise be invisible.
+/// Textwrap does not currently support breaking words at soft
+/// hyphens.
+#[cfg(feature = "unicode-linebreak")]
+const SHY: char = '\u{00ad}';
+
+// Strip all ANSI escape sequences from `text`.
+#[cfg(feature = "unicode-linebreak")]
+fn strip_ansi_escape_sequences(text: &str) -> String {
+ let mut result = String::with_capacity(text.len());
+
+ let mut chars = text.chars();
+ while let Some(ch) = chars.next() {
+ if skip_ansi_escape_sequence(ch, &mut chars) {
+ continue;
+ }
+ result.push(ch);
+ }
+
+ result
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ // Like assert_eq!, but the left expression is an iterator.
+ macro_rules! assert_iter_eq {
+ ($left:expr, $right:expr) => {
+ assert_eq!($left.collect::<Vec<_>>(), $right);
+ };
+ }
+
+ #[test]
+ fn ascii_space_empty() {
+ assert_iter_eq!(AsciiSpace.find_words(""), vec![]);
+ }
+
+ #[test]
+ fn ascii_space_single_word() {
+ assert_iter_eq!(AsciiSpace.find_words("foo"), vec![Word::from("foo")]);
+ }
+
+ #[test]
+ fn ascii_space_two_words() {
+ assert_iter_eq!(
+ AsciiSpace.find_words("foo bar"),
+ vec![Word::from("foo "), Word::from("bar")]
+ );
+ }
+
+ #[test]
+ fn ascii_space_multiple_words() {
+ assert_iter_eq!(
+ AsciiSpace.find_words("foo bar baz"),
+ vec![Word::from("foo "), Word::from("bar "), Word::from("baz")]
+ );
+ }
+
+ #[test]
+ fn ascii_space_only_whitespace() {
+ assert_iter_eq!(AsciiSpace.find_words(" "), vec![Word::from(" ")]);
+ }
+
+ #[test]
+ fn ascii_space_inter_word_whitespace() {
+ assert_iter_eq!(
+ AsciiSpace.find_words("foo bar"),
+ vec![Word::from("foo "), Word::from("bar")]
+ )
+ }
+
+ #[test]
+ fn ascii_space_trailing_whitespace() {
+ assert_iter_eq!(AsciiSpace.find_words("foo "), vec![Word::from("foo ")]);
+ }
+
+ #[test]
+ fn ascii_space_leading_whitespace() {
+ assert_iter_eq!(
+ AsciiSpace.find_words(" foo"),
+ vec![Word::from(" "), Word::from("foo")]
+ );
+ }
+
+ #[test]
+ fn ascii_space_multi_column_char() {
+ assert_iter_eq!(
+ AsciiSpace.find_words("\u{1f920}"), // cowboy emoji 🤠
+ vec![Word::from("\u{1f920}")]
+ );
+ }
+
+ #[test]
+ fn ascii_space_hyphens() {
+ assert_iter_eq!(
+ AsciiSpace.find_words("foo-bar"),
+ vec![Word::from("foo-bar")]
+ );
+ assert_iter_eq!(
+ AsciiSpace.find_words("foo- bar"),
+ vec![Word::from("foo- "), Word::from("bar")]
+ );
+ assert_iter_eq!(
+ AsciiSpace.find_words("foo - bar"),
+ vec![Word::from("foo "), Word::from("- "), Word::from("bar")]
+ );
+ assert_iter_eq!(
+ AsciiSpace.find_words("foo -bar"),
+ vec![Word::from("foo "), Word::from("-bar")]
+ );
+ }
+
+ #[test]
+ #[cfg(unix)]
+ fn ascii_space_colored_text() {
+ use termion::color::{Blue, Fg, Green, Reset};
+
+ let green_hello = format!("{}Hello{} ", Fg(Green), Fg(Reset));
+ let blue_world = format!("{}World!{}", Fg(Blue), Fg(Reset));
+ assert_iter_eq!(
+ AsciiSpace.find_words(&format!("{}{}", green_hello, blue_world)),
+ vec![Word::from(&green_hello), Word::from(&blue_world)]
+ );
+
+ #[cfg(feature = "unicode-linebreak")]
+ assert_iter_eq!(
+ UnicodeBreakProperties.find_words(&format!("{}{}", green_hello, blue_world)),
+ vec![Word::from(&green_hello), Word::from(&blue_world)]
+ );
+ }
+
+ #[test]
+ fn ascii_space_color_inside_word() {
+ let text = "foo\u{1b}[0m\u{1b}[32mbar\u{1b}[0mbaz";
+ assert_iter_eq!(AsciiSpace.find_words(&text), vec![Word::from(text)]);
+
+ #[cfg(feature = "unicode-linebreak")]
+ assert_iter_eq!(
+ UnicodeBreakProperties.find_words(&text),
+ vec![Word::from(text)]
+ );
+ }
+}
diff --git a/src/word_splitters.rs b/src/word_splitters.rs
new file mode 100644
index 0000000..f4d94c7
--- /dev/null
+++ b/src/word_splitters.rs
@@ -0,0 +1,311 @@
+//! Word splitting functionality.
+//!
+//! To wrap text into lines, long words sometimes need to be split
+//! across lines. The [`WordSplitter`] trait defines this
+//! functionality. [`HyphenSplitter`] is the default implementation of
+//! this treat: it will simply split words on existing hyphens.
+
+use std::ops::Deref;
+
+use crate::core::{display_width, Word};
+
+/// The `WordSplitter` trait describes where words can be split.
+///
+/// If the textwrap crate has been compiled with the `hyphenation`
+/// Cargo feature enabled, you will find an implementation of
+/// `WordSplitter` by the `hyphenation::Standard` struct. Use this
+/// struct for language-aware hyphenation:
+///
+/// ```
+/// #[cfg(feature = "hyphenation")]
+/// {
+/// use hyphenation::{Language, Load, Standard};
+/// use textwrap::{wrap, Options};
+///
+/// let text = "Oxidation is the loss of electrons.";
+/// let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+/// let options = Options::new(8).word_splitter(dictionary);
+/// assert_eq!(wrap(text, &options), vec!["Oxida-",
+/// "tion is",
+/// "the loss",
+/// "of elec-",
+/// "trons."]);
+/// }
+/// ```
+///
+/// Please see the documentation for the [hyphenation] crate for more
+/// details.
+///
+/// [hyphenation]: https://docs.rs/hyphenation/
+pub trait WordSplitter: WordSplitterClone + std::fmt::Debug {
+ /// Return all possible indices where `word` can be split.
+ ///
+ /// The indices returned must be in range `0..word.len()`. They
+ /// should point to the index _after_ the split point, i.e., after
+ /// `-` if splitting on hyphens. This way, `word.split_at(idx)`
+ /// will break the word into two well-formed pieces.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation, WordSplitter};
+ /// assert_eq!(NoHyphenation.split_points("cannot-be-split"), vec![]);
+ /// assert_eq!(HyphenSplitter.split_points("can-be-split"), vec![4, 7]);
+ /// ```
+ fn split_points(&self, word: &str) -> Vec<usize>;
+}
+
+// The internal `WordSplitterClone` trait is allows us to implement
+// `Clone` for `Box<dyn WordSplitter>`. This in used in the
+// `From<&Options<'_, WrapAlgo, WordSep, WordSplit>> for Options<'a,
+// WrapAlgo, WordSep, WordSplit>` implementation.
+#[doc(hidden)]
+pub trait WordSplitterClone {
+ fn clone_box(&self) -> Box<dyn WordSplitter>;
+}
+
+impl<T: WordSplitter + Clone + 'static> WordSplitterClone for T {
+ fn clone_box(&self) -> Box<dyn WordSplitter> {
+ Box::new(self.clone())
+ }
+}
+
+impl Clone for Box<dyn WordSplitter> {
+ fn clone(&self) -> Box<dyn WordSplitter> {
+ self.deref().clone_box()
+ }
+}
+
+impl WordSplitter for Box<dyn WordSplitter> {
+ fn split_points(&self, word: &str) -> Vec<usize> {
+ self.deref().split_points(word)
+ }
+}
+
+/// Use this as a [`Options.word_splitter`] to avoid any kind of
+/// hyphenation:
+///
+/// ```
+/// use textwrap::{wrap, Options};
+/// use textwrap::word_splitters::NoHyphenation;
+///
+/// let options = Options::new(8).word_splitter(NoHyphenation);
+/// assert_eq!(wrap("foo bar-baz", &options),
+/// vec!["foo", "bar-baz"]);
+/// ```
+///
+/// [`Options.word_splitter`]: super::Options::word_splitter
+#[derive(Clone, Copy, Debug)]
+pub struct NoHyphenation;
+
+/// `NoHyphenation` implements `WordSplitter` by not splitting the
+/// word at all.
+impl WordSplitter for NoHyphenation {
+ fn split_points(&self, _: &str) -> Vec<usize> {
+ Vec::new()
+ }
+}
+
+/// Simple and default way to split words: splitting on existing
+/// hyphens only.
+///
+/// You probably don't need to use this type since it's already used
+/// by default by [`Options::new`](super::Options::new).
+#[derive(Clone, Copy, Debug)]
+pub struct HyphenSplitter;
+
+/// `HyphenSplitter` is the default `WordSplitter` used by
+/// [`Options::new`](super::Options::new). It will split words on any
+/// existing hyphens in the word.
+///
+/// It will only use hyphens that are surrounded by alphanumeric
+/// characters, which prevents a word like `"--foo-bar"` from being
+/// split into `"--"` and `"foo-bar"`.
+impl WordSplitter for HyphenSplitter {
+ fn split_points(&self, word: &str) -> Vec<usize> {
+ let mut splits = Vec::new();
+
+ for (idx, _) in word.match_indices('-') {
+ // We only use hyphens that are surrounded by alphanumeric
+ // characters. This is to avoid splitting on repeated hyphens,
+ // such as those found in --foo-bar.
+ let prev = word[..idx].chars().next_back();
+ let next = word[idx + 1..].chars().next();
+
+ if prev.filter(|ch| ch.is_alphanumeric()).is_some()
+ && next.filter(|ch| ch.is_alphanumeric()).is_some()
+ {
+ splits.push(idx + 1); // +1 due to width of '-'.
+ }
+ }
+
+ splits
+ }
+}
+
+/// A hyphenation dictionary can be used to do language-specific
+/// hyphenation using patterns from the [hyphenation] crate.
+///
+/// **Note:** Only available when the `hyphenation` Cargo feature is
+/// enabled.
+///
+/// [hyphenation]: https://docs.rs/hyphenation/
+#[cfg(feature = "hyphenation")]
+impl WordSplitter for hyphenation::Standard {
+ fn split_points(&self, word: &str) -> Vec<usize> {
+ use hyphenation::Hyphenator;
+ self.hyphenate(word).breaks
+ }
+}
+
+/// Split words into smaller words according to the split points given
+/// by `word_splitter`.
+///
+/// Note that we split all words, regardless of their length. This is
+/// to more cleanly separate the business of splitting (including
+/// automatic hyphenation) from the business of word wrapping.
+///
+/// # Examples
+///
+/// ```
+/// use textwrap::core::Word;
+/// use textwrap::word_splitters::{split_words, NoHyphenation, HyphenSplitter};
+///
+/// assert_eq!(
+/// split_words(vec![Word::from("foo-bar")], &HyphenSplitter).collect::<Vec<_>>(),
+/// vec![Word::from("foo-"), Word::from("bar")]
+/// );
+///
+/// // The NoHyphenation splitter ignores the '-':
+/// assert_eq!(
+/// split_words(vec![Word::from("foo-bar")], &NoHyphenation).collect::<Vec<_>>(),
+/// vec![Word::from("foo-bar")]
+/// );
+/// ```
+pub fn split_words<'a, I, WordSplit>(
+ words: I,
+ word_splitter: &'a WordSplit,
+) -> impl Iterator<Item = Word<'a>>
+where
+ I: IntoIterator<Item = Word<'a>>,
+ WordSplit: WordSplitter,
+{
+ words.into_iter().flat_map(move |word| {
+ let mut prev = 0;
+ let mut split_points = word_splitter.split_points(&word).into_iter();
+ std::iter::from_fn(move || {
+ if let Some(idx) = split_points.next() {
+ let need_hyphen = !word[..idx].ends_with('-');
+ let w = Word {
+ word: &word.word[prev..idx],
+ width: display_width(&word[prev..idx]),
+ whitespace: "",
+ penalty: if need_hyphen { "-" } else { "" },
+ };
+ prev = idx;
+ return Some(w);
+ }
+
+ if prev < word.word.len() || prev == 0 {
+ let w = Word {
+ word: &word.word[prev..],
+ width: display_width(&word[prev..]),
+ whitespace: word.whitespace,
+ penalty: word.penalty,
+ };
+ prev = word.word.len() + 1;
+ return Some(w);
+ }
+
+ None
+ })
+ })
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ // Like assert_eq!, but the left expression is an iterator.
+ macro_rules! assert_iter_eq {
+ ($left:expr, $right:expr) => {
+ assert_eq!($left.collect::<Vec<_>>(), $right);
+ };
+ }
+
+ #[test]
+ fn split_words_no_words() {
+ assert_iter_eq!(split_words(vec![], &HyphenSplitter), vec![]);
+ }
+
+ #[test]
+ fn split_words_empty_word() {
+ assert_iter_eq!(
+ split_words(vec![Word::from(" ")], &HyphenSplitter),
+ vec![Word::from(" ")]
+ );
+ }
+
+ #[test]
+ fn split_words_single_word() {
+ assert_iter_eq!(
+ split_words(vec![Word::from("foobar")], &HyphenSplitter),
+ vec![Word::from("foobar")]
+ );
+ }
+
+ #[test]
+ fn split_words_hyphen_splitter() {
+ assert_iter_eq!(
+ split_words(vec![Word::from("foo-bar")], &HyphenSplitter),
+ vec![Word::from("foo-"), Word::from("bar")]
+ );
+ }
+
+ #[test]
+ fn split_words_adds_penalty() {
+ #[derive(Clone, Debug)]
+ struct FixedSplitPoint;
+ impl WordSplitter for FixedSplitPoint {
+ fn split_points(&self, _: &str) -> Vec<usize> {
+ vec![3]
+ }
+ }
+
+ assert_iter_eq!(
+ split_words(vec![Word::from("foobar")].into_iter(), &FixedSplitPoint),
+ vec![
+ Word {
+ word: "foo",
+ width: 3,
+ whitespace: "",
+ penalty: "-"
+ },
+ Word {
+ word: "bar",
+ width: 3,
+ whitespace: "",
+ penalty: ""
+ }
+ ]
+ );
+
+ assert_iter_eq!(
+ split_words(vec![Word::from("fo-bar")].into_iter(), &FixedSplitPoint),
+ vec![
+ Word {
+ word: "fo-",
+ width: 3,
+ whitespace: "",
+ penalty: ""
+ },
+ Word {
+ word: "bar",
+ width: 3,
+ whitespace: "",
+ penalty: ""
+ }
+ ]
+ );
+ }
+}
diff --git a/src/wrap_algorithms.rs b/src/wrap_algorithms.rs
new file mode 100644
index 0000000..368ef2a
--- /dev/null
+++ b/src/wrap_algorithms.rs
@@ -0,0 +1,257 @@
+//! Word wrapping algorithms.
+//!
+//! After a text has been broken into words (or [`Fragment`]s), one
+//! now has to decide how to break the fragments into lines. The
+//! simplest algorithm for this is implemented by [`wrap_first_fit`]:
+//! it uses no look-ahead and simply adds fragments to the line as
+//! long as they fit. However, this can lead to poor line breaks if a
+//! large fragment almost-but-not-quite fits on a line. When that
+//! happens, the fragment is moved to the next line and it will leave
+//! behind a large gap. A more advanced algorithm, implemented by
+//! [`wrap_optimal_fit`], will take this into account. The optimal-fit
+//! algorithm considers all possible line breaks and will attempt to
+//! minimize the gaps left behind by overly short lines.
+//!
+//! While both algorithms run in linear time, the first-fit algorithm
+//! is about 4 times faster than the optimal-fit algorithm.
+
+#[cfg(feature = "smawk")]
+mod optimal_fit;
+#[cfg(feature = "smawk")]
+pub use optimal_fit::{wrap_optimal_fit, OptimalFit};
+
+use crate::core::{Fragment, Word};
+
+/// Describes how to wrap words into lines.
+///
+/// The simplest approach is to wrap words one word at a time. This is
+/// implemented by [`FirstFit`]. If the `smawk` Cargo feature is
+/// enabled, a more complex algorithm is available, implemented by
+/// [`OptimalFit`], which will look at an entire paragraph at a time
+/// in order to find optimal line breaks.
+pub trait WrapAlgorithm: WrapAlgorithmClone + std::fmt::Debug {
+ /// Wrap words according to line widths.
+ ///
+ /// The `line_widths` slice gives the target line width for each
+ /// line (the last slice element is repeated as necessary). This
+ /// can be used to implement hanging indentation.
+ ///
+ /// Please see the implementors of the trait for examples.
+ fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>;
+}
+
+// The internal `WrapAlgorithmClone` trait is allows us to implement
+// `Clone` for `Box<dyn WrapAlgorithm>`. This in used in the
+// `From<&Options<'_, WrapAlgo, WordSep, WordSplit>> for Options<'a,
+// WrapAlgo, WordSep, WordSplit>` implementation.
+#[doc(hidden)]
+pub trait WrapAlgorithmClone {
+ fn clone_box(&self) -> Box<dyn WrapAlgorithm>;
+}
+
+impl<T: WrapAlgorithm + Clone + 'static> WrapAlgorithmClone for T {
+ fn clone_box(&self) -> Box<dyn WrapAlgorithm> {
+ Box::new(self.clone())
+ }
+}
+
+impl Clone for Box<dyn WrapAlgorithm> {
+ fn clone(&self) -> Box<dyn WrapAlgorithm> {
+ use std::ops::Deref;
+ self.deref().clone_box()
+ }
+}
+
+impl WrapAlgorithm for Box<dyn WrapAlgorithm> {
+ fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> {
+ use std::ops::Deref;
+ self.deref().wrap(words, line_widths)
+ }
+}
+
+/// Wrap words using a fast and simple algorithm.
+///
+/// This algorithm uses no look-ahead when finding line breaks.
+/// Implemented by [`wrap_first_fit`], please see that function for
+/// details and examples.
+#[derive(Clone, Copy, Debug, Default)]
+pub struct FirstFit;
+
+impl WrapAlgorithm for FirstFit {
+ #[inline]
+ fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> {
+ wrap_first_fit(words, line_widths)
+ }
+}
+
+/// Wrap abstract fragments into lines with a first-fit algorithm.
+///
+/// The `line_widths` slice gives the target line width for each line
+/// (the last slice element is repeated as necessary). This can be
+/// used to implement hanging indentation.
+///
+/// The fragments must already have been split into the desired
+/// widths, this function will not (and cannot) attempt to split them
+/// further when arranging them into lines.
+///
+/// # First-Fit Algorithm
+///
+/// This implements a simple “greedy” algorithm: accumulate fragments
+/// one by one and when a fragment no longer fits, start a new line.
+/// There is no look-ahead, we simply take first fit of the fragments
+/// we find.
+///
+/// While fast and predictable, this algorithm can produce poor line
+/// breaks when a long fragment is moved to a new line, leaving behind
+/// a large gap:
+///
+/// ```
+/// use textwrap::core::Word;
+/// use textwrap::wrap_algorithms;
+/// use textwrap::word_separators::{AsciiSpace, WordSeparator};
+///
+/// // Helper to convert wrapped lines to a Vec<String>.
+/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
+/// lines.iter().map(|line| {
+/// line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
+/// }).collect::<Vec<_>>()
+/// }
+///
+/// let text = "These few words will unfortunately not wrap nicely.";
+/// let words = AsciiSpace.find_words(text).collect::<Vec<_>>();
+/// assert_eq!(lines_to_strings(wrap_algorithms::wrap_first_fit(&words, &[15])),
+/// vec!["These few words",
+/// "will", // <-- short line
+/// "unfortunately",
+/// "not wrap",
+/// "nicely."]);
+///
+/// // We can avoid the short line if we look ahead:
+/// #[cfg(feature = "smawk")]
+/// assert_eq!(lines_to_strings(wrap_algorithms::wrap_optimal_fit(&words, &[15])),
+/// vec!["These few",
+/// "words will",
+/// "unfortunately",
+/// "not wrap",
+/// "nicely."]);
+/// ```
+///
+/// The [`wrap_optimal_fit`] function was used above to get better
+/// line breaks. It uses an advanced algorithm which tries to avoid
+/// short lines. This function is about 4 times faster than
+/// [`wrap_optimal_fit`].
+///
+/// # Examples
+///
+/// Imagine you're building a house site and you have a number of
+/// tasks you need to execute. Things like pour foundation, complete
+/// framing, install plumbing, electric cabling, install insulation.
+///
+/// The construction workers can only work during daytime, so they
+/// need to pack up everything at night. Because they need to secure
+/// their tools and move machines back to the garage, this process
+/// takes much more time than the time it would take them to simply
+/// switch to another task.
+///
+/// You would like to make a list of tasks to execute every day based
+/// on your estimates. You can model this with a program like this:
+///
+/// ```
+/// use textwrap::wrap_algorithms::wrap_first_fit;
+/// use textwrap::core::{Fragment, Word};
+///
+/// #[derive(Debug)]
+/// struct Task<'a> {
+/// name: &'a str,
+/// hours: usize, // Time needed to complete task.
+/// sweep: usize, // Time needed for a quick sweep after task during the day.
+/// cleanup: usize, // Time needed for full cleanup if day ends with this task.
+/// }
+///
+/// impl Fragment for Task<'_> {
+/// fn width(&self) -> usize { self.hours }
+/// fn whitespace_width(&self) -> usize { self.sweep }
+/// fn penalty_width(&self) -> usize { self.cleanup }
+/// }
+///
+/// // The morning tasks
+/// let tasks = vec![
+/// Task { name: "Foundation", hours: 4, sweep: 2, cleanup: 3 },
+/// Task { name: "Framing", hours: 3, sweep: 1, cleanup: 2 },
+/// Task { name: "Plumbing", hours: 2, sweep: 2, cleanup: 2 },
+/// Task { name: "Electrical", hours: 2, sweep: 1, cleanup: 2 },
+/// Task { name: "Insulation", hours: 2, sweep: 1, cleanup: 2 },
+/// Task { name: "Drywall", hours: 3, sweep: 1, cleanup: 2 },
+/// Task { name: "Floors", hours: 3, sweep: 1, cleanup: 2 },
+/// Task { name: "Countertops", hours: 1, sweep: 1, cleanup: 2 },
+/// Task { name: "Bathrooms", hours: 2, sweep: 1, cleanup: 2 },
+/// ];
+///
+/// // Fill tasks into days, taking `day_length` into account. The
+/// // output shows the hours worked per day along with the names of
+/// // the tasks for that day.
+/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: usize) -> Vec<(usize, Vec<&'a str>)> {
+/// let mut days = Vec::new();
+/// // Assign tasks to days. The assignment is a vector of slices,
+/// // with a slice per day.
+/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
+/// for day in assigned_days.iter() {
+/// let last = day.last().unwrap();
+/// let work_hours: usize = day.iter().map(|t| t.hours + t.sweep).sum();
+/// let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
+/// days.push((work_hours - last.sweep + last.cleanup, names));
+/// }
+/// days
+/// }
+///
+/// // With a single crew working 8 hours a day:
+/// assert_eq!(
+/// assign_days(&tasks, 8),
+/// [
+/// (7, vec!["Foundation"]),
+/// (8, vec!["Framing", "Plumbing"]),
+/// (7, vec!["Electrical", "Insulation"]),
+/// (5, vec!["Drywall"]),
+/// (7, vec!["Floors", "Countertops"]),
+/// (4, vec!["Bathrooms"]),
+/// ]
+/// );
+///
+/// // With two crews working in shifts, 16 hours a day:
+/// assert_eq!(
+/// assign_days(&tasks, 16),
+/// [
+/// (14, vec!["Foundation", "Framing", "Plumbing"]),
+/// (15, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
+/// (6, vec!["Countertops", "Bathrooms"]),
+/// ]
+/// );
+/// ```
+///
+/// Apologies to anyone who actually knows how to build a house and
+/// knows how long each step takes :-)
+pub fn wrap_first_fit<'a, 'b, T: Fragment>(
+ fragments: &'a [T],
+ line_widths: &'b [usize],
+) -> Vec<&'a [T]> {
+ // The final line width is used for all remaining lines.
+ let default_line_width = line_widths.last().copied().unwrap_or(0);
+ let mut lines = Vec::new();
+ let mut start = 0;
+ let mut width = 0;
+
+ for (idx, fragment) in fragments.iter().enumerate() {
+ let line_width = line_widths
+ .get(lines.len())
+ .copied()
+ .unwrap_or(default_line_width);
+ if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
+ lines.push(&fragments[start..idx]);
+ start = idx;
+ width = 0;
+ }
+ width += fragment.width() + fragment.whitespace_width();
+ }
+ lines.push(&fragments[start..]);
+ lines
+}
diff --git a/src/core/optimal_fit.rs b/src/wrap_algorithms/optimal_fit.rs
index c18b974..95ecf1f 100644
--- a/src/core/optimal_fit.rs
+++ b/src/wrap_algorithms/optimal_fit.rs
@@ -1,6 +1,26 @@
-use crate::core::Fragment;
use std::cell::RefCell;
+use crate::core::{Fragment, Word};
+use crate::wrap_algorithms::WrapAlgorithm;
+
+/// Wrap words using an advanced algorithm with look-ahead.
+///
+/// This wrapping algorithm considers the entire paragraph to find
+/// optimal line breaks. Implemented by [`wrap_optimal_fit`], please
+/// see that function for details and examples.
+///
+/// **Note:** Only available when the `smawk` Cargo feature is
+/// enabled.
+#[derive(Clone, Copy, Debug, Default)]
+pub struct OptimalFit;
+
+impl WrapAlgorithm for OptimalFit {
+ #[inline]
+ fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> {
+ wrap_optimal_fit(words, line_widths)
+ }
+}
+
/// Cache for line numbers. This is necessary to avoid a O(n**2)
/// behavior when computing line numbers in [`wrap_optimal_fit`].
struct LineNumbers {
@@ -39,27 +59,28 @@ const NLINE_PENALTY: i32 = 1000;
/// overflow the line by 1 character in extreme cases:
///
/// ```
-/// use textwrap::core::{wrap_optimal_fit, Word};
+/// use textwrap::wrap_algorithms::wrap_optimal_fit;
+/// use textwrap::core::Word;
///
/// let short = "foo ";
/// let long = "x".repeat(50);
/// let fragments = vec![Word::from(short), Word::from(&long)];
///
/// // Perfect fit, both words are on a single line with no overflow.
-/// let wrapped = wrap_optimal_fit(&fragments, |_| short.len() + long.len());
+/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len()]);
/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
///
/// // The words no longer fit, yet we get a single line back. While
/// // the cost of overflow (`1 * 2500`) is the same as the cost of the
/// // gap (`50 * 50 = 2500`), the tie is broken by `NLINE_PENALTY`
/// // which makes it cheaper to overflow than to use two lines.
-/// let wrapped = wrap_optimal_fit(&fragments, |_| short.len() + long.len() - 1);
+/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 1]);
/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
///
/// // The cost of overflow would be 2 * 2500, whereas the cost of the
/// // gap is only `49 * 49 + NLINE_PENALTY = 2401 + 1000 = 3401`. We
/// // therefore get two lines.
-/// let wrapped = wrap_optimal_fit(&fragments, |_| short.len() + long.len() - 2);
+/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 2]);
/// assert_eq!(wrapped, vec![&[Word::from(short)],
/// &[Word::from(&long)]]);
/// ```
@@ -81,8 +102,9 @@ const HYPHEN_PENALTY: i32 = 25;
/// Wrap abstract fragments into lines with an optimal-fit algorithm.
///
-/// The `line_widths` map line numbers (starting from 0) to a target
-/// line width. This can be used to implement hanging indentation.
+/// The `line_widths` slice gives the target line width for each line
+/// (the last slice element is repeated as necessary). This can be
+/// used to implement hanging indentation.
///
/// The fragments must already have been split into the desired
/// widths, this function will not (and cannot) attempt to split them
@@ -153,10 +175,12 @@ const HYPHEN_PENALTY: i32 = 25;
///
/// **Note:** Only available when the `smawk` Cargo feature is
/// enabled.
-pub fn wrap_optimal_fit<'a, T: Fragment, F: Fn(usize) -> usize>(
+pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
fragments: &'a [T],
- line_widths: F,
+ line_widths: &'b [usize],
) -> Vec<&'a [T]> {
+ // The final line width is used for all remaining lines.
+ let default_line_width = line_widths.last().copied().unwrap_or(0);
let mut widths = Vec::with_capacity(fragments.len() + 1);
let mut width = 0;
widths.push(width);
@@ -170,7 +194,11 @@ pub fn wrap_optimal_fit<'a, T: Fragment, F: Fn(usize) -> usize>(
let minima = smawk::online_column_minima(0, widths.len(), |minima, i, j| {
// Line number for fragment `i`.
let line_number = line_numbers.get(i, &minima);
- let target_width = std::cmp::max(1, line_widths(line_number));
+ let line_width = line_widths
+ .get(line_number)
+ .copied()
+ .unwrap_or(default_line_width);
+ let target_width = std::cmp::max(1, line_width);
// Compute the width of a line spanning fragments[i..j] in
// constant time. We need to adjust widths[j] by subtracting
diff --git a/tests/traits.rs b/tests/traits.rs
new file mode 100644
index 0000000..cd0d73c
--- /dev/null
+++ b/tests/traits.rs
@@ -0,0 +1,86 @@
+use textwrap::word_separators::{AsciiSpace, WordSeparator};
+use textwrap::word_splitters::{HyphenSplitter, NoHyphenation, WordSplitter};
+use textwrap::wrap_algorithms::{FirstFit, WrapAlgorithm};
+use textwrap::Options;
+
+/// Cleaned up type name.
+fn type_name<T: ?Sized>(_val: &T) -> String {
+ std::any::type_name::<T>().replace("alloc::boxed::Box", "Box")
+}
+
+#[test]
+#[cfg(not(feature = "smawk"))]
+#[cfg(not(feature = "unicode-linebreak"))]
+fn static_hyphensplitter() {
+ // Inferring the full type.
+ let options = Options::new(10);
+ assert_eq!(
+ type_name(&options),
+ format!(
+ "textwrap::Options<{}, {}, {}>",
+ "textwrap::wrap_algorithms::FirstFit",
+ "textwrap::word_separators::AsciiSpace",
+ "textwrap::word_splitters::HyphenSplitter"
+ )
+ );
+
+ // Inferring part of the type.
+ let options: Options<_, _, HyphenSplitter> = Options::new(10);
+ assert_eq!(
+ type_name(&options),
+ format!(
+ "textwrap::Options<{}, {}, {}>",
+ "textwrap::wrap_algorithms::FirstFit",
+ "textwrap::word_separators::AsciiSpace",
+ "textwrap::word_splitters::HyphenSplitter"
+ )
+ );
+
+ // Explicitly making all parameters inferred.
+ let options: Options<_, _, _> = Options::new(10);
+ assert_eq!(
+ type_name(&options),
+ format!(
+ "textwrap::Options<{}, {}, {}>",
+ "textwrap::wrap_algorithms::FirstFit",
+ "textwrap::word_separators::AsciiSpace",
+ "textwrap::word_splitters::HyphenSplitter"
+ )
+ );
+}
+
+#[test]
+fn box_static_nohyphenation() {
+ // Inferred static type.
+ let options = Options::new(10)
+ .wrap_algorithm(Box::new(FirstFit))
+ .word_splitter(Box::new(NoHyphenation))
+ .word_separator(Box::new(AsciiSpace));
+ assert_eq!(
+ type_name(&options),
+ format!(
+ "textwrap::Options<{}, {}, {}>",
+ "Box<textwrap::wrap_algorithms::FirstFit>",
+ "Box<textwrap::word_separators::AsciiSpace>",
+ "Box<textwrap::word_splitters::NoHyphenation>"
+ )
+ );
+}
+
+#[test]
+fn box_dyn_wordsplitter() {
+ // Inferred dynamic type due to default type parameter.
+ let options = Options::new(10)
+ .wrap_algorithm(Box::new(FirstFit) as Box<dyn WrapAlgorithm>)
+ .word_splitter(Box::new(HyphenSplitter) as Box<dyn WordSplitter>)
+ .word_separator(Box::new(AsciiSpace) as Box<dyn WordSeparator>);
+ assert_eq!(
+ type_name(&options),
+ format!(
+ "textwrap::Options<{}, {}, {}>",
+ "Box<dyn textwrap::wrap_algorithms::WrapAlgorithm>",
+ "Box<dyn textwrap::word_separators::WordSeparator>",
+ "Box<dyn textwrap::word_splitters::WordSplitter>"
+ )
+ );
+}