aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThiébaud Weksteen <tweek@google.com>2021-02-22 18:12:09 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2021-02-22 18:12:09 +0000
commit382b3288f1929910982663b3c3cebc99e30cff9e (patch)
tree96256807df9b158be38b92fc46b59521ec90d492
parent40b242c1327400fbe94f7613e6619843f4f83bdb (diff)
parent4b1f9eeaa187570bb346ee070c959220d0590b13 (diff)
downloadtextwrap-382b3288f1929910982663b3c3cebc99e30cff9e.tar.gz
Update to 0.13.3 am: 1d19096d00 am: 4b1f9eeaa1
Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/textwrap/+/1598753 MUST ONLY BE SUBMITTED BY AUTOMERGER Change-Id: Id48fda3a0924f03512bf0208649857a2ad4ee599
-rw-r--r--.appveyor.yml15
-rw-r--r--.cargo_vcs_info.json2
-rw-r--r--.circleci/config.yml18
-rw-r--r--.codecov.yml13
-rw-r--r--.travis.yml15
-rw-r--r--Android.bp26
-rw-r--r--CHANGELOG.md227
-rw-r--r--Cargo.toml38
-rw-r--r--METADATA12
-rw-r--r--README.md214
-rw-r--r--TEST_MAPPING8
-rw-r--r--benches/linear.rs114
-rw-r--r--examples/hyphenation.rs17
-rw-r--r--examples/layout.rs34
-rw-r--r--examples/termwidth.rs37
-rw-r--r--src/core.rs896
-rw-r--r--src/core/optimal_fit.rs228
-rw-r--r--src/indentation.rs233
-rw-r--r--src/lib.rs2014
-rw-r--r--src/splitting.rs170
-rw-r--r--tests/indent.rs88
-rw-r--r--tests/version-numbers.rs7
22 files changed, 3170 insertions, 1256 deletions
diff --git a/.appveyor.yml b/.appveyor.yml
deleted file mode 100644
index 10bc961..0000000
--- a/.appveyor.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-environment:
- matrix:
- - TOOLCHAIN: stable
- - TOOLCHAIN: nightly
-
-install:
- - ps: Start-FileDownload 'https://static.rust-lang.org/rustup/dist/i686-pc-windows-gnu/rustup-init.exe'
- - rustup-init.exe -y --profile minimal --default-toolchain %TOOLCHAIN%
- - set PATH=%PATH%;%USERPROFILE%\.cargo\bin
-
-build_script:
- - cargo build --all-features
-
-test_script:
- - cargo test --all-features
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index fdd0ea1..3e4b610 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
{
"git": {
- "sha1": "b2247874f041dd601dd3ee28c3dcdfa912da2646"
+ "sha1": "ad143f1be460a4bab07a6ad5d0f408c1cbb50ac7"
}
}
diff --git a/.circleci/config.yml b/.circleci/config.yml
deleted file mode 100644
index b3d23b1..0000000
--- a/.circleci/config.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-version: 2
-jobs:
- build:
- machine: true
- steps:
- - checkout
- - run:
- name: Pull xd009642/tarpaulin
- command: docker pull xd009642/tarpaulin
- - run:
- name: Generate coverage report
- command: >-
- docker run --security-opt seccomp=unconfined
- -v $PWD:/volume xd009642/tarpaulin
- cargo tarpaulin --out Xml --all-features
- - run:
- name: Upload to codecov.io
- command: bash <(curl -s https://codecov.io/bash) -Z -f cobertura.xml
diff --git a/.codecov.yml b/.codecov.yml
deleted file mode 100644
index a9b0bd2..0000000
--- a/.codecov.yml
+++ /dev/null
@@ -1,13 +0,0 @@
-codecov:
- # Do not wait for these CI providers since they will not upload any
- # coverage reports.
- ci:
- - !appveyor
- - !travis
-
-coverage:
- status:
- project:
- default:
- # Allow a 5% drop in overall project coverage on a PR.
- threshold: 5%
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 71b1f02..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-language: rust
-
-rust:
- - stable
- - nightly
-
-install:
- - cargo build --all-features --tests --examples
- - >
- if [[ $TRAVIS_RUST_VERSION == "nightly" ]]; then
- cargo build --all-features --benches
- fi
-
-script:
- - cargo test --all-features
diff --git a/Android.bp b/Android.bp
index a258f60..f879002 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1,21 +1,5 @@
-// This file is generated by cargo2android.py --run --dependencies --device.
-
-package {
- default_applicable_licenses: ["external_rust_crates_textwrap_license"],
-}
-
-// Added automatically by a large-scale-change
-// See: http://go/android-license-faq
-license {
- name: "external_rust_crates_textwrap_license",
- visibility: [":__subpackages__"],
- license_kinds: [
- "SPDX-license-identifier-MIT",
- ],
- license_text: [
- "LICENSE",
- ],
-}
+// This file is generated by cargo2android.py --run --dependencies --device --features=.
+// Do not modify this file as changes will be overridden on upgrade.
rust_library {
name: "libtextwrap",
@@ -23,10 +7,4 @@ rust_library {
crate_name: "textwrap",
srcs: ["src/lib.rs"],
edition: "2018",
- rustlibs: [
- "libunicode_width",
- ],
}
-
-// dependent_library ["feature_list"]
-// unicode-width-0.1.8 "default"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6527784..05d22af 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,207 @@
This file lists the most important changes made in each release of
`textwrap`.
-## Version 0.12.1 — July 3rd, 2020
+## Version 0.13.3 (2021-02-20)
+
+This release contains a bugfix for `indent` and improved handling of
+emojis. We’ve also added a new function for formatting text in columns
+and functions for reformatting already wrapped text.
+
+* [#276](https://github.com/mgeisler/textwrap/pull/276): Extend
+ `core::display_width` to handle emojis when the unicode-width Cargo
+ feature is disabled.
+* [#279](https://github.com/mgeisler/textwrap/pull/279): Make `indent`
+ preserve existing newlines in the input string.
+* [#281](https://github.com/mgeisler/textwrap/pull/281): Ensure all
+ `Options` fields have examples.
+* [#282](https://github.com/mgeisler/textwrap/pull/282): Add a
+ `wrap_columns` function.
+* [#294](https://github.com/mgeisler/textwrap/pull/294): Add new
+ `unfill` and `refill` functions.
+
+## Version 0.13.2 (2020-12-30)
+
+This release primarily makes all dependencies optional. This makes it
+possible to slim down textwrap as needed.
+
+* [#254](https://github.com/mgeisler/textwrap/pull/254): `impl
+ WordSplitter` for `Box<T> where T: WordSplitter`.
+* [#255](https://github.com/mgeisler/textwrap/pull/255): Use command
+ line arguments as initial text in interactive example.
+* [#256](https://github.com/mgeisler/textwrap/pull/256): Introduce
+ fuzz tests for `wrap_optimal_fit` and `wrap_first_fit`.
+* [#260](https://github.com/mgeisler/textwrap/pull/260): Make the
+ unicode-width dependency optional.
+* [#261](https://github.com/mgeisler/textwrap/pull/261): Make the
+ smawk dependency optional.
+
+## Version 0.13.1 (2020-12-10)
+
+This is a bugfix release which fixes a regression in 0.13.0. The bug
+meant that colored text was wrapped incorrectly.
+
+* [#245](https://github.com/mgeisler/textwrap/pull/245): Support
+ deleting a word with Ctrl-Backspace in the interactive demo.
+* [#246](https://github.com/mgeisler/textwrap/pull/246): Show build
+ type (debug/release) in interactive demo.
+* [#249](https://github.com/mgeisler/textwrap/pull/249): Correctly
+ compute width while skipping over ANSI escape sequences.
+
+## Version 0.13.0 (2020-12-05)
+
+This is a major release which rewrites the core logic, adds many new
+features, and fixes a couple of bugs. Most programs which use
+`textwrap` stays the same, incompatibilities and upgrade notes are
+given below.
+
+Clone the repository and run the following to explore the new features
+in an interactive demo (Linux only):
+
+```sh
+$ cargo run --example interactive --all-features
+```
+
+### Bug Fixes
+
+#### Rewritten core wrapping algorithm
+
+* [#221](https://github.com/mgeisler/textwrap/pull/221): Reformulate
+ wrapping in terms of words with whitespace and penalties.
+
+The core wrapping algorithm has been completely rewritten. This fixed
+bugs and simplified the code, while also making it possible to use
+`textwrap` outside the context of the terminal.
+
+As part of this, trailing whitespace is now discarded consistently
+from wrapped lines. Before we would inconsistently remove whitespace
+at the end of wrapped lines, except for the last. Leading whitespace
+is still preserved.
+
+### New Features
+
+#### Optimal-fit wrapping
+
+* [#234](https://github.com/mgeisler/textwrap/pull/234): Introduce
+ wrapping using an optimal-fit algorithm.
+
+This release adds support for new wrapping algorithm which finds a
+globally optimal set of line breaks, taking certain penalties into
+account. As an example, the old algorithm would produce
+
+ "To be, or"
+ "not to be:"
+ "that is"
+ "the"
+ "question"
+
+Notice how the fourth line with “the” is very short. The new algorithm
+shortens the previous lines slightly to produce fewer short lines:
+
+ "To be,"
+ "or not to"
+ "be: that"
+ "is the"
+ "question"
+
+Use the new `textwrap::core::WrapAlgorithm` enum to select between the
+new and old algorithm. By default, the new algorithm is used.
+
+The optimal-fit algorithm is inspired by the line breaking algorithm
+used in TeX, described in the 1981 article [_Breaking Paragraphs into
+Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf) by
+Knuth and Plass.
+
+#### In-place wrapping
+
+* [#226](https://github.com/mgeisler/textwrap/pull/226): Add a
+ `fill_inplace` function.
+
+When the text you want to fill is already a temporary `String`, you
+can now mutate it in-place with `fill_inplace`:
+
+```rust
+let mut greeting = format!("Greetings {}, welcome to the game! You have {} lives left.",
+ player.name, player.lives);
+fill_inplace(&mut greeting, line_width);
+```
+
+This is faster than calling `fill` and it will reuse the memory
+already allocated for the string.
+
+### Changed Features
+
+#### `Wrapper` is replaced with `Options`
+
+* [#213](https://github.com/mgeisler/textwrap/pull/213): Simplify API
+ with only top-level functions.
+* [#215](https://github.com/mgeisler/textwrap/pull/215): Reintroducing
+ the type parameter on `Options` (previously known as `Wrapper`).
+* [#219](https://github.com/mgeisler/textwrap/pull/219): Allow using
+ trait objects with `fill` & `wrap`.
+* [#227](https://github.com/mgeisler/textwrap/pull/227): Replace
+ `WrapOptions` with `Into<Options>`.
+
+The `Wrapper` struct held the options (line width, indentation, etc)
+for wrapping text. It was also the entry point for actually wrapping
+the text via its methods such as `wrap`, `wrap_iter`,
+`into_wrap_iter`, and `fill` methods.
+
+The struct has been replaced by a simpler `Options` struct which only
+holds options. The `Wrapper` methods are gone, their job has been
+taken over by the top-level `wrap` and `fill` functions. The signature
+of these functions have changed from
+
+```rust
+fn fill(s: &str, width: usize) -> String;
+
+fn wrap(s: &str, width: usize) -> Vec<Cow<'_, str>>;
+```
+
+to the more general
+
+```rust
+fn fill<'a, S, Opt>(text: &str, options: Opt) -> String
+where
+ S: WordSplitter,
+ Opt: Into<Options<'a, S>>;
+
+fn wrap<'a, S, Opt>(text: &str, options: Opt) -> Vec<Cow<'_, str>>
+where
+ S: WordSplitter,
+ Opt: Into<Options<'a, S>>;
+```
+
+The `Into<Options<'a, S>` bound allows you to pass an `usize` (which
+is interpreted as the line width) *and* a full `Options` object. This
+allows the new functions to work like the old, plus you can now fully
+customize the behavior of the wrapping via `Options` when needed.
+
+Code that call `textwrap::wrap` or `textwrap::fill` can remain
+unchanged. Code that calls into `Wrapper::wrap` or `Wrapper::fill`
+will need to be update. This is a mechanical change, please see
+[#213](https://github.com/mgeisler/textwrap/pull/213) for examples.
+
+Thanks to @CryptJar and @Koxiat for their support in the PRs above!
+
+### Removed Features
+
+* The `wrap_iter` and `into_wrap_iter` methods are gone. This means
+ that lazy iteration is no longer supported: you always get all
+ wrapped lines back as a `Vec`. This was done to simplify the code
+ and to support the optimal-fit algorithm.
+
+ The first-fit algorithm could still be implemented in an incremental
+ fashion. Please let us know if this is important to you.
+
+### Other Changes
+
+* [#206](https://github.com/mgeisler/textwrap/pull/206): Change
+ `Wrapper.splitter` from `T: WordSplitter` to `Box<dyn
+ WordSplitter>`.
+* [#216](https://github.com/mgeisler/textwrap/pull/216): Forbid the
+ use of unsafe code.
+
+## Version 0.12.1 (2020-07-03)
This is a bugfix release.
@@ -13,7 +213,7 @@ This is a bugfix release.
broken and would cause extra whitespace to be inserted when words
were longer than the line width.
-## Version 0.12.0 — June 26th, 2020
+## Version 0.12.0 (2020-06-26)
The code has been updated to the [Rust 2018 edition][rust-2018] and
each new release of `textwrap` will only support the latest stable
@@ -31,7 +231,7 @@ US-English. This slims down the dependency.
* Fixed [#158][issue-158]: Unintended wrapping when using external splitter.
* Fixed [#177][issue-177]: Update examples to the 2018 edition.
-## Version 0.11.0 — December 9th, 2018
+## Version 0.11.0 (2018-12-09)
Due to our dependencies bumping their minimum supported version of
Rust, the minimum version of Rust we test against is now 1.22.0.
@@ -40,7 +240,7 @@ Rust, the minimum version of Rust we test against is now 1.22.0.
trailing newlines. Thanks @bbqsrc!
* Fixed [#151][issue-151]: Release of version with hyphenation 0.7.
-## Version 0.10.0 — April 28th, 2018
+## Version 0.10.0 (2018-04-28)
Due to our dependencies bumping their minimum supported version of
Rust, the minimum version of Rust we test against is now 1.17.0.
@@ -50,7 +250,7 @@ Rust, the minimum version of Rust we test against is now 1.17.0.
* Fixed [#122][issue-122]: Take newlines into account when wrapping.
* Fixed [#129][issue-129]: Panic on string with em-dash.
-## Version 0.9.0 — October 5th, 2017
+## Version 0.9.0 (2017-10-05)
The dependency on `term_size` is now optional, and by default this
feature is not enabled. This is a *breaking change* for users of
@@ -63,7 +263,7 @@ Added a regression test for the case where `width` is set to
* Fixed [#101][issue-101]: Make `term_size` an optional dependency.
-## Version 0.8.0 — September 4th, 2017
+## Version 0.8.0 (2017-09-04)
The `Wrapper` stuct is now generic over the type of word splitter
being used. This means less boxing and a nicer API. The
@@ -78,7 +278,7 @@ if you will be iterating over the wrapped lines one by one.
@hcpl!
* Fixed [#81][issue-81]: Set `html_root_url`.
-## Version 0.7.0 — July 20th, 2017
+## Version 0.7.0 (2017-07-20)
Version 0.7.0 changes the return type of `Wrapper::wrap` from
`Vec<String>` to `Vec<Cow<'a, str>>`. This means that the output lines
@@ -96,7 +296,7 @@ important for you so we can provide a work around.
* Fixed [#58][issue-58]: Add a "fast_wrap" function.
* Fixed [#61][issue-61]: Documentation errors.
-## Version 0.6.0 — May 22nd, 2017
+## Version 0.6.0 (2017-05-22)
Version 0.6.0 adds builder methods to `Wrapper` for easy one-line
initialization and configuration:
@@ -110,7 +310,7 @@ words, not even at existing hyphens.
* Fixed [#28][issue-28]: Support not squeezing whitespace.
-## Version 0.5.0 — May 15th, 2017
+## Version 0.5.0 (2017-05-15)
Version 0.5.0 has *breaking API changes*. However, this only affects
code using the hyphenation feature. The feature is now optional, so
@@ -133,22 +333,22 @@ Other changes include optimizations, so version 0.5.0 is roughly
* Fixed [#36][issue-36]: Support building without `hyphenation`.
* Fixed [#39][issue-39]: Respect non-breaking spaces.
-## Version 0.4.0 — January 24th, 2017
+## Version 0.4.0 (2017-01-24)
Documented complexities and tested these via `cargo bench`.
* Fixed [#13][issue-13]: Immediatedly add word if it fits.
* Fixed [#14][issue-14]: Avoid splitting on initial hyphens.
-## Version 0.3.0 — January 7th, 2017
+## Version 0.3.0 (2017-01-07)
Added support for automatic hyphenation.
-## Version 0.2.0 — December 28th, 2016
+## Version 0.2.0 (2016-12-28)
Introduced `Wrapper` struct. Added support for wrapping on hyphens.
-## Version 0.1.0 — December 17th, 2016
+## Version 0.1.0 (2016-12-17)
First public release with support for wrapping strings on whitespace.
@@ -176,5 +376,6 @@ First public release with support for wrapping strings on whitespace.
[issue-141]: https://github.com/mgeisler/textwrap/issues/141
[issue-151]: https://github.com/mgeisler/textwrap/issues/151
[issue-158]: https://github.com/mgeisler/textwrap/issues/158
+[issue-176]: https://github.com/mgeisler/textwrap/issues/176
[issue-177]: https://github.com/mgeisler/textwrap/issues/177
[issue-193]: https://github.com/mgeisler/textwrap/issues/193
diff --git a/Cargo.toml b/Cargo.toml
index 0a908ee..b49a99e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,9 +13,10 @@
[package]
edition = "2018"
name = "textwrap"
-version = "0.12.1"
+version = "0.13.3"
authors = ["Martin Geisler <martin@geisler.net>"]
-description = "Textwrap is a library for word wrapping, indenting, and dedenting\nstrings.\n\nYou can use it to format strings (such as help and error messages) for\ndisplay in commandline applications. It is designed to be efficient\nand handle Unicode characters correctly.\n"
+exclude = [".github/", ".gitignore", "benches/", "examples/", "fuzz/", "images/"]
+description = "Powerful library for word wrapping, indenting, and dedenting strings"
documentation = "https://docs.rs/textwrap/"
readme = "README.md"
keywords = ["text", "formatting", "wrap", "typesetting", "hyphenation"]
@@ -24,33 +25,40 @@ license = "MIT"
repository = "https://github.com/mgeisler/textwrap"
[package.metadata.docs.rs]
all-features = true
+
+[[bench]]
+name = "linear"
+path = "benches/linear.rs"
+harness = false
[dependencies.hyphenation]
version = "0.8"
features = ["embed_en-us"]
optional = true
+[dependencies.smawk]
+version = "0.3"
+optional = true
+
[dependencies.terminal_size]
version = "0.1"
optional = true
[dependencies.unicode-width]
version = "0.1"
-[dev-dependencies.lipsum]
-version = "0.6"
+optional = true
+[dev-dependencies.criterion]
+version = "0.3"
-[dev-dependencies.rand]
-version = "0.6"
+[dev-dependencies.lipsum]
+version = "0.7"
-[dev-dependencies.rand_xorshift]
-version = "0.1"
+[dev-dependencies.unic-emoji-char]
+version = "0.9.0"
[dev-dependencies.version-sync]
version = "0.9"
-[badges.appveyor]
-repository = "mgeisler/textwrap"
-
-[badges.codecov]
-repository = "mgeisler/textwrap"
-[badges.travis-ci]
-repository = "mgeisler/textwrap"
+[features]
+default = ["unicode-width", "smawk"]
+[target."cfg(unix)".dev-dependencies.termion]
+version = "1.5"
diff --git a/METADATA b/METADATA
index 7798419..08130cc 100644
--- a/METADATA
+++ b/METADATA
@@ -1,5 +1,5 @@
name: "textwrap"
-description: "Textwrap is a library for word wrapping, indenting, and dedenting strings. You can use it to format strings (such as help and error messages) for display in commandline applications. It is designed to be efficient and handle Unicode characters correctly."
+description: "Powerful library for word wrapping, indenting, and dedenting strings"
third_party {
url {
type: HOMEPAGE
@@ -7,13 +7,13 @@ third_party {
}
url {
type: ARCHIVE
- value: "https://static.crates.io/crates/textwrap/textwrap-0.12.1.crate"
+ value: "https://static.crates.io/crates/textwrap/textwrap-0.13.3.crate"
}
- version: "0.12.1"
+ version: "0.13.3"
license_type: NOTICE
last_upgrade_date {
- year: 2020
- month: 8
- day: 27
+ year: 2021
+ month: 2
+ day: 22
}
}
diff --git a/README.md b/README.md
index 20673a9..39093e0 100644
--- a/README.md
+++ b/README.md
@@ -1,201 +1,133 @@
# Textwrap
-[![](https://travis-ci.org/mgeisler/textwrap.svg?branch=master)][travis-ci]
-[![](https://ci.appveyor.com/api/projects/status/github/mgeisler/textwrap?branch=master&svg=true)][appveyor]
+[![](https://github.com/mgeisler/textwrap/workflows/build/badge.svg)][build-status]
[![](https://codecov.io/gh/mgeisler/textwrap/branch/master/graph/badge.svg)][codecov]
[![](https://img.shields.io/crates/v/textwrap.svg)][crates-io]
[![](https://docs.rs/textwrap/badge.svg)][api-docs]
-Textwrap is a small Rust crate for word wrapping text. You can use it
-to format strings for display in commandline applications. The crate
-name and interface is inspired by
-the [Python textwrap module][py-textwrap].
+Textwrap is a library for wrapping and indenting text. It is most
+often used by command-line programs to format dynamic output nicely so
+it looks good in a terminal. However, you can use the library to wrap
+arbitrary things by implementing the `Fragment` trait — an example
+would be wrapping text for PDF files.
## Usage
-To use `textwrap`, add this to your `Cargo.toml` file:
+To use the textwrap crate, add this to your `Cargo.toml` file:
```toml
[dependencies]
-textwrap = "0.12"
+textwrap = "0.13"
```
-This gives you the text wrapping without of the optional features
-listed next.
+By default, this enables word wrapping with support for Unicode
+strings. Extra features can be enabled with Cargo features — and the
+Unicode support can be disabled if needed. This allows you slim down
+the library and so you will only pay for the features you actually
+use. Please see the [_Cargo Features_ in the crate
+documentation](https://docs.rs/textwrap/#cargo-features) for a full
+list of the available features.
-### `hyphenation`
+## Documentation
-If you would like to have automatic language-sensitive hyphenation,
-enable the `hyphenation` feature:
+**[API documentation][api-docs]**
-```toml
-[dependencies]
-textwrap = { version = "0.12", features = ["hyphenation"] }
-```
+## Getting Started
-This gives you hyphenation support for US English. Please see the
-[`hyphenation` example] for an executable demo. Read the Getting
-Started section below to see how to load the hyphenation patterns for
-other languages.
+Word wrapping is easy using the `fill` function:
-### `terminal_size`
+```rust
+fn main() {
+ let text = "textwrap: an efficient and powerful library for wrapping text.";
+ println!("{}", textwrap::fill(text, 28));
+}
+```
-To conveniently wrap text at the current terminal width, enable the
-`terminal_size` feature:
+The output is wrapped within 28 columns:
-```toml
-[dependencies]
-textwrap = { version = "0.12", features = ["terminal_size"] }
+```
+textwrap: an efficient
+and powerful library for
+wrapping text.
```
-Please see the [`termwidth` example] for how to use this feature.
-
-## Documentation
+Sharp-eyed readers will notice that the first line is 22 columns wide.
+So why is the word “and” put in the second line when there is space
+for it in the first line?
-**[API documentation][api-docs]**
+The explanation is that textwrap does not just wrap text one line at a
+time. Instead, it uses an optimal-fit algorithm which looks ahead and
+chooses line breaks which minimize the gaps left at ends of lines.
-## Getting Started
+Without look-ahead, the first line would be longer and the text would
+look like this:
-Word wrapping single strings is easy using the `fill` function:
-```rust
-fn main() {
- let text = "textwrap: a small library for wrapping text.";
- println!("{}", textwrap::fill(text, 18));
-}
```
-The output is
-```
-textwrap: a small
-library for
+textwrap: an efficient and
+powerful library for
wrapping text.
```
-If you enable the `hyphenation` feature, you get support for automatic
-hyphenation for [about 70 languages][patterns] via high-quality TeX
-hyphenation patterns.
+The second line is now shorter and the text is more ragged. The kind
+of wrapping can be configured via `Option::wrap_algorithm`.
+
+If you enable the `hyphenation` Cargo feature, you get support for
+automatic hyphenation for [about 70 languages][patterns] via
+high-quality TeX hyphenation patterns.
-Your program must load the hyphenation pattern and call
-`Wrapper::with_splitter` to use it:
+Your program must load the hyphenation pattern and configure
+`Options::splitter` to use it:
```rust
use hyphenation::{Language, Load, Standard};
-use textwrap::Wrapper;
+use textwrap::Options;
fn main() {
let hyphenator = Standard::from_embedded(Language::EnglishUS).unwrap();
- let wrapper = Wrapper::with_splitter(18, hyphenator);
- let text = "textwrap: a small library for wrapping text.";
- println!("{}", wrapper.fill(text))
+ let options = Options::new(28).splitter(hyphenator);
+ let text = "textwrap: an efficient and powerful library for wrapping text.";
+ println!("{}", fill(text, &options);
}
```
The output now looks like this:
```
-textwrap: a small
-library for wrap-
+textwrap: an efficient and
+powerful library for wrap-
ping text.
```
The US-English hyphenation patterns are embedded when you enable the
`hyphenation` feature. They are licensed under a [permissive
-license][en-us license] and take up about 88 KB of space in your
-application. If you need hyphenation for other languages, you need to
-download a [precompiled `.bincode` file][bincode] and load it
-yourself. Please see the [`hyphenation` documentation] for details.
+license][en-us license] and take up about 88 KB in your binary. If you
+need hyphenation for other languages, you need to download a
+[precompiled `.bincode` file][bincode] and load it yourself. Please
+see the [`hyphenation` documentation] for details.
## Wrapping Strings at Compile Time
If your strings are known at compile time, please take a look at the
procedural macros from the [`textwrap-macros` crate].
-
## Examples
-The library comes with some small example programs that shows various
-features.
-
-### Layout Example
-
-The `layout` example shows how a fixed example string is wrapped at
-different widths. Run the example with:
-
-```shell
-$ cargo run --features hyphenation --example layout
-```
-
-The program will use the following string:
+The library comes with [a
+collection](https://github.com/mgeisler/textwrap/tree/master/examples)
+of small example programs that shows various features. You’re invited
+to clone the repository and try them out for yourself!
-> Memory safety without garbage collection. Concurrency without data
-> races. Zero-cost abstractions.
+Of special note is the `interactive` example. This is a demo program
+which demonstrates most of the available features: you can enter text
+and adjust the width at which it is wrapped interactively. You can
+also adjust the `Options` used to see the effect of different
+`WordSplitter`s and wrap algorithms.
-The string is wrapped at all widths between 15 and 60 columns. With
-narrow columns the output looks like this:
+Run the demo with
+```sh
+$ cargo run --example interactive
```
-.--- Width: 15 ---.
-| Memory safety |
-| without garbage |
-| collection. |
-| Concurrency |
-| without data |
-| races. Zero- |
-| cost abstrac- |
-| tions. |
-.--- Width: 16 ----.
-| Memory safety |
-| without garbage |
-| collection. Con- |
-| currency without |
-| data races. Ze- |
-| ro-cost abstrac- |
-| tions. |
-```
-
-Later, longer lines are used and the output now looks like this:
-
-```
-.-------------------- Width: 49 --------------------.
-| Memory safety without garbage collection. Concur- |
-| rency without data races. Zero-cost abstractions. |
-.---------------------- Width: 53 ----------------------.
-| Memory safety without garbage collection. Concurrency |
-| without data races. Zero-cost abstractions. |
-.------------------------- Width: 59 -------------------------.
-| Memory safety without garbage collection. Concurrency with- |
-| out data races. Zero-cost abstractions. |
-```
-
-Notice how words are split at hyphens (such as "zero-cost") but also
-how words are hyphenated using automatic/machine hyphenation.
-### Terminal Width Example
-
-The `termwidth` example simply shows how the width can be set
-automatically to the current terminal width. Run it with this command:
-
-```
-$ cargo run --example termwidth
-```
-
-If you run it in a narrow terminal, you'll see output like this:
-```
-Formatted in within 60 columns:
-----
-Memory safety without garbage collection. Concurrency
-without data races. Zero-cost abstractions.
-----
-```
-
-If `stdout` is not connected to the terminal, the program will use a
-default of 80 columns for the width:
-
-```
-$ cargo run --example termwidth | cat
-Formatted in within 80 columns:
-----
-Memory safety without garbage collection. Concurrency without data races. Zero-
-cost abstractions.
-----
-```
+The demo needs a Linux terminal to function.
## Release History
@@ -208,10 +140,8 @@ Textwrap can be distributed according to the [MIT license][mit].
Contributions will be accepted under the same license.
[crates-io]: https://crates.io/crates/textwrap
-[travis-ci]: https://travis-ci.org/mgeisler/textwrap
-[appveyor]: https://ci.appveyor.com/project/mgeisler/textwrap
+[build-status]: https://github.com/mgeisler/textwrap/actions?query=workflow%3Abuild+branch%3Amaster
[codecov]: https://codecov.io/gh/mgeisler/textwrap
-[py-textwrap]: https://docs.python.org/library/textwrap
[`textwrap-macros` crate]: https://crates.io/crates/textwrap-macros
[`hyphenation` example]: https://github.com/mgeisler/textwrap/blob/master/examples/hyphenation.rs
[`termwidth` example]: https://github.com/mgeisler/textwrap/blob/master/examples/termwidth.rs
diff --git a/TEST_MAPPING b/TEST_MAPPING
index 6716814..f1cd786 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -1,7 +1,13 @@
-// Generated by cargo2android.py for tests in Android.bp
+// Generated by update_crate_tests.py for tests that depend on this crate.
{
"presubmit": [
{
+ "name": "authfs_device_test_src_lib"
+ },
+ {
+ "name": "keystore2_test"
+ },
+ {
"name": "libsqlite3-sys_device_test_src_lib"
}
]
diff --git a/benches/linear.rs b/benches/linear.rs
deleted file mode 100644
index 7215afa..0000000
--- a/benches/linear.rs
+++ /dev/null
@@ -1,114 +0,0 @@
-#![feature(test)]
-
-// The benchmarks here verify that the complexity grows as O(*n*)
-// where *n* is the number of characters in the text to be wrapped.
-
-extern crate test;
-
-#[cfg(feature = "hyphenation")]
-use hyphenation::{Language, Load, Standard};
-use lipsum::MarkovChain;
-use rand::SeedableRng;
-use rand_xorshift::XorShiftRng;
-use test::Bencher;
-
-const LINE_LENGTH: usize = 60;
-
-/// Generate a lorem ipsum text with the given number of characters.
-fn lorem_ipsum(length: usize) -> String {
- // The average word length in the lorem ipsum text is somewhere
- // between 6 and 7. So we conservatively divide by 5 to have a
- // long enough text that we can truncate below.
- let rng = XorShiftRng::seed_from_u64(0);
- let mut chain = MarkovChain::new_with_rng(rng);
- chain.learn(lipsum::LOREM_IPSUM);
- chain.learn(lipsum::LIBER_PRIMUS);
-
- let mut text = chain.generate_from(length / 5, ("Lorem", "ipsum"));
- text.truncate(length);
- text
-}
-
-#[bench]
-fn fill_100(b: &mut Bencher) {
- let text = &lorem_ipsum(100);
- b.iter(|| textwrap::fill(text, LINE_LENGTH))
-}
-
-#[bench]
-fn fill_200(b: &mut Bencher) {
- let text = &lorem_ipsum(200);
- b.iter(|| textwrap::fill(text, LINE_LENGTH))
-}
-
-#[bench]
-fn fill_400(b: &mut Bencher) {
- let text = &lorem_ipsum(400);
- b.iter(|| textwrap::fill(text, LINE_LENGTH))
-}
-
-#[bench]
-fn fill_800(b: &mut Bencher) {
- let text = &lorem_ipsum(800);
- b.iter(|| textwrap::fill(text, LINE_LENGTH))
-}
-
-#[bench]
-fn wrap_100(b: &mut Bencher) {
- let text = &lorem_ipsum(100);
- b.iter(|| textwrap::wrap(text, LINE_LENGTH))
-}
-
-#[bench]
-fn wrap_200(b: &mut Bencher) {
- let text = &lorem_ipsum(200);
- b.iter(|| textwrap::wrap(text, LINE_LENGTH))
-}
-
-#[bench]
-fn wrap_400(b: &mut Bencher) {
- let text = &lorem_ipsum(400);
- b.iter(|| textwrap::wrap(text, LINE_LENGTH))
-}
-
-#[bench]
-fn wrap_800(b: &mut Bencher) {
- let text = &lorem_ipsum(800);
- b.iter(|| textwrap::wrap(text, LINE_LENGTH))
-}
-
-#[bench]
-#[cfg(feature = "hyphenation")]
-fn hyphenation_fill_100(b: &mut Bencher) {
- let text = &lorem_ipsum(100);
- let dictionary = Standard::from_embedded(Language::Latin).unwrap();
- let wrapper = textwrap::Wrapper::with_splitter(LINE_LENGTH, dictionary);
- b.iter(|| wrapper.fill(text))
-}
-
-#[bench]
-#[cfg(feature = "hyphenation")]
-fn hyphenation_fill_200(b: &mut Bencher) {
- let text = &lorem_ipsum(200);
- let dictionary = Standard::from_embedded(Language::Latin).unwrap();
- let wrapper = textwrap::Wrapper::with_splitter(LINE_LENGTH, dictionary);
- b.iter(|| wrapper.fill(text))
-}
-
-#[bench]
-#[cfg(feature = "hyphenation")]
-fn hyphenation_fill_400(b: &mut Bencher) {
- let text = &lorem_ipsum(400);
- let dictionary = Standard::from_embedded(Language::Latin).unwrap();
- let wrapper = textwrap::Wrapper::with_splitter(LINE_LENGTH, dictionary);
- b.iter(|| wrapper.fill(text))
-}
-
-#[bench]
-#[cfg(feature = "hyphenation")]
-fn hyphenation_fill_800(b: &mut Bencher) {
- let text = &lorem_ipsum(800);
- let dictionary = Standard::from_embedded(Language::Latin).unwrap();
- let wrapper = textwrap::Wrapper::with_splitter(LINE_LENGTH, dictionary);
- b.iter(|| wrapper.fill(text))
-}
diff --git a/examples/hyphenation.rs b/examples/hyphenation.rs
deleted file mode 100644
index aa72fbe..0000000
--- a/examples/hyphenation.rs
+++ /dev/null
@@ -1,17 +0,0 @@
-#[cfg(feature = "hyphenation")]
-use hyphenation::{Language, Load, Standard};
-
-#[cfg(not(feature = "hyphenation"))]
-fn main() {
- println!("Please run this example as");
- println!();
- println!(" cargo run --example hyphenation --features hyphenation");
-}
-
-#[cfg(feature = "hyphenation")]
-fn main() {
- let text = "textwrap: a small library for wrapping text.";
- let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let wrapper = textwrap::Wrapper::with_splitter(18, dictionary);
- println!("{}", wrapper.fill(text));
-}
diff --git a/examples/layout.rs b/examples/layout.rs
deleted file mode 100644
index ae818a2..0000000
--- a/examples/layout.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-#[cfg(feature = "hyphenation")]
-use hyphenation::{Language, Load};
-use textwrap::Wrapper;
-
-#[cfg(not(feature = "hyphenation"))]
-fn new_wrapper<'a>() -> Wrapper<'a, textwrap::HyphenSplitter> {
- Wrapper::new(0)
-}
-
-#[cfg(feature = "hyphenation")]
-fn new_wrapper<'a>() -> Wrapper<'a, hyphenation::Standard> {
- let dictionary = hyphenation::Standard::from_embedded(Language::EnglishUS).unwrap();
- Wrapper::with_splitter(0, dictionary)
-}
-
-fn main() {
- let example = "Memory safety without garbage collection. \
- Concurrency without data races. \
- Zero-cost abstractions.";
- let mut prev_lines = vec![];
- let mut wrapper = new_wrapper();
- for width in 15..60 {
- wrapper.width = width;
- let lines = wrapper.wrap(example);
- if lines != prev_lines {
- let title = format!(" Width: {} ", width);
- println!(".{:-^1$}.", title, width + 2);
- for line in &lines {
- println!("| {:1$} |", line, width);
- }
- prev_lines = lines;
- }
- }
-}
diff --git a/examples/termwidth.rs b/examples/termwidth.rs
deleted file mode 100644
index bd4070b..0000000
--- a/examples/termwidth.rs
+++ /dev/null
@@ -1,37 +0,0 @@
-#[cfg(feature = "hyphenation")]
-use hyphenation::{Language, Load, Standard};
-#[cfg(feature = "terminal_size")]
-use textwrap::Wrapper;
-
-#[cfg(not(feature = "terminal_size"))]
-fn main() {
- println!("Please enable the terminal_size feature to run this example.");
-}
-
-#[cfg(feature = "terminal_size")]
-fn main() {
- #[cfg(not(feature = "hyphenation"))]
- fn new_wrapper<'a>() -> (&'static str, Wrapper<'a, textwrap::HyphenSplitter>) {
- ("without hyphenation", Wrapper::with_termwidth())
- }
-
- #[cfg(feature = "hyphenation")]
- fn new_wrapper<'a>() -> (&'static str, Wrapper<'a, Standard>) {
- let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- (
- "with hyphenation",
- Wrapper::with_splitter(textwrap::termwidth(), dictionary),
- )
- }
-
- let example = "Memory safety without garbage collection. \
- Concurrency without data races. \
- Zero-cost abstractions.";
- // Create a new Wrapper -- automatically set the width to the
- // current terminal width.
- let (msg, wrapper) = new_wrapper();
- println!("Formatted {} in {} columns:", msg, wrapper.width);
- println!("----");
- println!("{}", wrapper.fill(example));
- println!("----");
-}
diff --git a/src/core.rs b/src/core.rs
new file mode 100644
index 0000000..dc00454
--- /dev/null
+++ b/src/core.rs
@@ -0,0 +1,896 @@
+//! Building blocks for advanced wrapping functionality.
+//!
+//! The functions and structs in this module can be used to implement
+//! advanced wrapping functionality when the [`wrap`](super::wrap) and
+//! [`fill`](super::fill) function don't do what you want.
+//!
+//! In general, you want to follow these steps when wrapping
+//! something:
+//!
+//! 1. Split your input into [`Fragment`]s. These are abstract blocks
+//! of text or content which can be wrapped into lines. You can use
+//! [`find_words`] to do this for text.
+//!
+//! 2. Potentially split your fragments into smaller pieces. This
+//! allows you to implement things like hyphenation. If wrapping
+//! text, [`split_words`] can help you do this.
+//!
+//! 3. Potentially break apart fragments that are still too large to
+//! fit on a single line. This is implemented in [`break_words`].
+//!
+//! 4. Finally take your fragments and put them into lines. There are
+//! two algorithms for this: [`wrap_optimal_fit`] and
+//! [`wrap_first_fit`]. The former produces better line breaks, the
+//! latter is faster.
+//!
+//! 5. Iterate through the slices returned by the wrapping functions
+//! and construct your lines of output.
+//!
+//! Please [open an issue](https://github.com/mgeisler/textwrap/) if
+//! the functionality here is not sufficient or if you have ideas for
+//! improving it. We would love to hear from you!
+
+use crate::{Options, WordSplitter};
+
+#[cfg(feature = "smawk")]
+mod optimal_fit;
+#[cfg(feature = "smawk")]
+pub use optimal_fit::wrap_optimal_fit;
+
+/// The CSI or “Control Sequence Introducer” introduces an ANSI escape
+/// sequence. This is typically used for colored text and will be
+/// ignored when computing the text width.
+const CSI: (char, char) = ('\x1b', '[');
+/// The final bytes of an ANSI escape sequence must be in this range.
+const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e';
+
+/// Skip ANSI escape sequences. The `ch` is the current `char`, the
+/// `chars` provide the following characters. The `chars` will be
+/// modified if `ch` is the start of an ANSI escape sequence.
+#[inline]
+fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool {
+ if ch == CSI.0 && chars.next() == Some(CSI.1) {
+ // We have found the start of an ANSI escape code, typically
+ // used for colored terminal text. We skip until we find a
+ // "final byte" in the range 0x40–0x7E.
+ for ch in chars {
+ if ANSI_FINAL_BYTE.contains(&ch) {
+ return true;
+ }
+ }
+ }
+ false
+}
+
+#[cfg(feature = "unicode-width")]
+#[inline]
+fn ch_width(ch: char) -> usize {
+ unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0)
+}
+
+/// First character which [`ch_width`] will classify as double-width.
+/// Please see [`display_width`].
+#[cfg(not(feature = "unicode-width"))]
+const DOUBLE_WIDTH_CUTOFF: char = '\u{1100}';
+
+#[cfg(not(feature = "unicode-width"))]
+#[inline]
+fn ch_width(ch: char) -> usize {
+ if ch < DOUBLE_WIDTH_CUTOFF {
+ 1
+ } else {
+ 2
+ }
+}
+
+/// Compute the display width of `text` while skipping over ANSI
+/// escape sequences.
+///
+/// # Examples
+///
+/// ```
+/// use textwrap::core::display_width;
+///
+/// assert_eq!(display_width("Café Plain"), 10);
+/// assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10);
+/// ```
+///
+/// **Note:** When the `unicode-width` Cargo feature is disabled, the
+/// width of a `char` is determined by a crude approximation which
+/// simply counts chars below U+1100 as 1 column wide, and all other
+/// characters as 2 columns wide. With the feature enabled, function
+/// will correctly deal with [combining characters] in their
+/// decomposed form (see [Unicode equivalence]).
+///
+/// An example of a decomposed character is “é”, which can be
+/// decomposed into: “e” followed by a combining acute accent: “◌́”.
+/// Without the `unicode-width` Cargo feature, every `char` below
+/// U+1100 has a width of 1. This includes the combining accent:
+///
+/// ```
+/// use textwrap::core::display_width;
+///
+/// assert_eq!(display_width("Cafe Plain"), 10);
+/// #[cfg(feature = "unicode-width")]
+/// assert_eq!(display_width("Cafe\u{301} Plain"), 10);
+/// #[cfg(not(feature = "unicode-width"))]
+/// assert_eq!(display_width("Cafe\u{301} Plain"), 11);
+/// ```
+///
+/// ## Emojis and CJK Characters
+///
+/// Characters such as emojis and [CJK characters] used in the
+/// Chinese, Japanese, and Korean langauges are seen as double-width,
+/// even if the `unicode-width` feature is disabled:
+///
+/// ```
+/// use textwrap::core::display_width;
+///
+/// assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20);
+/// assert_eq!(display_width("你好"), 4); // “Nǐ hǎo” or “Hello” in Chinese
+/// ```
+///
+/// # Limitations
+///
+/// The displayed width of a string cannot always be computed from the
+/// string alone. This is because the width depends on the rendering
+/// engine used. This is particularly visible with [emoji modifier
+/// sequences] where a base emoji is modified with, e.g., skin tone or
+/// hair color modifiers. It is up to the rendering engine to detect
+/// this and to produce a suitable emoji.
+///
+/// A simple example is “❤️”, which consists of “❤” (U+2764: Black
+/// Heart Symbol) followed by U+FE0F (Variation Selector-16). By
+/// itself, “❤” is a black heart, but if you follow it with the
+/// variant selector, you may get a wider red heart.
+///
+/// A more complex example would be “👨‍🦰” which should depict a man
+/// with red hair. Here the computed width is too large — and the
+/// width differs depending on the use of the `unicode-width` feature:
+///
+/// ```
+/// use textwrap::core::display_width;
+///
+/// assert_eq!("👨‍🦰".chars().collect::<Vec<char>>(), ['\u{1f468}', '\u{200d}', '\u{1f9b0}']);
+/// #[cfg(feature = "unicode-width")]
+/// assert_eq!(display_width("👨‍🦰"), 4);
+/// #[cfg(not(feature = "unicode-width"))]
+/// assert_eq!(display_width("👨‍🦰"), 6);
+/// ```
+///
+/// This happens because the grapheme consists of three code points:
+/// “👨” (U+1F468: Man), Zero Width Joiner (U+200D), and “🦰”
+/// (U+1F9B0: Red Hair). You can see them above in the test. With
+/// `unicode-width` enabled, the ZWJ is correctly seen as having zero
+/// width, without it is counted as a double-width character.
+///
+/// ## Terminal Support
+///
+/// Modern browsers typically do a great job at combining characters
+/// as shown above, but terminals often struggle more. As an example,
+/// Gnome Terminal version 3.38.1, shows “❤️” as a big red heart, but
+/// shows "👨‍🦰" as “👨🦰”.
+///
+/// [combining characters]: https://en.wikipedia.org/wiki/Combining_character
+/// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence
+/// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters
+/// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html
+#[inline]
+pub fn display_width(text: &str) -> usize {
+ let mut chars = text.chars();
+ let mut width = 0;
+ while let Some(ch) = chars.next() {
+ if skip_ansi_escape_sequence(ch, &mut chars) {
+ continue;
+ }
+ width += ch_width(ch);
+ }
+ width
+}
+
+/// A (text) fragment denotes the unit which we wrap into lines.
+///
+/// Fragments represent an abstract _word_ plus the _whitespace_
+/// following the word. In case the word falls at the end of the line,
+/// the whitespace is dropped and a so-called _penalty_ is inserted
+/// instead (typically `"-"` if the word was hyphenated).
+///
+/// For wrapping purposes, the precise content of the word, the
+/// whitespace, and the penalty is irrelevant. All we need to know is
+/// the displayed width of each part, which this trait provides.
+pub trait Fragment: std::fmt::Debug {
+ /// Displayed width of word represented by this fragment.
+ fn width(&self) -> usize;
+
+ /// Displayed width of the whitespace that must follow the word
+ /// when the word is not at the end of a line.
+ fn whitespace_width(&self) -> usize;
+
+ /// Displayed width of the penalty that must be inserted if the
+ /// word falls at the end of a line.
+ fn penalty_width(&self) -> usize;
+}
+
+/// A piece of wrappable text, including any trailing whitespace.
+///
+/// A `Word` is an example of a [`Fragment`], so it has a width,
+/// trailing whitespace, and potentially a penalty item.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub struct Word<'a> {
+ word: &'a str,
+ width: usize,
+ pub(crate) whitespace: &'a str,
+ pub(crate) penalty: &'a str,
+}
+
+impl std::ops::Deref for Word<'_> {
+ type Target = str;
+
+ fn deref(&self) -> &Self::Target {
+ self.word
+ }
+}
+
+impl<'a> Word<'a> {
+ /// Construct a new `Word`.
+ ///
+ /// A trailing stretch of `' '` is automatically taken to be the
+ /// whitespace part of the word.
+ pub fn from(word: &str) -> Word<'_> {
+ let trimmed = word.trim_end_matches(' ');
+ Word {
+ word: trimmed,
+ width: display_width(&trimmed),
+ whitespace: &word[trimmed.len()..],
+ penalty: "",
+ }
+ }
+
+ /// Break this word into smaller words with a width of at most
+ /// `line_width`. The whitespace and penalty from this `Word` is
+ /// added to the last piece.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use textwrap::core::Word;
+ /// assert_eq!(
+ /// Word::from("Hello! ").break_apart(3).collect::<Vec<_>>(),
+ /// vec![Word::from("Hel"), Word::from("lo! ")]
+ /// );
+ /// ```
+ pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator<Item = Word<'a>> + 'b {
+ let mut char_indices = self.word.char_indices();
+ let mut offset = 0;
+ let mut width = 0;
+
+ std::iter::from_fn(move || {
+ while let Some((idx, ch)) = char_indices.next() {
+ if skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) {
+ continue;
+ }
+
+ if width > 0 && width + ch_width(ch) > line_width {
+ let word = Word {
+ word: &self.word[offset..idx],
+ width: width,
+ whitespace: "",
+ penalty: "",
+ };
+ offset = idx;
+ width = ch_width(ch);
+ return Some(word);
+ }
+
+ width += ch_width(ch);
+ }
+
+ if offset < self.word.len() {
+ let word = Word {
+ word: &self.word[offset..],
+ width: width,
+ whitespace: self.whitespace,
+ penalty: self.penalty,
+ };
+ offset = self.word.len();
+ return Some(word);
+ }
+
+ None
+ })
+ }
+}
+
+impl Fragment for Word<'_> {
+ #[inline]
+ fn width(&self) -> usize {
+ self.width
+ }
+
+ // We assume the whitespace consist of ' ' only. This allows us to
+ // compute the display width in constant time.
+ #[inline]
+ fn whitespace_width(&self) -> usize {
+ self.whitespace.len()
+ }
+
+ // We assume the penalty is `""` or `"-"`. This allows us to
+ // compute the display width in constant time.
+ #[inline]
+ fn penalty_width(&self) -> usize {
+ self.penalty.len()
+ }
+}
+
+/// Split line into words separated by regions of `' '` characters.
+///
+/// # Examples
+///
+/// ```
+/// use textwrap::core::{find_words, Fragment, Word};
+/// let words = find_words("Hello World!").collect::<Vec<_>>();
+/// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]);
+/// assert_eq!(words[0].width(), 5);
+/// assert_eq!(words[0].whitespace_width(), 1);
+/// assert_eq!(words[0].penalty_width(), 0);
+/// ```
+pub fn find_words(line: &str) -> impl Iterator<Item = Word> {
+ let mut start = 0;
+ let mut in_whitespace = false;
+ let mut char_indices = line.char_indices();
+
+ std::iter::from_fn(move || {
+ // for (idx, ch) in char_indices does not work, gives this
+ // error:
+ //
+ // > cannot move out of `char_indices`, a captured variable in
+ // > an `FnMut` closure
+ #[allow(clippy::while_let_on_iterator)]
+ while let Some((idx, ch)) = char_indices.next() {
+ if in_whitespace && ch != ' ' {
+ let word = Word::from(&line[start..idx]);
+ start = idx;
+ in_whitespace = ch == ' ';
+ return Some(word);
+ }
+
+ in_whitespace = ch == ' ';
+ }
+
+ if start < line.len() {
+ let word = Word::from(&line[start..]);
+ start = line.len();
+ return Some(word);
+ }
+
+ None
+ })
+}
+
+/// Split words into smaller words according to the split points given
+/// by `options`.
+///
+/// Note that we split all words, regardless of their length. This is
+/// to more cleanly separate the business of splitting (including
+/// automatic hyphenation) from the business of word wrapping.
+///
+/// # Examples
+///
+/// ```
+/// use textwrap::core::{split_words, Word};
+/// use textwrap::{NoHyphenation, Options};
+///
+/// // The default splitter is HyphenSplitter:
+/// let options = Options::new(80);
+/// assert_eq!(
+/// split_words(vec![Word::from("foo-bar")], &options).collect::<Vec<_>>(),
+/// vec![Word::from("foo-"), Word::from("bar")]
+/// );
+///
+/// // The NoHyphenation splitter ignores the '-':
+/// let options = Options::new(80).splitter(NoHyphenation);
+/// assert_eq!(
+/// split_words(vec![Word::from("foo-bar")], &options).collect::<Vec<_>>(),
+/// vec![Word::from("foo-bar")]
+/// );
+/// ```
+pub fn split_words<'a, I, S, Opt>(words: I, options: Opt) -> impl Iterator<Item = Word<'a>>
+where
+ I: IntoIterator<Item = Word<'a>>,
+ S: WordSplitter,
+ Opt: Into<Options<'a, S>>,
+{
+ let options = options.into();
+
+ words.into_iter().flat_map(move |word| {
+ let mut prev = 0;
+ let mut split_points = options.splitter.split_points(&word).into_iter();
+ std::iter::from_fn(move || {
+ if let Some(idx) = split_points.next() {
+ let need_hyphen = !word[..idx].ends_with('-');
+ let w = Word {
+ word: &word.word[prev..idx],
+ width: display_width(&word[prev..idx]),
+ whitespace: "",
+ penalty: if need_hyphen { "-" } else { "" },
+ };
+ prev = idx;
+ return Some(w);
+ }
+
+ if prev < word.word.len() || prev == 0 {
+ let w = Word {
+ word: &word.word[prev..],
+ width: display_width(&word[prev..]),
+ whitespace: word.whitespace,
+ penalty: word.penalty,
+ };
+ prev = word.word.len() + 1;
+ return Some(w);
+ }
+
+ None
+ })
+ })
+}
+
+/// Forcibly break words wider than `line_width` into smaller words.
+///
+/// This simply calls [`Word::break_apart`] on words that are too
+/// wide. This means that no extra `'-'` is inserted, the word is
+/// simply broken into smaller pieces.
+pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec<Word<'a>>
+where
+ I: IntoIterator<Item = Word<'a>>,
+{
+ let mut shortened_words = Vec::new();
+ for word in words {
+ if word.width() > line_width {
+ shortened_words.extend(word.break_apart(line_width));
+ } else {
+ shortened_words.push(word);
+ }
+ }
+ shortened_words
+}
+
+/// Wrapping algorithms.
+///
+/// After a text has been broken into [`Fragment`]s, the one now has
+/// to decide how to break the fragments into lines. The simplest
+/// algorithm for this is implemented by [`wrap_first_fit`]: it uses
+/// no look-ahead and simply adds fragments to the line as long as
+/// they fit. However, this can lead to poor line breaks if a large
+/// fragment almost-but-not-quite fits on a line. When that happens,
+/// the fragment is moved to the next line and it will leave behind a
+/// large gap. A more advanced algorithm, implemented by
+/// [`wrap_optimal_fit`], will take this into account. The optimal-fit
+/// algorithm considers all possible line breaks and will attempt to
+/// minimize the gaps left behind by overly short lines.
+///
+/// While both algorithms run in linear time, the first-fit algorithm
+/// is about 4 times faster than the optimal-fit algorithm.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum WrapAlgorithm {
+ /// Use an advanced algorithm which considers the entire paragraph
+ /// to find optimal line breaks. Implemented by
+ /// [`wrap_optimal_fit`].
+ ///
+ /// **Note:** Only available when the `smawk` Cargo feature is
+ /// enabled.
+ #[cfg(feature = "smawk")]
+ OptimalFit,
+ /// Use a fast and simple algorithm with no look-ahead to find
+ /// line breaks. Implemented by [`wrap_first_fit`].
+ FirstFit,
+}
+
+/// Wrap abstract fragments into lines with a first-fit algorithm.
+///
+/// The `line_widths` map line numbers (starting from 0) to a target
+/// line width. This can be used to implement hanging indentation.
+///
+/// The fragments must already have been split into the desired
+/// widths, this function will not (and cannot) attempt to split them
+/// further when arranging them into lines.
+///
+/// # First-Fit Algorithm
+///
+/// This implements a simple “greedy” algorithm: accumulate fragments
+/// one by one and when a fragment no longer fits, start a new line.
+/// There is no look-ahead, we simply take first fit of the fragments
+/// we find.
+///
+/// While fast and predictable, this algorithm can produce poor line
+/// breaks when a long fragment is moved to a new line, leaving behind
+/// a large gap:
+///
+/// ```
+/// use textwrap::core::{find_words, wrap_first_fit, Word};
+///
+/// // Helper to convert wrapped lines to a Vec<String>.
+/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
+/// lines.iter().map(|line| {
+/// line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
+/// }).collect::<Vec<_>>()
+/// }
+///
+/// let text = "These few words will unfortunately not wrap nicely.";
+/// let words = find_words(text).collect::<Vec<_>>();
+/// assert_eq!(lines_to_strings(wrap_first_fit(&words, |_| 15)),
+/// vec!["These few words",
+/// "will", // <-- short line
+/// "unfortunately",
+/// "not wrap",
+/// "nicely."]);
+///
+/// // We can avoid the short line if we look ahead:
+/// #[cfg(feature = "smawk")]
+/// assert_eq!(lines_to_strings(textwrap::core::wrap_optimal_fit(&words, |_| 15)),
+/// vec!["These few",
+/// "words will",
+/// "unfortunately",
+/// "not wrap",
+/// "nicely."]);
+/// ```
+///
+/// The [`wrap_optimal_fit`] function was used above to get better
+/// line breaks. It uses an advanced algorithm which tries to avoid
+/// short lines. This function is about 4 times faster than
+/// [`wrap_optimal_fit`].
+///
+/// # Examples
+///
+/// Imagine you're building a house site and you have a number of
+/// tasks you need to execute. Things like pour foundation, complete
+/// framing, install plumbing, electric cabling, install insulation.
+///
+/// The construction workers can only work during daytime, so they
+/// need to pack up everything at night. Because they need to secure
+/// their tools and move machines back to the garage, this process
+/// takes much more time than the time it would take them to simply
+/// switch to another task.
+///
+/// You would like to make a list of tasks to execute every day based
+/// on your estimates. You can model this with a program like this:
+///
+/// ```
+/// use textwrap::core::{wrap_first_fit, Fragment};
+///
+/// #[derive(Debug)]
+/// struct Task<'a> {
+/// name: &'a str,
+/// hours: usize, // Time needed to complete task.
+/// sweep: usize, // Time needed for a quick sweep after task during the day.
+/// cleanup: usize, // Time needed to cleanup after task at end of day.
+/// }
+///
+/// impl Fragment for Task<'_> {
+/// fn width(&self) -> usize { self.hours }
+/// fn whitespace_width(&self) -> usize { self.sweep }
+/// fn penalty_width(&self) -> usize { self.cleanup }
+/// }
+///
+/// // The morning tasks
+/// let tasks = vec![
+/// Task { name: "Foundation", hours: 4, sweep: 2, cleanup: 3 },
+/// Task { name: "Framing", hours: 3, sweep: 1, cleanup: 2 },
+/// Task { name: "Plumbing", hours: 2, sweep: 2, cleanup: 2 },
+/// Task { name: "Electrical", hours: 2, sweep: 1, cleanup: 2 },
+/// Task { name: "Insulation", hours: 2, sweep: 1, cleanup: 2 },
+/// Task { name: "Drywall", hours: 3, sweep: 1, cleanup: 2 },
+/// Task { name: "Floors", hours: 3, sweep: 1, cleanup: 2 },
+/// Task { name: "Countertops", hours: 1, sweep: 1, cleanup: 2 },
+/// Task { name: "Bathrooms", hours: 2, sweep: 1, cleanup: 2 },
+/// ];
+///
+/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: usize) -> Vec<(usize, Vec<&'a str>)> {
+/// let mut days = Vec::new();
+/// for day in wrap_first_fit(&tasks, |i| day_length) {
+/// let last = day.last().unwrap();
+/// let work_hours: usize = day.iter().map(|t| t.hours + t.sweep).sum();
+/// let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
+/// days.push((work_hours - last.sweep + last.cleanup, names));
+/// }
+/// days
+/// }
+///
+/// // With a single crew working 8 hours a day:
+/// assert_eq!(
+/// assign_days(&tasks, 8),
+/// [
+/// (7, vec!["Foundation"]),
+/// (8, vec!["Framing", "Plumbing"]),
+/// (7, vec!["Electrical", "Insulation"]),
+/// (5, vec!["Drywall"]),
+/// (7, vec!["Floors", "Countertops"]),
+/// (4, vec!["Bathrooms"]),
+/// ]
+/// );
+///
+/// // With two crews working in shifts, 16 hours a day:
+/// assert_eq!(
+/// assign_days(&tasks, 16),
+/// [
+/// (14, vec!["Foundation", "Framing", "Plumbing"]),
+/// (15, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
+/// (6, vec!["Countertops", "Bathrooms"]),
+/// ]
+/// );
+/// ```
+///
+/// Apologies to anyone who actually knows how to build a house and
+/// knows how long each step takes :-)
+pub fn wrap_first_fit<T: Fragment, F: Fn(usize) -> usize>(
+ fragments: &[T],
+ line_widths: F,
+) -> Vec<&[T]> {
+ let mut lines = Vec::new();
+ let mut start = 0;
+ let mut width = 0;
+
+ for (idx, fragment) in fragments.iter().enumerate() {
+ let line_width = line_widths(lines.len());
+ if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
+ lines.push(&fragments[start..idx]);
+ start = idx;
+ width = 0;
+ }
+ width += fragment.width() + fragment.whitespace_width();
+ }
+ lines.push(&fragments[start..]);
+ lines
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[cfg(feature = "unicode-width")]
+ use unicode_width::UnicodeWidthChar;
+
+ // Like assert_eq!, but the left expression is an iterator.
+ macro_rules! assert_iter_eq {
+ ($left:expr, $right:expr) => {
+ assert_eq!($left.collect::<Vec<_>>(), $right);
+ };
+ }
+
+ #[test]
+ fn skip_ansi_escape_sequence_works() {
+ let blue_text = "\u{1b}[34mHello\u{1b}[0m";
+ let mut chars = blue_text.chars();
+ let ch = chars.next().unwrap();
+ assert!(skip_ansi_escape_sequence(ch, &mut chars));
+ assert_eq!(chars.next(), Some('H'));
+ }
+
+ #[test]
+ fn emojis_have_correct_width() {
+ use unic_emoji_char::is_emoji;
+
+ // Emojis in the Basic Latin (ASCII) and Latin-1 Supplement
+ // blocks all have a width of 1 column. This includes
+ // characters such as '#' and '©'.
+ for ch in '\u{1}'..'\u{FF}' {
+ if is_emoji(ch) {
+ let desc = format!("{:?} U+{:04X}", ch, ch as u32);
+
+ #[cfg(feature = "unicode-width")]
+ assert_eq!(ch.width().unwrap(), 1, "char: {}", desc);
+
+ #[cfg(not(feature = "unicode-width"))]
+ assert_eq!(ch_width(ch), 1, "char: {}", desc);
+ }
+ }
+
+ // Emojis in the remaining blocks of the Basic Multilingual
+ // Plane (BMP), in the Supplementary Multilingual Plane (SMP),
+ // and in the Supplementary Ideographic Plane (SIP), are all 1
+ // or 2 columns wide when unicode-width is used, and always 2
+ // columns wide otherwise. This includes all of our favorite
+ // emojis such as 😊.
+ for ch in '\u{FF}'..'\u{2FFFF}' {
+ if is_emoji(ch) {
+ let desc = format!("{:?} U+{:04X}", ch, ch as u32);
+
+ #[cfg(feature = "unicode-width")]
+ assert!(ch.width().unwrap() <= 2, "char: {}", desc);
+
+ #[cfg(not(feature = "unicode-width"))]
+ assert_eq!(ch_width(ch), 2, "char: {}", desc);
+ }
+ }
+
+ // The remaining planes contain almost no assigned code points
+ // and thus also no emojis.
+ }
+
+ #[test]
+ fn display_width_works() {
+ assert_eq!("Café Plain".len(), 11); // “é” is two bytes
+ assert_eq!(display_width("Café Plain"), 10);
+ assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10);
+ }
+
+ #[test]
+ fn display_width_narrow_emojis() {
+ #[cfg(feature = "unicode-width")]
+ assert_eq!(display_width("⁉"), 1);
+
+ // The ⁉ character is above DOUBLE_WIDTH_CUTOFF.
+ #[cfg(not(feature = "unicode-width"))]
+ assert_eq!(display_width("⁉"), 2);
+ }
+
+ #[test]
+ fn display_width_narrow_emojis_variant_selector() {
+ #[cfg(feature = "unicode-width")]
+ assert_eq!(display_width("⁉\u{fe0f}"), 1);
+
+ // The variant selector-16 is also counted.
+ #[cfg(not(feature = "unicode-width"))]
+ assert_eq!(display_width("⁉\u{fe0f}"), 4);
+ }
+
+ #[test]
+ fn display_width_emojis() {
+ assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20);
+ }
+
+ #[test]
+ fn find_words_empty() {
+ assert_iter_eq!(find_words(""), vec![]);
+ }
+
+ #[test]
+ fn find_words_single_word() {
+ assert_iter_eq!(find_words("foo"), vec![Word::from("foo")]);
+ }
+
+ #[test]
+ fn find_words_two_words() {
+ assert_iter_eq!(
+ find_words("foo bar"),
+ vec![Word::from("foo "), Word::from("bar")]
+ );
+ }
+
+ #[test]
+ fn find_words_multiple_words() {
+ assert_iter_eq!(
+ find_words("foo bar baz"),
+ vec![Word::from("foo "), Word::from("bar "), Word::from("baz")]
+ );
+ }
+
+ #[test]
+ fn find_words_whitespace() {
+ assert_iter_eq!(find_words(" "), vec![Word::from(" ")]);
+ }
+
+ #[test]
+ fn find_words_inter_word_whitespace() {
+ assert_iter_eq!(
+ find_words("foo bar"),
+ vec![Word::from("foo "), Word::from("bar")]
+ )
+ }
+
+ #[test]
+ fn find_words_trailing_whitespace() {
+ assert_iter_eq!(find_words("foo "), vec![Word::from("foo ")]);
+ }
+
+ #[test]
+ fn find_words_leading_whitespace() {
+ assert_iter_eq!(
+ find_words(" foo"),
+ vec![Word::from(" "), Word::from("foo")]
+ );
+ }
+
+ #[test]
+ fn find_words_multi_column_char() {
+ assert_iter_eq!(
+ find_words("\u{1f920}"), // cowboy emoji 🤠
+ vec![Word::from("\u{1f920}")]
+ );
+ }
+
+ #[test]
+ fn find_words_hyphens() {
+ assert_iter_eq!(find_words("foo-bar"), vec![Word::from("foo-bar")]);
+ assert_iter_eq!(
+ find_words("foo- bar"),
+ vec![Word::from("foo- "), Word::from("bar")]
+ );
+ assert_iter_eq!(
+ find_words("foo - bar"),
+ vec![Word::from("foo "), Word::from("- "), Word::from("bar")]
+ );
+ assert_iter_eq!(
+ find_words("foo -bar"),
+ vec![Word::from("foo "), Word::from("-bar")]
+ );
+ }
+
+ #[test]
+ fn split_words_no_words() {
+ assert_iter_eq!(split_words(vec![], 80), vec![]);
+ }
+
+ #[test]
+ fn split_words_empty_word() {
+ assert_iter_eq!(
+ split_words(vec![Word::from(" ")], 80),
+ vec![Word::from(" ")]
+ );
+ }
+
+ #[test]
+ fn split_words_hyphen_splitter() {
+ assert_iter_eq!(
+ split_words(vec![Word::from("foo-bar")], 80),
+ vec![Word::from("foo-"), Word::from("bar")]
+ );
+ }
+
+ #[test]
+ fn split_words_short_line() {
+ // Note that `split_words` does not take the line width into
+ // account, that is the job of `break_words`.
+ assert_iter_eq!(
+ split_words(vec![Word::from("foobar")], 3),
+ vec![Word::from("foobar")]
+ );
+ }
+
+ #[test]
+ fn split_words_adds_penalty() {
+ #[derive(Debug)]
+ struct FixedSplitPoint;
+ impl WordSplitter for FixedSplitPoint {
+ fn split_points(&self, _: &str) -> Vec<usize> {
+ vec![3]
+ }
+ }
+
+ let options = Options::new(80).splitter(FixedSplitPoint);
+ assert_iter_eq!(
+ split_words(vec![Word::from("foobar")].into_iter(), &options),
+ vec![
+ Word {
+ word: "foo",
+ width: 3,
+ whitespace: "",
+ penalty: "-"
+ },
+ Word {
+ word: "bar",
+ width: 3,
+ whitespace: "",
+ penalty: ""
+ }
+ ]
+ );
+
+ assert_iter_eq!(
+ split_words(vec![Word::from("fo-bar")].into_iter(), &options),
+ vec![
+ Word {
+ word: "fo-",
+ width: 3,
+ whitespace: "",
+ penalty: ""
+ },
+ Word {
+ word: "bar",
+ width: 3,
+ whitespace: "",
+ penalty: ""
+ }
+ ]
+ );
+ }
+}
diff --git a/src/core/optimal_fit.rs b/src/core/optimal_fit.rs
new file mode 100644
index 0000000..c18b974
--- /dev/null
+++ b/src/core/optimal_fit.rs
@@ -0,0 +1,228 @@
+use crate::core::Fragment;
+use std::cell::RefCell;
+
+/// Cache for line numbers. This is necessary to avoid a O(n**2)
+/// behavior when computing line numbers in [`wrap_optimal_fit`].
+struct LineNumbers {
+ line_numbers: RefCell<Vec<usize>>,
+}
+
+impl LineNumbers {
+ fn new(size: usize) -> Self {
+ let mut line_numbers = Vec::with_capacity(size);
+ line_numbers.push(0);
+ LineNumbers {
+ line_numbers: RefCell::new(line_numbers),
+ }
+ }
+
+ fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
+ while self.line_numbers.borrow_mut().len() < i + 1 {
+ let pos = self.line_numbers.borrow().len();
+ let line_number = 1 + self.get(minima[pos].0, &minima);
+ self.line_numbers.borrow_mut().push(line_number);
+ }
+
+ self.line_numbers.borrow()[i]
+ }
+}
+
+/// Per-line penalty. This is added for every line, which makes it
+/// expensive to output more lines than the minimum required.
+const NLINE_PENALTY: i32 = 1000;
+
+/// Per-character cost for lines that overflow the target line width.
+///
+/// With a value of 50², every single character costs as much as
+/// leaving a gap of 50 characters behind. This is becuase we assign
+/// as cost of `gap * gap` to a short line. This means that we can
+/// overflow the line by 1 character in extreme cases:
+///
+/// ```
+/// use textwrap::core::{wrap_optimal_fit, Word};
+///
+/// let short = "foo ";
+/// let long = "x".repeat(50);
+/// let fragments = vec![Word::from(short), Word::from(&long)];
+///
+/// // Perfect fit, both words are on a single line with no overflow.
+/// let wrapped = wrap_optimal_fit(&fragments, |_| short.len() + long.len());
+/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
+///
+/// // The words no longer fit, yet we get a single line back. While
+/// // the cost of overflow (`1 * 2500`) is the same as the cost of the
+/// // gap (`50 * 50 = 2500`), the tie is broken by `NLINE_PENALTY`
+/// // which makes it cheaper to overflow than to use two lines.
+/// let wrapped = wrap_optimal_fit(&fragments, |_| short.len() + long.len() - 1);
+/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
+///
+/// // The cost of overflow would be 2 * 2500, whereas the cost of the
+/// // gap is only `49 * 49 + NLINE_PENALTY = 2401 + 1000 = 3401`. We
+/// // therefore get two lines.
+/// let wrapped = wrap_optimal_fit(&fragments, |_| short.len() + long.len() - 2);
+/// assert_eq!(wrapped, vec![&[Word::from(short)],
+/// &[Word::from(&long)]]);
+/// ```
+///
+/// This only happens if the overflowing word is 50 characters long
+/// _and_ if it happens to overflow the line by exactly one character.
+/// If it overflows by more than one character, the overflow penalty
+/// will quickly outgrow the cost of the gap, as seen above.
+const OVERFLOW_PENALTY: i32 = 50 * 50;
+
+/// The last line is short if it is less than 1/4 of the target width.
+const SHORT_LINE_FRACTION: usize = 4;
+
+/// Penalize a short last line.
+const SHORT_LAST_LINE_PENALTY: i32 = 25;
+
+/// Penalty for lines ending with a hyphen.
+const HYPHEN_PENALTY: i32 = 25;
+
+/// Wrap abstract fragments into lines with an optimal-fit algorithm.
+///
+/// The `line_widths` map line numbers (starting from 0) to a target
+/// line width. This can be used to implement hanging indentation.
+///
+/// The fragments must already have been split into the desired
+/// widths, this function will not (and cannot) attempt to split them
+/// further when arranging them into lines.
+///
+/// # Optimal-Fit Algorithm
+///
+/// The algorithm considers all possible break points and picks the
+/// breaks which minimizes the gaps at the end of each line. More
+/// precisely, the algorithm assigns a cost or penalty to each break
+/// point, determined by `cost = gap * gap` where `gap = target_width -
+/// line_width`. Shorter lines are thus penalized more heavily since
+/// they leave behind a larger gap.
+///
+/// We can illustrate this with the text “To be, or not to be: that is
+/// the question”. We will be wrapping it in a narrow column with room
+/// for only 10 characters. The [greedy
+/// algorithm](super::wrap_first_fit) will produce these lines, each
+/// annotated with the corresponding penalty:
+///
+/// ```text
+/// "To be, or" 1² = 1
+/// "not to be:" 0² = 0
+/// "that is" 3² = 9
+/// "the" 7² = 49
+/// "question" 2² = 4
+/// ```
+///
+/// We see that line four with “the” leaves a gap of 7 columns, which
+/// gives it a penalty of 49. The sum of the penalties is 63.
+///
+/// There are 10 words, which means that there are `2_u32.pow(9)` or
+/// 512 different ways to typeset it. We can compute
+/// the sum of the penalties for each possible line break and search
+/// for the one with the lowest sum:
+///
+/// ```text
+/// "To be," 4² = 16
+/// "or not to" 1² = 1
+/// "be: that" 2² = 4
+/// "is the" 4² = 16
+/// "question" 2² = 4
+/// ```
+///
+/// The sum of the penalties is 41, which is better than what the
+/// greedy algorithm produced.
+///
+/// Searching through all possible combinations would normally be
+/// prohibitively slow. However, it turns out that the problem can be
+/// formulated as the task of finding column minima in a cost matrix.
+/// This matrix has a special form (totally monotone) which lets us
+/// use a [linear-time algorithm called
+/// SMAWK](https://lib.rs/crates/smawk) to find the optimal break
+/// points.
+///
+/// This means that the time complexity remains O(_n_) where _n_ is
+/// the number of words. Compared to
+/// [`wrap_first_fit`](super::wrap_first_fit), this function is about
+/// 4 times slower.
+///
+/// The optimization of per-line costs over the entire paragraph is
+/// inspired by the line breaking algorithm used in TeX, as described
+/// in the 1981 article [_Breaking Paragraphs into
+/// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf)
+/// by Knuth and Plass. The implementation here is based on [Python
+/// code by David
+/// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py).
+///
+/// **Note:** Only available when the `smawk` Cargo feature is
+/// enabled.
+pub fn wrap_optimal_fit<'a, T: Fragment, F: Fn(usize) -> usize>(
+ fragments: &'a [T],
+ line_widths: F,
+) -> Vec<&'a [T]> {
+ let mut widths = Vec::with_capacity(fragments.len() + 1);
+ let mut width = 0;
+ widths.push(width);
+ for fragment in fragments {
+ width += fragment.width() + fragment.whitespace_width();
+ widths.push(width);
+ }
+
+ let line_numbers = LineNumbers::new(fragments.len());
+
+ let minima = smawk::online_column_minima(0, widths.len(), |minima, i, j| {
+ // Line number for fragment `i`.
+ let line_number = line_numbers.get(i, &minima);
+ let target_width = std::cmp::max(1, line_widths(line_number));
+
+ // Compute the width of a line spanning fragments[i..j] in
+ // constant time. We need to adjust widths[j] by subtracting
+ // the whitespace of fragment[j-i] and then add the penalty.
+ let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
+ + fragments[j - 1].penalty_width();
+
+ // We compute cost of the line containing fragments[i..j]. We
+ // start with values[i].1, which is the optimal cost for
+ // breaking before fragments[i].
+ //
+ // First, every extra line cost NLINE_PENALTY.
+ let mut cost = minima[i].1 + NLINE_PENALTY;
+
+ // Next, we add a penalty depending on the line length.
+ if line_width > target_width {
+ // Lines that overflow get a hefty penalty.
+ let overflow = (line_width - target_width) as i32;
+ cost += overflow * OVERFLOW_PENALTY;
+ } else if j < fragments.len() {
+ // Other lines (except for the last line) get a milder
+ // penalty which depend on the size of the gap.
+ let gap = (target_width - line_width) as i32;
+ cost += gap * gap;
+ } else if i + 1 == j && line_width < target_width / SHORT_LINE_FRACTION {
+ // The last line can have any size gap, but we do add a
+ // penalty if the line is very short (typically because it
+ // contains just a single word).
+ cost += SHORT_LAST_LINE_PENALTY;
+ }
+
+ // Finally, we discourage hyphens.
+ if fragments[j - 1].penalty_width() > 0 {
+ // TODO: this should use a penalty value from the fragment
+ // instead.
+ cost += HYPHEN_PENALTY;
+ }
+
+ cost
+ });
+
+ let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
+ let mut pos = fragments.len();
+ loop {
+ let prev = minima[pos].0;
+ lines.push(&fragments[prev..pos]);
+ pos = prev;
+ if pos == 0 {
+ break;
+ }
+ }
+
+ lines.reverse();
+ lines
+}
diff --git a/src/indentation.rs b/src/indentation.rs
index ca19249..cc2351f 100644
--- a/src/indentation.rs
+++ b/src/indentation.rs
@@ -18,8 +18,7 @@
/// ");
/// ```
///
-/// Empty lines (lines consisting only of whitespace) are not indented
-/// and the whitespace is replaced by a single newline (`\n`):
+/// Lines consisting only of whitespace are kept unchanged:
///
/// ```
/// use textwrap::indent;
@@ -34,7 +33,7 @@
/// ->Foo
///
/// ->Bar
-///
+/// \t
/// ->Baz
/// ");
/// ```
@@ -45,17 +44,21 @@
/// ```
/// use textwrap::indent;
///
-/// assert_eq!(indent(" \t Foo ", "->"), "-> \t Foo \n");
+/// assert_eq!(indent(" \t Foo ", "->"), "-> \t Foo ");
/// ```
pub fn indent(s: &str, prefix: &str) -> String {
let mut result = String::new();
- for line in s.lines() {
- if line.chars().any(|c| !c.is_whitespace()) {
+
+ for (idx, line) in s.split('\n').enumerate() {
+ if idx > 0 {
+ result.push('\n');
+ }
+ if !line.trim().is_empty() {
result.push_str(prefix);
- result.push_str(line);
}
- result.push('\n');
+ result.push_str(line);
}
+
result
}
@@ -138,12 +141,6 @@ pub fn dedent(s: &str) -> String {
mod tests {
use super::*;
- /// Add newlines. Ensures that the final line in the vector also
- /// has a newline.
- fn add_nl(lines: &[&str]) -> String {
- lines.join("\n") + "\n"
- }
-
#[test]
fn indent_empty() {
assert_eq!(indent("\n", " "), "\n");
@@ -152,27 +149,35 @@ mod tests {
#[test]
#[rustfmt::skip]
fn indent_nonempty() {
- let x = vec![" foo",
- "bar",
- " baz"];
- let y = vec!["// foo",
- "//bar",
- "// baz"];
- assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y));
+ let text = [
+ " foo\n",
+ "bar\n",
+ " baz\n",
+ ].join("");
+ let expected = [
+ "// foo\n",
+ "//bar\n",
+ "// baz\n",
+ ].join("");
+ assert_eq!(indent(&text, "//"), expected);
}
#[test]
#[rustfmt::skip]
fn indent_empty_line() {
- let x = vec![" foo",
- "bar",
- "",
- " baz"];
- let y = vec!["// foo",
- "//bar",
- "",
- "// baz"];
- assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y));
+ let text = [
+ " foo",
+ "bar",
+ "",
+ " baz",
+ ].join("\n");
+ let expected = [
+ "// foo",
+ "//bar",
+ "",
+ "// baz",
+ ].join("\n");
+ assert_eq!(indent(&text, "//"), expected);
}
#[test]
@@ -183,112 +188,148 @@ mod tests {
#[test]
#[rustfmt::skip]
fn dedent_multi_line() {
- let x = vec![" foo",
- " bar",
- " baz"];
- let y = vec![" foo",
- "bar",
- " baz"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ let x = [
+ " foo",
+ " bar",
+ " baz",
+ ].join("\n");
+ let y = [
+ " foo",
+ "bar",
+ " baz"
+ ].join("\n");
+ assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_empty_line() {
- let x = vec![" foo",
- " bar",
- " ",
- " baz"];
- let y = vec![" foo",
- "bar",
- "",
- " baz"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ let x = [
+ " foo",
+ " bar",
+ " ",
+ " baz"
+ ].join("\n");
+ let y = [
+ " foo",
+ "bar",
+ "",
+ " baz"
+ ].join("\n");
+ assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_blank_line() {
- let x = vec![" foo",
- "",
- " bar",
- " foo",
- " bar",
- " baz"];
- let y = vec!["foo",
- "",
- " bar",
- " foo",
- " bar",
- " baz"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ let x = [
+ " foo",
+ "",
+ " bar",
+ " foo",
+ " bar",
+ " baz",
+ ].join("\n");
+ let y = [
+ "foo",
+ "",
+ " bar",
+ " foo",
+ " bar",
+ " baz",
+ ].join("\n");
+ assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_whitespace_line() {
- let x = vec![" foo",
- " ",
- " bar",
- " foo",
- " bar",
- " baz"];
- let y = vec!["foo",
- "",
- " bar",
- " foo",
- " bar",
- " baz"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ let x = [
+ " foo",
+ " ",
+ " bar",
+ " foo",
+ " bar",
+ " baz",
+ ].join("\n");
+ let y = [
+ "foo",
+ "",
+ " bar",
+ " foo",
+ " bar",
+ " baz",
+ ].join("\n");
+ assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_mixed_whitespace() {
- let x = vec!["\tfoo",
- " bar"];
- let y = vec!["\tfoo",
- " bar"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ let x = [
+ "\tfoo",
+ " bar",
+ ].join("\n");
+ let y = [
+ "\tfoo",
+ " bar",
+ ].join("\n");
+ assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_tabbed_whitespace() {
- let x = vec!["\t\tfoo",
- "\t\t\tbar"];
- let y = vec!["foo",
- "\tbar"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ let x = [
+ "\t\tfoo",
+ "\t\t\tbar",
+ ].join("\n");
+ let y = [
+ "foo",
+ "\tbar",
+ ].join("\n");
+ assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_mixed_tabbed_whitespace() {
- let x = vec!["\t \tfoo",
- "\t \t\tbar"];
- let y = vec!["foo",
- "\tbar"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ let x = [
+ "\t \tfoo",
+ "\t \t\tbar",
+ ].join("\n");
+ let y = [
+ "foo",
+ "\tbar",
+ ].join("\n");
+ assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_mixed_tabbed_whitespace2() {
- let x = vec!["\t \tfoo",
- "\t \tbar"];
- let y = vec!["\tfoo",
- " \tbar"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ let x = [
+ "\t \tfoo",
+ "\t \tbar",
+ ].join("\n");
+ let y = [
+ "\tfoo",
+ " \tbar",
+ ].join("\n");
+ assert_eq!(dedent(&x), y);
}
#[test]
#[rustfmt::skip]
fn dedent_preserve_no_terminating_newline() {
- let x = vec![" foo",
- " bar"].join("\n");
- let y = vec!["foo",
- " bar"].join("\n");
+ let x = [
+ " foo",
+ " bar",
+ ].join("\n");
+ let y = [
+ "foo",
+ " bar",
+ ].join("\n");
assert_eq!(dedent(&x), y);
}
}
diff --git a/src/lib.rs b/src/lib.rs
index 7bd3928..1781b63 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,7 +1,10 @@
-//! `textwrap` provides functions for word wrapping and filling text.
+//! The textwrap library provides functions for word wrapping and
+//! indenting text.
//!
-//! Wrapping text can be very useful in commandline programs where you
-//! want to format dynamic output nicely so it looks good in a
+//! # Wrapping Text
+//!
+//! Wrapping text can be very useful in command-line programs where
+//! you want to format dynamic output nicely so it looks good in a
//! terminal. A quick example:
//!
//! ```no_run
@@ -19,20 +22,20 @@
//! wrapping text.
//! ```
//!
-//! If you enable the `hyphenation` feature, you can get automatic
-//! hyphenation for a number of languages:
+//! If you enable the `hyphenation` Cargo feature, you can get
+//! automatic hyphenation for a number of languages:
//!
//! ```no_run
//! # #[cfg(feature = "hyphenation")]
//! use hyphenation::{Language, Load, Standard};
-//! use textwrap::Wrapper;
+//! use textwrap::{fill, Options};
//!
//! # #[cfg(feature = "hyphenation")]
//! fn main() {
//! let text = "textwrap: a small library for wrapping text.";
//! let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
-//! let wrapper = Wrapper::with_splitter(18, dictionary);
-//! println!("{}", wrapper.fill(text));
+//! let options = Options::new(18).splitter(dictionary);
+//! println!("{}", fill(text, &options));
//! }
//!
//! # #[cfg(not(feature = "hyphenation"))]
@@ -47,16 +50,20 @@
//! ping text.
//! ```
//!
-//! # Wrapping Strings at Compile Time
+//! See also the [`unfill`] and [`refill`] functions which allow you to
+//! manipulate already wrapped text.
+//!
+//! ## Wrapping Strings at Compile Time
//!
//! If your strings are known at compile time, please take a look at
-//! the procedural macros from the [`textwrap-macros` crate].
+//! the procedural macros from the [textwrap-macros] crate.
//!
-//! # Displayed Width vs Byte Size
+//! ## Displayed Width vs Byte Size
//!
//! To word wrap text, one must know the width of each word so one can
-//! know when to break lines. This library measures the width of text
-//! using the [displayed width][unicode-width], not the size in bytes.
+//! know when to break lines. This library will by default measure the
+//! width of text using the _displayed width_, not the size in bytes.
+//! The `unicode-width` Cargo feature controls this.
//!
//! This is important for non-ASCII text. ASCII characters such as `a`
//! and `!` are simple and take up one column each. This means that
@@ -67,47 +74,104 @@
//!
//! This is why we take care to use the displayed width instead of the
//! byte count when computing line lengths. All functions in this
-//! library handle Unicode characters like this.
+//! library handle Unicode characters like this when the
+//! `unicode-width` Cargo feature is enabled (it is enabled by
+//! default).
+//!
+//! # Indentation and Dedentation
+//!
+//! The textwrap library also offers functions for adding a prefix to
+//! every line of a string and to remove leading whitespace. As an
+//! example, the [`indent`] function allows you to turn lines of text
+//! into a bullet list:
+//!
+//! ```
+//! let before = "
+//! foo
+//! bar
+//! baz
+//! ";
+//! let after = "
+//! * foo
+//! * bar
+//! * baz
+//! ";
+//! assert_eq!(textwrap::indent(before, "* "), after);
+//! ```
+//!
+//! Removing leading whitespace is done with [`dedent`]:
+//!
+//! ```
+//! let before = "
+//! Some
+//! indented
+//! text
+//! ";
+//! let after = "
+//! Some
+//! indented
+//! text
+//! ";
+//! assert_eq!(textwrap::dedent(before), after);
+//! ```
//!
//! # Cargo Features
//!
-//! The library has two optional features:
+//! The textwrap library can be slimmed down as needed via a number of
+//! Cargo features. This means you only pay for the features you
+//! actually use.
//!
-//! * `terminal_size`: enables automatic detection of the terminal
-//! width via the [terminal_size][] crate. See the
-//! [`Wrapper::with_termwidth`] constructor for details.
+//! The full dependency graph, where dashed lines indicate optional
+//! dependencies, is shown below:
+//!
+//! <img src="https://raw.githubusercontent.com/mgeisler/textwrap/master/images/textwrap-0.13.3.svg">
+//!
+//! ## Default Features
//!
-//! * `hyphenation`: enables language-sentive hyphenation via the
-//! [hyphenation][] crate. See the [`WordSplitter`] trait for
+//! These features are enabled by default:
+//!
+//! * `unicode-width`: enables correct width computation of non-ASCII
+//! characters via the [unicode-width] crate. Without this feature,
+//! every [`char`] is 1 column wide, except for emojis which are 2
+//! columns wide. See the [`core::display_width`] function for
//! details.
//!
-//! [`textwrap-macros` crate]: https://crates.io/crates/textwrap-macros
+//! This feature can be disabled if you only need to wrap ASCII
+//! text, or if the functions in [`core`] are used directly with
+//! [`core::Fragment`]s for which the widths have been computed in
+//! other ways.
+//!
+//! * `smawk`: enables linear-time wrapping of the whole paragraph via
+//! the [smawk] crate. See the [`core::wrap_optimal_fit`] function
+//! for details on the optimal-fit algorithm.
+//!
+//! This feature can be disabled if you only ever intend to use
+//! [`core::wrap_first_fit`].
+//!
+//! ## Optional Features
+//!
+//! These Cargo features enable new functionality:
+//!
+//! * `terminal_size`: enables automatic detection of the terminal
+//! width via the [terminal_size] crate. See the
+//! [`Options::with_termwidth`] constructor for details.
+//!
+//! * `hyphenation`: enables language-sensitive hyphenation via the
+//! [hyphenation] crate. See the [`WordSplitter`] trait for details.
+//!
//! [unicode-width]: https://docs.rs/unicode-width/
-//! [terminal_size]: https://crates.io/crates/terminal_size
-//! [hyphenation]: https://crates.io/crates/hyphenation
-//! [`Wrapper::with_termwidth`]: struct.Wrapper.html#method.with_termwidth
-//! [`WordSplitter`]: trait.WordSplitter.html
+//! [smawk]: https://docs.rs/smawk/
+//! [textwrap-macros]: https://docs.rs/textwrap-macros/
+//! [terminal_size]: https://docs.rs/terminal_size/
+//! [hyphenation]: https://docs.rs/hyphenation/
-#![doc(html_root_url = "https://docs.rs/textwrap/0.12.1")]
+#![doc(html_root_url = "https://docs.rs/textwrap/0.13.3")]
+#![forbid(unsafe_code)] // See https://github.com/mgeisler/textwrap/issues/210
#![deny(missing_docs)]
#![deny(missing_debug_implementations)]
#![allow(clippy::redundant_field_names)]
use std::borrow::Cow;
-use std::str::CharIndices;
-
-use unicode_width::UnicodeWidthChar;
-use unicode_width::UnicodeWidthStr;
-
-/// A non-breaking space.
-const NBSP: char = '\u{a0}';
-
-/// The CSI or "Control Sequence Introducer" introduces an ANSI escape
-/// sequence. This is typically used for colored text and will be
-/// ignored when computing the text width.
-const CSI: (char, char) = ('\u{1b}', '[');
-/// The final bytes of an ANSI escape sequence must be in this range.
-const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e';
mod indentation;
pub use crate::indentation::dedent;
@@ -116,88 +180,245 @@ pub use crate::indentation::indent;
mod splitting;
pub use crate::splitting::{HyphenSplitter, NoHyphenation, WordSplitter};
-/// A Wrapper holds settings for wrapping and filling text. Use it
-/// when the convenience [`wrap_iter`], [`wrap`] and [`fill`] functions
-/// are not flexible enough.
-///
-/// [`wrap_iter`]: fn.wrap_iter.html
-/// [`wrap`]: fn.wrap.html
-/// [`fill`]: fn.fill.html
-///
-/// The algorithm used by the `WrapIter` iterator (returned from the
-/// `wrap_iter` method) works by doing successive partial scans over
-/// words in the input string (where each single scan yields a single
-/// line) so that the overall time and memory complexity is O(*n*) where
-/// *n* is the length of the input string.
-#[derive(Clone, Debug)]
-pub struct Wrapper<'a, S: WordSplitter> {
+pub mod core;
+
+/// Holds settings for wrapping and filling text.
+#[derive(Debug, Clone)]
+pub struct Options<'a, S: ?Sized = Box<dyn WordSplitter>> {
/// The width in columns at which the text will be wrapped.
pub width: usize,
- /// Indentation used for the first line of output.
+ /// Indentation used for the first line of output. See the
+ /// [`Options::initial_indent`] method.
pub initial_indent: &'a str,
- /// Indentation used for subsequent lines of output.
+ /// Indentation used for subsequent lines of output. See the
+ /// [`Options::subsequent_indent`] method.
pub subsequent_indent: &'a str,
/// Allow long words to be broken if they cannot fit on a line.
/// When set to `false`, some lines may be longer than
- /// `self.width`.
+ /// `self.width`. See the [`Options::break_words`] method.
pub break_words: bool,
- /// The method for splitting words. If the `hyphenation` feature
- /// is enabled, you can use a `hyphenation::Standard` dictionary
- /// here to get language-aware hyphenation.
+ /// Wraping algorithm to use, see [`core::WrapAlgorithm`] for
+ /// details.
+ pub wrap_algorithm: core::WrapAlgorithm,
+ /// The method for splitting words. This can be used to prohibit
+ /// splitting words on hyphens, or it can be used to implement
+ /// language-aware machine hyphenation. Please see the
+ /// [`WordSplitter`] trait for details.
pub splitter: S,
}
-impl<'a> Wrapper<'a, HyphenSplitter> {
- /// Create a new Wrapper for wrapping at the specified width. By
- /// default, we allow words longer than `width` to be broken. A
- /// [`HyphenSplitter`] will be used by default for splitting
- /// words. See the [`WordSplitter`] trait for other options.
+impl<'a, S: ?Sized> From<&'a Options<'a, S>> for Options<'a, &'a S> {
+ fn from(options: &'a Options<'a, S>) -> Self {
+ Self {
+ width: options.width,
+ initial_indent: options.initial_indent,
+ subsequent_indent: options.subsequent_indent,
+ break_words: options.break_words,
+ wrap_algorithm: options.wrap_algorithm,
+ splitter: &options.splitter,
+ }
+ }
+}
+
+impl<'a> From<usize> for Options<'a, HyphenSplitter> {
+ fn from(width: usize) -> Self {
+ Options::new(width)
+ }
+}
+
+/// Constructors for boxed Options, specifically.
+impl<'a> Options<'a, HyphenSplitter> {
+ /// Creates a new [`Options`] with the specified width and static
+ /// dispatch using the [`HyphenSplitter`]. Equivalent to
+ ///
+ /// ```
+ /// # use textwrap::{Options, HyphenSplitter, WordSplitter};
+ /// # let width = 80;
+ /// # let actual = Options::new(width);
+ /// # let expected =
+ /// Options {
+ /// width: width,
+ /// initial_indent: "",
+ /// subsequent_indent: "",
+ /// break_words: true,
+ /// #[cfg(feature = "smawk")]
+ /// wrap_algorithm: textwrap::core::WrapAlgorithm::OptimalFit,
+ /// #[cfg(not(feature = "smawk"))]
+ /// wrap_algorithm: textwrap::core::WrapAlgorithm::FirstFit,
+ /// splitter: HyphenSplitter,
+ /// }
+ /// # ;
+ /// # assert_eq!(actual.width, expected.width);
+ /// # assert_eq!(actual.initial_indent, expected.initial_indent);
+ /// # assert_eq!(actual.subsequent_indent, expected.subsequent_indent);
+ /// # assert_eq!(actual.break_words, expected.break_words);
+ /// # assert_eq!(actual.wrap_algorithm, expected.wrap_algorithm);
+ /// # let expected_coerced: Options<'static, HyphenSplitter> = expected;
+ /// ```
+ ///
+ /// Note that the default wrap algorithm changes based on the
+ /// `smawk` Cargo feature. The best available algorithm is used by
+ /// default.
+ ///
+ /// Static dispatch mean here, that the splitter is stored as-is
+ /// and the type is known at compile-time. Thus the returned value
+ /// is actually a `Options<HyphenSplitter>`.
+ ///
+ /// Dynamic dispatch on the other hand, mean that the splitter is
+ /// stored as a trait object for instance in a `Box<dyn
+ /// WordSplitter>`. This way the splitter's inner type can be
+ /// changed without changing the type of this struct, which then
+ /// would be just `Options` as a short cut for `Options<Box<dyn
+ /// WordSplitter>>`.
+ ///
+ /// The value and type of the splitter can be choose from the
+ /// start using the [`Options::with_splitter`] constructor or
+ /// changed afterwards using the [`Options::splitter`] method.
+ /// Whether static or dynamic dispatch is used, depends on whether
+ /// these functions are given a boxed [`WordSplitter`] or not.
+ /// Take for example:
+ ///
+ /// ```
+ /// use textwrap::{HyphenSplitter, NoHyphenation, Options};
+ /// # use textwrap::{WordSplitter};
+ /// # let width = 80;
+ ///
+ /// // uses HyphenSplitter with static dispatch
+ /// // the actual type: Options<HyphenSplitter>
+ /// let opt = Options::new(width);
+ /// # let opt_coerce: Options<HyphenSplitter> = opt;
+ ///
+ /// // uses NoHyphenation with static dispatch
+ /// // the actual type: Options<NoHyphenation>
+ /// let opt = Options::new(width).splitter(NoHyphenation);
+ /// # let opt_coerce: Options<NoHyphenation> = opt;
+ ///
+ /// // uses HyphenSplitter with dynamic dispatch
+ /// // the actual type: Options<Box<dyn WordSplitter>>
+ /// let opt: Options = Options::new(width).splitter(Box::new(HyphenSplitter));
+ /// # let opt_coerce: Options<Box<dyn WordSplitter>> = opt;
///
- /// [`HyphenSplitter`]: struct.HyphenSplitter.html
- /// [`WordSplitter`]: trait.WordSplitter.html
- pub fn new(width: usize) -> Wrapper<'a, HyphenSplitter> {
- Wrapper::with_splitter(width, HyphenSplitter)
+ /// // uses NoHyphenation with dynamic dispatch
+ /// // the actual type: Options<Box<dyn WordSplitter>>
+ /// let opt: Options = Options::new(width).splitter(Box::new(NoHyphenation));
+ /// # let opt_coerce: Options<Box<dyn WordSplitter>> = opt;
+ /// ```
+ ///
+ /// Notice that the last two variables have the same type, despite
+ /// the different `WordSplitter` in use. Thus dynamic dispatch
+ /// allows to change the splitter at run-time without changing the
+ /// variables type.
+ pub const fn new(width: usize) -> Self {
+ Options::with_splitter(width, HyphenSplitter)
}
- /// Create a new Wrapper for wrapping text at the current terminal
- /// width. If the terminal width cannot be determined (typically
- /// because the standard input and output is not connected to a
- /// terminal), a width of 80 characters will be used. Other
- /// settings use the same defaults as `Wrapper::new`.
+ /// Creates a new [`Options`] with `width` set to the current
+ /// terminal width. If the terminal width cannot be determined
+ /// (typically because the standard input and output is not
+ /// connected to a terminal), a width of 80 characters will be
+ /// used. Other settings use the same defaults as
+ /// [`Options::new`].
///
/// Equivalent to:
///
/// ```no_run
- /// # #![allow(unused_variables)]
- /// use textwrap::{Wrapper, termwidth};
+ /// use textwrap::{termwidth, Options};
///
- /// let wrapper = Wrapper::new(termwidth());
+ /// let options = Options::new(termwidth());
/// ```
///
/// **Note:** Only available when the `terminal_size` feature is
/// enabled.
#[cfg(feature = "terminal_size")]
- pub fn with_termwidth() -> Wrapper<'a, HyphenSplitter> {
- Wrapper::new(termwidth())
+ pub fn with_termwidth() -> Self {
+ Self::new(termwidth())
}
}
-impl<'a, S: WordSplitter> Wrapper<'a, S> {
- /// Use the given [`WordSplitter`] to create a new Wrapper for
- /// wrapping at the specified width. By default, we allow words
- /// longer than `width` to be broken.
+impl<'a, S> Options<'a, S> {
+ /// Creates a new [`Options`] with the specified width and
+ /// splitter. Equivalent to
+ ///
+ /// ```
+ /// # use textwrap::{Options, NoHyphenation, HyphenSplitter};
+ /// # const splitter: NoHyphenation = NoHyphenation;
+ /// # const width: usize = 80;
+ /// # const actual: Options<'static, NoHyphenation> = Options::with_splitter(width, splitter);
+ /// # let expected =
+ /// Options {
+ /// width: width,
+ /// initial_indent: "",
+ /// subsequent_indent: "",
+ /// break_words: true,
+ /// #[cfg(feature = "smawk")]
+ /// wrap_algorithm: textwrap::core::WrapAlgorithm::OptimalFit,
+ /// #[cfg(not(feature = "smawk"))]
+ /// wrap_algorithm: textwrap::core::WrapAlgorithm::FirstFit,
+ /// splitter: splitter,
+ /// }
+ /// # ;
+ /// # assert_eq!(actual.width, expected.width);
+ /// # assert_eq!(actual.initial_indent, expected.initial_indent);
+ /// # assert_eq!(actual.subsequent_indent, expected.subsequent_indent);
+ /// # assert_eq!(actual.break_words, expected.break_words);
+ /// # assert_eq!(actual.wrap_algorithm, expected.wrap_algorithm);
+ /// # let expected_coerced: Options<'static, NoHyphenation> = expected;
+ /// ```
+ ///
+ /// This constructor allows to specify the splitter to be used. It
+ /// is like a short-cut for `Options::new(w).splitter(s)`, but
+ /// this function is a `const fn`. The given splitter may be in a
+ /// [`Box`], which then can be coerced into a trait object for
+ /// dynamic dispatch:
+ ///
+ /// ```
+ /// use textwrap::{HyphenSplitter, NoHyphenation, Options};
+ /// # use textwrap::{WordSplitter};
+ /// # const width: usize = 80;
+ ///
+ /// // This opt contains a boxed trait object as splitter.
+ /// // The type annotation is important, otherwise it will be not a trait object
+ /// let mut opt: Options = Options::with_splitter(width, Box::new(NoHyphenation));
+ /// // Its type is actually: `Options<Box<dyn WordSplitter>>`:
+ /// let opt_coerced: Options<Box<dyn WordSplitter>> = opt;
+ ///
+ /// // Thus, it can be overridden with a different splitter.
+ /// opt = Options::with_splitter(width, Box::new(HyphenSplitter));
+ /// // Now, containing a `HyphenSplitter` instead.
+ /// ```
+ ///
+ /// Since the splitter is given by value, which determines the
+ /// generic type parameter, it can be used to produce both an
+ /// [`Options`] with static and dynamic dispatch, respectively.
+ /// While dynamic dispatch allows to change the type of the inner
+ /// splitter at run time as seen above, static dispatch especially
+ /// can store the splitter directly, without the need for a box.
+ /// This in turn allows it to be used in constant and static
+ /// context:
+ ///
+ /// ```
+ /// use textwrap::{HyphenSplitter, Options};
+ /// # const width: usize = 80;
///
- /// [`WordSplitter`]: trait.WordSplitter.html
- pub fn with_splitter(width: usize, splitter: S) -> Wrapper<'a, S> {
- Wrapper {
- width: width,
+ /// const FOO: Options<HyphenSplitter> = Options::with_splitter(width, HyphenSplitter);
+ /// static BAR: Options<HyphenSplitter> = FOO;
+ /// ```
+ pub const fn with_splitter(width: usize, splitter: S) -> Self {
+ Options {
+ width,
initial_indent: "",
subsequent_indent: "",
break_words: true,
+ #[cfg(feature = "smawk")]
+ wrap_algorithm: core::WrapAlgorithm::OptimalFit,
+ #[cfg(not(feature = "smawk"))]
+ wrap_algorithm: core::WrapAlgorithm::FirstFit,
splitter: splitter,
}
}
+}
+impl<'a, S: WordSplitter> Options<'a, S> {
/// Change [`self.initial_indent`]. The initial indentation is
/// used on the very first line of output.
///
@@ -206,16 +427,18 @@ impl<'a, S: WordSplitter> Wrapper<'a, S> {
/// Classic paragraph indentation can be achieved by specifying an
/// initial indentation and wrapping each paragraph by itself:
///
- /// ```no_run
- /// # #![allow(unused_variables)]
- /// use textwrap::Wrapper;
+ /// ```
+ /// use textwrap::{Options, wrap};
///
- /// let wrapper = Wrapper::new(15).initial_indent(" ");
+ /// let options = Options::new(16).initial_indent(" ");
+ /// assert_eq!(wrap("This is a little example.", options),
+ /// vec![" This is a",
+ /// "little example."]);
/// ```
///
/// [`self.initial_indent`]: #structfield.initial_indent
- pub fn initial_indent(self, indent: &'a str) -> Wrapper<'a, S> {
- Wrapper {
+ pub fn initial_indent(self, indent: &'a str) -> Self {
+ Options {
initial_indent: indent,
..self
}
@@ -229,18 +452,29 @@ impl<'a, S: WordSplitter> Wrapper<'a, S> {
/// Combining initial and subsequent indentation lets you format a
/// single paragraph as a bullet list:
///
- /// ```no_run
- /// # #![allow(unused_variables)]
- /// use textwrap::Wrapper;
+ /// ```
+ /// use textwrap::{Options, wrap};
///
- /// let wrapper = Wrapper::new(15)
+ /// let options = Options::new(12)
/// .initial_indent("* ")
/// .subsequent_indent(" ");
+ /// #[cfg(feature = "smawk")]
+ /// assert_eq!(wrap("This is a little example.", options),
+ /// vec!["* This is",
+ /// " a little",
+ /// " example."]);
+ ///
+ /// // Without the `smawk` feature, the wrapping is a little different:
+ /// #[cfg(not(feature = "smawk"))]
+ /// assert_eq!(wrap("This is a little example.", options),
+ /// vec!["* This is a",
+ /// " little",
+ /// " example."]);
/// ```
///
/// [`self.subsequent_indent`]: #structfield.subsequent_indent
- pub fn subsequent_indent(self, indent: &'a str) -> Wrapper<'a, S> {
- Wrapper {
+ pub fn subsequent_indent(self, indent: &'a str) -> Self {
+ Options {
subsequent_indent: indent,
..self
}
@@ -250,524 +484,687 @@ impl<'a, S: WordSplitter> Wrapper<'a, S> {
/// than `self.width` can be broken, or if they will be left
/// sticking out into the right margin.
///
- /// [`self.break_words`]: #structfield.break_words
- pub fn break_words(self, setting: bool) -> Wrapper<'a, S> {
- Wrapper {
- break_words: setting,
- ..self
- }
- }
-
- /// Fill a line of text at `self.width` characters.
- ///
- /// The result is a string with newlines between each line. Use
- /// the `wrap` method if you need access to the individual lines.
- ///
- /// # Complexities
- ///
- /// This method simply joins the lines produced by `wrap_iter`. As
- /// such, it inherits the O(*n*) time and memory complexity where
- /// *n* is the input string length.
- ///
- /// # Examples
- ///
- /// ```
- /// use textwrap::Wrapper;
- ///
- /// let wrapper = Wrapper::new(15);
- /// assert_eq!(wrapper.fill("Memory safety without garbage collection."),
- /// "Memory safety\nwithout garbage\ncollection.");
- /// ```
- pub fn fill(&self, s: &str) -> String {
- // This will avoid reallocation in simple cases (no
- // indentation, no hyphenation).
- let mut result = String::with_capacity(s.len());
-
- for (i, line) in self.wrap_iter(s).enumerate() {
- if i > 0 {
- result.push('\n');
- }
- result.push_str(&line);
- }
-
- result
- }
-
- /// Wrap a line of text at `self.width` characters.
- ///
- /// # Complexities
- ///
- /// This method simply collects the lines produced by `wrap_iter`.
- /// As such, it inherits the O(*n*) overall time and memory
- /// complexity where *n* is the input string length.
- ///
/// # Examples
///
/// ```
- /// use textwrap::Wrapper;
- ///
- /// let wrap15 = Wrapper::new(15);
- /// assert_eq!(wrap15.wrap("Concurrency without data races."),
- /// vec!["Concurrency",
- /// "without data",
- /// "races."]);
- ///
- /// let wrap20 = Wrapper::new(20);
- /// assert_eq!(wrap20.wrap("Concurrency without data races."),
- /// vec!["Concurrency without",
- /// "data races."]);
- /// ```
- ///
- /// Notice that newlines in the input are preserved. This means
- /// that they force a line break, regardless of how long the
- /// current line is:
+ /// use textwrap::{wrap, Options};
///
+ /// let options = Options::new(4).break_words(true);
+ /// assert_eq!(wrap("This is a little example.", options),
+ /// vec!["This",
+ /// "is a",
+ /// "litt",
+ /// "le",
+ /// "exam",
+ /// "ple."]);
/// ```
- /// use textwrap::Wrapper;
///
- /// let wrapper = Wrapper::new(40);
- /// assert_eq!(wrapper.wrap("First line.\nSecond line."),
- /// vec!["First line.", "Second line."]);
- /// ```
- ///
- pub fn wrap(&self, s: &'a str) -> Vec<Cow<'a, str>> {
- self.wrap_iter(s).collect::<Vec<_>>()
+ /// [`self.break_words`]: #structfield.break_words
+ pub fn break_words(self, setting: bool) -> Self {
+ Options {
+ break_words: setting,
+ ..self
+ }
}
- /// Lazily wrap a line of text at `self.width` characters.
- ///
- /// The [`WordSplitter`] stored in [`self.splitter`] is used
- /// whenever when a word is too large to fit on the current line.
- /// By changing the field, different hyphenation strategies can be
- /// implemented.
- ///
- /// # Complexities
- ///
- /// This method returns a [`WrapIter`] iterator which borrows this
- /// `Wrapper`. The algorithm used has a linear complexity, so
- /// getting the next line from the iterator will take O(*w*) time,
- /// where *w* is the wrapping width. Fully processing the iterator
- /// will take O(*n*) time for an input string of length *n*.
- ///
- /// When no indentation is used, each line returned is a slice of
- /// the input string and the memory overhead is thus constant.
- /// Otherwise new memory is allocated for each line returned.
- ///
- /// # Examples
- ///
- /// ```
- /// use std::borrow::Cow::Borrowed;
- /// use textwrap::Wrapper;
+ /// Change [`self.wrap_algorithm`].
///
- /// let wrap20 = Wrapper::new(20);
- /// let mut wrap20_iter = wrap20.wrap_iter("Zero-cost abstractions.");
- /// assert_eq!(wrap20_iter.next(), Some(Borrowed("Zero-cost")));
- /// assert_eq!(wrap20_iter.next(), Some(Borrowed("abstractions.")));
- /// assert_eq!(wrap20_iter.next(), None);
+ /// See [`core::WrapAlgorithm`] for details on the choices.
///
- /// let wrap25 = Wrapper::new(25);
- /// let mut wrap25_iter = wrap25.wrap_iter("Zero-cost abstractions.");
- /// assert_eq!(wrap25_iter.next(), Some(Borrowed("Zero-cost abstractions.")));
- /// assert_eq!(wrap25_iter.next(), None);
- /// ```
- ///
- /// [`self.splitter`]: #structfield.splitter
- /// [`WordSplitter`]: trait.WordSplitter.html
- /// [`WrapIter`]: struct.WrapIter.html
- pub fn wrap_iter<'w>(&'w self, s: &'a str) -> WrapIter<'w, 'a, S> {
- WrapIter {
- wrapper: self,
- inner: WrapIterImpl::new(self, s),
+ /// [`self.wrap_algorithm`]: #structfield.wrap_algorithm
+ pub fn wrap_algorithm(self, wrap_algorithm: core::WrapAlgorithm) -> Self {
+ Options {
+ wrap_algorithm,
+ ..self
}
}
- /// Lazily wrap a line of text at `self.width` characters.
+ /// Change [`self.splitter`]. The [`WordSplitter`] is used to fit
+ /// part of a word into the current line when wrapping text.
///
- /// The [`WordSplitter`] stored in [`self.splitter`] is used
- /// whenever when a word is too large to fit on the current line.
- /// By changing the field, different hyphenation strategies can be
- /// implemented.
- ///
- /// # Complexities
- ///
- /// This method consumes the `Wrapper` and returns a
- /// [`IntoWrapIter`] iterator. Fully processing the iterator has
- /// the same O(*n*) time complexity as [`wrap_iter`], where *n* is
- /// the length of the input string.
- ///
- /// # Examples
+ /// This function may return a different type than `Self`. That is
+ /// the case when the given `splitter` is of a different type the
+ /// the currently stored one in the `splitter` field. Take for
+ /// example:
///
/// ```
- /// use std::borrow::Cow::Borrowed;
- /// use textwrap::Wrapper;
- ///
- /// let wrap20 = Wrapper::new(20);
- /// let mut wrap20_iter = wrap20.into_wrap_iter("Zero-cost abstractions.");
- /// assert_eq!(wrap20_iter.next(), Some(Borrowed("Zero-cost")));
- /// assert_eq!(wrap20_iter.next(), Some(Borrowed("abstractions.")));
- /// assert_eq!(wrap20_iter.next(), None);
+ /// use textwrap::{HyphenSplitter, NoHyphenation, Options};
+ /// // The default type returned by `new` is `Options<HyphenSplitter>`
+ /// let opt: Options<HyphenSplitter> = Options::new(80);
+ /// // Setting a different splitter changes the type
+ /// let opt: Options<NoHyphenation> = opt.splitter(NoHyphenation);
/// ```
///
/// [`self.splitter`]: #structfield.splitter
- /// [`WordSplitter`]: trait.WordSplitter.html
- /// [`IntoWrapIter`]: struct.IntoWrapIter.html
- /// [`wrap_iter`]: #method.wrap_iter
- pub fn into_wrap_iter(self, s: &'a str) -> IntoWrapIter<'a, S> {
- let inner = WrapIterImpl::new(&self, s);
-
- IntoWrapIter {
- wrapper: self,
- inner: inner,
+ pub fn splitter<T>(self, splitter: T) -> Options<'a, T> {
+ Options {
+ width: self.width,
+ initial_indent: self.initial_indent,
+ subsequent_indent: self.subsequent_indent,
+ break_words: self.break_words,
+ wrap_algorithm: self.wrap_algorithm,
+ splitter: splitter,
}
}
}
-/// An iterator over the lines of the input string which owns a
-/// `Wrapper`. An instance of `IntoWrapIter` is typically obtained
-/// through either [`wrap_iter`] or [`Wrapper::into_wrap_iter`].
+/// Return the current terminal width. If the terminal width cannot be
+/// determined (typically because the standard output is not connected
+/// to a terminal), a default width of 80 characters will be used.
///
-/// Each call of `.next()` method yields a line wrapped in `Some` if the
-/// input hasn't been fully processed yet. Otherwise it returns `None`.
+/// # Examples
///
-/// [`wrap_iter`]: fn.wrap_iter.html
-/// [`Wrapper::into_wrap_iter`]: struct.Wrapper.html#method.into_wrap_iter
-#[derive(Debug)]
-pub struct IntoWrapIter<'a, S: WordSplitter> {
- wrapper: Wrapper<'a, S>,
- inner: WrapIterImpl<'a>,
-}
-
-impl<'a, S: WordSplitter> Iterator for IntoWrapIter<'a, S> {
- type Item = Cow<'a, str>;
-
- fn next(&mut self) -> Option<Cow<'a, str>> {
- self.inner.next(&self.wrapper)
- }
-}
-
-/// An iterator over the lines of the input string which borrows a
-/// `Wrapper`. An instance of `WrapIter` is typically obtained
-/// through the [`Wrapper::wrap_iter`] method.
+/// Create an [`Options`] for wrapping at the current terminal width
+/// with a two column margin to the left and the right:
+///
+/// ```no_run
+/// use textwrap::{termwidth, NoHyphenation, Options};
///
-/// Each call of `.next()` method yields a line wrapped in `Some` if the
-/// input hasn't been fully processed yet. Otherwise it returns `None`.
+/// let width = termwidth() - 4; // Two columns on each side.
+/// let options = Options::new(width)
+/// .splitter(NoHyphenation)
+/// .initial_indent(" ")
+/// .subsequent_indent(" ");
+/// ```
///
-/// [`Wrapper::wrap_iter`]: struct.Wrapper.html#method.wrap_iter
-#[derive(Debug)]
-pub struct WrapIter<'w, 'a: 'w, S: WordSplitter> {
- wrapper: &'w Wrapper<'a, S>,
- inner: WrapIterImpl<'a>,
+/// **Note:** Only available when the `terminal_size` Cargo feature is
+/// enabled.
+#[cfg(feature = "terminal_size")]
+pub fn termwidth() -> usize {
+ terminal_size::terminal_size().map_or(80, |(terminal_size::Width(w), _)| w.into())
}
-impl<'w, 'a: 'w, S: WordSplitter> Iterator for WrapIter<'w, 'a, S> {
- type Item = Cow<'a, str>;
-
- fn next(&mut self) -> Option<Cow<'a, str>> {
- self.inner.next(self.wrapper)
+/// Fill a line of text at a given width.
+///
+/// The result is a [`String`], complete with newlines between each
+/// line. Use the [`wrap`] function if you need access to the
+/// individual lines.
+///
+/// The easiest way to use this function is to pass an integer for
+/// `width_or_options`:
+///
+/// ```
+/// use textwrap::fill;
+///
+/// assert_eq!(
+/// fill("Memory safety without garbage collection.", 15),
+/// "Memory safety\nwithout garbage\ncollection."
+/// );
+/// ```
+///
+/// If you need to customize the wrapping, you can pass an [`Options`]
+/// instead of an `usize`:
+///
+/// ```
+/// use textwrap::{fill, Options};
+///
+/// let options = Options::new(15)
+/// .initial_indent("- ")
+/// .subsequent_indent(" ");
+/// assert_eq!(
+/// fill("Memory safety without garbage collection.", &options),
+/// "- Memory safety\n without\n garbage\n collection."
+/// );
+/// ```
+pub fn fill<'a, S, Opt>(text: &str, width_or_options: Opt) -> String
+where
+ S: WordSplitter,
+ Opt: Into<Options<'a, S>>,
+{
+ // This will avoid reallocation in simple cases (no
+ // indentation, no hyphenation).
+ let mut result = String::with_capacity(text.len());
+
+ for (i, line) in wrap(text, width_or_options).iter().enumerate() {
+ if i > 0 {
+ result.push('\n');
+ }
+ result.push_str(&line);
}
-}
-/// Like `char::is_whitespace`, but non-breaking spaces don't count.
-#[inline]
-fn is_whitespace(ch: char) -> bool {
- ch.is_whitespace() && ch != NBSP
+ result
}
-/// Common implementation details for `WrapIter` and `IntoWrapIter`.
-#[derive(Debug)]
-struct WrapIterImpl<'a> {
- // String to wrap.
- source: &'a str,
- // CharIndices iterator over self.source.
- char_indices: CharIndices<'a>,
- // Byte index where the current line starts.
- start: usize,
- // Byte index of the last place where the string can be split.
- split: usize,
- // Size in bytes of the character at self.source[self.split].
- split_len: usize,
- // Width of self.source[self.start..idx].
- line_width: usize,
- // Width of self.source[self.start..self.split].
- line_width_at_split: usize,
- // Tracking runs of whitespace characters.
- in_whitespace: bool,
- // Has iterator finished producing elements?
- finished: bool,
-}
-
-impl<'a> WrapIterImpl<'a> {
- fn new<S: WordSplitter>(wrapper: &Wrapper<'a, S>, s: &'a str) -> WrapIterImpl<'a> {
- WrapIterImpl {
- source: s,
- char_indices: s.char_indices(),
- start: 0,
- split: 0,
- split_len: 0,
- line_width: wrapper.initial_indent.width(),
- line_width_at_split: wrapper.initial_indent.width(),
- in_whitespace: false,
- finished: false,
+/// Unpack a paragraph of already-wrapped text.
+///
+/// This function attempts to recover the original text from a single
+/// paragraph of text produced by the [`fill`] function. This means
+/// that it turns
+///
+/// ```text
+/// textwrap: a small
+/// library for
+/// wrapping text.
+/// ```
+///
+/// back into
+///
+/// ```text
+/// textwrap: a small library for wrapping text.
+/// ```
+///
+/// In addition, it will recognize a common prefix among the lines.
+/// The prefix of the first line is returned in
+/// [`Options::initial_indent`] and the prefix (if any) of the the
+/// other lines is returned in [`Options::subsequent_indent`].
+///
+/// In addition to `' '`, the prefixes can consist of characters used
+/// for unordered lists (`'-'`, `'+'`, and `'*'`) and block quotes
+/// (`'>'`) in Markdown as well as characters often used for inline
+/// comments (`'#'` and `'/'`).
+///
+/// The text must come from a single wrapped paragraph. This means
+/// that there can be no `"\n\n"` within the text.
+///
+/// # Examples
+///
+/// ```
+/// use textwrap::unfill;
+///
+/// let (text, options) = unfill("\
+/// * This is an
+/// example of
+/// a list item.
+/// ");
+///
+/// assert_eq!(text, "This is an example of a list item.\n");
+/// assert_eq!(options.initial_indent, "* ");
+/// assert_eq!(options.subsequent_indent, " ");
+/// ```
+pub fn unfill<'a>(text: &'a str) -> (String, Options<'a, HyphenSplitter>) {
+ let trimmed = text.trim_end_matches('\n');
+ let prefix_chars: &[_] = &[' ', '-', '+', '*', '>', '#', '/'];
+
+ let mut options = Options::new(0);
+ for (idx, line) in trimmed.split('\n').enumerate() {
+ options.width = std::cmp::max(options.width, core::display_width(line));
+ let without_prefix = line.trim_start_matches(prefix_chars);
+ let prefix = &line[..line.len() - without_prefix.len()];
+
+ println!("line: {:?} -> prefix: {:?}", line, prefix);
+
+ if idx == 0 {
+ options.initial_indent = prefix;
+ } else if idx == 1 {
+ options.subsequent_indent = prefix;
+ } else if idx > 1 {
+ for ((idx, x), y) in prefix.char_indices().zip(options.subsequent_indent.chars()) {
+ if x != y {
+ options.subsequent_indent = &prefix[..idx];
+ break;
+ }
+ }
+ if prefix.len() < options.subsequent_indent.len() {
+ options.subsequent_indent = prefix;
+ }
}
}
- fn create_result_line<S: WordSplitter>(&self, wrapper: &Wrapper<'a, S>) -> Cow<'a, str> {
- if self.start == 0 {
- Cow::from(wrapper.initial_indent)
+ let mut unfilled = String::with_capacity(text.len());
+ for (idx, line) in trimmed.split('\n').enumerate() {
+ if idx == 0 {
+ unfilled.push_str(&line[options.initial_indent.len()..]);
} else {
- Cow::from(wrapper.subsequent_indent)
+ unfilled.push(' ');
+ unfilled.push_str(&line[options.subsequent_indent.len()..]);
}
}
- fn next<S: WordSplitter>(&mut self, wrapper: &Wrapper<'a, S>) -> Option<Cow<'a, str>> {
- if self.finished {
- return None;
- }
-
- while let Some((idx, ch)) = self.char_indices.next() {
- if ch == CSI.0 && self.char_indices.next().map(|(_, ch)| ch) == Some(CSI.1) {
- // We have found the start of an ANSI escape code,
- // typically used for colored text. We ignore all
- // characters until we find a "final byte" in the
- // range 0x40–0x7E.
- while let Some((_, ch)) = self.char_indices.next() {
- if ANSI_FINAL_BYTE.contains(&ch) {
- break;
- }
- }
- // Done with the escape sequence, we continue with
- // next character in the outer loop.
- continue;
- }
-
- let char_width = ch.width().unwrap_or(0);
- let char_len = ch.len_utf8();
- if ch == '\n' {
- self.split = idx;
- self.split_len = char_len;
- self.line_width_at_split = self.line_width;
- self.in_whitespace = false;
-
- // If this is not the final line, return the current line. Otherwise,
- // we will return the line with its line break after exiting the loop
- if self.split + self.split_len < self.source.len() {
- let mut line = self.create_result_line(wrapper);
- line += &self.source[self.start..self.split];
-
- self.start = self.split + self.split_len;
- self.line_width = wrapper.subsequent_indent.width();
+ println!("pushing trailing newlines: {:?}", &text[trimmed.len()..]);
+ unfilled.push_str(&text[trimmed.len()..]);
- return Some(line);
- }
- } else if is_whitespace(ch) {
- // Extend the previous split or create a new one.
- if self.in_whitespace {
- self.split_len += char_len;
- } else {
- self.split = idx;
- self.split_len = char_len;
- }
- self.line_width_at_split = self.line_width + char_width;
- self.in_whitespace = true;
- } else if self.line_width + char_width > wrapper.width {
- // There is no room for this character on the current
- // line. Try to split the final word.
- self.in_whitespace = false;
- let remaining_text = &self.source[self.split + self.split_len..];
- let final_word = match remaining_text.find(is_whitespace) {
- Some(i) => &remaining_text[..i],
- None => remaining_text,
- };
-
- let mut hyphen = "";
- let splits = wrapper.splitter.split(final_word);
- for &(head, hyp, _) in splits.iter().rev() {
- if self.line_width_at_split + head.width() + hyp.width() <= wrapper.width {
- // We can fit head into the current line.
- // Advance the split point by the width of the
- // whitespace and the head length.
- self.split += self.split_len + head.len();
- // The new `split_len` is equal to the stretch
- // of whitespace following the split.
- self.split_len = remaining_text[head.len()..]
- .char_indices()
- .skip_while(|(_, ch)| is_whitespace(*ch))
- .next()
- .map_or(0, |(idx, _)| idx);
- self.line_width_at_split += head.width() + hyp.width();
- hyphen = hyp;
- break;
- }
- }
-
- if self.start >= self.split {
- // The word is too big to fit on a single line.
- if wrapper.break_words {
- // Break work at current index.
- self.split = idx;
- self.split_len = 0;
- self.line_width_at_split = self.line_width;
- } else {
- // Add smallest split.
- self.split += self.split_len + splits[0].0.len();
- // The new `split_len` is equal to the stretch
- // of whitespace following the smallest split.
- self.split_len = remaining_text[splits[0].0.len()..]
- .char_indices()
- .skip_while(|(_, ch)| is_whitespace(*ch))
- .next()
- .map_or(0, |(idx, _)| idx);
- self.line_width_at_split = self.line_width;
- }
- }
-
- if self.start < self.split {
- let mut line = self.create_result_line(wrapper);
- line += &self.source[self.start..self.split];
- line += hyphen;
-
- self.start = self.split + self.split_len;
- self.line_width += wrapper.subsequent_indent.width();
- self.line_width -= self.line_width_at_split;
- self.line_width += char_width;
- self.line_width_at_split = wrapper.subsequent_indent.width();
+ println!("unfilled: {:?}", unfilled);
- return Some(line);
- }
- } else {
- self.in_whitespace = false;
- }
- self.line_width += char_width;
- }
-
- self.finished = true;
-
- // Add final line.
- if self.start < self.source.len() {
- let mut line = self.create_result_line(wrapper);
- line += &self.source[self.start..];
- return Some(line);
- }
-
- None
- }
+ (unfilled, options)
}
-/// Return the current terminal width. If the terminal width cannot be
-/// determined (typically because the standard output is not connected
-/// to a terminal), a default width of 80 characters will be used.
+/// Refill a paragraph of wrapped text with a new width.
///
-/// # Examples
+/// This function will first use the [`unfill`] function to remove
+/// newlines from the text. Afterwards the text is filled again using
+/// the [`fill`] function.
///
-/// Create a `Wrapper` for the current terminal with a two column
-/// margin:
+/// The `new_width_or_options` argument specify the new width and can
+/// specify other options as well — except for
+/// [`Options::initial_indent`] and [`Options::subsequent_indent`],
+/// which are deduced from `filled_text`.
///
-/// ```no_run
-/// # #![allow(unused_variables)]
-/// use textwrap::{Wrapper, NoHyphenation, termwidth};
+/// # Examples
///
-/// let width = termwidth() - 4; // Two columns on each side.
-/// let wrapper = Wrapper::with_splitter(width, NoHyphenation)
-/// .initial_indent(" ")
-/// .subsequent_indent(" ");
/// ```
+/// use textwrap::refill;
///
-/// **Note:** Only available when the `terminal_size` feature is
-/// enabled.
-#[cfg(feature = "terminal_size")]
-pub fn termwidth() -> usize {
- terminal_size::terminal_size().map_or(80, |(terminal_size::Width(w), _)| w.into())
+/// let text = "\
+/// > Memory safety without
+/// > garbage collection.
+/// ";
+/// assert_eq!(refill(text, 15), "\
+/// > Memory safety
+/// > without
+/// > garbage
+/// > collection.
+/// ");
+pub fn refill<'a, S, Opt>(filled_text: &str, new_width_or_options: Opt) -> String
+where
+ S: WordSplitter,
+ Opt: Into<Options<'a, S>>,
+{
+ let trimmed = filled_text.trim_end_matches('\n');
+ let (text, options) = unfill(trimmed);
+ let mut new_options = new_width_or_options.into();
+ new_options.initial_indent = options.initial_indent;
+ new_options.subsequent_indent = options.subsequent_indent;
+ let mut refilled = fill(&text, new_options);
+ refilled.push_str(&filled_text[trimmed.len()..]);
+ refilled
}
-/// Fill a line of text at `width` characters.
+/// Wrap a line of text at a given width.
+///
+/// The result is a vector of lines, each line is of type [`Cow<'_,
+/// str>`](Cow), which means that the line will borrow from the input
+/// `&str` if possible. The lines do not have a trailing `'\n'`. Use
+/// the [`fill`] function if you need a [`String`] instead.
///
-/// The result is a string with newlines between each line. Use
-/// [`wrap`] if you need access to the individual lines or
-/// [`wrap_iter`] for its iterator counterpart.
+/// The easiest way to use this function is to pass an integer for
+/// `width_or_options`:
///
/// ```
-/// use textwrap::fill;
+/// use textwrap::wrap;
///
-/// assert_eq!(fill("Memory safety without garbage collection.", 15),
-/// "Memory safety\nwithout garbage\ncollection.");
+/// let lines = wrap("Memory safety without garbage collection.", 15);
+/// assert_eq!(lines, &[
+/// "Memory safety",
+/// "without garbage",
+/// "collection.",
+/// ]);
/// ```
///
-/// This function creates a Wrapper on the fly with default settings.
-/// If you need to set a language corpus for automatic hyphenation, or
-/// need to fill many strings, then it is suggested to create a Wrapper
-/// and call its [`fill` method].
+/// If you need to customize the wrapping, you can pass an [`Options`]
+/// instead of an `usize`:
///
-/// [`wrap`]: fn.wrap.html
-/// [`wrap_iter`]: fn.wrap_iter.html
-/// [`fill` method]: struct.Wrapper.html#method.fill
-pub fn fill(s: &str, width: usize) -> String {
- Wrapper::new(width).fill(s)
-}
-
-/// Wrap a line of text at `width` characters.
+/// ```
+/// use textwrap::{wrap, Options};
///
-/// This function creates a Wrapper on the fly with default settings.
-/// If you need to set a language corpus for automatic hyphenation, or
-/// need to wrap many strings, then it is suggested to create a Wrapper
-/// and call its [`wrap` method].
+/// let options = Options::new(15)
+/// .initial_indent("- ")
+/// .subsequent_indent(" ");
+/// let lines = wrap("Memory safety without garbage collection.", &options);
+/// assert_eq!(lines, &[
+/// "- Memory safety",
+/// " without",
+/// " garbage",
+/// " collection.",
+/// ]);
+/// ```
///
-/// The result is a vector of strings. Use [`wrap_iter`] if you need an
-/// iterator version.
+/// # Optimal-Fit Wrapping
///
-/// # Examples
+/// By default, `wrap` will try to ensure an even right margin by
+/// finding breaks which avoid short lines. We call this an
+/// “optimal-fit algorithm” since the line breaks are computed by
+/// considering all possible line breaks. The alternative is a
+/// “first-fit algorithm” which simply accumulates words until they no
+/// longer fit on the line.
+///
+/// As an example, using the first-fit algorithm to wrap the famous
+/// Hamlet quote “To be, or not to be: that is the question” in a
+/// narrow column with room for only 10 characters looks like this:
///
/// ```
-/// use textwrap::wrap;
+/// # use textwrap::{Options, wrap};
+/// # use textwrap::core::WrapAlgorithm::FirstFit;
+/// #
+/// # let lines = wrap("To be, or not to be: that is the question",
+/// # Options::new(10).wrap_algorithm(FirstFit));
+/// # assert_eq!(lines.join("\n") + "\n", "\
+/// To be, or
+/// not to be:
+/// that is
+/// the
+/// question
+/// # ");
+/// ```
///
-/// assert_eq!(wrap("Concurrency without data races.", 15),
-/// vec!["Concurrency",
-/// "without data",
-/// "races."]);
+/// Notice how the second to last line is quite narrow because
+/// “question” was too large to fit? The greedy first-fit algorithm
+/// doesn’t look ahead, so it has no other option than to put
+/// “question” onto its own line.
+///
+/// With the optimal-fit wrapping algorithm, the previous lines are
+/// shortened slightly in order to make the word “is” go into the
+/// second last line:
+///
+/// ```
+/// # #[cfg(feature = "smawk")] {
+/// # use textwrap::{Options, wrap};
+/// # use textwrap::core::WrapAlgorithm::OptimalFit;
+/// #
+/// # let lines = wrap("To be, or not to be: that is the question",
+/// # Options::new(10).wrap_algorithm(OptimalFit));
+/// # assert_eq!(lines.join("\n") + "\n", "\
+/// To be,
+/// or not to
+/// be: that
+/// is the
+/// question
+/// # "); }
+/// ```
+///
+/// Please see [`core::WrapAlgorithm`] for details.
+///
+/// # Examples
+///
+/// The returned iterator yields lines of type `Cow<'_, str>`. If
+/// possible, the wrapped lines will borrow from the input string. As
+/// an example, a hanging indentation, the first line can borrow from
+/// the input, but the subsequent lines become owned strings:
///
-/// assert_eq!(wrap("Concurrency without data races.", 20),
-/// vec!["Concurrency without",
-/// "data races."]);
/// ```
+/// use std::borrow::Cow::{Borrowed, Owned};
+/// use textwrap::{wrap, Options};
///
-/// [`wrap_iter`]: fn.wrap_iter.html
-/// [`wrap` method]: struct.Wrapper.html#method.wrap
-pub fn wrap(s: &str, width: usize) -> Vec<Cow<'_, str>> {
- Wrapper::new(width).wrap(s)
+/// let options = Options::new(15).subsequent_indent("....");
+/// let lines = wrap("Wrapping text all day long.", &options);
+/// let annotated = lines
+/// .iter()
+/// .map(|line| match line {
+/// Borrowed(text) => format!("[Borrowed] {}", text),
+/// Owned(text) => format!("[Owned] {}", text),
+/// })
+/// .collect::<Vec<_>>();
+/// assert_eq!(
+/// annotated,
+/// &[
+/// "[Borrowed] Wrapping text",
+/// "[Owned] ....all day",
+/// "[Owned] ....long.",
+/// ]
+/// );
+/// ```
+pub fn wrap<'a, S, Opt>(text: &str, width_or_options: Opt) -> Vec<Cow<'_, str>>
+where
+ S: WordSplitter,
+ Opt: Into<Options<'a, S>>,
+{
+ let options = width_or_options.into();
+
+ let initial_width = options
+ .width
+ .saturating_sub(core::display_width(options.initial_indent));
+ let subsequent_width = options
+ .width
+ .saturating_sub(core::display_width(options.subsequent_indent));
+
+ let mut lines = Vec::new();
+ for line in text.split('\n') {
+ let words = core::find_words(line);
+ let split_words = core::split_words(words, &options);
+ let broken_words = if options.break_words {
+ let mut broken_words = core::break_words(split_words, subsequent_width);
+ if !options.initial_indent.is_empty() {
+ // Without this, the first word will always go into
+ // the first line. However, since we break words based
+ // on the _second_ line width, it can be wrong to
+ // unconditionally put the first word onto the first
+ // line. An empty zero-width word fixed this.
+ broken_words.insert(0, core::Word::from(""));
+ }
+ broken_words
+ } else {
+ split_words.collect::<Vec<_>>()
+ };
+
+ #[rustfmt::skip]
+ let line_lengths = |i| if i == 0 { initial_width } else { subsequent_width };
+ let wrapped_words = match options.wrap_algorithm {
+ #[cfg(feature = "smawk")]
+ core::WrapAlgorithm::OptimalFit => core::wrap_optimal_fit(&broken_words, line_lengths),
+ core::WrapAlgorithm::FirstFit => core::wrap_first_fit(&broken_words, line_lengths),
+ };
+
+ let mut idx = 0;
+ for words in wrapped_words {
+ let last_word = match words.last() {
+ None => {
+ lines.push(Cow::from(""));
+ continue;
+ }
+ Some(word) => word,
+ };
+
+ // We assume here that all words are contiguous in `line`.
+ // That is, the sum of their lengths should add up to the
+ // length of `line`.
+ let len = words
+ .iter()
+ .map(|word| word.len() + word.whitespace.len())
+ .sum::<usize>()
+ - last_word.whitespace.len();
+
+ // The result is owned if we have indentation, otherwise
+ // we can simply borrow an empty string.
+ let mut result = if lines.is_empty() && !options.initial_indent.is_empty() {
+ Cow::Owned(options.initial_indent.to_owned())
+ } else if !lines.is_empty() && !options.subsequent_indent.is_empty() {
+ Cow::Owned(options.subsequent_indent.to_owned())
+ } else {
+ // We can use an empty string here since string
+ // concatenation for `Cow` preserves a borrowed value
+ // when either side is empty.
+ Cow::from("")
+ };
+
+ result += &line[idx..idx + len];
+
+ if !last_word.penalty.is_empty() {
+ result.to_mut().push_str(&last_word.penalty);
+ }
+
+ lines.push(result);
+
+ // Advance by the length of `result`, plus the length of
+ // `last_word.whitespace` -- even if we had a penalty, we
+ // need to skip over the whitespace.
+ idx += len + last_word.whitespace.len();
+ }
+ }
+
+ lines
}
-/// Lazily wrap a line of text at `width` characters.
+/// Wrap text into columns with a given total width.
+///
+/// The `left_gap`, `mid_gap` and `right_gap` arguments specify the
+/// strings to insert before, between, and after the columns. The
+/// total width of all columns and all gaps is specified using the
+/// `total_width_or_options` argument. This argument can simply be an
+/// integer if you want to use default settings when wrapping, or it
+/// can be a [`Options`] value if you want to customize the wrapping.
+///
+/// If the columns are narrow, it is recommended to set
+/// [`Options::break_words`] to `true` to prevent words from
+/// protruding into the margins.
+///
+/// The per-column width is computed like this:
+///
+/// ```
+/// # let (left_gap, middle_gap, right_gap) = ("", "", "");
+/// # let columns = 2;
+/// # let options = textwrap::Options::new(80);
+/// let inner_width = options.width
+/// - textwrap::core::display_width(left_gap)
+/// - textwrap::core::display_width(right_gap)
+/// - textwrap::core::display_width(middle_gap) * (columns - 1);
+/// let column_width = inner_width / columns;
+/// ```
+///
+/// The `text` is wrapped using [`wrap`] and the given `options`
+/// argument, but the width is overwritten to the computed
+/// `column_width`.
///
-/// This function creates a Wrapper on the fly with default settings.
-/// It then calls the [`into_wrap_iter`] method. Hence, the return
-/// value is an [`IntoWrapIter`], not a [`WrapIter`] as the function
-/// name would otherwise suggest.
+/// # Panics
///
-/// If you need to set a language corpus for automatic hyphenation, or
-/// need to wrap many strings, then it is suggested to create a Wrapper
-/// and call its [`wrap_iter`] or [`into_wrap_iter`] methods.
+/// Panics if `columns` is zero.
///
/// # Examples
///
/// ```
-/// use std::borrow::Cow::Borrowed;
-/// use textwrap::wrap_iter;
+/// use textwrap::wrap_columns;
///
-/// let mut wrap20_iter = wrap_iter("Zero-cost abstractions.", 20);
-/// assert_eq!(wrap20_iter.next(), Some(Borrowed("Zero-cost")));
-/// assert_eq!(wrap20_iter.next(), Some(Borrowed("abstractions.")));
-/// assert_eq!(wrap20_iter.next(), None);
+/// let text = "\
+/// This is an example text, which is wrapped into three columns. \
+/// Notice how the final column can be shorter than the others.";
+///
+/// #[cfg(feature = "smawk")]
+/// assert_eq!(wrap_columns(text, 3, 50, "| ", " | ", " |"),
+/// vec!["| This is | into three | column can be |",
+/// "| an example | columns. | shorter than |",
+/// "| text, which | Notice how | the others. |",
+/// "| is wrapped | the final | |"]);
+///
+/// // Without the `smawk` feature, the middle column is a little more uneven:
+/// #[cfg(not(feature = "smawk"))]
+/// assert_eq!(wrap_columns(text, 3, 50, "| ", " | ", " |"),
+/// vec!["| This is an | three | column can be |",
+/// "| example text, | columns. | shorter than |",
+/// "| which is | Notice how | the others. |",
+/// "| wrapped into | the final | |"]);
+pub fn wrap_columns<'a, S, Opt>(
+ text: &str,
+ columns: usize,
+ total_width_or_options: Opt,
+ left_gap: &str,
+ mid_gap: &str,
+ right_gap: &str,
+) -> Vec<String>
+where
+ S: WordSplitter,
+ Opt: Into<Options<'a, S>>,
+{
+ assert!(columns > 0);
+
+ let mut options = total_width_or_options.into();
+
+ let inner_width = options
+ .width
+ .saturating_sub(core::display_width(left_gap))
+ .saturating_sub(core::display_width(right_gap))
+ .saturating_sub(core::display_width(mid_gap) * (columns - 1));
+
+ let column_width = std::cmp::max(inner_width / columns, 1);
+ options.width = column_width;
+ let last_column_padding = " ".repeat(inner_width % column_width);
+ let wrapped_lines = wrap(text, options);
+ let lines_per_column =
+ wrapped_lines.len() / columns + usize::from(wrapped_lines.len() % columns > 0);
+ let mut lines = Vec::new();
+ for line_no in 0..lines_per_column {
+ let mut line = String::from(left_gap);
+ for column_no in 0..columns {
+ match wrapped_lines.get(line_no + column_no * lines_per_column) {
+ Some(column_line) => {
+ line.push_str(&column_line);
+ line.push_str(&" ".repeat(column_width - core::display_width(&column_line)));
+ }
+ None => {
+ line.push_str(&" ".repeat(column_width));
+ }
+ }
+ if column_no == columns - 1 {
+ line.push_str(&last_column_padding);
+ } else {
+ line.push_str(mid_gap);
+ }
+ }
+ line.push_str(right_gap);
+ lines.push(line);
+ }
+
+ lines
+}
+
+/// Fill `text` in-place without reallocating the input string.
+///
+/// This function works by modifying the input string: some `' '`
+/// characters will be replaced by `'\n'` characters. The rest of the
+/// text remains untouched.
+///
+/// Since we can only replace existing whitespace in the input with
+/// `'\n'`, we cannot do hyphenation nor can we split words longer
+/// than the line width. Indentation is also ruled out. In other
+/// words, `fill_inplace(width)` behaves as if you had called [`fill`]
+/// with these options:
///
-/// let mut wrap25_iter = wrap_iter("Zero-cost abstractions.", 25);
-/// assert_eq!(wrap25_iter.next(), Some(Borrowed("Zero-cost abstractions.")));
-/// assert_eq!(wrap25_iter.next(), None);
/// ```
+/// # use textwrap::{Options, NoHyphenation};
+/// # let width = 80;
+/// Options {
+/// width: width,
+/// initial_indent: "",
+/// subsequent_indent: "",
+/// break_words: false,
+/// wrap_algorithm: textwrap::core::WrapAlgorithm::FirstFit,
+/// splitter: NoHyphenation,
+/// };
+/// ```
+///
+/// The wrap algorithm is [`core::WrapAlgorithm::FirstFit`] since this
+/// is the fastest algorithm — and the main reason to use
+/// `fill_inplace` is to get the string broken into newlines as fast
+/// as possible.
+///
+/// A last difference is that (unlike [`fill`]) `fill_inplace` can
+/// leave trailing whitespace on lines. This is because we wrap by
+/// inserting a `'\n'` at the final whitespace in the input string:
+///
+/// ```
+/// let mut text = String::from("Hello World!");
+/// textwrap::fill_inplace(&mut text, 10);
+/// assert_eq!(text, "Hello \nWorld!");
+/// ```
+///
+/// If we didn't do this, the word `World!` would end up being
+/// indented. You can avoid this if you make sure that your input text
+/// has no double spaces.
///
-/// [`wrap_iter`]: struct.Wrapper.html#method.wrap_iter
-/// [`into_wrap_iter`]: struct.Wrapper.html#method.into_wrap_iter
-/// [`IntoWrapIter`]: struct.IntoWrapIter.html
-/// [`WrapIter`]: struct.WrapIter.html
-pub fn wrap_iter(s: &str, width: usize) -> IntoWrapIter<'_, HyphenSplitter> {
- Wrapper::new(width).into_wrap_iter(s)
+/// # Performance
+///
+/// In benchmarks, `fill_inplace` is about twice as fast as [`fill`].
+/// Please see the [`linear`
+/// benchmark](https://github.com/mgeisler/textwrap/blob/master/benches/linear.rs)
+/// for details.
+pub fn fill_inplace(text: &mut String, width: usize) {
+ let mut indices = Vec::new();
+
+ let mut offset = 0;
+ for line in text.split('\n') {
+ let words = core::find_words(line).collect::<Vec<_>>();
+ let wrapped_words = core::wrap_first_fit(&words, |_| width);
+
+ let mut line_offset = offset;
+ for words in &wrapped_words[..wrapped_words.len() - 1] {
+ let line_len = words
+ .iter()
+ .map(|word| word.len() + word.whitespace.len())
+ .sum::<usize>();
+
+ line_offset += line_len;
+ // We've advanced past all ' ' characters -- want to move
+ // one ' ' backwards and insert our '\n' there.
+ indices.push(line_offset - 1);
+ }
+
+ // Advance past entire line, plus the '\n' which was removed
+ // by the split call above.
+ offset += line.len() + 1;
+ }
+
+ let mut bytes = std::mem::take(text).into_bytes();
+ for idx in indices {
+ bytes[idx] = b'\n';
+ }
+ *text = String::from_utf8(bytes).unwrap();
}
#[cfg(test)]
@@ -777,17 +1174,43 @@ mod tests {
use hyphenation::{Language, Load, Standard};
#[test]
+ fn options_agree_with_usize() {
+ let opt_usize = Options::from(42_usize);
+ let opt_options = Options::new(42);
+
+ assert_eq!(opt_usize.width, opt_options.width);
+ assert_eq!(opt_usize.initial_indent, opt_options.initial_indent);
+ assert_eq!(opt_usize.subsequent_indent, opt_options.subsequent_indent);
+ assert_eq!(opt_usize.break_words, opt_options.break_words);
+ assert_eq!(
+ opt_usize.splitter.split_points("hello-world"),
+ opt_options.splitter.split_points("hello-world")
+ );
+ }
+
+ #[test]
fn no_wrap() {
assert_eq!(wrap("foo", 10), vec!["foo"]);
}
#[test]
- fn simple() {
+ fn wrap_simple() {
assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]);
}
#[test]
- fn multi_word_on_line() {
+ fn to_be_or_not() {
+ assert_eq!(
+ wrap(
+ "To be, or not to be, that is the question.",
+ Options::new(10).wrap_algorithm(core::WrapAlgorithm::FirstFit)
+ ),
+ vec!["To be, or", "not to be,", "that is", "the", "question."]
+ );
+ }
+
+ #[test]
+ fn multiple_words_on_first_line() {
assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]);
}
@@ -813,21 +1236,10 @@ mod tests {
#[test]
fn trailing_whitespace() {
- assert_eq!(wrap("foo bar ", 6), vec!["foo", "bar "]);
- }
-
- #[test]
- fn interior_whitespace() {
- assert_eq!(wrap("foo: bar baz", 10), vec!["foo: bar", "baz"]);
- }
-
- #[test]
- fn extra_whitespace_start_of_line() {
// Whitespace is only significant inside a line. After a line
// gets too long and is broken, the first word starts in
- // column zero and is not indented. The line before might end
- // up with trailing whitespace.
- assert_eq!(wrap("foo bar", 5), vec!["foo", "bar"]);
+ // column zero and is not indented.
+ assert_eq!(wrap("foo bar baz ", 5), vec!["foo", "bar", "baz"]);
}
#[test]
@@ -848,6 +1260,7 @@ mod tests {
}
#[test]
+ #[cfg(feature = "unicode-width")]
fn wide_character_handling() {
assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]);
assert_eq!(
@@ -857,27 +1270,54 @@ mod tests {
}
#[test]
- fn empty_input_not_indented() {
- let wrapper = Wrapper::new(10).initial_indent("!!!");
- assert_eq!(wrapper.fill(""), "");
+ fn empty_line_is_indented() {
+ // Previously, indentation was not applied to empty lines.
+ // However, this is somewhat inconsistent and undesirable if
+ // the indentation is something like a border ("| ") which you
+ // want to apply to all lines, empty or not.
+ let options = Options::new(10).initial_indent("!!!");
+ assert_eq!(fill("", &options), "!!!");
}
#[test]
fn indent_single_line() {
- let wrapper = Wrapper::new(10).initial_indent(">>>"); // No trailing space
- assert_eq!(wrapper.fill("foo"), ">>>foo");
+ let options = Options::new(10).initial_indent(">>>"); // No trailing space
+ assert_eq!(fill("foo", &options), ">>>foo");
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-width")]
+ fn indent_first_emoji() {
+ let options = Options::new(10).initial_indent("👉👉");
+ assert_eq!(
+ wrap("x x x x x x x x x x x x x", &options),
+ vec!["👉👉x x x", "x x x x x", "x x x x x"]
+ );
}
#[test]
fn indent_multiple_lines() {
- let wrapper = Wrapper::new(6).initial_indent("* ").subsequent_indent(" ");
- assert_eq!(wrapper.wrap("foo bar baz"), vec!["* foo", " bar", " baz"]);
+ let options = Options::new(6).initial_indent("* ").subsequent_indent(" ");
+ assert_eq!(
+ wrap("foo bar baz", &options),
+ vec!["* foo", " bar", " baz"]
+ );
}
#[test]
fn indent_break_words() {
- let wrapper = Wrapper::new(5).initial_indent("* ").subsequent_indent(" ");
- assert_eq!(wrapper.wrap("foobarbaz"), vec!["* foo", " bar", " baz"]);
+ let options = Options::new(5).initial_indent("* ").subsequent_indent(" ");
+ assert_eq!(wrap("foobarbaz", &options), vec!["* foo", " bar", " baz"]);
+ }
+
+ #[test]
+ fn initial_indent_break_words() {
+ // This is a corner-case showing how the long word is broken
+ // according to the width of the subsequent lines. The first
+ // fragment of the word no longer fits on the first line,
+ // which ends up being pure indentation.
+ let options = Options::new(5).initial_indent("-->");
+ assert_eq!(wrap("foobarbaz", &options), vec!["-->", "fooba", "rbaz"]);
}
#[test]
@@ -887,8 +1327,8 @@ mod tests {
#[test]
fn trailing_hyphen() {
- let wrapper = Wrapper::new(5).break_words(false);
- assert_eq!(wrapper.wrap("foobar-"), vec!["foobar-"]);
+ let options = Options::new(5).break_words(false);
+ assert_eq!(wrap("foobar-", &options), vec!["foobar-"]);
}
#[test]
@@ -898,17 +1338,17 @@ mod tests {
#[test]
fn hyphens_flag() {
- let wrapper = Wrapper::new(5).break_words(false);
+ let options = Options::new(5).break_words(false);
assert_eq!(
- wrapper.wrap("The --foo-bar flag."),
+ wrap("The --foo-bar flag.", &options),
vec!["The", "--foo-", "bar", "flag."]
);
}
#[test]
fn repeated_hyphens() {
- let wrapper = Wrapper::new(4).break_words(false);
- assert_eq!(wrapper.wrap("foo--bar"), vec!["foo--bar"]);
+ let options = Options::new(4).break_words(false);
+ assert_eq!(wrap("foo--bar", &options), vec!["foo--bar"]);
}
#[test]
@@ -918,8 +1358,8 @@ mod tests {
#[test]
fn hyphens_non_alphanumeric() {
- let wrapper = Wrapper::new(5).break_words(false);
- assert_eq!(wrapper.wrap("foo(-)bar"), vec!["foo(-)bar"]);
+ let options = Options::new(5).break_words(false);
+ assert_eq!(wrap("foo(-)bar", &options), vec!["foo(-)bar"]);
}
#[test]
@@ -929,49 +1369,84 @@ mod tests {
#[test]
fn forced_split() {
- let wrapper = Wrapper::new(5).break_words(false);
- assert_eq!(wrapper.wrap("foobar-baz"), vec!["foobar-", "baz"]);
+ let options = Options::new(5).break_words(false);
+ assert_eq!(wrap("foobar-baz", &options), vec!["foobar-", "baz"]);
}
#[test]
fn multiple_unbroken_words_issue_193() {
- let wrapper = Wrapper::new(3).break_words(false);
+ let options = Options::new(3).break_words(false);
assert_eq!(
- wrapper.wrap("small large tiny"),
+ wrap("small large tiny", &options),
vec!["small", "large", "tiny"]
);
assert_eq!(
- wrapper.wrap("small large tiny"),
+ wrap("small large tiny", &options),
vec!["small", "large", "tiny"]
);
}
#[test]
fn very_narrow_lines_issue_193() {
- let wrapper = Wrapper::new(1).break_words(false);
- assert_eq!(wrapper.wrap("fooo x y"), vec!["fooo", "x", "y"]);
- assert_eq!(wrapper.wrap("fooo x y"), vec!["fooo", "x", "y"]);
+ let options = Options::new(1).break_words(false);
+ assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]);
+ assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]);
+ }
+
+ #[test]
+ fn simple_hyphens_static() {
+ let options = Options::new(8).splitter(HyphenSplitter);
+ assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]);
}
#[test]
- fn no_hyphenation() {
- let wrapper = Wrapper::with_splitter(8, NoHyphenation);
- assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]);
+ fn simple_hyphens_dynamic() {
+ let options: Options = Options::new(8).splitter(Box::new(HyphenSplitter));
+ assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]);
+ }
+
+ #[test]
+ fn no_hyphenation_static() {
+ let options = Options::new(8).splitter(NoHyphenation);
+ assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]);
+ }
+
+ #[test]
+ fn no_hyphenation_dynamic() {
+ let options: Options = Options::new(8).splitter(Box::new(NoHyphenation));
+ assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]);
+ }
+
+ #[test]
+ #[cfg(feature = "hyphenation")]
+ fn auto_hyphenation_double_hyphenation_static() {
+ let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+ let options = Options::new(10);
+ assert_eq!(
+ wrap("Internationalization", &options),
+ vec!["Internatio", "nalization"]
+ );
+
+ let options = Options::new(10).splitter(dictionary);
+ assert_eq!(
+ wrap("Internationalization", &options),
+ vec!["Interna-", "tionaliza-", "tion"]
+ );
}
#[test]
#[cfg(feature = "hyphenation")]
- fn auto_hyphenation() {
+ fn auto_hyphenation_double_hyphenation_dynamic() {
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let wrapper = Wrapper::new(10);
+ let mut options: Options = Options::new(10).splitter(Box::new(HyphenSplitter));
assert_eq!(
- wrapper.wrap("Internationalization"),
+ wrap("Internationalization", &options),
vec!["Internatio", "nalization"]
);
- let wrapper = Wrapper::with_splitter(10, dictionary);
+ options = Options::new(10).splitter(Box::new(dictionary));
assert_eq!(
- wrapper.wrap("Internationalization"),
+ wrap("Internationalization", &options),
vec!["Interna-", "tionaliza-", "tion"]
);
}
@@ -980,16 +1455,16 @@ mod tests {
#[cfg(feature = "hyphenation")]
fn auto_hyphenation_issue_158() {
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let wrapper = Wrapper::new(10);
+ let options = Options::new(10);
assert_eq!(
- wrapper.wrap("participation is the key to success"),
- vec!["participat", "ion is the", "key to", "success"]
+ wrap("participation is the key to success", &options),
+ vec!["participat", "ion is", "the key to", "success"]
);
- let wrapper = Wrapper::with_splitter(10, dictionary);
+ let options = Options::new(10).splitter(dictionary);
assert_eq!(
- wrapper.wrap("participation is the key to success"),
- vec!["participa-", "tion is the", "key to", "success"]
+ wrap("participation is the key to success", &options),
+ vec!["partici-", "pation is", "the key to", "success"]
);
}
@@ -999,9 +1474,9 @@ mod tests {
// Test that hyphenation takes the width of the wihtespace
// into account.
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let wrapper = Wrapper::with_splitter(15, dictionary);
+ let options = Options::new(15).splitter(dictionary);
assert_eq!(
- wrapper.wrap("garbage collection"),
+ wrap("garbage collection", &options),
vec!["garbage col-", "lection"]
);
}
@@ -1013,8 +1488,8 @@ mod tests {
// line is borrowed.
use std::borrow::Cow::{Borrowed, Owned};
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let wrapper = Wrapper::with_splitter(10, dictionary);
- let lines = wrapper.wrap("Internationalization");
+ let options = Options::new(10).splitter(dictionary);
+ let lines = wrap("Internationalization", &options);
if let Borrowed(s) = lines[0] {
assert!(false, "should not have been borrowed: {:?}", s);
}
@@ -1030,12 +1505,15 @@ mod tests {
#[cfg(feature = "hyphenation")]
fn auto_hyphenation_with_hyphen() {
let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let wrapper = Wrapper::new(8).break_words(false);
- assert_eq!(wrapper.wrap("over-caffinated"), vec!["over-", "caffinated"]);
+ let options = Options::new(8).break_words(false);
+ assert_eq!(
+ wrap("over-caffinated", &options),
+ vec!["over-", "caffinated"]
+ );
- let wrapper = Wrapper::with_splitter(8, dictionary).break_words(false);
+ let options = options.splitter(dictionary);
assert_eq!(
- wrapper.wrap("over-caffinated"),
+ wrap("over-caffinated", &options),
vec!["over-", "caffi-", "nated"]
);
}
@@ -1047,6 +1525,8 @@ mod tests {
#[test]
fn break_words_wide_characters() {
+ // Even the poor man's version of `ch_width` counts these
+ // characters as wide.
assert_eq!(wrap("Hello", 5), vec!["He", "ll", "o"]);
}
@@ -1056,33 +1536,64 @@ mod tests {
}
#[test]
+ fn break_long_first_word() {
+ assert_eq!(wrap("testx y", 4), vec!["test", "x y"]);
+ }
+
+ #[test]
fn break_words_line_breaks() {
assert_eq!(fill("ab\ncdefghijkl", 5), "ab\ncdefg\nhijkl");
assert_eq!(fill("abcdefgh\nijkl", 5), "abcde\nfgh\nijkl");
}
#[test]
+ fn break_words_empty_lines() {
+ assert_eq!(
+ fill("foo\nbar", &Options::new(2).break_words(false)),
+ "foo\nbar"
+ );
+ }
+
+ #[test]
fn preserve_line_breaks() {
- assert_eq!(fill("test\n", 11), "test\n");
- assert_eq!(fill("test\n\na\n\n", 11), "test\n\na\n\n");
- assert_eq!(fill("1 3 5 7\n1 3 5 7", 7), "1 3 5 7\n1 3 5 7");
+ assert_eq!(fill("", 80), "");
+ assert_eq!(fill("\n", 80), "\n");
+ assert_eq!(fill("\n\n\n", 80), "\n\n\n");
+ assert_eq!(fill("test\n", 80), "test\n");
+ assert_eq!(fill("test\n\na\n\n", 80), "test\n\na\n\n");
+ assert_eq!(
+ fill(
+ "1 3 5 7\n1 3 5 7",
+ Options::new(7).wrap_algorithm(core::WrapAlgorithm::FirstFit)
+ ),
+ "1 3 5 7\n1 3 5 7"
+ );
+ assert_eq!(
+ fill(
+ "1 3 5 7\n1 3 5 7",
+ Options::new(5).wrap_algorithm(core::WrapAlgorithm::FirstFit)
+ ),
+ "1 3 5\n7\n1 3 5\n7"
+ );
}
#[test]
- fn wrap_preserve_line_breaks() {
- assert_eq!(fill("1 3 5 7\n1 3 5 7", 5), "1 3 5\n7\n1 3 5\n7");
+ fn preserve_line_breaks_with_whitespace() {
+ assert_eq!(fill(" ", 80), "");
+ assert_eq!(fill(" \n ", 80), "\n");
+ assert_eq!(fill(" \n \n \n ", 80), "\n\n\n");
}
#[test]
fn non_breaking_space() {
- let wrapper = Wrapper::new(5).break_words(false);
- assert_eq!(wrapper.fill("foo bar baz"), "foo bar baz");
+ let options = Options::new(5).break_words(false);
+ assert_eq!(fill("foo bar baz", &options), "foo bar baz");
}
#[test]
fn non_breaking_hyphen() {
- let wrapper = Wrapper::new(5).break_words(false);
- assert_eq!(wrapper.fill("foo‑bar‑baz"), "foo‑bar‑baz");
+ let options = Options::new(5).break_words(false);
+ assert_eq!(fill("foo‑bar‑baz", &options), "foo‑bar‑baz");
}
#[test]
@@ -1101,4 +1612,289 @@ mod tests {
String::from(green_hello) + "\n" + &blue_world
);
}
+
+ #[test]
+ fn cloning_works() {
+ static OPT: Options<HyphenSplitter> = Options::with_splitter(80, HyphenSplitter);
+ #[allow(clippy::clone_on_copy)]
+ let opt = OPT.clone();
+ assert_eq!(opt.width, 80);
+ }
+
+ #[test]
+ fn fill_inplace_empty() {
+ let mut text = String::from("");
+ fill_inplace(&mut text, 80);
+ assert_eq!(text, "");
+ }
+
+ #[test]
+ fn fill_inplace_simple() {
+ let mut text = String::from("foo bar baz");
+ fill_inplace(&mut text, 10);
+ assert_eq!(text, "foo bar\nbaz");
+ }
+
+ #[test]
+ fn fill_inplace_multiple_lines() {
+ let mut text = String::from("Some text to wrap over multiple lines");
+ fill_inplace(&mut text, 12);
+ assert_eq!(text, "Some text to\nwrap over\nmultiple\nlines");
+ }
+
+ #[test]
+ fn fill_inplace_long_word() {
+ let mut text = String::from("Internationalization is hard");
+ fill_inplace(&mut text, 10);
+ assert_eq!(text, "Internationalization\nis hard");
+ }
+
+ #[test]
+ fn fill_inplace_no_hyphen_splitting() {
+ let mut text = String::from("A well-chosen example");
+ fill_inplace(&mut text, 10);
+ assert_eq!(text, "A\nwell-chosen\nexample");
+ }
+
+ #[test]
+ fn fill_inplace_newlines() {
+ let mut text = String::from("foo bar\n\nbaz\n\n\n");
+ fill_inplace(&mut text, 10);
+ assert_eq!(text, "foo bar\n\nbaz\n\n\n");
+ }
+
+ #[test]
+ fn fill_inplace_newlines_reset_line_width() {
+ let mut text = String::from("1 3 5\n1 3 5 7 9\n1 3 5 7 9 1 3");
+ fill_inplace(&mut text, 10);
+ assert_eq!(text, "1 3 5\n1 3 5 7 9\n1 3 5 7 9\n1 3");
+ }
+
+ #[test]
+ fn fill_inplace_leading_whitespace() {
+ let mut text = String::from(" foo bar baz");
+ fill_inplace(&mut text, 10);
+ assert_eq!(text, " foo bar\nbaz");
+ }
+
+ #[test]
+ fn fill_inplace_trailing_whitespace() {
+ let mut text = String::from("foo bar baz ");
+ fill_inplace(&mut text, 10);
+ assert_eq!(text, "foo bar\nbaz ");
+ }
+
+ #[test]
+ fn fill_inplace_interior_whitespace() {
+ // To avoid an unwanted indentation of "baz", it is important
+ // to replace the final ' ' with '\n'.
+ let mut text = String::from("foo bar baz");
+ fill_inplace(&mut text, 10);
+ assert_eq!(text, "foo bar \nbaz");
+ }
+
+ #[test]
+ fn unfill_simple() {
+ let (text, options) = unfill("foo\nbar");
+ assert_eq!(text, "foo bar");
+ assert_eq!(options.width, 3);
+ }
+
+ #[test]
+ fn unfill_trailing_newlines() {
+ let (text, options) = unfill("foo\nbar\n\n\n");
+ assert_eq!(text, "foo bar\n\n\n");
+ assert_eq!(options.width, 3);
+ }
+
+ #[test]
+ fn unfill_initial_indent() {
+ let (text, options) = unfill(" foo\nbar\nbaz");
+ assert_eq!(text, "foo bar baz");
+ assert_eq!(options.width, 5);
+ assert_eq!(options.initial_indent, " ");
+ }
+
+ #[test]
+ fn unfill_differing_indents() {
+ let (text, options) = unfill(" foo\n bar\n baz");
+ assert_eq!(text, "foo bar baz");
+ assert_eq!(options.width, 7);
+ assert_eq!(options.initial_indent, " ");
+ assert_eq!(options.subsequent_indent, " ");
+ }
+
+ #[test]
+ fn unfill_list_item() {
+ let (text, options) = unfill("* foo\n bar\n baz");
+ assert_eq!(text, "foo bar baz");
+ assert_eq!(options.width, 5);
+ assert_eq!(options.initial_indent, "* ");
+ assert_eq!(options.subsequent_indent, " ");
+ }
+
+ #[test]
+ fn unfill_multiple_char_prefix() {
+ let (text, options) = unfill(" // foo bar\n // baz\n // quux");
+ assert_eq!(text, "foo bar baz quux");
+ assert_eq!(options.width, 14);
+ assert_eq!(options.initial_indent, " // ");
+ assert_eq!(options.subsequent_indent, " // ");
+ }
+
+ #[test]
+ fn unfill_block_quote() {
+ let (text, options) = unfill("> foo\n> bar\n> baz");
+ assert_eq!(text, "foo bar baz");
+ assert_eq!(options.width, 5);
+ assert_eq!(options.initial_indent, "> ");
+ assert_eq!(options.subsequent_indent, "> ");
+ }
+
+ #[test]
+ fn unfill_whitespace() {
+ assert_eq!(unfill("foo bar").0, "foo bar");
+ }
+
+ #[test]
+ fn trait_object() {
+ let opt_a: Options<NoHyphenation> = Options::with_splitter(20, NoHyphenation);
+ let opt_b: Options<HyphenSplitter> = 10.into();
+
+ let mut dyn_opt: &Options<dyn WordSplitter> = &opt_a;
+ assert_eq!(wrap("foo bar-baz", dyn_opt), vec!["foo bar-baz"]);
+
+ // Just assign a totally different option
+ dyn_opt = &opt_b;
+ assert_eq!(wrap("foo bar-baz", dyn_opt), vec!["foo bar-", "baz"]);
+ }
+
+ #[test]
+ fn trait_object_vec() {
+ // Create a vector of referenced trait-objects
+ let mut vector: Vec<&Options<dyn WordSplitter>> = Vec::new();
+ // Expected result from each options
+ let mut results = Vec::new();
+
+ let opt_usize: Options<_> = 10.into();
+ vector.push(&opt_usize);
+ results.push(vec!["over-", "caffinated"]);
+
+ #[cfg(feature = "hyphenation")]
+ let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+ #[cfg(feature = "hyphenation")]
+ let opt_hyp = Options::new(8).splitter(dictionary);
+ #[cfg(feature = "hyphenation")]
+ vector.push(&opt_hyp);
+ #[cfg(feature = "hyphenation")]
+ results.push(vec!["over-", "caffi-", "nated"]);
+
+ // Actually: Options<Box<dyn WordSplitter>>
+ let opt_box: Options = Options::new(10)
+ .break_words(false)
+ .splitter(Box::new(NoHyphenation));
+ vector.push(&opt_box);
+ results.push(vec!["over-caffinated"]);
+
+ // Test each entry
+ for (opt, expected) in vector.into_iter().zip(results) {
+ assert_eq!(
+ // Just all the totally different options
+ wrap("over-caffinated", opt),
+ expected
+ );
+ }
+ }
+
+ #[test]
+ fn outer_boxing() {
+ let mut wrapper: Box<Options<dyn WordSplitter>> = Box::new(Options::new(80));
+
+ // We must first deref the Box into a trait object and pass it by-reference
+ assert_eq!(wrap("foo bar baz", &*wrapper), vec!["foo bar baz"]);
+
+ // Replace the `Options` with a `usize`
+ wrapper = Box::new(Options::from(5));
+
+ // Deref per-se works as well, it already returns a reference
+ use std::ops::Deref;
+ assert_eq!(
+ wrap("foo bar baz", wrapper.deref()),
+ vec!["foo", "bar", "baz"]
+ );
+ }
+
+ #[test]
+ fn wrap_columns_empty_text() {
+ assert_eq!(wrap_columns("", 1, 10, "| ", "", " |"), vec!["| |"]);
+ }
+
+ #[test]
+ fn wrap_columns_single_column() {
+ assert_eq!(
+ wrap_columns("Foo", 3, 30, "| ", " | ", " |"),
+ vec!["| Foo | | |"]
+ );
+ }
+
+ #[test]
+ fn wrap_columns_uneven_columns() {
+ // The gaps take up a total of 5 columns, so the columns are
+ // (21 - 5)/4 = 4 columns wide:
+ assert_eq!(
+ wrap_columns("Foo Bar Baz Quux", 4, 21, "|", "|", "|"),
+ vec!["|Foo |Bar |Baz |Quux|"]
+ );
+ // As the total width increases, the last column absorbs the
+ // excess width:
+ assert_eq!(
+ wrap_columns("Foo Bar Baz Quux", 4, 24, "|", "|", "|"),
+ vec!["|Foo |Bar |Baz |Quux |"]
+ );
+ // Finally, when the width is 25, the columns can be resized
+ // to a width of (25 - 5)/4 = 5 columns:
+ assert_eq!(
+ wrap_columns("Foo Bar Baz Quux", 4, 25, "|", "|", "|"),
+ vec!["|Foo |Bar |Baz |Quux |"]
+ );
+ }
+
+ #[test]
+ #[cfg(feature = "unicode-width")]
+ fn wrap_columns_with_emojis() {
+ assert_eq!(
+ wrap_columns(
+ "Words and a few emojis 😍 wrapped in ⓶ columns",
+ 2,
+ 30,
+ "✨ ",
+ " ⚽ ",
+ " 👀"
+ ),
+ vec![
+ "✨ Words ⚽ wrapped in 👀",
+ "✨ and a few ⚽ ⓶ columns 👀",
+ "✨ emojis 😍 ⚽ 👀"
+ ]
+ );
+ }
+
+ #[test]
+ fn wrap_columns_big_gaps() {
+ // The column width shrinks to 1 because the gaps take up all
+ // the space.
+ assert_eq!(
+ wrap_columns("xyz", 2, 10, "----> ", " !!! ", " <----"),
+ vec![
+ "----> x !!! z <----", //
+ "----> y !!! <----"
+ ]
+ );
+ }
+
+ #[test]
+ #[should_panic]
+ fn wrap_columns_panic_with_zero_columns() {
+ wrap_columns("", 0, 10, "", "", "");
+ }
}
diff --git a/src/splitting.rs b/src/splitting.rs
index fc9f9d4..e92b188 100644
--- a/src/splitting.rs
+++ b/src/splitting.rs
@@ -5,61 +5,85 @@
//! functionality. [`HyphenSplitter`] is the default implementation of
//! this treat: it will simply split words on existing hyphens.
-/// An interface for splitting words.
+/// The `WordSplitter` trait describes where words can be split.
///
-/// When the [`wrap_iter`] method will try to fit text into a line, it
-/// will eventually find a word that it too large the current text
-/// width. It will then call the currently configured `WordSplitter` to
-/// have it attempt to split the word into smaller parts. This trait
-/// describes that functionality via the [`split`] method.
+/// If the textwrap crate has been compiled with the `hyphenation`
+/// Cargo feature enabled, you will find an implementation of
+/// `WordSplitter` by the `hyphenation::Standard` struct. Use this
+/// struct for language-aware hyphenation:
///
-/// If the `textwrap` crate has been compiled with the `hyphenation`
-/// feature enabled, you will find an implementation of `WordSplitter`
-/// by the `hyphenation::Standard` struct. Use this struct for
-/// language-aware hyphenation. See the [`hyphenation` documentation]
-/// for details.
+/// ```
+/// #[cfg(feature = "hyphenation")]
+/// {
+/// use hyphenation::{Language, Load, Standard};
+/// use textwrap::{wrap, Options};
+///
+/// let text = "Oxidation is the loss of electrons.";
+/// let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+/// let options = Options::new(8).splitter(dictionary);
+/// assert_eq!(wrap(text, &options), vec!["Oxida-",
+/// "tion is",
+/// "the loss",
+/// "of elec-",
+/// "trons."]);
+/// }
+/// ```
+///
+/// Please see the documentation for the [hyphenation] crate for more
+/// details.
///
-/// [`wrap_iter`]: ../struct.Wrapper.html#method.wrap_iter
-/// [`split`]: #tymethod.split
-/// [`hyphenation` documentation]: https://docs.rs/hyphenation/
-pub trait WordSplitter {
- /// Return all possible splits of word. Each split is a triple
- /// with a head, a hyphen, and a tail where `head + &tail == word`.
- /// The hyphen can be empty if there is already a hyphen in the
- /// head.
+/// [hyphenation]: https://docs.rs/hyphenation/
+pub trait WordSplitter: std::fmt::Debug {
+ /// Return all possible indices where `word` can be split.
///
- /// The splits should go from smallest to longest and should
- /// include no split at all. So the word "technology" could be
- /// split into
+ /// The indices returned must be in range `0..word.len()`. They
+ /// should point to the index _after_ the split point, i.e., after
+ /// `-` if splitting on hyphens. This way, `word.split_at(idx)`
+ /// will break the word into two well-formed pieces.
///
- /// ```no_run
- /// vec![("tech", "-", "nology"),
- /// ("technol", "-", "ogy"),
- /// ("technolo", "-", "gy"),
- /// ("technology", "", "")];
+ /// # Examples
+ ///
+ /// ```
+ /// use textwrap::{HyphenSplitter, NoHyphenation, WordSplitter};
+ /// assert_eq!(NoHyphenation.split_points("cannot-be-split"), vec![]);
+ /// assert_eq!(HyphenSplitter.split_points("can-be-split"), vec![4, 7]);
/// ```
- fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>;
+ fn split_points(&self, word: &str) -> Vec<usize>;
+}
+
+impl<S: WordSplitter + ?Sized> WordSplitter for Box<S> {
+ fn split_points(&self, word: &str) -> Vec<usize> {
+ use std::ops::Deref;
+ self.deref().split_points(word)
+ }
+}
+
+impl<T: ?Sized + WordSplitter> WordSplitter for &T {
+ fn split_points(&self, word: &str) -> Vec<usize> {
+ (*self).split_points(word)
+ }
}
-/// Use this as a [`Wrapper.splitter`] to avoid any kind of
+/// Use this as a [`Options.splitter`] to avoid any kind of
/// hyphenation:
///
/// ```
-/// use textwrap::{Wrapper, NoHyphenation};
+/// use textwrap::{wrap, NoHyphenation, Options};
///
-/// let wrapper = Wrapper::with_splitter(8, NoHyphenation);
-/// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]);
+/// let options = Options::new(8).splitter(NoHyphenation);
+/// assert_eq!(wrap("foo bar-baz", &options),
+/// vec!["foo", "bar-baz"]);
/// ```
///
-/// [`Wrapper.splitter`]: ../struct.Wrapper.html#structfield.splitter
-#[derive(Clone, Debug)]
+/// [`Options.splitter`]: super::Options::splitter
+#[derive(Clone, Copy, Debug)]
pub struct NoHyphenation;
/// `NoHyphenation` implements `WordSplitter` by not splitting the
/// word at all.
impl WordSplitter for NoHyphenation {
- fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
- vec![(word, "", "")]
+ fn split_points(&self, _: &str) -> Vec<usize> {
+ Vec::new()
}
}
@@ -67,74 +91,50 @@ impl WordSplitter for NoHyphenation {
/// hyphens only.
///
/// You probably don't need to use this type since it's already used
-/// by default by `Wrapper::new`.
-#[derive(Clone, Debug)]
+/// by default by [`Options::new`](super::Options::new).
+#[derive(Clone, Copy, Debug)]
pub struct HyphenSplitter;
/// `HyphenSplitter` is the default `WordSplitter` used by
-/// `Wrapper::new`. It will split words on any existing hyphens in the
-/// word.
+/// [`Options::new`](super::Options::new). It will split words on any
+/// existing hyphens in the word.
///
/// It will only use hyphens that are surrounded by alphanumeric
-/// characters, which prevents a word like "--foo-bar" from being
-/// split on the first or second hyphen.
+/// characters, which prevents a word like `"--foo-bar"` from being
+/// split into `"--"` and `"foo-bar"`.
impl WordSplitter for HyphenSplitter {
- fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
- let mut triples = Vec::new();
- // Split on hyphens, smallest split first. We only use hyphens
- // that are surrounded by alphanumeric characters. This is to
- // avoid splitting on repeated hyphens, such as those found in
- // --foo-bar.
- let mut char_indices = word.char_indices();
- // Early return if the word is empty.
- let mut prev = match char_indices.next() {
- None => return vec![(word, "", "")],
- Some((_, ch)) => ch,
- };
+ fn split_points(&self, word: &str) -> Vec<usize> {
+ let mut splits = Vec::new();
- // Find current word, or return early if the word only has a
- // single character.
- let (mut idx, mut cur) = match char_indices.next() {
- None => return vec![(word, "", "")],
- Some((idx, cur)) => (idx, cur),
- };
+ for (idx, _) in word.match_indices('-') {
+ // We only use hyphens that are surrounded by alphanumeric
+ // characters. This is to avoid splitting on repeated hyphens,
+ // such as those found in --foo-bar.
+ let prev = word[..idx].chars().next_back();
+ let next = word[idx + 1..].chars().next();
- for (i, next) in char_indices {
- if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() {
- let (head, tail) = word.split_at(idx + 1);
- triples.push((head, "", tail));
+ if prev.filter(|ch| ch.is_alphanumeric()).is_some()
+ && next.filter(|ch| ch.is_alphanumeric()).is_some()
+ {
+ splits.push(idx + 1); // +1 due to width of '-'.
}
- prev = cur;
- idx = i;
- cur = next;
}
- // Finally option is no split at all.
- triples.push((word, "", ""));
-
- triples
+ splits
}
}
/// A hyphenation dictionary can be used to do language-specific
-/// hyphenation using patterns from the hyphenation crate.
+/// hyphenation using patterns from the [hyphenation] crate.
///
-/// **Note:** Only available when the `hyphenation` feature is
+/// **Note:** Only available when the `hyphenation` Cargo feature is
/// enabled.
+///
+/// [hyphenation]: https://docs.rs/hyphenation/
#[cfg(feature = "hyphenation")]
impl WordSplitter for hyphenation::Standard {
- fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
+ fn split_points(&self, word: &str) -> Vec<usize> {
use hyphenation::Hyphenator;
- // Find splits based on language dictionary.
- let mut triples = Vec::new();
- for n in self.hyphenate(word).breaks {
- let (head, tail) = word.split_at(n);
- let hyphen = if head.ends_with('-') { "" } else { "-" };
- triples.push((head, hyphen, tail));
- }
- // Finally option is no split at all.
- triples.push((word, "", ""));
-
- triples
+ self.hyphenate(word).breaks
}
}
diff --git a/tests/indent.rs b/tests/indent.rs
new file mode 100644
index 0000000..9dd5ad2
--- /dev/null
+++ b/tests/indent.rs
@@ -0,0 +1,88 @@
+/// tests cases ported over from python standard library
+use textwrap::{dedent, indent};
+
+const ROUNDTRIP_CASES: [&str; 3] = [
+ // basic test case
+ "Hi.\nThis is a test.\nTesting.",
+ // include a blank line
+ "Hi.\nThis is a test.\n\nTesting.",
+ // include leading and trailing blank lines
+ "\nHi.\nThis is a test.\nTesting.\n",
+];
+
+const WINDOWS_CASES: [&str; 2] = [
+ // use windows line endings
+ "Hi.\r\nThis is a test.\r\nTesting.",
+ // pathological case
+ "Hi.\r\nThis is a test.\n\r\nTesting.\r\n\n",
+];
+
+#[test]
+fn test_indent_nomargin_default() {
+ // indent should do nothing if 'prefix' is empty.
+ for text in ROUNDTRIP_CASES.iter() {
+ assert_eq!(&indent(text, ""), text);
+ }
+ for text in WINDOWS_CASES.iter() {
+ assert_eq!(&indent(text, ""), text);
+ }
+}
+
+#[test]
+fn test_roundtrip_spaces() {
+ // A whitespace prefix should roundtrip with dedent
+ for text in ROUNDTRIP_CASES.iter() {
+ assert_eq!(&dedent(&indent(text, " ")), text);
+ }
+}
+
+#[test]
+fn test_roundtrip_tabs() {
+ // A whitespace prefix should roundtrip with dedent
+ for text in ROUNDTRIP_CASES.iter() {
+ assert_eq!(&dedent(&indent(text, "\t\t")), text);
+ }
+}
+
+#[test]
+fn test_roundtrip_mixed() {
+ // A whitespace prefix should roundtrip with dedent
+ for text in ROUNDTRIP_CASES.iter() {
+ assert_eq!(&dedent(&indent(text, " \t \t ")), text);
+ }
+}
+
+#[test]
+fn test_indent_default() {
+ // Test default indenting of lines that are not whitespace only
+ let prefix = " ";
+ let expected = [
+ // Basic test case
+ " Hi.\n This is a test.\n Testing.",
+ // Include a blank line
+ " Hi.\n This is a test.\n\n Testing.",
+ // Include leading and trailing blank lines
+ "\n Hi.\n This is a test.\n Testing.\n",
+ ];
+ for (text, expect) in ROUNDTRIP_CASES.iter().zip(expected.iter()) {
+ assert_eq!(&indent(text, prefix), expect)
+ }
+ let expected = [
+ // Use Windows line endings
+ " Hi.\r\n This is a test.\r\n Testing.",
+ // Pathological case
+ " Hi.\r\n This is a test.\n\r\n Testing.\r\n\n",
+ ];
+ for (text, expect) in WINDOWS_CASES.iter().zip(expected.iter()) {
+ assert_eq!(&indent(text, prefix), expect)
+ }
+}
+
+#[test]
+fn indented_text_should_have_the_same_number_of_lines_as_the_original_text() {
+ let texts = ["foo\nbar", "foo\nbar\n", "foo\nbar\nbaz"];
+ for original in texts.iter() {
+ let indented = indent(original, "");
+ assert_eq!(&indented, original);
+ }
+}
diff --git a/tests/version-numbers.rs b/tests/version-numbers.rs
index 3429514..3f429b1 100644
--- a/tests/version-numbers.rs
+++ b/tests/version-numbers.rs
@@ -7,7 +7,7 @@ fn test_readme_deps() {
fn test_changelog() {
version_sync::assert_contains_regex!(
"CHANGELOG.md",
- r"^## Version {version} — .* \d\d?.., 20\d\d$"
+ r"^## Version {version} \(20\d\d-\d\d-\d\d\)"
);
}
@@ -15,3 +15,8 @@ fn test_changelog() {
fn test_html_root_url() {
version_sync::assert_html_root_url_updated!("src/lib.rs");
}
+
+#[test]
+fn test_dependency_graph() {
+ version_sync::assert_contains_regex!("src/lib.rs", "master/images/textwrap-{version}.svg");
+}