diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 04:45:34 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 04:45:34 +0000 |
commit | 8252a405d78c2348903fb6bd68cbcd9c6c635d01 (patch) | |
tree | 635ee5c32540d7f99f692a4a3b2fac295dab1115 | |
parent | 51ad9120fdced2a1683eaa3dad00b42045525bdc (diff) | |
parent | 7c15b5075412570075c4a045ba10831c299fc3ee (diff) | |
download | textwrap-8252a405d78c2348903fb6bd68cbcd9c6c635d01.tar.gz |
Snap for 10453563 from 7c15b5075412570075c4a045ba10831c299fc3ee to mainline-appsearch-releaseaml_ase_341510000aml_ase_341410000aml_ase_341310010aml_ase_341113000aml_ase_340913000android14-mainline-appsearch-release
Change-Id: I2f37f58ddf5f3ba660d7f645d6d28651a7a22f12
-rw-r--r-- | .cargo_vcs_info.json | 2 | ||||
-rw-r--r-- | Android.bp | 6 | ||||
-rw-r--r-- | CHANGELOG.md | 28 | ||||
-rw-r--r-- | Cargo.lock.saved | 558 | ||||
-rw-r--r-- | Cargo.toml | 58 | ||||
-rw-r--r-- | METADATA | 14 | ||||
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | TEST_MAPPING | 48 | ||||
-rw-r--r-- | src/fuzzing.rs | 23 | ||||
-rw-r--r-- | src/indentation.rs | 2 | ||||
-rw-r--r-- | src/lib.rs | 511 | ||||
-rw-r--r-- | src/line_ending.rs | 88 | ||||
-rw-r--r-- | src/word_separators.rs | 79 | ||||
-rw-r--r-- | src/wrap_algorithms.rs | 30 | ||||
-rw-r--r-- | src/wrap_algorithms/optimal_fit.rs | 2 |
15 files changed, 1205 insertions, 246 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index bf3e802..b191c07 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,6 +1,6 @@ { "git": { - "sha1": "559e07a53bdf7de6bed5c48aacfc0ec8c8bb0c05" + "sha1": "3c052a06a66bdec9434f7dfdd4839c431207ddf9" }, "path_in_vcs": "" }
\ No newline at end of file @@ -23,12 +23,14 @@ rust_library { host_supported: true, crate_name: "textwrap", cargo_env_compat: true, - cargo_pkg_version: "0.15.0", + cargo_pkg_version: "0.16.0", srcs: ["src/lib.rs"], - edition: "2018", + edition: "2021", apex_available: [ "//apex_available:platform", "com.android.compos", "com.android.virt", ], + product_available: true, + vendor_available: true, } diff --git a/CHANGELOG.md b/CHANGELOG.md index 093b9dc..5d0dd61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,34 @@ This file lists the most important changes made in each release of `textwrap`. +## Version 0.16.0 (2022-10-23) + +This release marks `Options` as `non_exhaustive` and extends it to +make line endings configurable, it adds new fast paths to `fill` and +`wrap`, and it fixes crashes in `unfill` and `refill`. + +* [#480](https://github.com/mgeisler/textwrap/pull/480): Mark + `Options` as `non_exhaustive`. This will allow us to extend the + struct in the future without breaking backwards compatibility. +* [#478](https://github.com/mgeisler/textwrap/pull/478): Add fast + paths to `fill` and `wrap`. This makes the functions 10-25 times + faster when the no wrapping is needed. +* [#468](https://github.com/mgeisler/textwrap/pull/468): Fix `refill` + to add back correct line ending. +* [#467](https://github.com/mgeisler/textwrap/pull/467): Fix crashes + in `unfill` and `refill`. +* [#458](https://github.com/mgeisler/textwrap/pull/458): Test with + Rust 1.56 (first compiler release with support for Rust 2021). +* [#454](https://github.com/mgeisler/textwrap/pull/454): Make line + endings configurable. +* [#448](https://github.com/mgeisler/textwrap/pull/448): Migrate to + the Rust 2021 edition. + +## Version 0.15.1 (2022-09-15) + +This release was yanked since it accidentally broke backwards +compatibility with 0.15.0. + ## Version 0.15.0 (2022-02-27) This is a major feature release with two main changes: diff --git a/Cargo.lock.saved b/Cargo.lock.saved new file mode 100644 index 0000000..414d793 --- /dev/null +++ b/Cargo.lock.saved @@ -0,0 +1,558 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "0.7.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e" +dependencies = [ + "memchr", +] + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cc" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "form_urlencoded" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fst" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" + +[[package]] +name = "getrandom" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + +[[package]] +name = "hyphenation" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcf4dd4c44ae85155502a52c48739c8a48185d1449fff1963cffee63c28a50f0" +dependencies = [ + "bincode", + "fst", + "hyphenation_commons", + "pocket-resources", + "serde", +] + +[[package]] +name = "hyphenation_commons" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5febe7a2ade5c7d98eb8b75f946c046b335324b06a14ea0998271504134c05bf" +dependencies = [ + "fst", + "serde", +] + +[[package]] +name = "idna" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "io-lifetimes" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e481ccbe3dea62107216d0d1138bb8ad8e5e5c43009a098bd1990272c497b0" + +[[package]] +name = "libc" +version = "0.2.135" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68783febc7782c6c5cb401fbda4de5a9898be1762314da0bb2c10ced61f18b0c" + +[[package]] +name = "linux-raw-sys" +version = "0.0.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d2456c373231a208ad294c33dc5bff30051eafd954cd4caae83a712b12854d" + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "numtoa" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef" + +[[package]] +name = "once_cell" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" + +[[package]] +name = "percent-encoding" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" + +[[package]] +name = "pocket-resources" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c135f38778ad324d9e9ee68690bac2c1a51f340fdf96ca13e2ab3914eb2e51d8" + +[[package]] +name = "proc-macro2" +version = "1.0.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pulldown-cmark" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffade02495f22453cd593159ea2f59827aae7f53fa8323f756799b670881dcf8" +dependencies = [ + "bitflags", + "memchr", + "unicase", +] + +[[package]] +name = "quote" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags", +] + +[[package]] +name = "redox_termios" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8440d8acb4fd3d277125b4bd01a6f38aee8d814b3b5fc09b3f2b825d37d3fe8f" +dependencies = [ + "redox_syscall", +] + +[[package]] +name = "regex" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" + +[[package]] +name = "rustix" +version = "0.35.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "985947f9b6423159c4726323f373be0a21bdb514c5af06a849cb3d2dce2d01e8" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "semver" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4" + +[[package]] +name = "serde" +version = "1.0.147" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.147" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "smawk" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f67ad224767faa3c7d8b6d91985b78e70a1324408abcb1cfcc2be4c06bc06043" + +[[package]] +name = "syn" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "terminal_size" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8440c860cf79def6164e4a0a983bcc2305d82419177a0e0c71930d049e3ac5a1" +dependencies = [ + "rustix", + "windows-sys", +] + +[[package]] +name = "termion" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "659c1f379f3408c7e5e84c7d0da6d93404e3800b6b9d063ba24436419302ec90" +dependencies = [ + "libc", + "numtoa", + "redox_syscall", + "redox_termios", +] + +[[package]] +name = "textwrap" +version = "0.16.0" +dependencies = [ + "hyphenation", + "smawk", + "terminal_size", + "termion", + "unic-emoji-char", + "unicode-linebreak", + "unicode-width", + "version-sync", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" + +[[package]] +name = "toml" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" +dependencies = [ + "serde", +] + +[[package]] +name = "unic-char-property" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" +dependencies = [ + "unic-char-range", +] + +[[package]] +name = "unic-char-range" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" + +[[package]] +name = "unic-common" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" + +[[package]] +name = "unic-emoji-char" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-version" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" +dependencies = [ + "unic-common", +] + +[[package]] +name = "unicase" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" +dependencies = [ + "version_check", +] + +[[package]] +name = "unicode-bidi" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" + +[[package]] +name = "unicode-ident" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" + +[[package]] +name = "unicode-linebreak" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5faade31a542b8b35855fff6e8def199853b2da8da256da52f52f1316ee3137" +dependencies = [ + "hashbrown", + "regex", +] + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + +[[package]] +name = "url" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "version-sync" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d0801cec07737d88cb900e6419f6f68733867f90b3faaa837e84692e101bf0" +dependencies = [ + "proc-macro2", + "pulldown-cmark", + "regex", + "semver", + "syn", + "toml", + "url", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +dependencies = [ + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" + +[[package]] +name = "windows_i686_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" + +[[package]] +name = "windows_i686_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" @@ -10,18 +10,35 @@ # See Cargo.toml.orig for the original contents. [package] -edition = "2018" +edition = "2021" name = "textwrap" -version = "0.15.0" +version = "0.16.0" authors = ["Martin Geisler <martin@geisler.net>"] -exclude = [".github/", ".gitignore", "benches/", "examples/", "fuzz/", "images/"] -description = "Powerful library for word wrapping, indenting, and dedenting strings" +exclude = [ + ".github/", + ".gitignore", + "benchmarks/", + "examples/", + "fuzz/", + "images/", +] +description = "Library for word wrapping, indenting, and dedenting strings. Has optional support for Unicode and emojis as well as machine hyphenation." documentation = "https://docs.rs/textwrap/" readme = "README.md" -keywords = ["text", "formatting", "wrap", "typesetting", "hyphenation"] -categories = ["text-processing", "command-line-interface"] +keywords = [ + "text", + "formatting", + "wrap", + "typesetting", + "hyphenation", +] +categories = [ + "text-processing", + "command-line-interface", +] license = "MIT" repository = "https://github.com/mgeisler/textwrap" + [package.metadata.docs.rs] all-features = true @@ -35,15 +52,6 @@ name = "termwidth" path = "examples/termwidth.rs" required-features = ["terminal_size"] -[[bench]] -name = "linear" -path = "benches/linear.rs" -harness = false - -[[bench]] -name = "indent" -path = "benches/indent.rs" -harness = false [dependencies.hyphenation] version = "0.8.4" features = ["embed_en-us"] @@ -54,21 +62,16 @@ version = "0.3.1" optional = true [dependencies.terminal_size] -version = "0.1.17" +version = "0.2.1" optional = true [dependencies.unicode-linebreak] -version = "0.1.2" +version = "0.1.4" optional = true [dependencies.unicode-width] -version = "0.1.9" +version = "0.1.10" optional = true -[dev-dependencies.criterion] -version = "0.3.5" - -[dev-dependencies.lipsum] -version = "0.8.0" [dev-dependencies.unic-emoji-char] version = "0.9.0" @@ -77,6 +80,11 @@ version = "0.9.0" version = "0.9.4" [features] -default = ["unicode-linebreak", "unicode-width", "smawk"] +default = [ + "unicode-linebreak", + "unicode-width", + "smawk", +] + [target."cfg(unix)".dev-dependencies.termion] -version = "1.5.6" +version = "2.0.1" @@ -1,5 +1,9 @@ +# This project was upgraded with external_updater. +# Usage: tools/external_updater/updater.sh update rust/crates/textwrap +# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md + name: "textwrap" -description: "Powerful library for word wrapping, indenting, and dedenting strings" +description: "Library for word wrapping, indenting, and dedenting strings. Has optional support for Unicode and emojis as well as machine hyphenation." third_party { url { type: HOMEPAGE @@ -7,13 +11,13 @@ third_party { } url { type: ARCHIVE - value: "https://static.crates.io/crates/textwrap/textwrap-0.15.0.crate" + value: "https://static.crates.io/crates/textwrap/textwrap-0.16.0.crate" } - version: "0.15.0" + version: "0.16.0" license_type: NOTICE last_upgrade_date { year: 2022 - month: 3 - day: 1 + month: 12 + day: 19 } } @@ -16,7 +16,7 @@ drawn on a [HTML5 canvas using WebAssembly][wasm-demo]. To use the textwrap crate, add this to your `Cargo.toml` file: ```toml [dependencies] -textwrap = "0.15" +textwrap = "0.16" ``` By default, this enables word wrapping with support for Unicode diff --git a/TEST_MAPPING b/TEST_MAPPING index 07d379e..43da34b 100644 --- a/TEST_MAPPING +++ b/TEST_MAPPING @@ -5,7 +5,7 @@ "path": "external/rust/crates/base64" }, { - "path": "external/rust/crates/libsqlite3-sys" + "path": "external/rust/crates/clap/2.33.3" }, { "path": "external/rust/crates/tinytemplate" @@ -16,51 +16,5 @@ { "path": "external/rust/crates/unicode-xid" } - ], - "presubmit": [ - { - "name": "ZipFuseTest" - }, - { - "name": "apkdmverity.test" - }, - { - "name": "authfs_device_test_src_lib" - }, - { - "name": "diced_test" - }, - { - "name": "keystore2_test" - }, - { - "name": "keystore2_vintf_test" - }, - { - "name": "legacykeystore_test" - } - ], - "presubmit-rust": [ - { - "name": "ZipFuseTest" - }, - { - "name": "apkdmverity.test" - }, - { - "name": "authfs_device_test_src_lib" - }, - { - "name": "diced_test" - }, - { - "name": "keystore2_test" - }, - { - "name": "keystore2_vintf_test" - }, - { - "name": "legacykeystore_test" - } ] } diff --git a/src/fuzzing.rs b/src/fuzzing.rs new file mode 100644 index 0000000..24d59fd --- /dev/null +++ b/src/fuzzing.rs @@ -0,0 +1,23 @@ +//! Fuzzing helpers. + +use super::Options; +use std::borrow::Cow; + +/// Exposed for fuzzing so we can check the slow path is correct. +pub fn fill_slow_path<'a>(text: &str, options: Options<'_>) -> String { + super::fill_slow_path(text, options) +} + +/// Exposed for fuzzing so we can check the slow path is correct. +pub fn wrap_single_line<'a>(line: &'a str, options: &Options<'_>, lines: &mut Vec<Cow<'a, str>>) { + super::wrap_single_line(line, options, lines); +} + +/// Exposed for fuzzing so we can check the slow path is correct. +pub fn wrap_single_line_slow_path<'a>( + line: &'a str, + options: &Options<'_>, + lines: &mut Vec<Cow<'a, str>>, +) { + super::wrap_single_line_slow_path(line, options, lines) +} diff --git a/src/indentation.rs b/src/indentation.rs index 5d90c06..2f3a853 100644 --- a/src/indentation.rs +++ b/src/indentation.rs @@ -134,7 +134,7 @@ pub fn dedent(s: &str) -> String { // We now go over the lines a second time to build the result. let mut result = String::new(); for line in s.lines() { - if line.starts_with(&prefix) && line.chars().any(|c| !c.is_whitespace()) { + if line.starts_with(prefix) && line.chars().any(|c| !c.is_whitespace()) { let (_, tail) = line.split_at(prefix.len()); result.push_str(tail); } @@ -112,7 +112,7 @@ //! The full dependency graph, where dashed lines indicate optional //! dependencies, is shown below: //! -//! <img src="https://raw.githubusercontent.com/mgeisler/textwrap/master/images/textwrap-0.15.0.svg"> +//! <img src="https://raw.githubusercontent.com/mgeisler/textwrap/master/images/textwrap-0.16.0.svg"> //! //! ## Default Features //! @@ -146,16 +146,20 @@ //! This feature can be disabled if you only ever intend to use //! [`wrap_algorithms::wrap_first_fit`]. //! -//! With Rust 1.59.0, the size impact of the above features on your +//! <!-- begin binary-sizes --> +//! +//! With Rust 1.64.0, the size impact of the above features on your //! binary is as follows: //! //! | Configuration | Binary Size | Delta | //! | :--- | ---: | ---: | //! | quick-and-dirty implementation | 289 KB | — KB | -//! | textwrap without default features | 301 KB | 12 KB | +//! | textwrap without default features | 305 KB | 16 KB | //! | textwrap with smawk | 317 KB | 28 KB | -//! | textwrap with unicode-width | 313 KB | 24 KB | -//! | textwrap with unicode-linebreak | 395 KB | 106 KB | +//! | textwrap with unicode-width | 309 KB | 20 KB | +//! | textwrap with unicode-linebreak | 342 KB | 53 KB | +//! +//! <!-- end binary-sizes --> //! //! The above sizes are the stripped sizes and the binary is compiled //! in release mode with this profile: @@ -189,7 +193,7 @@ //! [terminal_size]: https://docs.rs/terminal_size/ //! [hyphenation]: https://docs.rs/hyphenation/ -#![doc(html_root_url = "https://docs.rs/textwrap/0.15.0")] +#![doc(html_root_url = "https://docs.rs/textwrap/0.16.0")] #![forbid(unsafe_code)] // See https://github.com/mgeisler/textwrap/issues/210 #![deny(missing_docs)] #![deny(missing_debug_implementations)] @@ -214,27 +218,24 @@ pub use word_splitters::WordSplitter; pub mod wrap_algorithms; pub use wrap_algorithms::WrapAlgorithm; -pub mod core; +mod line_ending; +pub use line_ending::LineEnding; -#[cfg(feature = "unicode-linebreak")] -macro_rules! DefaultWordSeparator { - () => { - WordSeparator::UnicodeBreakProperties - }; -} +pub mod core; -#[cfg(not(feature = "unicode-linebreak"))] -macro_rules! DefaultWordSeparator { - () => { - WordSeparator::AsciiSpace - }; -} +// This module is only active when running fuzz tests. It provides +// access to private helpers. +#[cfg(fuzzing)] +pub mod fuzzing; /// Holds configuration options for wrapping and filling text. +#[non_exhaustive] #[derive(Debug, Clone)] pub struct Options<'a> { /// The width in columns at which the text will be wrapped. pub width: usize, + /// Line ending used for breaking lines. + pub line_ending: LineEnding, /// Indentation used for the first line of output. See the /// [`Options::initial_indent`] method. pub initial_indent: &'a str, @@ -262,6 +263,7 @@ impl<'a> From<&'a Options<'a>> for Options<'a> { fn from(options: &'a Options<'a>) -> Self { Self { width: options.width, + line_ending: options.line_ending, initial_indent: options.initial_indent, subsequent_indent: options.subsequent_indent, break_words: options.break_words, @@ -279,34 +281,30 @@ impl<'a> From<usize> for Options<'a> { } impl<'a> Options<'a> { - /// Creates a new [`Options`] with the specified width. Equivalent to + /// Creates a new [`Options`] with the specified width. + /// + /// The other fields are given default values as follows: /// /// ``` - /// # use textwrap::{Options, WordSplitter, WordSeparator, WrapAlgorithm}; + /// # use textwrap::{LineEnding, Options, WordSplitter, WordSeparator, WrapAlgorithm}; /// # let width = 80; - /// # let actual = Options::new(width); - /// # let expected = - /// Options { - /// width: width, - /// initial_indent: "", - /// subsequent_indent: "", - /// break_words: true, - /// #[cfg(feature = "unicode-linebreak")] - /// word_separator: WordSeparator::UnicodeBreakProperties, - /// #[cfg(not(feature = "unicode-linebreak"))] - /// word_separator: WordSeparator::AsciiSpace, - /// #[cfg(feature = "smawk")] - /// wrap_algorithm: WrapAlgorithm::new_optimal_fit(), - /// #[cfg(not(feature = "smawk"))] - /// wrap_algorithm: WrapAlgorithm::FirstFit, - /// word_splitter: WordSplitter::HyphenSplitter, - /// } - /// # ; - /// # assert_eq!(actual.width, expected.width); - /// # assert_eq!(actual.initial_indent, expected.initial_indent); - /// # assert_eq!(actual.subsequent_indent, expected.subsequent_indent); - /// # assert_eq!(actual.break_words, expected.break_words); - /// # assert_eq!(actual.word_splitter, expected.word_splitter); + /// let options = Options::new(width); + /// assert_eq!(options.line_ending, LineEnding::LF); + /// assert_eq!(options.initial_indent, ""); + /// assert_eq!(options.subsequent_indent, ""); + /// assert_eq!(options.break_words, true); + /// + /// #[cfg(feature = "unicode-linebreak")] + /// assert_eq!(options.word_separator, WordSeparator::UnicodeBreakProperties); + /// #[cfg(not(feature = "unicode-linebreak"))] + /// assert_eq!(options.word_separator, WordSeparator::AsciiSpace); + /// + /// #[cfg(feature = "smawk")] + /// assert_eq!(options.wrap_algorithm, WrapAlgorithm::new_optimal_fit()); + /// #[cfg(not(feature = "smawk"))] + /// assert_eq!(options.wrap_algorithm, WrapAlgorithm::FirstFit); + /// + /// assert_eq!(options.word_splitter, WordSplitter::HyphenSplitter); /// ``` /// /// Note that the default word separator and wrap algorithms @@ -315,10 +313,11 @@ impl<'a> Options<'a> { pub const fn new(width: usize) -> Self { Options { width, + line_ending: LineEnding::LF, initial_indent: "", subsequent_indent: "", break_words: true, - word_separator: DefaultWordSeparator!(), + word_separator: WordSeparator::new(), wrap_algorithm: WrapAlgorithm::new(), word_splitter: WordSplitter::HyphenSplitter, } @@ -345,9 +344,29 @@ impl<'a> Options<'a> { pub fn with_termwidth() -> Self { Self::new(termwidth()) } -} -impl<'a> Options<'a> { + /// Change [`self.line_ending`]. This specifies which of the + /// supported line endings should be used to break the lines of the + /// input text. + /// + /// # Examples + /// + /// ``` + /// use textwrap::{refill, LineEnding, Options}; + /// + /// let options = Options::new(15).line_ending(LineEnding::CRLF); + /// assert_eq!(refill("This is a little example.", options), + /// "This is a\r\nlittle example."); + /// ``` + /// + /// [`self.line_ending`]: #structfield.line_ending + pub fn line_ending(self, line_ending: LineEnding) -> Self { + Options { + line_ending, + ..self + } + } + /// Change [`self.initial_indent`]. The initial indentation is /// used on the very first line of output. /// @@ -413,6 +432,9 @@ impl<'a> Options<'a> { /// than `self.width` can be broken, or if they will be left /// sticking out into the right margin. /// + /// See [`Options::word_splitter`] instead if you want to control + /// hyphenation. + /// /// # Examples /// /// ``` @@ -444,6 +466,7 @@ impl<'a> Options<'a> { pub fn word_separator(self, word_separator: WordSeparator) -> Options<'a> { Options { width: self.width, + line_ending: self.line_ending, initial_indent: self.initial_indent, subsequent_indent: self.subsequent_indent, break_words: self.break_words, @@ -462,6 +485,7 @@ impl<'a> Options<'a> { pub fn wrap_algorithm(self, wrap_algorithm: WrapAlgorithm) -> Options<'a> { Options { width: self.width, + line_ending: self.line_ending, initial_indent: self.initial_indent, subsequent_indent: self.subsequent_indent, break_words: self.break_words, @@ -475,20 +499,38 @@ impl<'a> Options<'a> { /// [`word_splitters::WordSplitter`] is used to fit part of a word /// into the current line when wrapping text. /// + /// See [`Options::break_words`] instead if you want to control the + /// handling of words longer than the line width. + /// /// # Examples /// /// ``` - /// use textwrap::{Options, WordSplitter}; - /// let opt = Options::new(80); - /// assert_eq!(opt.word_splitter, WordSplitter::HyphenSplitter); - /// let opt = opt.word_splitter(WordSplitter::NoHyphenation); - /// assert_eq!(opt.word_splitter, WordSplitter::NoHyphenation); + /// use textwrap::{wrap, Options, WordSplitter}; + /// + /// // The default is WordSplitter::HyphenSplitter. + /// let options = Options::new(5); + /// assert_eq!(wrap("foo-bar-baz", &options), + /// vec!["foo-", "bar-", "baz"]); + /// + /// // The word is now so long that break_words kick in: + /// let options = Options::new(5) + /// .word_splitter(WordSplitter::NoHyphenation); + /// assert_eq!(wrap("foo-bar-baz", &options), + /// vec!["foo-b", "ar-ba", "z"]); + /// + /// // If you want to breaks at all, disable both: + /// let options = Options::new(5) + /// .break_words(false) + /// .word_splitter(WordSplitter::NoHyphenation); + /// assert_eq!(wrap("foo-bar-baz", &options), + /// vec!["foo-bar-baz"]); /// ``` /// /// [`self.word_splitter`]: #structfield.word_splitter pub fn word_splitter(self, word_splitter: WordSplitter) -> Options<'a> { Options { width: self.width, + line_ending: self.line_ending, initial_indent: self.initial_indent, subsequent_indent: self.subsequent_indent, break_words: self.break_words, @@ -562,13 +604,27 @@ pub fn fill<'a, Opt>(text: &str, width_or_options: Opt) -> String where Opt: Into<Options<'a>>, { + let options = width_or_options.into(); + + if text.len() < options.width && !text.contains('\n') && options.initial_indent.is_empty() { + String::from(text.trim_end_matches(' ')) + } else { + fill_slow_path(text, options) + } +} + +/// Slow path for fill. +/// +/// This is taken when `text` is longer than `options.width`. +fn fill_slow_path(text: &str, options: Options<'_>) -> String { // This will avoid reallocation in simple cases (no // indentation, no hyphenation). let mut result = String::with_capacity(text.len()); - for (i, line) in wrap(text, width_or_options).iter().enumerate() { + let line_ending_str = options.line_ending.as_str(); + for (i, line) in wrap(text, options).iter().enumerate() { if i > 0 { - result.push('\n'); + result.push_str(line_ending_str); } result.push_str(line); } @@ -594,23 +650,31 @@ where /// textwrap: a small library for wrapping text. /// ``` /// -/// In addition, it will recognize a common prefix among the lines. +/// In addition, it will recognize a common prefix and a common line +/// ending among the lines. +/// /// The prefix of the first line is returned in /// [`Options::initial_indent`] and the prefix (if any) of the the /// other lines is returned in [`Options::subsequent_indent`]. /// +/// Line ending is returned in [`Options::line_ending`]. If line ending +/// can not be confidently detected (mixed or no line endings in the +/// input), [`LineEnding::LF`] will be returned. +/// /// In addition to `' '`, the prefixes can consist of characters used /// for unordered lists (`'-'`, `'+'`, and `'*'`) and block quotes /// (`'>'`) in Markdown as well as characters often used for inline /// comments (`'#'` and `'/'`). /// /// The text must come from a single wrapped paragraph. This means -/// that there can be no `"\n\n"` within the text. +/// that there can be no empty lines (`"\n\n"` or `"\r\n\r\n"`) within +/// the text. It is unspecified what happens if `unfill` is called on +/// more than one paragraph of text. /// /// # Examples /// /// ``` -/// use textwrap::unfill; +/// use textwrap::{LineEnding, unfill}; /// /// let (text, options) = unfill("\ /// * This is an @@ -621,13 +685,13 @@ where /// assert_eq!(text, "This is an example of a list item.\n"); /// assert_eq!(options.initial_indent, "* "); /// assert_eq!(options.subsequent_indent, " "); +/// assert_eq!(options.line_ending, LineEnding::LF); /// ``` pub fn unfill(text: &str) -> (String, Options<'_>) { - let trimmed = text.trim_end_matches('\n'); let prefix_chars: &[_] = &[' ', '-', '+', '*', '>', '#', '/']; let mut options = Options::new(0); - for (idx, line) in trimmed.split('\n').enumerate() { + for (idx, line) in text.lines().enumerate() { options.width = std::cmp::max(options.width, core::display_width(line)); let without_prefix = line.trim_start_matches(prefix_chars); let prefix = &line[..line.len() - without_prefix.len()]; @@ -650,16 +714,30 @@ pub fn unfill(text: &str) -> (String, Options<'_>) { } let mut unfilled = String::with_capacity(text.len()); - for (idx, line) in trimmed.split('\n').enumerate() { + let mut detected_line_ending = None; + + for (idx, (line, ending)) in line_ending::NonEmptyLines(text).enumerate() { if idx == 0 { unfilled.push_str(&line[options.initial_indent.len()..]); } else { unfilled.push(' '); unfilled.push_str(&line[options.subsequent_indent.len()..]); } + match (detected_line_ending, ending) { + (None, Some(_)) => detected_line_ending = ending, + (Some(LineEnding::CRLF), Some(LineEnding::LF)) => detected_line_ending = ending, + _ => (), + } + } + + // Add back a line ending if `text` ends with the one we detect. + if let Some(line_ending) = detected_line_ending { + if text.ends_with(line_ending.as_str()) { + unfilled.push_str(line_ending.as_str()); + } } - unfilled.push_str(&text[trimmed.len()..]); + options.line_ending = detected_line_ending.unwrap_or(LineEnding::LF); (unfilled, options) } @@ -721,13 +799,20 @@ pub fn refill<'a, Opt>(filled_text: &str, new_width_or_options: Opt) -> String where Opt: Into<Options<'a>>, { - let trimmed = filled_text.trim_end_matches('\n'); - let (text, options) = unfill(trimmed); let mut new_options = new_width_or_options.into(); + let (text, options) = unfill(filled_text); + // The original line ending is kept by `unfill`. + let stripped = text.strip_suffix(options.line_ending.as_str()); + let new_line_ending = new_options.line_ending.as_str(); + new_options.initial_indent = options.initial_indent; new_options.subsequent_indent = options.subsequent_indent; - let mut refilled = fill(&text, new_options); - refilled.push_str(&filled_text[trimmed.len()..]); + let mut refilled = fill(stripped.unwrap_or(&text), new_options); + + // Add back right line ending if we stripped one off above. + if stripped.is_some() { + refilled.push_str(new_line_ending); + } refilled } @@ -905,85 +990,110 @@ pub fn wrap<'a, Opt>(text: &str, width_or_options: Opt) -> Vec<Cow<'_, str>> where Opt: Into<Options<'a>>, { - let options = width_or_options.into(); + let options: Options = width_or_options.into(); + let line_ending_str = options.line_ending.as_str(); + let mut lines = Vec::new(); + for line in text.split(line_ending_str) { + wrap_single_line(line, &options, &mut lines); + } + + lines +} + +fn wrap_single_line<'a>(line: &'a str, options: &Options<'_>, lines: &mut Vec<Cow<'a, str>>) { + let indent = if lines.is_empty() { + options.initial_indent + } else { + options.subsequent_indent + }; + if line.len() < options.width && indent.is_empty() { + lines.push(Cow::from(line.trim_end_matches(' '))); + } else { + wrap_single_line_slow_path(line, options, lines) + } +} + +/// Wrap a single line of text. +/// +/// This is taken when `line` is longer than `options.width`. +fn wrap_single_line_slow_path<'a>( + line: &'a str, + options: &Options<'_>, + lines: &mut Vec<Cow<'a, str>>, +) { let initial_width = options .width .saturating_sub(core::display_width(options.initial_indent)); let subsequent_width = options .width .saturating_sub(core::display_width(options.subsequent_indent)); + let line_widths = [initial_width, subsequent_width]; + + let words = options.word_separator.find_words(line); + let split_words = word_splitters::split_words(words, &options.word_splitter); + let broken_words = if options.break_words { + let mut broken_words = core::break_words(split_words, line_widths[1]); + if !options.initial_indent.is_empty() { + // Without this, the first word will always go into the + // first line. However, since we break words based on the + // _second_ line width, it can be wrong to unconditionally + // put the first word onto the first line. An empty + // zero-width word fixed this. + broken_words.insert(0, core::Word::from("")); + } + broken_words + } else { + split_words.collect::<Vec<_>>() + }; - let mut lines = Vec::new(); - for line in text.split('\n') { - let words = options.word_separator.find_words(line); - let split_words = word_splitters::split_words(words, &options.word_splitter); - let broken_words = if options.break_words { - let mut broken_words = core::break_words(split_words, subsequent_width); - if !options.initial_indent.is_empty() { - // Without this, the first word will always go into - // the first line. However, since we break words based - // on the _second_ line width, it can be wrong to - // unconditionally put the first word onto the first - // line. An empty zero-width word fixed this. - broken_words.insert(0, core::Word::from("")); + let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths); + + let mut idx = 0; + for words in wrapped_words { + let last_word = match words.last() { + None => { + lines.push(Cow::from("")); + continue; } - broken_words - } else { - split_words.collect::<Vec<_>>() + Some(word) => word, }; - let line_widths = [initial_width, subsequent_width]; - let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths); - - let mut idx = 0; - for words in wrapped_words { - let last_word = match words.last() { - None => { - lines.push(Cow::from("")); - continue; - } - Some(word) => word, - }; - - // We assume here that all words are contiguous in `line`. - // That is, the sum of their lengths should add up to the - // length of `line`. - let len = words - .iter() - .map(|word| word.len() + word.whitespace.len()) - .sum::<usize>() - - last_word.whitespace.len(); - - // The result is owned if we have indentation, otherwise - // we can simply borrow an empty string. - let mut result = if lines.is_empty() && !options.initial_indent.is_empty() { - Cow::Owned(options.initial_indent.to_owned()) - } else if !lines.is_empty() && !options.subsequent_indent.is_empty() { - Cow::Owned(options.subsequent_indent.to_owned()) - } else { - // We can use an empty string here since string - // concatenation for `Cow` preserves a borrowed value - // when either side is empty. - Cow::from("") - }; + // We assume here that all words are contiguous in `line`. + // That is, the sum of their lengths should add up to the + // length of `line`. + let len = words + .iter() + .map(|word| word.len() + word.whitespace.len()) + .sum::<usize>() + - last_word.whitespace.len(); + + // The result is owned if we have indentation, otherwise we + // can simply borrow an empty string. + let mut result = if lines.is_empty() && !options.initial_indent.is_empty() { + Cow::Owned(options.initial_indent.to_owned()) + } else if !lines.is_empty() && !options.subsequent_indent.is_empty() { + Cow::Owned(options.subsequent_indent.to_owned()) + } else { + // We can use an empty string here since string + // concatenation for `Cow` preserves a borrowed value when + // either side is empty. + Cow::from("") + }; - result += &line[idx..idx + len]; + result += &line[idx..idx + len]; - if !last_word.penalty.is_empty() { - result.to_mut().push_str(last_word.penalty); - } + if !last_word.penalty.is_empty() { + result.to_mut().push_str(last_word.penalty); + } - lines.push(result); + lines.push(result); - // Advance by the length of `result`, plus the length of - // `last_word.whitespace` -- even if we had a penalty, we - // need to skip over the whitespace. - idx += len + last_word.whitespace.len(); - } + // Advance by the length of `result`, plus the length of + // `last_word.whitespace` -- even if we had a penalty, we need + // to skip over the whitespace. + idx += len + last_word.whitespace.len(); } - - lines } /// Wrap text into columns with a given total width. @@ -1056,7 +1166,7 @@ where { assert!(columns > 0); - let mut options = total_width_or_options.into(); + let mut options: Options = total_width_or_options.into(); let inner_width = options .width @@ -1103,25 +1213,23 @@ where /// text remains untouched. /// /// Since we can only replace existing whitespace in the input with -/// `'\n'`, we cannot do hyphenation nor can we split words longer -/// than the line width. We also need to use `AsciiSpace` as the word -/// separator since we need `' '` characters between words in order to -/// replace some of them with a `'\n'`. Indentation is also ruled out. -/// In other words, `fill_inplace(width)` behaves as if you had called -/// [`fill`] with these options: +/// `'\n'` (there is no space for `"\r\n"`), we cannot do hyphenation +/// nor can we split words longer than the line width. We also need to +/// use `AsciiSpace` as the word separator since we need `' '` +/// characters between words in order to replace some of them with a +/// `'\n'`. Indentation is also ruled out. In other words, +/// `fill_inplace(width)` behaves as if you had called [`fill`] with +/// these options: /// /// ``` -/// # use textwrap::{core, Options, WordSplitter, WordSeparator, WrapAlgorithm}; +/// # use textwrap::{core, LineEnding, Options, WordSplitter, WordSeparator, WrapAlgorithm}; /// # let width = 80; -/// Options { -/// width: width, -/// initial_indent: "", -/// subsequent_indent: "", -/// break_words: false, -/// word_separator: WordSeparator::AsciiSpace, -/// wrap_algorithm: WrapAlgorithm::FirstFit, -/// word_splitter: WordSplitter::NoHyphenation, -/// }; +/// Options::new(width) +/// .break_words(false) +/// .line_ending(LineEnding::LF) +/// .word_separator(WordSeparator::AsciiSpace) +/// .wrap_algorithm(WrapAlgorithm::FirstFit) +/// .word_splitter(WordSplitter::NoHyphenation); /// ``` /// /// The wrap algorithm is [`WrapAlgorithm::FirstFit`] since this @@ -1147,7 +1255,7 @@ where /// /// In benchmarks, `fill_inplace` is about twice as fast as [`fill`]. /// Please see the [`linear` -/// benchmark](https://github.com/mgeisler/textwrap/blob/master/benches/linear.rs) +/// benchmark](https://github.com/mgeisler/textwrap/blob/master/benchmarks/linear.rs) /// for details. pub fn fill_inplace(text: &mut String, width: usize) { let mut indices = Vec::new(); @@ -1308,7 +1416,7 @@ mod tests { assert_eq!( wrap( "Hello, World!", - Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties) + Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties), ), vec!["Hello, W", "orld!"] ); @@ -1349,6 +1457,21 @@ mod tests { } #[test] + fn only_initial_indent_multiple_lines() { + let options = Options::new(10).initial_indent(" "); + assert_eq!(wrap("foo\nbar\nbaz", &options), vec![" foo", "bar", "baz"]); + } + + #[test] + fn only_subsequent_indent_multiple_lines() { + let options = Options::new(10).subsequent_indent(" "); + assert_eq!( + wrap("foo\nbar\nbaz", &options), + vec!["foo", " bar", " baz"] + ); + } + + #[test] fn indent_break_words() { let options = Options::new(5).initial_indent("* ").subsequent_indent(" "); assert_eq!(wrap("foobarbaz", &options), vec!["* foo", " bar", " baz"]); @@ -1625,8 +1748,8 @@ mod tests { let green_hello = "\u{1b}[0m\u{1b}[32mHello\u{1b}[0m"; let blue_world = "\u{1b}[0m\u{1b}[34mWorld!\u{1b}[0m"; assert_eq!( - fill(&(String::from(green_hello) + " " + &blue_world), 6), - String::from(green_hello) + "\n" + &blue_world + fill(&(String::from(green_hello) + " " + blue_world), 6), + String::from(green_hello) + "\n" + blue_world ); } @@ -1713,13 +1836,54 @@ mod tests { let (text, options) = unfill("foo\nbar"); assert_eq!(text, "foo bar"); assert_eq!(options.width, 3); + assert_eq!(options.line_ending, LineEnding::LF); + } + + #[test] + fn unfill_no_new_line() { + let (text, options) = unfill("foo bar"); + assert_eq!(text, "foo bar"); + assert_eq!(options.width, 7); + assert_eq!(options.line_ending, LineEnding::LF); + } + + #[test] + fn unfill_simple_crlf() { + let (text, options) = unfill("foo\r\nbar"); + assert_eq!(text, "foo bar"); + assert_eq!(options.width, 3); + assert_eq!(options.line_ending, LineEnding::CRLF); + } + + #[test] + fn unfill_mixed_new_lines() { + let (text, options) = unfill("foo\r\nbar\nbaz"); + assert_eq!(text, "foo bar baz"); + assert_eq!(options.width, 3); + assert_eq!(options.line_ending, LineEnding::LF); } #[test] fn unfill_trailing_newlines() { let (text, options) = unfill("foo\nbar\n\n\n"); - assert_eq!(text, "foo bar\n\n\n"); + assert_eq!(text, "foo bar\n"); + assert_eq!(options.width, 3); + } + + #[test] + fn unfill_mixed_trailing_newlines() { + let (text, options) = unfill("foo\r\nbar\n\r\n\n"); + assert_eq!(text, "foo bar\n"); assert_eq!(options.width, 3); + assert_eq!(options.line_ending, LineEnding::LF); + } + + #[test] + fn unfill_trailing_crlf() { + let (text, options) = unfill("foo bar\r\n"); + assert_eq!(text, "foo bar\r\n"); + assert_eq!(options.width, 7); + assert_eq!(options.line_ending, LineEnding::CRLF); } #[test] @@ -1767,11 +1931,58 @@ mod tests { } #[test] + fn unfill_only_prefixes_issue_466() { + // Test that we don't crash if the first line has only prefix + // chars *and* the second line is shorter than the first line. + let (text, options) = unfill("######\nfoo"); + assert_eq!(text, " foo"); + assert_eq!(options.width, 6); + assert_eq!(options.initial_indent, "######"); + assert_eq!(options.subsequent_indent, ""); + } + + #[test] + fn unfill_trailing_newlines_issue_466() { + // Test that we don't crash on a '\r' following a string of + // '\n'. The problem was that we removed both kinds of + // characters in one code path, but not in the other. + let (text, options) = unfill("foo\n##\n\n\r"); + // The \n\n changes subsequent_indent to "". + assert_eq!(text, "foo ## \r"); + assert_eq!(options.width, 3); + assert_eq!(options.initial_indent, ""); + assert_eq!(options.subsequent_indent, ""); + } + + #[test] fn unfill_whitespace() { assert_eq!(unfill("foo bar").0, "foo bar"); } #[test] + fn refill_convert_lf_to_crlf() { + let options = Options::new(5).line_ending(LineEnding::CRLF); + assert_eq!(refill("foo\nbar\n", options), "foo\r\nbar\r\n",); + } + + #[test] + fn refill_convert_crlf_to_lf() { + let options = Options::new(5).line_ending(LineEnding::LF); + assert_eq!(refill("foo\r\nbar\r\n", options), "foo\nbar\n",); + } + + #[test] + fn refill_convert_mixed_newlines() { + let options = Options::new(5).line_ending(LineEnding::CRLF); + assert_eq!(refill("foo\r\nbar\n", options), "foo\r\nbar\r\n",); + } + + #[test] + fn refill_defaults_to_lf() { + assert_eq!(refill("foo bar baz", 5), "foo\nbar\nbaz"); + } + + #[test] fn wrap_columns_empty_text() { assert_eq!(wrap_columns("", 1, 10, "| ", "", " |"), vec!["| |"]); } diff --git a/src/line_ending.rs b/src/line_ending.rs new file mode 100644 index 0000000..0514fe5 --- /dev/null +++ b/src/line_ending.rs @@ -0,0 +1,88 @@ +//! Line ending detection and conversion. + +use std::fmt::Debug; + +/// Supported line endings. Like in the Rust standard library, two line +/// endings are supported: `\r\n` and `\n` +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LineEnding { + /// _Carriage return and line feed_ – a line ending sequence + /// historically used in Windows. Corresponds to the sequence + /// of ASCII control characters `0x0D 0x0A` or `\r\n` + CRLF, + /// _Line feed_ – a line ending historically used in Unix. + /// Corresponds to the ASCII control character `0x0A` or `\n` + LF, +} + +impl LineEnding { + /// Turns this [`LineEnding`] value into its ASCII representation. + #[inline] + pub const fn as_str(&self) -> &'static str { + match self { + Self::CRLF => "\r\n", + Self::LF => "\n", + } + } +} + +/// An iterator over the lines of a string, as tuples of string slice +/// and [`LineEnding`] value; it only emits non-empty lines (i.e. having +/// some content before the terminating `\r\n` or `\n`). +/// +/// This struct is used internally by the library. +#[derive(Debug, Clone, Copy)] +pub(crate) struct NonEmptyLines<'a>(pub &'a str); + +impl<'a> Iterator for NonEmptyLines<'a> { + type Item = (&'a str, Option<LineEnding>); + + fn next(&mut self) -> Option<Self::Item> { + while let Some(lf) = self.0.find('\n') { + if lf == 0 || (lf == 1 && self.0.as_bytes()[lf - 1] == b'\r') { + self.0 = &self.0[(lf + 1)..]; + continue; + } + let trimmed = match self.0.as_bytes()[lf - 1] { + b'\r' => (&self.0[..(lf - 1)], Some(LineEnding::CRLF)), + _ => (&self.0[..lf], Some(LineEnding::LF)), + }; + self.0 = &self.0[(lf + 1)..]; + return Some(trimmed); + } + if self.0.is_empty() { + None + } else { + let line = std::mem::take(&mut self.0); + Some((line, None)) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn non_empty_lines_full_case() { + assert_eq!( + NonEmptyLines("LF\nCRLF\r\n\r\n\nunterminated") + .collect::<Vec<(&str, Option<LineEnding>)>>(), + vec![ + ("LF", Some(LineEnding::LF)), + ("CRLF", Some(LineEnding::CRLF)), + ("unterminated", None), + ] + ); + } + + #[test] + fn non_empty_lines_new_lines_only() { + assert_eq!(NonEmptyLines("\r\n\n\n\r\n").next(), None); + } + + #[test] + fn non_empty_lines_no_input() { + assert_eq!(NonEmptyLines("").next(), None); + } +} diff --git a/src/word_separators.rs b/src/word_separators.rs index 25adf31..dc74e5e 100644 --- a/src/word_separators.rs +++ b/src/word_separators.rs @@ -122,6 +122,40 @@ pub enum WordSeparator { Custom(fn(line: &str) -> Box<dyn Iterator<Item = Word<'_>> + '_>), } +impl PartialEq for WordSeparator { + /// Compare two word separators. + /// + /// ``` + /// use textwrap::WordSeparator; + /// + /// assert_eq!(WordSeparator::AsciiSpace, WordSeparator::AsciiSpace); + /// #[cfg(feature = "unicode-linebreak")] { + /// assert_eq!(WordSeparator::UnicodeBreakProperties, + /// WordSeparator::UnicodeBreakProperties); + /// } + /// ``` + /// + /// Note that `WordSeparator::Custom` values never compare equal: + /// + /// ``` + /// use textwrap::WordSeparator; + /// use textwrap::core::Word; + /// fn word_separator(line: &str) -> Box<dyn Iterator<Item = Word<'_>> + '_> { + /// Box::new(line.split_inclusive(' ').map(Word::from)) + /// } + /// assert_ne!(WordSeparator::Custom(word_separator), + /// WordSeparator::Custom(word_separator)); + /// ``` + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (WordSeparator::AsciiSpace, WordSeparator::AsciiSpace) => true, + #[cfg(feature = "unicode-linebreak")] + (WordSeparator::UnicodeBreakProperties, WordSeparator::UnicodeBreakProperties) => true, + (_, _) => false, + } + } +} + impl std::fmt::Debug for WordSeparator { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -134,6 +168,23 @@ impl std::fmt::Debug for WordSeparator { } impl WordSeparator { + /// Create a new word separator. + /// + /// The best available algorithm is used by default, i.e., + /// [`WordSeparator::UnicodeBreakProperties`] if available, + /// otherwise [`WordSeparator::AsciiSpace`]. + pub const fn new() -> Self { + #[cfg(feature = "unicode-linebreak")] + { + WordSeparator::UnicodeBreakProperties + } + + #[cfg(not(feature = "unicode-linebreak"))] + { + WordSeparator::AsciiSpace + } + } + // This function should really return impl Iterator<Item = Word>, but // this isn't possible until Rust supports higher-kinded types: // https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md @@ -154,13 +205,7 @@ fn find_words_ascii_space<'a>(line: &'a str) -> Box<dyn Iterator<Item = Word<'a> let mut char_indices = line.char_indices(); Box::new(std::iter::from_fn(move || { - // for (idx, ch) in char_indices does not work, gives this - // error: - // - // > cannot move out of `char_indices`, a captured variable in - // > an `FnMut` closure - #[allow(clippy::while_let_on_iterator)] - while let Some((idx, ch)) = char_indices.next() { + for (idx, ch) in char_indices.by_ref() { if in_whitespace && ch != ' ' { let word = Word::from(&line[start..idx]); start = idx; @@ -252,8 +297,7 @@ fn find_words_unicode_break_properties<'a>( let mut start = 0; Box::new(std::iter::from_fn(move || { - #[allow(clippy::while_let_on_iterator)] - while let Some((idx, _)) = opportunities.next() { + for (idx, _) in opportunities.by_ref() { if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx) { let word = Word::from(&line[start..orig_idx]); start = orig_idx; @@ -283,8 +327,8 @@ mod tests { }; } - fn to_words<'a>(words: Vec<&'a str>) -> Vec<Word<'a>> { - words.into_iter().map(|w: &str| Word::from(&w)).collect() + fn to_words(words: Vec<&str>) -> Vec<Word<'_>> { + words.into_iter().map(Word::from).collect() } macro_rules! test_find_words { @@ -417,12 +461,21 @@ mod tests { #[test] fn find_words_color_inside_word() { let text = "foo\u{1b}[0m\u{1b}[32mbar\u{1b}[0mbaz"; - assert_iter_eq!(AsciiSpace.find_words(&text), vec![Word::from(text)]); + assert_iter_eq!(AsciiSpace.find_words(text), vec![Word::from(text)]); #[cfg(feature = "unicode-linebreak")] assert_iter_eq!( - UnicodeBreakProperties.find_words(&text), + UnicodeBreakProperties.find_words(text), vec![Word::from(text)] ); } + + #[test] + fn word_separator_new() { + #[cfg(feature = "unicode-linebreak")] + assert!(matches!(WordSeparator::new(), UnicodeBreakProperties)); + + #[cfg(not(feature = "unicode-linebreak"))] + assert!(matches!(WordSeparator::new(), AsciiSpace)); + } } diff --git a/src/wrap_algorithms.rs b/src/wrap_algorithms.rs index 5ca49c3..eef9b33 100644 --- a/src/wrap_algorithms.rs +++ b/src/wrap_algorithms.rs @@ -87,6 +87,36 @@ pub enum WrapAlgorithm { Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>), } +impl PartialEq for WrapAlgorithm { + /// Compare two wrap algorithms. + /// + /// ``` + /// use textwrap::WrapAlgorithm; + /// + /// assert_eq!(WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit); + /// #[cfg(feature = "smawk")] { + /// assert_eq!(WrapAlgorithm::new_optimal_fit(), WrapAlgorithm::new_optimal_fit()); + /// } + /// ``` + /// + /// Note that `WrapAlgorithm::Custom1` values never compare equal: + /// + /// ``` + /// use textwrap::WrapAlgorithm; + /// + /// assert_ne!(WrapAlgorithm::Custom(|words, line_widths| vec![words]), + /// WrapAlgorithm::Custom(|words, line_widths| vec![words])); + /// ``` + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit) => true, + #[cfg(feature = "smawk")] + (WrapAlgorithm::OptimalFit(a), WrapAlgorithm::OptimalFit(b)) => a == b, + (_, _) => false, + } + } +} + impl std::fmt::Debug for WrapAlgorithm { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { diff --git a/src/wrap_algorithms/optimal_fit.rs b/src/wrap_algorithms/optimal_fit.rs index 0625e28..ef2f333 100644 --- a/src/wrap_algorithms/optimal_fit.rs +++ b/src/wrap_algorithms/optimal_fit.rs @@ -19,7 +19,7 @@ use crate::core::Fragment; /// /// **Note:** Only available when the `smawk` Cargo feature is /// enabled. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq)] pub struct Penalties { /// Per-line penalty. This is added for every line, which makes it /// expensive to output more lines than the minimum required. |