diff options
Diffstat (limited to 'src/wrap_algorithms/optimal_fit.rs')
-rw-r--r-- | src/wrap_algorithms/optimal_fit.rs | 337 |
1 files changed, 257 insertions, 80 deletions
diff --git a/src/wrap_algorithms/optimal_fit.rs b/src/wrap_algorithms/optimal_fit.rs index 95ecf1f..0625e28 100644 --- a/src/wrap_algorithms/optimal_fit.rs +++ b/src/wrap_algorithms/optimal_fit.rs @@ -1,23 +1,157 @@ use std::cell::RefCell; -use crate::core::{Fragment, Word}; -use crate::wrap_algorithms::WrapAlgorithm; +use crate::core::Fragment; -/// Wrap words using an advanced algorithm with look-ahead. +/// Penalties for +/// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit) +/// and [`wrap_optimal_fit`]. /// -/// This wrapping algorithm considers the entire paragraph to find -/// optimal line breaks. Implemented by [`wrap_optimal_fit`], please -/// see that function for details and examples. +/// This wrapping algorithm in [`wrap_optimal_fit`] considers the +/// entire paragraph to find optimal line breaks. When wrapping text, +/// "penalties" are assigned to line breaks based on the gaps left at +/// the end of lines. The penalties are given by this struct, with +/// [`Penalties::default`] assigning penalties that work well for +/// monospace text. +/// +/// If you are wrapping proportional text, you are advised to assign +/// your own penalties according to your font size. See the individual +/// penalties below for details. /// /// **Note:** Only available when the `smawk` Cargo feature is /// enabled. -#[derive(Clone, Copy, Debug, Default)] -pub struct OptimalFit; +#[derive(Clone, Copy, Debug)] +pub struct Penalties { + /// Per-line penalty. This is added for every line, which makes it + /// expensive to output more lines than the minimum required. + pub nline_penalty: usize, + + /// Per-character cost for lines that overflow the target line width. + /// + /// With a default value of 50², every single character costs as + /// much as leaving a gap of 50 characters behind. This is because + /// we assign as cost of `gap * gap` to a short line. When + /// wrapping monospace text, we can overflow the line by 1 + /// character in extreme cases: + /// + /// ``` + /// use textwrap::core::Word; + /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties}; + /// + /// let short = "foo "; + /// let long = "x".repeat(50); + /// let length = (short.len() + long.len()) as f64; + /// let fragments = vec![Word::from(short), Word::from(&long)]; + /// let penalties = Penalties::new(); + /// + /// // Perfect fit, both words are on a single line with no overflow. + /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap(); + /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); + /// + /// // The words no longer fit, yet we get a single line back. While + /// // the cost of overflow (`1 * 2500`) is the same as the cost of the + /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty` + /// // which makes it cheaper to overflow than to use two lines. + /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap(); + /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); + /// + /// // The cost of overflow would be 2 * 2500, whereas the cost of + /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 = + /// // 3401`. We therefore get two lines. + /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap(); + /// assert_eq!(wrapped, vec![&[Word::from(short)], + /// &[Word::from(&long)]]); + /// ``` + /// + /// This only happens if the overflowing word is 50 characters + /// long _and_ if the word overflows the line by exactly one + /// character. If it overflows by more than one character, the + /// overflow penalty will quickly outgrow the cost of the gap, as + /// seen above. + pub overflow_penalty: usize, + + /// When should the a single word on the last line be considered + /// "too short"? + /// + /// If the last line of the text consist of a single word and if + /// this word is shorter than `1 / short_last_line_fraction` of + /// the line width, then the final line will be considered "short" + /// and `short_last_line_penalty` is added as an extra penalty. + /// + /// The effect of this is to avoid a final line consisting of a + /// single small word. For example, with a + /// `short_last_line_penalty` of 25 (the default), a gap of up to + /// 5 columns will be seen as more desirable than having a final + /// short line. + /// + /// ## Examples + /// + /// ``` + /// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm}; + /// + /// let text = "This is a demo of the short last line penalty."; + /// + /// // The first-fit algorithm leaves a single short word on the last line: + /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)), + /// vec!["This is a demo of the short last line", + /// "penalty."]); + /// + /// #[cfg(feature = "smawk")] { + /// let mut penalties = wrap_algorithms::Penalties::new(); + /// + /// // Since "penalty." is shorter than 25% of the line width, the + /// // optimal-fit algorithm adds a penalty of 25. This is enough + /// // to move "line " down: + /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), + /// vec!["This is a demo of the short last", + /// "line penalty."]); + /// + /// // We can change the meaning of "short" lines. Here, only words + /// // shorter than 1/10th of the line width will be considered short: + /// penalties.short_last_line_fraction = 10; + /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), + /// vec!["This is a demo of the short last line", + /// "penalty."]); + /// + /// // If desired, the penalty can also be disabled: + /// penalties.short_last_line_fraction = 4; + /// penalties.short_last_line_penalty = 0; + /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), + /// vec!["This is a demo of the short last line", + /// "penalty."]); + /// } + /// ``` + pub short_last_line_fraction: usize, + + /// Penalty for a last line with a single short word. + /// + /// Set this to zero if you do not want to penalize short last lines. + pub short_last_line_penalty: usize, + + /// Penalty for lines ending with a hyphen. + pub hyphen_penalty: usize, +} -impl WrapAlgorithm for OptimalFit { - #[inline] - fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> { - wrap_optimal_fit(words, line_widths) +impl Penalties { + /// Default penalties for monospace text. + /// + /// The penalties here work well for monospace text. This is + /// because they expect the gaps at the end of lines to be roughly + /// in the range `0..100`. If the gaps are larger, the + /// `overflow_penalty` and `hyphen_penalty` become insignificant. + pub const fn new() -> Self { + Penalties { + nline_penalty: 1000, + overflow_penalty: 50 * 50, + short_last_line_fraction: 4, + short_last_line_penalty: 25, + hyphen_penalty: 25, + } + } +} + +impl Default for Penalties { + fn default() -> Self { + Self::new() } } @@ -39,7 +173,7 @@ impl LineNumbers { fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize { while self.line_numbers.borrow_mut().len() < i + 1 { let pos = self.line_numbers.borrow().len(); - let line_number = 1 + self.get(minima[pos].0, &minima); + let line_number = 1 + self.get(minima[pos].0, minima); self.line_numbers.borrow_mut().push(line_number); } @@ -47,58 +181,17 @@ impl LineNumbers { } } -/// Per-line penalty. This is added for every line, which makes it -/// expensive to output more lines than the minimum required. -const NLINE_PENALTY: i32 = 1000; +/// Overflow error during the [`wrap_optimal_fit`] computation. +#[derive(Debug, PartialEq, Eq)] +pub struct OverflowError; -/// Per-character cost for lines that overflow the target line width. -/// -/// With a value of 50², every single character costs as much as -/// leaving a gap of 50 characters behind. This is becuase we assign -/// as cost of `gap * gap` to a short line. This means that we can -/// overflow the line by 1 character in extreme cases: -/// -/// ``` -/// use textwrap::wrap_algorithms::wrap_optimal_fit; -/// use textwrap::core::Word; -/// -/// let short = "foo "; -/// let long = "x".repeat(50); -/// let fragments = vec![Word::from(short), Word::from(&long)]; -/// -/// // Perfect fit, both words are on a single line with no overflow. -/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len()]); -/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); -/// -/// // The words no longer fit, yet we get a single line back. While -/// // the cost of overflow (`1 * 2500`) is the same as the cost of the -/// // gap (`50 * 50 = 2500`), the tie is broken by `NLINE_PENALTY` -/// // which makes it cheaper to overflow than to use two lines. -/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 1]); -/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); -/// -/// // The cost of overflow would be 2 * 2500, whereas the cost of the -/// // gap is only `49 * 49 + NLINE_PENALTY = 2401 + 1000 = 3401`. We -/// // therefore get two lines. -/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 2]); -/// assert_eq!(wrapped, vec![&[Word::from(short)], -/// &[Word::from(&long)]]); -/// ``` -/// -/// This only happens if the overflowing word is 50 characters long -/// _and_ if it happens to overflow the line by exactly one character. -/// If it overflows by more than one character, the overflow penalty -/// will quickly outgrow the cost of the gap, as seen above. -const OVERFLOW_PENALTY: i32 = 50 * 50; - -/// The last line is short if it is less than 1/4 of the target width. -const SHORT_LINE_FRACTION: usize = 4; - -/// Penalize a short last line. -const SHORT_LAST_LINE_PENALTY: i32 = 25; +impl std::fmt::Display for OverflowError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "wrap_optimal_fit cost computation overflowed") + } +} -/// Penalty for lines ending with a hyphen. -const HYPHEN_PENALTY: i32 = 25; +impl std::error::Error for OverflowError {} /// Wrap abstract fragments into lines with an optimal-fit algorithm. /// @@ -173,16 +266,48 @@ const HYPHEN_PENALTY: i32 = 25; /// code by David /// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py). /// +/// # Errors +/// +/// In case of an overflow during the cost computation, an `Err` is +/// returned. Overflows happens when fragments or lines have infinite +/// widths (`f64::INFINITY`) or if the widths are so large that the +/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()` +/// (approximately 1e154): +/// +/// ``` +/// use textwrap::core::Fragment; +/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties}; +/// +/// #[derive(Debug, PartialEq)] +/// struct Word(f64); +/// +/// impl Fragment for Word { +/// fn width(&self) -> f64 { self.0 } +/// fn whitespace_width(&self) -> f64 { 1.0 } +/// fn penalty_width(&self) -> f64 { 0.0 } +/// } +/// +/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is +/// // larger than f64::MAX: +/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()), +/// Err(OverflowError)); +/// ``` +/// +/// When using fragment widths and line widths which fit inside an +/// `u64`, overflows cannot happen. This means that fragments derived +/// from a `&str` cannot cause overflows. +/// /// **Note:** Only available when the `smawk` Cargo feature is /// enabled. pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( fragments: &'a [T], - line_widths: &'b [usize], -) -> Vec<&'a [T]> { + line_widths: &'b [f64], + penalties: &'b Penalties, +) -> Result<Vec<&'a [T]>, OverflowError> { // The final line width is used for all remaining lines. - let default_line_width = line_widths.last().copied().unwrap_or(0); + let default_line_width = line_widths.last().copied().unwrap_or(0.0); let mut widths = Vec::with_capacity(fragments.len() + 1); - let mut width = 0; + let mut width = 0.0; widths.push(width); for fragment in fragments { width += fragment.width() + fragment.whitespace_width(); @@ -191,18 +316,18 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( let line_numbers = LineNumbers::new(fragments.len()); - let minima = smawk::online_column_minima(0, widths.len(), |minima, i, j| { + let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| { // Line number for fragment `i`. - let line_number = line_numbers.get(i, &minima); + let line_number = line_numbers.get(i, minima); let line_width = line_widths .get(line_number) .copied() .unwrap_or(default_line_width); - let target_width = std::cmp::max(1, line_width); + let target_width = line_width.max(1.0); // Compute the width of a line spanning fragments[i..j] in // constant time. We need to adjust widths[j] by subtracting - // the whitespace of fragment[j-i] and then add the penalty. + // the whitespace of fragment[j-1] and then add the penalty. let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width() + fragments[j - 1].penalty_width(); @@ -211,35 +336,43 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( // breaking before fragments[i]. // // First, every extra line cost NLINE_PENALTY. - let mut cost = minima[i].1 + NLINE_PENALTY; + let mut cost = minima[i].1 + penalties.nline_penalty as f64; // Next, we add a penalty depending on the line length. if line_width > target_width { // Lines that overflow get a hefty penalty. - let overflow = (line_width - target_width) as i32; - cost += overflow * OVERFLOW_PENALTY; + let overflow = line_width - target_width; + cost += overflow * penalties.overflow_penalty as f64; } else if j < fragments.len() { // Other lines (except for the last line) get a milder // penalty which depend on the size of the gap. - let gap = (target_width - line_width) as i32; + let gap = target_width - line_width; cost += gap * gap; - } else if i + 1 == j && line_width < target_width / SHORT_LINE_FRACTION { + } else if i + 1 == j + && line_width < target_width / penalties.short_last_line_fraction as f64 + { // The last line can have any size gap, but we do add a // penalty if the line is very short (typically because it // contains just a single word). - cost += SHORT_LAST_LINE_PENALTY; + cost += penalties.short_last_line_penalty as f64; } // Finally, we discourage hyphens. - if fragments[j - 1].penalty_width() > 0 { + if fragments[j - 1].penalty_width() > 0.0 { // TODO: this should use a penalty value from the fragment // instead. - cost += HYPHEN_PENALTY; + cost += penalties.hyphen_penalty as f64; } cost }); + for (_, cost) in &minima { + if cost.is_infinite() { + return Err(OverflowError); + } + } + let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima)); let mut pos = fragments.len(); loop { @@ -252,5 +385,49 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( } lines.reverse(); - lines + Ok(lines) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, PartialEq)] + struct Word(f64); + + #[rustfmt::skip] + impl Fragment for Word { + fn width(&self) -> f64 { self.0 } + fn whitespace_width(&self) -> f64 { 1.0 } + fn penalty_width(&self) -> f64 { 0.0 } + } + + #[test] + fn wrap_fragments_with_infinite_widths() { + let words = vec![Word(f64::INFINITY)]; + assert_eq!( + wrap_optimal_fit(&words, &[0.0], &Penalties::default()), + Err(OverflowError) + ); + } + + #[test] + fn wrap_fragments_with_huge_widths() { + let words = vec![Word(1e200), Word(1e250), Word(1e300)]; + assert_eq!( + wrap_optimal_fit(&words, &[1e300], &Penalties::default()), + Err(OverflowError) + ); + } + + #[test] + fn wrap_fragments_with_large_widths() { + // The gaps will be of the sizes between 1e25 and 1e75. This + // makes the `gap * gap` cost fit comfortably in a f64. + let words = vec![Word(1e25), Word(1e50), Word(1e75)]; + assert_eq!( + wrap_optimal_fit(&words, &[1e100], &Penalties::default()), + Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]]) + ); + } } |