1 files changed, 257 insertions, 80 deletions
diff --git a/src/wrap_algorithms/optimal_fit.rs b/src/wrap_algorithms/optimal_fit.rs
index 95ecf1f..0625e28 100644
--- a/src/wrap_algorithms/optimal_fit.rs
+++ b/src/wrap_algorithms/optimal_fit.rs
@@ -1,23 +1,157 @@
 use std::cell::RefCell;
 
-use crate::core::{Fragment, Word};
-use crate::wrap_algorithms::WrapAlgorithm;
+use crate::core::Fragment;
 
-/// Wrap words using an advanced algorithm with look-ahead.
+/// Penalties for
+/// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit)
+/// and [`wrap_optimal_fit`].
 ///
-/// This wrapping algorithm considers the entire paragraph to find
-/// optimal line breaks. Implemented by [`wrap_optimal_fit`], please
-/// see that function for details and examples.
+/// This wrapping algorithm in [`wrap_optimal_fit`] considers the
+/// entire paragraph to find optimal line breaks. When wrapping text,
+/// "penalties" are assigned to line breaks based on the gaps left at
+/// the end of lines. The penalties are given by this struct, with
+/// [`Penalties::default`] assigning penalties that work well for
+/// monospace text.
+///
+/// If you are wrapping proportional text, you are advised to assign
+/// your own penalties according to your font size. See the individual
+/// penalties below for details.
 ///
 /// **Note:** Only available when the `smawk` Cargo feature is
 /// enabled.
-#[derive(Clone, Copy, Debug, Default)]
-pub struct OptimalFit;
+#[derive(Clone, Copy, Debug)]
+pub struct Penalties {
+    /// Per-line penalty. This is added for every line, which makes it
+    /// expensive to output more lines than the minimum required.
+    pub nline_penalty: usize,
+
+    /// Per-character cost for lines that overflow the target line width.
+    ///
+    /// With a default value of 50², every single character costs as
+    /// much as leaving a gap of 50 characters behind. This is because
+    /// we assign as cost of `gap * gap` to a short line. When
+    /// wrapping monospace text, we can overflow the line by 1
+    /// character in extreme cases:
+    ///
+    /// ```
+    /// use textwrap::core::Word;
+    /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
+    ///
+    /// let short = "foo ";
+    /// let long = "x".repeat(50);
+    /// let length = (short.len() + long.len()) as f64;
+    /// let fragments = vec![Word::from(short), Word::from(&long)];
+    /// let penalties = Penalties::new();
+    ///
+    /// // Perfect fit, both words are on a single line with no overflow.
+    /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap();
+    /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
+    ///
+    /// // The words no longer fit, yet we get a single line back. While
+    /// // the cost of overflow (`1 * 2500`) is the same as the cost of the
+    /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty`
+    /// // which makes it cheaper to overflow than to use two lines.
+    /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap();
+    /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
+    ///
+    /// // The cost of overflow would be 2 * 2500, whereas the cost of
+    /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 =
+    /// // 3401`. We therefore get two lines.
+    /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap();
+    /// assert_eq!(wrapped, vec![&[Word::from(short)],
+    ///                          &[Word::from(&long)]]);
+    /// ```
+    ///
+    /// This only happens if the overflowing word is 50 characters
+    /// long _and_ if the word overflows the line by exactly one
+    /// character. If it overflows by more than one character, the
+    /// overflow penalty will quickly outgrow the cost of the gap, as
+    /// seen above.
+    pub overflow_penalty: usize,
+
+    /// When should the a single word on the last line be considered
+    /// "too short"?
+    ///
+    /// If the last line of the text consist of a single word and if
+    /// this word is shorter than `1 / short_last_line_fraction` of
+    /// the line width, then the final line will be considered "short"
+    /// and `short_last_line_penalty` is added as an extra penalty.
+    ///
+    /// The effect of this is to avoid a final line consisting of a
+    /// single small word. For example, with a
+    /// `short_last_line_penalty` of 25 (the default), a gap of up to
+    /// 5 columns will be seen as more desirable than having a final
+    /// short line.
+    ///
+    /// ## Examples
+    ///
+    /// ```
+    /// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm};
+    ///
+    /// let text = "This is a demo of the short last line penalty.";
+    ///
+    /// // The first-fit algorithm leaves a single short word on the last line:
+    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)),
+    ///            vec!["This is a demo of the short last line",
+    ///                 "penalty."]);
+    ///
+    /// #[cfg(feature = "smawk")] {
+    /// let mut penalties = wrap_algorithms::Penalties::new();
+    ///
+    /// // Since "penalty." is shorter than 25% of the line width, the
+    /// // optimal-fit algorithm adds a penalty of 25. This is enough
+    /// // to move "line " down:
+    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
+    ///            vec!["This is a demo of the short last",
+    ///                 "line penalty."]);
+    ///
+    /// // We can change the meaning of "short" lines. Here, only words
+    /// // shorter than 1/10th of the line width will be considered short:
+    /// penalties.short_last_line_fraction = 10;
+    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
+    ///            vec!["This is a demo of the short last line",
+    ///                 "penalty."]);
+    ///
+    /// // If desired, the penalty can also be disabled:
+    /// penalties.short_last_line_fraction = 4;
+    /// penalties.short_last_line_penalty = 0;
+    /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
+    ///            vec!["This is a demo of the short last line",
+    ///                 "penalty."]);
+    /// }
+    /// ```
+    pub short_last_line_fraction: usize,
+
+    /// Penalty for a last line with a single short word.
+    ///
+    /// Set this to zero if you do not want to penalize short last lines.
+    pub short_last_line_penalty: usize,
+
+    /// Penalty for lines ending with a hyphen.
+    pub hyphen_penalty: usize,
+}
 
-impl WrapAlgorithm for OptimalFit {
-    #[inline]
-    fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> {
-        wrap_optimal_fit(words, line_widths)
+impl Penalties {
+    /// Default penalties for monospace text.
+    ///
+    /// The penalties here work well for monospace text. This is
+    /// because they expect the gaps at the end of lines to be roughly
+    /// in the range `0..100`. If the gaps are larger, the
+    /// `overflow_penalty` and `hyphen_penalty` become insignificant.
+    pub const fn new() -> Self {
+        Penalties {
+            nline_penalty: 1000,
+            overflow_penalty: 50 * 50,
+            short_last_line_fraction: 4,
+            short_last_line_penalty: 25,
+            hyphen_penalty: 25,
+        }
+    }
+}
+
+impl Default for Penalties {
+    fn default() -> Self {
+        Self::new()
     }
 }
 
@@ -39,7 +173,7 @@ impl LineNumbers {
     fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
         while self.line_numbers.borrow_mut().len() < i + 1 {
             let pos = self.line_numbers.borrow().len();
-            let line_number = 1 + self.get(minima[pos].0, &minima);
+            let line_number = 1 + self.get(minima[pos].0, minima);
             self.line_numbers.borrow_mut().push(line_number);
         }
 
@@ -47,58 +181,17 @@ impl LineNumbers {
     }
 }
 
-/// Per-line penalty. This is added for every line, which makes it
-/// expensive to output more lines than the minimum required.
-const NLINE_PENALTY: i32 = 1000;
+/// Overflow error during the [`wrap_optimal_fit`] computation.
+#[derive(Debug, PartialEq, Eq)]
+pub struct OverflowError;
 
-/// Per-character cost for lines that overflow the target line width.
-///
-/// With a value of 50², every single character costs as much as
-/// leaving a gap of 50 characters behind. This is becuase we assign
-/// as cost of `gap * gap` to a short line. This means that we can
-/// overflow the line by 1 character in extreme cases:
-///
-/// ```
-/// use textwrap::wrap_algorithms::wrap_optimal_fit;
-/// use textwrap::core::Word;
-///
-/// let short = "foo ";
-/// let long = "x".repeat(50);
-/// let fragments = vec![Word::from(short), Word::from(&long)];
-///
-/// // Perfect fit, both words are on a single line with no overflow.
-/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len()]);
-/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
-///
-/// // The words no longer fit, yet we get a single line back. While
-/// // the cost of overflow (`1 * 2500`) is the same as the cost of the
-/// // gap (`50 * 50 = 2500`), the tie is broken by `NLINE_PENALTY`
-/// // which makes it cheaper to overflow than to use two lines.
-/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 1]);
-/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
-///
-/// // The cost of overflow would be 2 * 2500, whereas the cost of the
-/// // gap is only `49 * 49 + NLINE_PENALTY = 2401 + 1000 = 3401`. We
-/// // therefore get two lines.
-/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 2]);
-/// assert_eq!(wrapped, vec![&[Word::from(short)],
-///                          &[Word::from(&long)]]);
-/// ```
-///
-/// This only happens if the overflowing word is 50 characters long
-/// _and_ if it happens to overflow the line by exactly one character.
-/// If it overflows by more than one character, the overflow penalty
-/// will quickly outgrow the cost of the gap, as seen above.
-const OVERFLOW_PENALTY: i32 = 50 * 50;
-
-/// The last line is short if it is less than 1/4 of the target width.
-const SHORT_LINE_FRACTION: usize = 4;
-
-/// Penalize a short last line.
-const SHORT_LAST_LINE_PENALTY: i32 = 25;
+impl std::fmt::Display for OverflowError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "wrap_optimal_fit cost computation overflowed")
+    }
+}
 
-/// Penalty for lines ending with a hyphen.
-const HYPHEN_PENALTY: i32 = 25;
+impl std::error::Error for OverflowError {}
 
 /// Wrap abstract fragments into lines with an optimal-fit algorithm.
 ///
@@ -173,16 +266,48 @@ const HYPHEN_PENALTY: i32 = 25;
 /// code by David
 /// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py).
 ///
+/// # Errors
+///
+/// In case of an overflow during the cost computation, an `Err` is
+/// returned. Overflows happens when fragments or lines have infinite
+/// widths (`f64::INFINITY`) or if the widths are so large that the
+/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()`
+/// (approximately 1e154):
+///
+/// ```
+/// use textwrap::core::Fragment;
+/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties};
+///
+/// #[derive(Debug, PartialEq)]
+/// struct Word(f64);
+///
+/// impl Fragment for Word {
+///     fn width(&self) -> f64 { self.0 }
+///     fn whitespace_width(&self) -> f64 { 1.0 }
+///     fn penalty_width(&self) -> f64 { 0.0 }
+/// }
+///
+/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is
+/// // larger than f64::MAX:
+/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()),
+///            Err(OverflowError));
+/// ```
+///
+/// When using fragment widths and line widths which fit inside an
+/// `u64`, overflows cannot happen. This means that fragments derived
+/// from a `&str` cannot cause overflows.
+///
 /// **Note:** Only available when the `smawk` Cargo feature is
 /// enabled.
 pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
     fragments: &'a [T],
-    line_widths: &'b [usize],
-) -> Vec<&'a [T]> {
+    line_widths: &'b [f64],
+    penalties: &'b Penalties,
+) -> Result<Vec<&'a [T]>, OverflowError> {
     // The final line width is used for all remaining lines.
-    let default_line_width = line_widths.last().copied().unwrap_or(0);
+    let default_line_width = line_widths.last().copied().unwrap_or(0.0);
     let mut widths = Vec::with_capacity(fragments.len() + 1);
-    let mut width = 0;
+    let mut width = 0.0;
     widths.push(width);
     for fragment in fragments {
         width += fragment.width() + fragment.whitespace_width();
@@ -191,18 +316,18 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
 
     let line_numbers = LineNumbers::new(fragments.len());
 
-    let minima = smawk::online_column_minima(0, widths.len(), |minima, i, j| {
+    let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| {
         // Line number for fragment `i`.
-        let line_number = line_numbers.get(i, &minima);
+        let line_number = line_numbers.get(i, minima);
         let line_width = line_widths
             .get(line_number)
             .copied()
             .unwrap_or(default_line_width);
-        let target_width = std::cmp::max(1, line_width);
+        let target_width = line_width.max(1.0);
 
         // Compute the width of a line spanning fragments[i..j] in
         // constant time. We need to adjust widths[j] by subtracting
-        // the whitespace of fragment[j-i] and then add the penalty.
+        // the whitespace of fragment[j-1] and then add the penalty.
         let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
             + fragments[j - 1].penalty_width();
 
@@ -211,35 +336,43 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
         // breaking before fragments[i].
         //
         // First, every extra line cost NLINE_PENALTY.
-        let mut cost = minima[i].1 + NLINE_PENALTY;
+        let mut cost = minima[i].1 + penalties.nline_penalty as f64;
 
         // Next, we add a penalty depending on the line length.
         if line_width > target_width {
             // Lines that overflow get a hefty penalty.
-            let overflow = (line_width - target_width) as i32;
-            cost += overflow * OVERFLOW_PENALTY;
+            let overflow = line_width - target_width;
+            cost += overflow * penalties.overflow_penalty as f64;
         } else if j < fragments.len() {
             // Other lines (except for the last line) get a milder
             // penalty which depend on the size of the gap.
-            let gap = (target_width - line_width) as i32;
+            let gap = target_width - line_width;
             cost += gap * gap;
-        } else if i + 1 == j && line_width < target_width / SHORT_LINE_FRACTION {
+        } else if i + 1 == j
+            && line_width < target_width / penalties.short_last_line_fraction as f64
+        {
             // The last line can have any size gap, but we do add a
             // penalty if the line is very short (typically because it
             // contains just a single word).
-            cost += SHORT_LAST_LINE_PENALTY;
+            cost += penalties.short_last_line_penalty as f64;
         }
 
         // Finally, we discourage hyphens.
-        if fragments[j - 1].penalty_width() > 0 {
+        if fragments[j - 1].penalty_width() > 0.0 {
             // TODO: this should use a penalty value from the fragment
             // instead.
-            cost += HYPHEN_PENALTY;
+            cost += penalties.hyphen_penalty as f64;
         }
 
         cost
     });
 
+    for (_, cost) in &minima {
+        if cost.is_infinite() {
+            return Err(OverflowError);
+        }
+    }
+
     let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
     let mut pos = fragments.len();
     loop {
@@ -252,5 +385,49 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
     }
 
     lines.reverse();
-    lines
+    Ok(lines)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[derive(Debug, PartialEq)]
+    struct Word(f64);
+
+    #[rustfmt::skip]
+    impl Fragment for Word {
+        fn width(&self) -> f64 { self.0 }
+        fn whitespace_width(&self) -> f64 { 1.0 }
+        fn penalty_width(&self) -> f64 { 0.0 }
+    }
+
+    #[test]
+    fn wrap_fragments_with_infinite_widths() {
+        let words = vec![Word(f64::INFINITY)];
+        assert_eq!(
+            wrap_optimal_fit(&words, &[0.0], &Penalties::default()),
+            Err(OverflowError)
+        );
+    }
+
+    #[test]
+    fn wrap_fragments_with_huge_widths() {
+        let words = vec![Word(1e200), Word(1e250), Word(1e300)];
+        assert_eq!(
+            wrap_optimal_fit(&words, &[1e300], &Penalties::default()),
+            Err(OverflowError)
+        );
+    }
+
+    #[test]
+    fn wrap_fragments_with_large_widths() {
+        // The gaps will be of the sizes between 1e25 and 1e75. This
+        // makes the `gap * gap` cost fit comfortably in a f64.
+        let words = vec![Word(1e25), Word(1e50), Word(1e75)];
+        assert_eq!(
+            wrap_optimal_fit(&words, &[1e100], &Penalties::default()),
+            Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]])
+        );
+    }
 }