From 6229992cbe96fbbf719aa4fdbede3888ffea67ba Mon Sep 17 00:00:00 2001
From: Jeff Vander Stoep <jeffv@google.com>
Date: Tue, 13 Dec 2022 14:15:06 +0100
Subject: Upgrade rayon to 1.6.1

This project was upgraded with external_updater.
Usage: tools/external_updater/updater.sh update rust/crates/rayon
For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md

Test: TreeHugger
Change-Id: I55672395408c0a770cc19a909a94f46b6fccfd38
---
 src/iter/chunks.rs           |  50 +++---
 src/iter/collect/consumer.rs | 113 +++++++-----
 src/iter/collect/mod.rs      | 159 ++++++-----------
 src/iter/collect/test.rs     |  50 ++----
 src/iter/extend.rs           | 398 ++++++++++++++++++++++++++++++++++---------
 src/iter/filter.rs           |   2 +-
 src/iter/filter_map.rs       |   2 +-
 src/iter/find.rs             |   2 +-
 src/iter/fold_chunks.rs      | 236 +++++++++++++++++++++++++
 src/iter/fold_chunks_with.rs | 231 +++++++++++++++++++++++++
 src/iter/inspect.rs          |   2 +-
 src/iter/interleave.rs       |  18 +-
 src/iter/len.rs              |   8 +-
 src/iter/map.rs              |   2 +-
 src/iter/mod.rs              | 289 +++++++++++++++++++++++++------
 src/iter/par_bridge.rs       | 159 ++++++-----------
 src/iter/plumbing/README.md  |   2 +-
 src/iter/skip.rs             |  13 +-
 src/iter/step_by.rs          |   1 -
 src/iter/test.rs             |  21 +--
 src/iter/try_fold.rs         |  68 ++++----
 src/iter/try_reduce.rs       |  54 +++---
 src/iter/try_reduce_with.rs  |  60 ++++---
 src/iter/update.rs           |   2 +-
 24 files changed, 1373 insertions(+), 569 deletions(-)
 create mode 100644 src/iter/fold_chunks.rs
 create mode 100644 src/iter/fold_chunks_with.rs

(limited to 'src/iter')
diff --git a/src/iter/chunks.rs b/src/iter/chunks.rs
index be5f84c..ec48278 100644
--- a/src/iter/chunks.rs
+++ b/src/iter/chunks.rs
@@ -90,38 +90,46 @@ where
             where
                 P: Producer<Item = T>,
             {
-                self.callback.callback(ChunkProducer {
-                    chunk_size: self.size,
-                    len: self.len,
-                    base,
-                })
+                let producer = ChunkProducer::new(self.size, self.len, base, Vec::from_iter);
+                self.callback.callback(producer)
             }
         }
     }
 }
 
-struct ChunkProducer<P>
-where
-    P: Producer,
-{
+pub(super) struct ChunkProducer<P, F> {
     chunk_size: usize,
     len: usize,
     base: P,
+    map: F,
 }
 
-impl<P> Producer for ChunkProducer<P>
+impl<P, F> ChunkProducer<P, F> {
+    pub(super) fn new(chunk_size: usize, len: usize, base: P, map: F) -> Self {
+        Self {
+            chunk_size,
+            len,
+            base,
+            map,
+        }
+    }
+}
+
+impl<P, F, T> Producer for ChunkProducer<P, F>
 where
     P: Producer,
+    F: Fn(P::IntoIter) -> T + Send + Clone,
 {
-    type Item = Vec<P::Item>;
-    type IntoIter = ChunkSeq<P>;
+    type Item = T;
+    type IntoIter = std::iter::Map<ChunkSeq<P>, F>;
 
     fn into_iter(self) -> Self::IntoIter {
-        ChunkSeq {
+        let chunks = ChunkSeq {
             chunk_size: self.chunk_size,
             len: self.len,
             inner: if self.len > 0 { Some(self.base) } else { None },
-        }
+        };
+        chunks.map(self.map)
     }
 
     fn split_at(self, index: usize) -> (Self, Self) {
@@ -132,11 +140,13 @@ where
                 chunk_size: self.chunk_size,
                 len: elem_index,
                 base: left,
+                map: self.map.clone(),
             },
             ChunkProducer {
                 chunk_size: self.chunk_size,
                 len: self.len - elem_index,
                 base: right,
+                map: self.map,
             },
         )
     }
@@ -150,7 +160,7 @@ where
     }
 }
 
-struct ChunkSeq<P> {
+pub(super) struct ChunkSeq<P> {
     chunk_size: usize,
     len: usize,
     inner: Option<P>,
@@ -160,7 +170,7 @@ impl<P> Iterator for ChunkSeq<P>
 where
     P: Producer,
 {
-    type Item = Vec<P::Item>;
+    type Item = P::IntoIter;
 
     fn next(&mut self) -> Option<Self::Item> {
         let producer = self.inner.take()?;
@@ -168,11 +178,11 @@ where
             let (left, right) = producer.split_at(self.chunk_size);
             self.inner = Some(right);
             self.len -= self.chunk_size;
-            Some(left.into_iter().collect())
+            Some(left.into_iter())
         } else {
             debug_assert!(self.len > 0);
             self.len = 0;
-            Some(producer.into_iter().collect())
+            Some(producer.into_iter())
         }
     }
 
@@ -206,11 +216,11 @@ where
             let (left, right) = producer.split_at(self.len - size);
             self.inner = Some(left);
             self.len -= size;
-            Some(right.into_iter().collect())
+            Some(right.into_iter())
         } else {
             debug_assert!(self.len > 0);
             self.len = 0;
-            Some(producer.into_iter().collect())
+            Some(producer.into_iter())
         }
     }
 }
diff --git a/src/iter/collect/consumer.rs b/src/iter/collect/consumer.rs
index 3a8eea0..acd67df 100644
--- a/src/iter/collect/consumer.rs
+++ b/src/iter/collect/consumer.rs
@@ -1,19 +1,38 @@
 use super::super::plumbing::*;
+use crate::SendPtr;
 use std::marker::PhantomData;
-use std::mem::MaybeUninit;
 use std::ptr;
 use std::slice;
 
 pub(super) struct CollectConsumer<'c, T: Send> {
-    /// A slice covering the target memory, not yet initialized!
-    target: &'c mut [MaybeUninit<T>],
+    /// See `CollectResult` for explanation of why this is not a slice
+    start: SendPtr<T>,
+    len: usize,
+    marker: PhantomData<&'c mut T>,
+}
+
+impl<T: Send> CollectConsumer<'_, T> {
+    /// Create a collector for `len` items in the unused capacity of the vector.
+    pub(super) fn appender(vec: &mut Vec<T>, len: usize) -> CollectConsumer<'_, T> {
+        let start = vec.len();
+        assert!(vec.capacity() - start >= len);
+
+        // SAFETY: We already made sure to have the additional space allocated.
+        // The pointer is derived from `Vec` directly, not through a `Deref`,
+        // so it has provenance over the whole allocation.
+        unsafe { CollectConsumer::new(vec.as_mut_ptr().add(start), len) }
+    }
 }
 
 impl<'c, T: Send + 'c> CollectConsumer<'c, T> {
     /// The target memory is considered uninitialized, and will be
     /// overwritten without reading or dropping existing values.
-    pub(super) fn new(target: &'c mut [MaybeUninit<T>]) -> Self {
-        CollectConsumer { target }
+    unsafe fn new(start: *mut T, len: usize) -> Self {
+        CollectConsumer {
+            start: SendPtr(start),
+            len,
+            marker: PhantomData,
+        }
     }
 }
 
@@ -23,10 +42,13 @@ impl<'c, T: Send + 'c> CollectConsumer<'c, T> {
 /// the elements will be dropped, unless its ownership is released before then.
 #[must_use]
 pub(super) struct CollectResult<'c, T> {
-    /// A slice covering the target memory, initialized up to our separate `len`.
-    target: &'c mut [MaybeUninit<T>],
-    /// The current initialized length in `target`
-    len: usize,
+    /// This pointer and length has the same representation as a slice,
+    /// but retains the provenance of the entire array so that we can merge
+    /// these regions together in `CollectReducer`.
+    start: SendPtr<T>,
+    total_len: usize,
+    /// The current initialized length after `start`
+    initialized_len: usize,
     /// Lifetime invariance guarantees that the data flows from consumer to result,
     /// especially for the `scope_fn` callback in `Collect::with_consumer`.
     invariant_lifetime: PhantomData<&'c mut &'c mut [T]>,
@@ -37,25 +59,26 @@ unsafe impl<'c, T> Send for CollectResult<'c, T> where T: Send {}
 impl<'c, T> CollectResult<'c, T> {
     /// The current length of the collect result
     pub(super) fn len(&self) -> usize {
-        self.len
+        self.initialized_len
     }
 
     /// Release ownership of the slice of elements, and return the length
     pub(super) fn release_ownership(mut self) -> usize {
-        let ret = self.len;
-        self.len = 0;
+        let ret = self.initialized_len;
+        self.initialized_len = 0;
         ret
     }
 }
 
 impl<'c, T> Drop for CollectResult<'c, T> {
     fn drop(&mut self) {
-        // Drop the first `self.len` elements, which have been recorded
+        // Drop the first `self.initialized_len` elements, which have been recorded
         // to be initialized by the folder.
         unsafe {
-            // TODO: use `MaybeUninit::slice_as_mut_ptr`
-            let start = self.target.as_mut_ptr() as *mut T;
-            ptr::drop_in_place(slice::from_raw_parts_mut(start, self.len));
+            ptr::drop_in_place(slice::from_raw_parts_mut(
+                self.start.0,
+                self.initialized_len,
+            ));
         }
     }
 }
@@ -66,24 +89,27 @@ impl<'c, T: Send + 'c> Consumer<T> for CollectConsumer<'c, T> {
     type Result = CollectResult<'c, T>;
 
     fn split_at(self, index: usize) -> (Self, Self, CollectReducer) {
-        let CollectConsumer { target } = self;
-
-        // Produce new consumers. Normal slicing ensures that the
-        // memory range given to each consumer is disjoint.
-        let (left, right) = target.split_at_mut(index);
-        (
-            CollectConsumer::new(left),
-            CollectConsumer::new(right),
-            CollectReducer,
-        )
+        let CollectConsumer { start, len, .. } = self;
+
+        // Produce new consumers.
+        // SAFETY: This assert checks that `index` is a valid offset for `start`
+        unsafe {
+            assert!(index <= len);
+            (
+                CollectConsumer::new(start.0, index),
+                CollectConsumer::new(start.0.add(index), len - index),
+                CollectReducer,
+            )
+        }
     }
 
     fn into_folder(self) -> Self::Folder {
         // Create a result/folder that consumes values and writes them
-        // into target. The initial result has length 0.
+        // into the region after start. The initial result has length 0.
         CollectResult {
-            target: self.target,
-            len: 0,
+            start: self.start,
+            total_len: self.len,
+            initialized_len: 0,
             invariant_lifetime: PhantomData,
         }
     }
@@ -97,15 +123,17 @@ impl<'c, T: Send + 'c> Folder<T> for CollectResult<'c, T> {
     type Result = Self;
 
     fn consume(mut self, item: T) -> Self {
-        let dest = self
-            .target
-            .get_mut(self.len)
-            .expect("too many values pushed to consumer");
+        assert!(
+            self.initialized_len < self.total_len,
+            "too many values pushed to consumer"
+        );
 
-        // Write item and increase the initialized length
+        // SAFETY: The assert above is a bounds check for this write, and we
+        // avoid assignment here so we do not drop an uninitialized T.
         unsafe {
-            dest.as_mut_ptr().write(item);
-            self.len += 1;
+            // Write item and increase the initialized length
+            self.start.0.add(self.initialized_len).write(item);
+            self.initialized_len += 1;
         }
 
         self
@@ -146,14 +174,13 @@ impl<'c, T> Reducer<CollectResult<'c, T>> for CollectReducer {
         // Merge if the CollectResults are adjacent and in left to right order
         // else: drop the right piece now and total length will end up short in the end,
         // when the correctness of the collected result is asserted.
-        let left_end = left.target[left.len..].as_ptr();
-        if left_end == right.target.as_ptr() {
-            let len = left.len + right.release_ownership();
-            unsafe {
-                left.target = slice::from_raw_parts_mut(left.target.as_mut_ptr(), len);
+        unsafe {
+            let left_end = left.start.0.add(left.initialized_len);
+            if left_end == right.start.0 {
+                left.total_len += right.total_len;
+                left.initialized_len += right.release_ownership();
             }
-            left.len = len;
+            left
         }
-        left
     }
 }
diff --git a/src/iter/collect/mod.rs b/src/iter/collect/mod.rs
index 7cbf215..4044a68 100644
--- a/src/iter/collect/mod.rs
+++ b/src/iter/collect/mod.rs
@@ -1,6 +1,4 @@
-use super::{IndexedParallelIterator, IntoParallelIterator, ParallelExtend, ParallelIterator};
-use std::mem::MaybeUninit;
-use std::slice;
+use super::{IndexedParallelIterator, ParallelIterator};
 
 mod consumer;
 use self::consumer::CollectConsumer;
@@ -19,7 +17,7 @@ where
 {
     v.truncate(0); // clear any old data
     let len = pi.len();
-    Collect::new(v, len).with_consumer(|consumer| pi.drive(consumer));
+    collect_with_consumer(v, len, |consumer| pi.drive(consumer));
 }
 
 /// Collects the results of the iterator into the specified vector.
@@ -33,12 +31,12 @@ where
 /// *any* `ParallelIterator` here, and `CollectConsumer` has to also implement
 /// `UnindexedConsumer`.  That implementation panics `unreachable!` in case
 /// there's a bug where we actually do try to use this unindexed.
-fn special_extend<I, T>(pi: I, len: usize, v: &mut Vec<T>)
+pub(super) fn special_extend<I, T>(pi: I, len: usize, v: &mut Vec<T>)
 where
     I: ParallelIterator<Item = T>,
     T: Send,
 {
-    Collect::new(v, len).with_consumer(|consumer| pi.drive_unindexed(consumer));
+    collect_with_consumer(v, len, |consumer| pi.drive_unindexed(consumer));
 }
 
 /// Unzips the results of the exact iterator into the specified vectors.
@@ -55,9 +53,9 @@ where
     right.truncate(0);
 
     let len = pi.len();
-    Collect::new(right, len).with_consumer(|right_consumer| {
+    collect_with_consumer(right, len, |right_consumer| {
         let mut right_result = None;
-        Collect::new(left, len).with_consumer(|left_consumer| {
+        collect_with_consumer(left, len, |left_consumer| {
             let (left_r, right_r) = unzip_indexed(pi, left_consumer, right_consumer);
             right_result = Some(right_r);
             left_r
@@ -66,108 +64,53 @@ where
     });
 }
 
-/// Manage the collection vector.
-struct Collect<'c, T: Send> {
-    vec: &'c mut Vec<T>,
-    len: usize,
-}
-
-impl<'c, T: Send + 'c> Collect<'c, T> {
-    fn new(vec: &'c mut Vec<T>, len: usize) -> Self {
-        Collect { vec, len }
-    }
-
-    /// Create a consumer on the slice of memory we are collecting into.
-    ///
-    /// The consumer needs to be used inside the scope function, and the
-    /// complete collect result passed back.
-    ///
-    /// This method will verify the collect result, and panic if the slice
-    /// was not fully written into. Otherwise, in the successful case,
-    /// the vector is complete with the collected result.
-    fn with_consumer<F>(mut self, scope_fn: F)
-    where
-        F: FnOnce(CollectConsumer<'_, T>) -> CollectResult<'_, T>,
-    {
-        let slice = Self::reserve_get_tail_slice(&mut self.vec, self.len);
-        let result = scope_fn(CollectConsumer::new(slice));
-
-        // The CollectResult represents a contiguous part of the
-        // slice, that has been written to.
-        // On unwind here, the CollectResult will be dropped.
-        // If some producers on the way did not produce enough elements,
-        // partial CollectResults may have been dropped without
-        // being reduced to the final result, and we will see
-        // that as the length coming up short.
-        //
-        // Here, we assert that `slice` is fully initialized. This is
-        // checked by the following assert, which verifies if a
-        // complete CollectResult was produced; if the length is
-        // correct, it is necessarily covering the target slice.
-        // Since we know that the consumer cannot have escaped from
-        // `drive` (by parametricity, essentially), we know that any
-        // stores that will happen, have happened. Unless some code is buggy,
-        // that means we should have seen `len` total writes.
-        let actual_writes = result.len();
-        assert!(
-            actual_writes == self.len,
-            "expected {} total writes, but got {}",
-            self.len,
-            actual_writes
-        );
-
-        // Release the result's mutable borrow and "proxy ownership"
-        // of the elements, before the vector takes it over.
-        result.release_ownership();
-
-        let new_len = self.vec.len() + self.len;
-
-        unsafe {
-            self.vec.set_len(new_len);
-        }
-    }
-
-    /// Reserve space for `len` more elements in the vector,
-    /// and return a slice to the uninitialized tail of the vector
-    fn reserve_get_tail_slice(vec: &mut Vec<T>, len: usize) -> &mut [MaybeUninit<T>] {
-        // Reserve the new space.
-        vec.reserve(len);
-
-        // TODO: use `Vec::spare_capacity_mut` instead
-        // SAFETY: `MaybeUninit<T>` is guaranteed to have the same layout
-        // as `T`, and we already made sure to have the additional space.
-        let start = vec.len();
-        let tail_ptr = vec[start..].as_mut_ptr() as *mut MaybeUninit<T>;
-        unsafe { slice::from_raw_parts_mut(tail_ptr, len) }
-    }
-}
-
-/// Extends a vector with items from a parallel iterator.
-impl<T> ParallelExtend<T> for Vec<T>
+/// Create a consumer on the slice of memory we are collecting into.
+///
+/// The consumer needs to be used inside the scope function, and the
+/// complete collect result passed back.
+///
+/// This method will verify the collect result, and panic if the slice
+/// was not fully written into. Otherwise, in the successful case,
+/// the vector is complete with the collected result.
+fn collect_with_consumer<T, F>(vec: &mut Vec<T>, len: usize, scope_fn: F)
 where
     T: Send,
+    F: FnOnce(CollectConsumer<'_, T>) -> CollectResult<'_, T>,
 {
-    fn par_extend<I>(&mut self, par_iter: I)
-    where
-        I: IntoParallelIterator<Item = T>,
-    {
-        // See the vec_collect benchmarks in rayon-demo for different strategies.
-        let par_iter = par_iter.into_par_iter();
-        match par_iter.opt_len() {
-            Some(len) => {
-                // When Rust gets specialization, we can get here for indexed iterators
-                // without relying on `opt_len`.  Until then, `special_extend()` fakes
-                // an unindexed mode on the promise that `opt_len()` is accurate.
-                special_extend(par_iter, len, self);
-            }
-            None => {
-                // This works like `extend`, but `Vec::append` is more efficient.
-                let list = super::extend::collect(par_iter);
-                self.reserve(super::extend::len(&list));
-                for mut vec in list {
-                    self.append(&mut vec);
-                }
-            }
-        }
+    // Reserve space for `len` more elements in the vector,
+    vec.reserve(len);
+
+    // Create the consumer and run the callback for collection.
+    let result = scope_fn(CollectConsumer::appender(vec, len));
+
+    // The `CollectResult` represents a contiguous part of the slice, that has
+    // been written to. On unwind here, the `CollectResult` will be dropped. If
+    // some producers on the way did not produce enough elements, partial
+    // `CollectResult`s may have been dropped without being reduced to the final
+    // result, and we will see that as the length coming up short.
+    //
+    // Here, we assert that added length is fully initialized. This is checked
+    // by the following assert, which verifies if a complete `CollectResult`
+    // was produced; if the length is correct, it is necessarily covering the
+    // target slice. Since we know that the consumer cannot have escaped from
+    // `drive` (by parametricity, essentially), we know that any stores that
+    // will happen, have happened. Unless some code is buggy, that means we
+    // should have seen `len` total writes.
+    let actual_writes = result.len();
+    assert!(
+        actual_writes == len,
+        "expected {} total writes, but got {}",
+        len,
+        actual_writes
+    );
+
+    // Release the result's mutable borrow and "proxy ownership"
+    // of the elements, before the vector takes it over.
+    result.release_ownership();
+
+    let new_len = vec.len() + len;
+
+    unsafe {
+        vec.set_len(new_len);
     }
 }
diff --git a/src/iter/collect/test.rs b/src/iter/collect/test.rs
index ddf7757..b5f676f 100644
--- a/src/iter/collect/test.rs
+++ b/src/iter/collect/test.rs
@@ -5,7 +5,7 @@
 // try to drive the "collect consumer" incorrectly. These should
 // result in panics.
 
-use super::Collect;
+use super::collect_with_consumer;
 use crate::iter::plumbing::*;
 use rayon_core::join;
 
@@ -20,7 +20,7 @@ use std::thread::Result as ThreadResult;
 #[should_panic(expected = "too many values")]
 fn produce_too_many_items() {
     let mut v = vec![];
-    Collect::new(&mut v, 2).with_consumer(|consumer| {
+    collect_with_consumer(&mut v, 2, |consumer| {
         let mut folder = consumer.into_folder();
         folder = folder.consume(22);
         folder = folder.consume(23);
@@ -35,8 +35,7 @@ fn produce_too_many_items() {
 #[should_panic(expected = "expected 5 total writes, but got 2")]
 fn produce_fewer_items() {
     let mut v = vec![];
-    let collect = Collect::new(&mut v, 5);
-    collect.with_consumer(|consumer| {
+    collect_with_consumer(&mut v, 5, |consumer| {
         let mut folder = consumer.into_folder();
         folder = folder.consume(22);
         folder = folder.consume(23);
@@ -49,8 +48,7 @@ fn produce_fewer_items() {
 #[should_panic(expected = "expected 4 total writes, but got 2")]
 fn left_produces_items_with_no_complete() {
     let mut v = vec![];
-    let collect = Collect::new(&mut v, 4);
-    collect.with_consumer(|consumer| {
+    collect_with_consumer(&mut v, 4, |consumer| {
         let (left_consumer, right_consumer, _) = consumer.split_at(2);
         let mut left_folder = left_consumer.into_folder();
         let mut right_folder = right_consumer.into_folder();
@@ -66,8 +64,7 @@ fn left_produces_items_with_no_complete() {
 #[should_panic(expected = "expected 4 total writes, but got 2")]
 fn right_produces_items_with_no_complete() {
     let mut v = vec![];
-    let collect = Collect::new(&mut v, 4);
-    collect.with_consumer(|consumer| {
+    collect_with_consumer(&mut v, 4, |consumer| {
         let (left_consumer, right_consumer, _) = consumer.split_at(2);
         let mut left_folder = left_consumer.into_folder();
         let mut right_folder = right_consumer.into_folder();
@@ -83,8 +80,7 @@ fn produces_items_with_no_complete() {
     let counter = DropCounter::default();
     let mut v = vec![];
     let panic_result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
-        let collect = Collect::new(&mut v, 2);
-        collect.with_consumer(|consumer| {
+        collect_with_consumer(&mut v, 2, |consumer| {
             let mut folder = consumer.into_folder();
             folder = folder.consume(counter.element());
             folder = folder.consume(counter.element());
@@ -102,8 +98,7 @@ fn produces_items_with_no_complete() {
 #[should_panic(expected = "too many values")]
 fn left_produces_too_many_items() {
     let mut v = vec![];
-    let collect = Collect::new(&mut v, 4);
-    collect.with_consumer(|consumer| {
+    collect_with_consumer(&mut v, 4, |consumer| {
         let (left_consumer, right_consumer, _) = consumer.split_at(2);
         let mut left_folder = left_consumer.into_folder();
         let mut right_folder = right_consumer.into_folder();
@@ -120,8 +115,7 @@ fn left_produces_too_many_items() {
 #[should_panic(expected = "too many values")]
 fn right_produces_too_many_items() {
     let mut v = vec![];
-    let collect = Collect::new(&mut v, 4);
-    collect.with_consumer(|consumer| {
+    collect_with_consumer(&mut v, 4, |consumer| {
         let (left_consumer, right_consumer, _) = consumer.split_at(2);
         let mut left_folder = left_consumer.into_folder();
         let mut right_folder = right_consumer.into_folder();
@@ -138,8 +132,7 @@ fn right_produces_too_many_items() {
 #[should_panic(expected = "expected 4 total writes, but got 1")]
 fn left_produces_fewer_items() {
     let mut v = vec![];
-    let collect = Collect::new(&mut v, 4);
-    collect.with_consumer(|consumer| {
+    collect_with_consumer(&mut v, 4, |consumer| {
         let reducer = consumer.to_reducer();
         let (left_consumer, right_consumer, _) = consumer.split_at(2);
         let mut left_folder = left_consumer.into_folder();
@@ -158,8 +151,7 @@ fn left_produces_fewer_items() {
 #[should_panic(expected = "expected 4 total writes, but got 2")]
 fn only_left_result() {
     let mut v = vec![];
-    let collect = Collect::new(&mut v, 4);
-    collect.with_consumer(|consumer| {
+    collect_with_consumer(&mut v, 4, |consumer| {
         let (left_consumer, right_consumer, _) = consumer.split_at(2);
         let mut left_folder = left_consumer.into_folder();
         let mut right_folder = right_consumer.into_folder();
@@ -177,8 +169,7 @@ fn only_left_result() {
 #[should_panic(expected = "expected 4 total writes, but got 2")]
 fn only_right_result() {
     let mut v = vec![];
-    let collect = Collect::new(&mut v, 4);
-    collect.with_consumer(|consumer| {
+    collect_with_consumer(&mut v, 4, |consumer| {
         let (left_consumer, right_consumer, _) = consumer.split_at(2);
         let mut left_folder = left_consumer.into_folder();
         let mut right_folder = right_consumer.into_folder();
@@ -195,8 +186,7 @@ fn only_right_result() {
 #[should_panic(expected = "expected 4 total writes, but got 2")]
 fn reducer_does_not_preserve_order() {
     let mut v = vec![];
-    let collect = Collect::new(&mut v, 4);
-    collect.with_consumer(|consumer| {
+    collect_with_consumer(&mut v, 4, |consumer| {
         let reducer = consumer.to_reducer();
         let (left_consumer, right_consumer, _) = consumer.split_at(2);
         let mut left_folder = left_consumer.into_folder();
@@ -215,8 +205,7 @@ fn reducer_does_not_preserve_order() {
 #[should_panic(expected = "expected 4 total writes, but got 3")]
 fn right_produces_fewer_items() {
     let mut v = vec![];
-    let collect = Collect::new(&mut v, 4);
-    collect.with_consumer(|consumer| {
+    collect_with_consumer(&mut v, 4, |consumer| {
         let reducer = consumer.to_reducer();
         let (left_consumer, right_consumer, _) = consumer.split_at(2);
         let mut left_folder = left_consumer.into_folder();
@@ -230,13 +219,12 @@ fn right_produces_fewer_items() {
 }
 
 // The left consumer panics and the right stops short, like `panic_fuse()`.
-// We should get the left panic without finishing `Collect::with_consumer`.
+// We should get the left panic without finishing `collect_with_consumer`.
 #[test]
 #[should_panic(expected = "left consumer panic")]
 fn left_panics() {
     let mut v = vec![];
-    let collect = Collect::new(&mut v, 4);
-    collect.with_consumer(|consumer| {
+    collect_with_consumer(&mut v, 4, |consumer| {
         let reducer = consumer.to_reducer();
         let (left_consumer, right_consumer, _) = consumer.split_at(2);
         let (left_result, right_result) = join(
@@ -257,13 +245,12 @@ fn left_panics() {
 }
 
 // The right consumer panics and the left stops short, like `panic_fuse()`.
-// We should get the right panic without finishing `Collect::with_consumer`.
+// We should get the right panic without finishing `collect_with_consumer`.
 #[test]
 #[should_panic(expected = "right consumer panic")]
 fn right_panics() {
     let mut v = vec![];
-    let collect = Collect::new(&mut v, 4);
-    collect.with_consumer(|consumer| {
+    collect_with_consumer(&mut v, 4, |consumer| {
         let reducer = consumer.to_reducer();
         let (left_consumer, right_consumer, _) = consumer.split_at(2);
         let (left_result, right_result) = join(
@@ -290,8 +277,7 @@ fn left_produces_fewer_items_drops() {
     let counter = DropCounter::default();
     let mut v = vec![];
     let panic_result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
-        let collect = Collect::new(&mut v, 4);
-        collect.with_consumer(|consumer| {
+        collect_with_consumer(&mut v, 4, |consumer| {
             let reducer = consumer.to_reducer();
             let (left_consumer, right_consumer, _) = consumer.split_at(2);
             let mut left_folder = left_consumer.into_folder();
diff --git a/src/iter/extend.rs b/src/iter/extend.rs
index fb89249..1769d47 100644
--- a/src/iter/extend.rs
+++ b/src/iter/extend.rs
@@ -1,4 +1,5 @@
 use super::noop::NoopConsumer;
+use super::plumbing::{Consumer, Folder, Reducer, UnindexedConsumer};
 use super::{IntoParallelIterator, ParallelExtend, ParallelIterator};
 
 use std::borrow::Cow;
@@ -9,55 +10,91 @@ use std::hash::{BuildHasher, Hash};
 
 /// Performs a generic `par_extend` by collecting to a `LinkedList<Vec<_>>` in
 /// parallel, then extending the collection sequentially.
-fn extend<C, I, F>(collection: &mut C, par_iter: I, reserve: F)
-where
-    I: IntoParallelIterator,
-    F: FnOnce(&mut C, &LinkedList<Vec<I::Item>>),
-    C: Extend<I::Item>,
-{
-    let list = collect(par_iter);
-    reserve(collection, &list);
-    for vec in list {
-        collection.extend(vec);
-    }
+macro_rules! extend {
+    ($self:ident, $par_iter:ident, $extend:ident) => {
+        $extend(
+            $self,
+            $par_iter.into_par_iter().drive_unindexed(ListVecConsumer),
+        );
+    };
 }
 
-pub(super) fn collect<I>(par_iter: I) -> LinkedList<Vec<I::Item>>
-where
-    I: IntoParallelIterator,
-{
-    par_iter
-        .into_par_iter()
-        .fold(Vec::new, vec_push)
-        .map(as_list)
-        .reduce(LinkedList::new, list_append)
+/// Computes the total length of a `LinkedList<Vec<_>>`.
+fn len<T>(list: &LinkedList<Vec<T>>) -> usize {
+    list.iter().map(Vec::len).sum()
 }
 
-fn vec_push<T>(mut vec: Vec<T>, elem: T) -> Vec<T> {
-    vec.push(elem);
-    vec
-}
+struct ListVecConsumer;
 
-fn as_list<T>(item: T) -> LinkedList<T> {
-    let mut list = LinkedList::new();
-    list.push_back(item);
-    list
+struct ListVecFolder<T> {
+    vec: Vec<T>,
 }
 
-fn list_append<T>(mut list1: LinkedList<T>, mut list2: LinkedList<T>) -> LinkedList<T> {
-    list1.append(&mut list2);
-    list1
+impl<T: Send> Consumer<T> for ListVecConsumer {
+    type Folder = ListVecFolder<T>;
+    type Reducer = ListReducer;
+    type Result = LinkedList<Vec<T>>;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) {
+        (Self, Self, ListReducer)
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        ListVecFolder { vec: Vec::new() }
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
 }
 
-/// Computes the total length of a `LinkedList<Vec<_>>`.
-pub(super) fn len<T>(list: &LinkedList<Vec<T>>) -> usize {
-    list.iter().map(Vec::len).sum()
+impl<T: Send> UnindexedConsumer<T> for ListVecConsumer {
+    fn split_off_left(&self) -> Self {
+        Self
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        ListReducer
+    }
 }
 
-fn no_reserve<C, T>(_: &mut C, _: &LinkedList<Vec<T>>) {}
+impl<T> Folder<T> for ListVecFolder<T> {
+    type Result = LinkedList<Vec<T>>;
+
+    fn consume(mut self, item: T) -> Self {
+        self.vec.push(item);
+        self
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        self.vec.extend(iter);
+        self
+    }
 
-fn heap_reserve<T: Ord, U>(heap: &mut BinaryHeap<T>, list: &LinkedList<Vec<U>>) {
-    heap.reserve(len(list));
+    fn complete(self) -> Self::Result {
+        let mut list = LinkedList::new();
+        if !self.vec.is_empty() {
+            list.push_back(self.vec);
+        }
+        list
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+fn heap_extend<T, Item>(heap: &mut BinaryHeap<T>, list: LinkedList<Vec<Item>>)
+where
+    BinaryHeap<T>: Extend<Item>,
+{
+    heap.reserve(len(&list));
+    for vec in list {
+        heap.extend(vec);
+    }
 }
 
 /// Extends a binary heap with items from a parallel iterator.
@@ -69,7 +106,7 @@ where
     where
         I: IntoParallelIterator<Item = T>,
     {
-        extend(self, par_iter, heap_reserve);
+        extend!(self, par_iter, heap_extend);
     }
 }
 
@@ -82,7 +119,16 @@ where
     where
         I: IntoParallelIterator<Item = &'a T>,
     {
-        extend(self, par_iter, heap_reserve);
+        extend!(self, par_iter, heap_extend);
+    }
+}
+
+fn btree_map_extend<K, V, Item>(map: &mut BTreeMap<K, V>, list: LinkedList<Vec<Item>>)
+where
+    BTreeMap<K, V>: Extend<Item>,
+{
+    for vec in list {
+        map.extend(vec);
     }
 }
 
@@ -96,7 +142,7 @@ where
     where
         I: IntoParallelIterator<Item = (K, V)>,
     {
-        extend(self, par_iter, no_reserve);
+        extend!(self, par_iter, btree_map_extend);
     }
 }
 
@@ -110,7 +156,16 @@ where
     where
         I: IntoParallelIterator<Item = (&'a K, &'a V)>,
     {
-        extend(self, par_iter, no_reserve);
+        extend!(self, par_iter, btree_map_extend);
+    }
+}
+
+fn btree_set_extend<T, Item>(set: &mut BTreeSet<T>, list: LinkedList<Vec<Item>>)
+where
+    BTreeSet<T>: Extend<Item>,
+{
+    for vec in list {
+        set.extend(vec);
     }
 }
 
@@ -123,7 +178,7 @@ where
     where
         I: IntoParallelIterator<Item = T>,
     {
-        extend(self, par_iter, no_reserve);
+        extend!(self, par_iter, btree_set_extend);
     }
 }
 
@@ -136,16 +191,20 @@ where
     where
         I: IntoParallelIterator<Item = &'a T>,
     {
-        extend(self, par_iter, no_reserve);
+        extend!(self, par_iter, btree_set_extend);
     }
 }
 
-fn map_reserve<K, V, S, U>(map: &mut HashMap<K, V, S>, list: &LinkedList<Vec<U>>)
+fn hash_map_extend<K, V, S, Item>(map: &mut HashMap<K, V, S>, list: LinkedList<Vec<Item>>)
 where
+    HashMap<K, V, S>: Extend<Item>,
     K: Eq + Hash,
     S: BuildHasher,
 {
-    map.reserve(len(list));
+    map.reserve(len(&list));
+    for vec in list {
+        map.extend(vec);
+    }
 }
 
 /// Extends a hash map with items from a parallel iterator.
@@ -160,7 +219,7 @@ where
         I: IntoParallelIterator<Item = (K, V)>,
     {
         // See the map_collect benchmarks in rayon-demo for different strategies.
-        extend(self, par_iter, map_reserve);
+        extend!(self, par_iter, hash_map_extend);
     }
 }
 
@@ -175,16 +234,20 @@ where
     where
         I: IntoParallelIterator<Item = (&'a K, &'a V)>,
     {
-        extend(self, par_iter, map_reserve);
+        extend!(self, par_iter, hash_map_extend);
     }
 }
 
-fn set_reserve<T, S, U>(set: &mut HashSet<T, S>, list: &LinkedList<Vec<U>>)
+fn hash_set_extend<T, S, Item>(set: &mut HashSet<T, S>, list: LinkedList<Vec<Item>>)
 where
+    HashSet<T, S>: Extend<Item>,
     T: Eq + Hash,
     S: BuildHasher,
 {
-    set.reserve(len(list));
+    set.reserve(len(&list));
+    for vec in list {
+        set.extend(vec);
+    }
 }
 
 /// Extends a hash set with items from a parallel iterator.
@@ -197,7 +260,7 @@ where
     where
         I: IntoParallelIterator<Item = T>,
     {
-        extend(self, par_iter, set_reserve);
+        extend!(self, par_iter, hash_set_extend);
     }
 }
 
@@ -211,15 +274,10 @@ where
     where
         I: IntoParallelIterator<Item = &'a T>,
     {
-        extend(self, par_iter, set_reserve);
+        extend!(self, par_iter, hash_set_extend);
     }
 }
 
-fn list_push_back<T>(mut list: LinkedList<T>, elem: T) -> LinkedList<T> {
-    list.push_back(elem);
-    list
-}
-
 /// Extends a linked list with items from a parallel iterator.
 impl<T> ParallelExtend<T> for LinkedList<T>
 where
@@ -229,10 +287,7 @@ where
     where
         I: IntoParallelIterator<Item = T>,
     {
-        let mut list = par_iter
-            .into_par_iter()
-            .fold(LinkedList::new, list_push_back)
-            .reduce(LinkedList::new, list_append);
+        let mut list = par_iter.into_par_iter().drive_unindexed(ListConsumer);
         self.append(&mut list);
     }
 }
@@ -246,13 +301,83 @@ where
     where
         I: IntoParallelIterator<Item = &'a T>,
     {
-        self.par_extend(par_iter.into_par_iter().cloned())
+        self.par_extend(par_iter.into_par_iter().copied())
+    }
+}
+
+struct ListConsumer;
+
+struct ListFolder<T> {
+    list: LinkedList<T>,
+}
+
+struct ListReducer;
+
+impl<T: Send> Consumer<T> for ListConsumer {
+    type Folder = ListFolder<T>;
+    type Reducer = ListReducer;
+    type Result = LinkedList<T>;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) {
+        (Self, Self, ListReducer)
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        ListFolder {
+            list: LinkedList::new(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<T: Send> UnindexedConsumer<T> for ListConsumer {
+    fn split_off_left(&self) -> Self {
+        Self
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        ListReducer
+    }
+}
+
+impl<T> Folder<T> for ListFolder<T> {
+    type Result = LinkedList<T>;
+
+    fn consume(mut self, item: T) -> Self {
+        self.list.push_back(item);
+        self
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        self.list.extend(iter);
+        self
+    }
+
+    fn complete(self) -> Self::Result {
+        self.list
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<T> Reducer<LinkedList<T>> for ListReducer {
+    fn reduce(self, mut left: LinkedList<T>, mut right: LinkedList<T>) -> LinkedList<T> {
+        left.append(&mut right);
+        left
     }
 }
 
-fn string_push(mut string: String, ch: char) -> String {
-    string.push(ch);
-    string
+fn flat_string_extend(string: &mut String, list: LinkedList<String>) {
+    string.reserve(list.iter().map(String::len).sum());
+    string.extend(list);
 }
 
 /// Extends a string with characters from a parallel iterator.
@@ -263,14 +388,8 @@ impl ParallelExtend<char> for String {
     {
         // This is like `extend`, but `Vec<char>` is less efficient to deal
         // with than `String`, so instead collect to `LinkedList<String>`.
-        let list: LinkedList<_> = par_iter
-            .into_par_iter()
-            .fold(String::new, string_push)
-            .map(as_list)
-            .reduce(LinkedList::new, list_append);
-
-        self.reserve(list.iter().map(String::len).sum());
-        self.extend(list)
+        let list = par_iter.into_par_iter().drive_unindexed(ListStringConsumer);
+        flat_string_extend(self, list);
     }
 }
 
@@ -280,13 +399,85 @@ impl<'a> ParallelExtend<&'a char> for String {
     where
         I: IntoParallelIterator<Item = &'a char>,
     {
-        self.par_extend(par_iter.into_par_iter().cloned())
+        self.par_extend(par_iter.into_par_iter().copied())
     }
 }
 
-fn string_reserve<T: AsRef<str>>(string: &mut String, list: &LinkedList<Vec<T>>) {
-    let len = list.iter().flatten().map(T::as_ref).map(str::len).sum();
+struct ListStringConsumer;
+
+struct ListStringFolder {
+    string: String,
+}
+
+impl Consumer<char> for ListStringConsumer {
+    type Folder = ListStringFolder;
+    type Reducer = ListReducer;
+    type Result = LinkedList<String>;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) {
+        (Self, Self, ListReducer)
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        ListStringFolder {
+            string: String::new(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl UnindexedConsumer<char> for ListStringConsumer {
+    fn split_off_left(&self) -> Self {
+        Self
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        ListReducer
+    }
+}
+
+impl Folder<char> for ListStringFolder {
+    type Result = LinkedList<String>;
+
+    fn consume(mut self, item: char) -> Self {
+        self.string.push(item);
+        self
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = char>,
+    {
+        self.string.extend(iter);
+        self
+    }
+
+    fn complete(self) -> Self::Result {
+        let mut list = LinkedList::new();
+        if !self.string.is_empty() {
+            list.push_back(self.string);
+        }
+        list
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+fn string_extend<Item>(string: &mut String, list: LinkedList<Vec<Item>>)
+where
+    String: Extend<Item>,
+    Item: AsRef<str>,
+{
+    let len = list.iter().flatten().map(Item::as_ref).map(str::len).sum();
     string.reserve(len);
+    for vec in list {
+        string.extend(vec);
+    }
 }
 
 /// Extends a string with string slices from a parallel iterator.
@@ -295,7 +486,7 @@ impl<'a> ParallelExtend<&'a str> for String {
     where
         I: IntoParallelIterator<Item = &'a str>,
     {
-        extend(self, par_iter, string_reserve);
+        extend!(self, par_iter, string_extend);
     }
 }
 
@@ -305,7 +496,7 @@ impl ParallelExtend<String> for String {
     where
         I: IntoParallelIterator<Item = String>,
     {
-        extend(self, par_iter, string_reserve);
+        extend!(self, par_iter, string_extend);
     }
 }
 
@@ -315,12 +506,18 @@ impl<'a> ParallelExtend<Cow<'a, str>> for String {
     where
         I: IntoParallelIterator<Item = Cow<'a, str>>,
     {
-        extend(self, par_iter, string_reserve);
+        extend!(self, par_iter, string_extend);
     }
 }
 
-fn deque_reserve<T, U>(deque: &mut VecDeque<T>, list: &LinkedList<Vec<U>>) {
-    deque.reserve(len(list));
+fn deque_extend<T, Item>(deque: &mut VecDeque<T>, list: LinkedList<Vec<Item>>)
+where
+    VecDeque<T>: Extend<Item>,
+{
+    deque.reserve(len(&list));
+    for vec in list {
+        deque.extend(vec);
+    }
 }
 
 /// Extends a deque with items from a parallel iterator.
@@ -332,7 +529,7 @@ where
     where
         I: IntoParallelIterator<Item = T>,
     {
-        extend(self, par_iter, deque_reserve);
+        extend!(self, par_iter, deque_extend);
     }
 }
 
@@ -345,12 +542,43 @@ where
     where
         I: IntoParallelIterator<Item = &'a T>,
     {
-        extend(self, par_iter, deque_reserve);
+        extend!(self, par_iter, deque_extend);
     }
 }
 
-// See the `collect` module for the `Vec<T>` implementation.
-// impl<T> ParallelExtend<T> for Vec<T>
+fn vec_append<T>(vec: &mut Vec<T>, list: LinkedList<Vec<T>>) {
+    vec.reserve(len(&list));
+    for mut other in list {
+        vec.append(&mut other);
+    }
+}
+
+/// Extends a vector with items from a parallel iterator.
+impl<T> ParallelExtend<T> for Vec<T>
+where
+    T: Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        // See the vec_collect benchmarks in rayon-demo for different strategies.
+        let par_iter = par_iter.into_par_iter();
+        match par_iter.opt_len() {
+            Some(len) => {
+                // When Rust gets specialization, we can get here for indexed iterators
+                // without relying on `opt_len`.  Until then, `special_extend()` fakes
+                // an unindexed mode on the promise that `opt_len()` is accurate.
+                super::collect::special_extend(par_iter, len, self);
+            }
+            None => {
+                // This works like `extend`, but `Vec::append` is more efficient.
+                let list = par_iter.drive_unindexed(ListVecConsumer);
+                vec_append(self, list);
+            }
+        }
+    }
+}
 
 /// Extends a vector with copied items from a parallel iterator.
 impl<'a, T> ParallelExtend<&'a T> for Vec<T>
@@ -361,7 +589,7 @@ where
     where
         I: IntoParallelIterator<Item = &'a T>,
     {
-        self.par_extend(par_iter.into_par_iter().cloned())
+        self.par_extend(par_iter.into_par_iter().copied())
     }
 }
 
diff --git a/src/iter/filter.rs b/src/iter/filter.rs
index 38627f7..e1b74ba 100644
--- a/src/iter/filter.rs
+++ b/src/iter/filter.rs
@@ -97,7 +97,7 @@ where
     P: Fn(&T) -> bool + Sync,
 {
     fn split_off_left(&self) -> Self {
-        FilterConsumer::new(self.base.split_off_left(), &self.filter_op)
+        FilterConsumer::new(self.base.split_off_left(), self.filter_op)
     }
 
     fn to_reducer(&self) -> Self::Reducer {
diff --git a/src/iter/filter_map.rs b/src/iter/filter_map.rs
index f19c385..db1c7e3 100644
--- a/src/iter/filter_map.rs
+++ b/src/iter/filter_map.rs
@@ -98,7 +98,7 @@ where
     P: Fn(T) -> Option<U> + Sync + 'p,
 {
     fn split_off_left(&self) -> Self {
-        FilterMapConsumer::new(self.base.split_off_left(), &self.filter_op)
+        FilterMapConsumer::new(self.base.split_off_left(), self.filter_op)
     }
 
     fn to_reducer(&self) -> Self::Reducer {
diff --git a/src/iter/find.rs b/src/iter/find.rs
index 971db2b..b16ee84 100644
--- a/src/iter/find.rs
+++ b/src/iter/find.rs
@@ -94,7 +94,7 @@ where
         self.item = iter
             .into_iter()
             // stop iterating if another thread has found something
-            .take_while(not_full(&self.found))
+            .take_while(not_full(self.found))
             .find(self.find_op);
         if self.item.is_some() {
             self.found.store(true, Ordering::Relaxed)
diff --git a/src/iter/fold_chunks.rs b/src/iter/fold_chunks.rs
new file mode 100644
index 0000000..185fb1a
--- /dev/null
+++ b/src/iter/fold_chunks.rs
@@ -0,0 +1,236 @@
+use std::fmt::{self, Debug};
+
+use super::chunks::ChunkProducer;
+use super::plumbing::*;
+use super::*;
+use crate::math::div_round_up;
+
+/// `FoldChunks` is an iterator that groups elements of an underlying iterator and applies a
+/// function over them, producing a single value for each group.
+///
+/// This struct is created by the [`fold_chunks()`] method on [`IndexedParallelIterator`]
+///
+/// [`fold_chunks()`]: trait.IndexedParallelIterator.html#method.fold_chunks
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct FoldChunks<I, ID, F>
+where
+    I: IndexedParallelIterator,
+{
+    base: I,
+    chunk_size: usize,
+    fold_op: F,
+    identity: ID,
+}
+
+impl<I: IndexedParallelIterator + Debug, ID, F> Debug for FoldChunks<I, ID, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Fold")
+            .field("base", &self.base)
+            .field("chunk_size", &self.chunk_size)
+            .finish()
+    }
+}
+
+impl<I, ID, U, F> FoldChunks<I, ID, F>
+where
+    I: IndexedParallelIterator,
+    ID: Fn() -> U + Send + Sync,
+    F: Fn(U, I::Item) -> U + Send + Sync,
+    U: Send,
+{
+    /// Creates a new `FoldChunks` iterator
+    pub(super) fn new(base: I, chunk_size: usize, identity: ID, fold_op: F) -> Self {
+        FoldChunks {
+            base,
+            chunk_size,
+            identity,
+            fold_op,
+        }
+    }
+}
+
+impl<I, ID, U, F> ParallelIterator for FoldChunks<I, ID, F>
+where
+    I: IndexedParallelIterator,
+    ID: Fn() -> U + Send + Sync,
+    F: Fn(U, I::Item) -> U + Send + Sync,
+    U: Send,
+{
+    type Item = U;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<U>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I, ID, U, F> IndexedParallelIterator for FoldChunks<I, ID, F>
+where
+    I: IndexedParallelIterator,
+    ID: Fn() -> U + Send + Sync,
+    F: Fn(U, I::Item) -> U + Send + Sync,
+    U: Send,
+{
+    fn len(&self) -> usize {
+        div_round_up(self.base.len(), self.chunk_size)
+    }
+
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        let len = self.base.len();
+        return self.base.with_producer(Callback {
+            chunk_size: self.chunk_size,
+            len,
+            identity: self.identity,
+            fold_op: self.fold_op,
+            callback,
+        });
+
+        struct Callback<CB, ID, F> {
+            chunk_size: usize,
+            len: usize,
+            identity: ID,
+            fold_op: F,
+            callback: CB,
+        }
+
+        impl<T, CB, ID, U, F> ProducerCallback<T> for Callback<CB, ID, F>
+        where
+            CB: ProducerCallback<U>,
+            ID: Fn() -> U + Send + Sync,
+            F: Fn(U, T) -> U + Send + Sync,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let identity = &self.identity;
+                let fold_op = &self.fold_op;
+                let fold_iter = move |iter: P::IntoIter| iter.fold(identity(), fold_op);
+                let producer = ChunkProducer::new(self.chunk_size, self.len, base, fold_iter);
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use std::ops::Add;
+
+    #[test]
+    fn check_fold_chunks() {
+        let words = "bishbashbosh!"
+            .chars()
+            .collect::<Vec<_>>()
+            .into_par_iter()
+            .fold_chunks(4, String::new, |mut s, c| {
+                s.push(c);
+                s
+            })
+            .collect::<Vec<_>>();
+
+        assert_eq!(words, vec!["bish", "bash", "bosh", "!"]);
+    }
+
+    // 'closure' values for tests below
+    fn id() -> i32 {
+        0
+    }
+    fn sum<T, U>(x: T, y: U) -> T
+    where
+        T: Add<U, Output = T>,
+    {
+        x + y
+    }
+
+    #[test]
+    #[should_panic(expected = "chunk_size must not be zero")]
+    fn check_fold_chunks_zero_size() {
+        let _: Vec<i32> = vec![1, 2, 3]
+            .into_par_iter()
+            .fold_chunks(0, id, sum)
+            .collect();
+    }
+
+    #[test]
+    fn check_fold_chunks_even_size() {
+        assert_eq!(
+            vec![1 + 2 + 3, 4 + 5 + 6, 7 + 8 + 9],
+            (1..10)
+                .into_par_iter()
+                .fold_chunks(3, id, sum)
+                .collect::<Vec<i32>>()
+        );
+    }
+
+    #[test]
+    fn check_fold_chunks_empty() {
+        let v: Vec<i32> = vec![];
+        let expected: Vec<i32> = vec![];
+        assert_eq!(
+            expected,
+            v.into_par_iter()
+                .fold_chunks(2, id, sum)
+                .collect::<Vec<i32>>()
+        );
+    }
+
+    #[test]
+    fn check_fold_chunks_len() {
+        assert_eq!(4, (0..8).into_par_iter().fold_chunks(2, id, sum).len());
+        assert_eq!(3, (0..9).into_par_iter().fold_chunks(3, id, sum).len());
+        assert_eq!(3, (0..8).into_par_iter().fold_chunks(3, id, sum).len());
+        assert_eq!(1, (&[1]).par_iter().fold_chunks(3, id, sum).len());
+        assert_eq!(0, (0..0).into_par_iter().fold_chunks(3, id, sum).len());
+    }
+
+    #[test]
+    fn check_fold_chunks_uneven() {
+        let cases: Vec<(Vec<u32>, usize, Vec<u32>)> = vec![
+            ((0..5).collect(), 3, vec![0 + 1 + 2, 3 + 4]),
+            (vec![1], 5, vec![1]),
+            ((0..4).collect(), 3, vec![0 + 1 + 2, 3]),
+        ];
+
+        for (i, (v, n, expected)) in cases.into_iter().enumerate() {
+            let mut res: Vec<u32> = vec![];
+            v.par_iter()
+                .fold_chunks(n, || 0, sum)
+                .collect_into_vec(&mut res);
+            assert_eq!(expected, res, "Case {} failed", i);
+
+            res.truncate(0);
+            v.into_par_iter()
+                .fold_chunks(n, || 0, sum)
+                .rev()
+                .collect_into_vec(&mut res);
+            assert_eq!(
+                expected.into_iter().rev().collect::<Vec<u32>>(),
+                res,
+                "Case {} reversed failed",
+                i
+            );
+        }
+    }
+}
diff --git a/src/iter/fold_chunks_with.rs b/src/iter/fold_chunks_with.rs
new file mode 100644
index 0000000..af120ae
--- /dev/null
+++ b/src/iter/fold_chunks_with.rs
@@ -0,0 +1,231 @@
+use std::fmt::{self, Debug};
+
+use super::chunks::ChunkProducer;
+use super::plumbing::*;
+use super::*;
+use crate::math::div_round_up;
+
+/// `FoldChunksWith` is an iterator that groups elements of an underlying iterator and applies a
+/// function over them, producing a single value for each group.
+///
+/// This struct is created by the [`fold_chunks_with()`] method on [`IndexedParallelIterator`]
+///
+/// [`fold_chunks_with()`]: trait.IndexedParallelIterator.html#method.fold_chunks
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct FoldChunksWith<I, U, F>
+where
+    I: IndexedParallelIterator,
+{
+    base: I,
+    chunk_size: usize,
+    item: U,
+    fold_op: F,
+}
+
+impl<I: IndexedParallelIterator + Debug, U: Debug, F> Debug for FoldChunksWith<I, U, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Fold")
+            .field("base", &self.base)
+            .field("chunk_size", &self.chunk_size)
+            .field("item", &self.item)
+            .finish()
+    }
+}
+
+impl<I, U, F> FoldChunksWith<I, U, F>
+where
+    I: IndexedParallelIterator,
+    U: Send + Clone,
+    F: Fn(U, I::Item) -> U + Send + Sync,
+{
+    /// Creates a new `FoldChunksWith` iterator
+    pub(super) fn new(base: I, chunk_size: usize, item: U, fold_op: F) -> Self {
+        FoldChunksWith {
+            base,
+            chunk_size,
+            item,
+            fold_op,
+        }
+    }
+}
+
+impl<I, U, F> ParallelIterator for FoldChunksWith<I, U, F>
+where
+    I: IndexedParallelIterator,
+    U: Send + Clone,
+    F: Fn(U, I::Item) -> U + Send + Sync,
+{
+    type Item = U;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<U>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I, U, F> IndexedParallelIterator for FoldChunksWith<I, U, F>
+where
+    I: IndexedParallelIterator,
+    U: Send + Clone,
+    F: Fn(U, I::Item) -> U + Send + Sync,
+{
+    fn len(&self) -> usize {
+        div_round_up(self.base.len(), self.chunk_size)
+    }
+
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        let len = self.base.len();
+        return self.base.with_producer(Callback {
+            chunk_size: self.chunk_size,
+            len,
+            item: self.item,
+            fold_op: self.fold_op,
+            callback,
+        });
+
+        struct Callback<CB, T, F> {
+            chunk_size: usize,
+            len: usize,
+            item: T,
+            fold_op: F,
+            callback: CB,
+        }
+
+        impl<T, U, F, CB> ProducerCallback<T> for Callback<CB, U, F>
+        where
+            CB: ProducerCallback<U>,
+            U: Send + Clone,
+            F: Fn(U, T) -> U + Send + Sync,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let item = self.item;
+                let fold_op = &self.fold_op;
+                let fold_iter = move |iter: P::IntoIter| iter.fold(item.clone(), fold_op);
+                let producer = ChunkProducer::new(self.chunk_size, self.len, base, fold_iter);
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use std::ops::Add;
+
+    #[test]
+    fn check_fold_chunks_with() {
+        let words = "bishbashbosh!"
+            .chars()
+            .collect::<Vec<_>>()
+            .into_par_iter()
+            .fold_chunks_with(4, String::new(), |mut s, c| {
+                s.push(c);
+                s
+            })
+            .collect::<Vec<_>>();
+
+        assert_eq!(words, vec!["bish", "bash", "bosh", "!"]);
+    }
+
+    // 'closure' value for tests below
+    fn sum<T, U>(x: T, y: U) -> T
+    where
+        T: Add<U, Output = T>,
+    {
+        x + y
+    }
+
+    #[test]
+    #[should_panic(expected = "chunk_size must not be zero")]
+    fn check_fold_chunks_zero_size() {
+        let _: Vec<i32> = vec![1, 2, 3]
+            .into_par_iter()
+            .fold_chunks_with(0, 0, sum)
+            .collect();
+    }
+
+    #[test]
+    fn check_fold_chunks_even_size() {
+        assert_eq!(
+            vec![1 + 2 + 3, 4 + 5 + 6, 7 + 8 + 9],
+            (1..10)
+                .into_par_iter()
+                .fold_chunks_with(3, 0, sum)
+                .collect::<Vec<i32>>()
+        );
+    }
+
+    #[test]
+    fn check_fold_chunks_with_empty() {
+        let v: Vec<i32> = vec![];
+        let expected: Vec<i32> = vec![];
+        assert_eq!(
+            expected,
+            v.into_par_iter()
+                .fold_chunks_with(2, 0, sum)
+                .collect::<Vec<i32>>()
+        );
+    }
+
+    #[test]
+    fn check_fold_chunks_len() {
+        assert_eq!(4, (0..8).into_par_iter().fold_chunks_with(2, 0, sum).len());
+        assert_eq!(3, (0..9).into_par_iter().fold_chunks_with(3, 0, sum).len());
+        assert_eq!(3, (0..8).into_par_iter().fold_chunks_with(3, 0, sum).len());
+        assert_eq!(1, (&[1]).par_iter().fold_chunks_with(3, 0, sum).len());
+        assert_eq!(0, (0..0).into_par_iter().fold_chunks_with(3, 0, sum).len());
+    }
+
+    #[test]
+    fn check_fold_chunks_uneven() {
+        let cases: Vec<(Vec<u32>, usize, Vec<u32>)> = vec![
+            ((0..5).collect(), 3, vec![0 + 1 + 2, 3 + 4]),
+            (vec![1], 5, vec![1]),
+            ((0..4).collect(), 3, vec![0 + 1 + 2, 3]),
+        ];
+
+        for (i, (v, n, expected)) in cases.into_iter().enumerate() {
+            let mut res: Vec<u32> = vec![];
+            v.par_iter()
+                .fold_chunks_with(n, 0, sum)
+                .collect_into_vec(&mut res);
+            assert_eq!(expected, res, "Case {} failed", i);
+
+            res.truncate(0);
+            v.into_par_iter()
+                .fold_chunks_with(n, 0, sum)
+                .rev()
+                .collect_into_vec(&mut res);
+            assert_eq!(
+                expected.into_iter().rev().collect::<Vec<u32>>(),
+                res,
+                "Case {} reversed failed",
+                i
+            );
+        }
+    }
+}
diff --git a/src/iter/inspect.rs b/src/iter/inspect.rs
index 9b1cd09..c50ca02 100644
--- a/src/iter/inspect.rs
+++ b/src/iter/inspect.rs
@@ -209,7 +209,7 @@ where
     F: Fn(&T) + Sync,
 {
     fn split_off_left(&self) -> Self {
-        InspectConsumer::new(self.base.split_off_left(), &self.inspect_op)
+        InspectConsumer::new(self.base.split_off_left(), self.inspect_op)
     }
 
     fn to_reducer(&self) -> Self::Reducer {
diff --git a/src/iter/interleave.rs b/src/iter/interleave.rs
index b5d43d5..3cacc49 100644
--- a/src/iter/interleave.rs
+++ b/src/iter/interleave.rs
@@ -310,16 +310,16 @@ where
 {
     #[inline]
     fn next_back(&mut self) -> Option<I::Item> {
-        if self.i.len() == self.j.len() {
-            if self.i_next {
-                self.i.next_back()
-            } else {
-                self.j.next_back()
+        match self.i.len().cmp(&self.j.len()) {
+            Ordering::Less => self.j.next_back(),
+            Ordering::Equal => {
+                if self.i_next {
+                    self.i.next_back()
+                } else {
+                    self.j.next_back()
+                }
             }
-        } else if self.i.len() < self.j.len() {
-            self.j.next_back()
-        } else {
-            self.i.next_back()
+            Ordering::Greater => self.i.next_back(),
         }
     }
 }
diff --git a/src/iter/len.rs b/src/iter/len.rs
index e65b3c0..8ec7f33 100644
--- a/src/iter/len.rs
+++ b/src/iter/len.rs
@@ -3,9 +3,9 @@ use super::*;
 use std::cmp;
 
 /// `MinLen` is an iterator that imposes a minimum length on iterator splits.
-/// This struct is created by the [`min_len()`] method on [`IndexedParallelIterator`]
+/// This struct is created by the [`with_min_len()`] method on [`IndexedParallelIterator`]
 ///
-/// [`min_len()`]: trait.IndexedParallelIterator.html#method.min_len
+/// [`with_min_len()`]: trait.IndexedParallelIterator.html#method.with_min_len
 /// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
 #[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
 #[derive(Debug, Clone)]
@@ -137,9 +137,9 @@ where
 }
 
 /// `MaxLen` is an iterator that imposes a maximum length on iterator splits.
-/// This struct is created by the [`max_len()`] method on [`IndexedParallelIterator`]
+/// This struct is created by the [`with_max_len()`] method on [`IndexedParallelIterator`]
 ///
-/// [`max_len()`]: trait.IndexedParallelIterator.html#method.max_len
+/// [`with_max_len()`]: trait.IndexedParallelIterator.html#method.with_max_len
 /// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
 #[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
 #[derive(Debug, Clone)]
diff --git a/src/iter/map.rs b/src/iter/map.rs
index f2a35ff..da14d40 100644
--- a/src/iter/map.rs
+++ b/src/iter/map.rs
@@ -213,7 +213,7 @@ where
     R: Send,
 {
     fn split_off_left(&self) -> Self {
-        MapConsumer::new(self.base.split_off_left(), &self.map_op)
+        MapConsumer::new(self.base.split_off_left(), self.map_op)
     }
 
     fn to_reducer(&self) -> Self::Reducer {
diff --git a/src/iter/mod.rs b/src/iter/mod.rs
index edff1a6..98c9326 100644
--- a/src/iter/mod.rs
+++ b/src/iter/mod.rs
@@ -119,6 +119,8 @@ mod flat_map_iter;
 mod flatten;
 mod flatten_iter;
 mod fold;
+mod fold_chunks;
+mod fold_chunks_with;
 mod for_each;
 mod from_par_iter;
 mod inspect;
@@ -140,6 +142,7 @@ mod repeat;
 mod rev;
 mod skip;
 mod splitter;
+mod step_by;
 mod sum;
 mod take;
 mod try_fold;
@@ -165,6 +168,8 @@ pub use self::{
     flatten::Flatten,
     flatten_iter::FlattenIter,
     fold::{Fold, FoldWith},
+    fold_chunks::FoldChunks,
+    fold_chunks_with::FoldChunksWith,
     inspect::Inspect,
     interleave::Interleave,
     interleave_shortest::InterleaveShortest,
@@ -181,6 +186,7 @@ pub use self::{
     rev::Rev,
     skip::Skip,
     splitter::{split, Split},
+    step_by::StepBy,
     take::Take,
     try_fold::{TryFold, TryFoldWith},
     update::Update,
@@ -189,10 +195,6 @@ pub use self::{
     zip_eq::ZipEq,
 };
 
-mod step_by;
-#[cfg(step_by)]
-pub use self::step_by::StepBy;
-
 /// `IntoParallelIterator` implements the conversion to a [`ParallelIterator`].
 ///
 /// By implementing `IntoParallelIterator` for a type, you define how it will
@@ -457,10 +459,10 @@ pub trait ParallelIterator: Sized + Send {
     fn try_for_each<OP, R>(self, op: OP) -> R
     where
         OP: Fn(Self::Item) -> R + Sync + Send,
-        R: Try<Ok = ()> + Send,
+        R: Try<Output = ()> + Send,
     {
-        fn ok<R: Try<Ok = ()>>(_: (), _: ()) -> R {
-            R::from_ok(())
+        fn ok<R: Try<Output = ()>>(_: (), _: ()) -> R {
+            R::from_output(())
         }
 
         self.map(op).try_reduce(<()>::default, ok)
@@ -497,10 +499,10 @@ pub trait ParallelIterator: Sized + Send {
     where
         OP: Fn(&mut T, Self::Item) -> R + Sync + Send,
         T: Send + Clone,
-        R: Try<Ok = ()> + Send,
+        R: Try<Output = ()> + Send,
     {
-        fn ok<R: Try<Ok = ()>>(_: (), _: ()) -> R {
-            R::from_ok(())
+        fn ok<R: Try<Output = ()>>(_: (), _: ()) -> R {
+            R::from_output(())
         }
 
         self.map_with(init, op).try_reduce(<()>::default, ok)
@@ -539,10 +541,10 @@ pub trait ParallelIterator: Sized + Send {
     where
         OP: Fn(&mut T, Self::Item) -> R + Sync + Send,
         INIT: Fn() -> T + Sync + Send,
-        R: Try<Ok = ()> + Send,
+        R: Try<Output = ()> + Send,
     {
-        fn ok<R: Try<Ok = ()>>(_: (), _: ()) -> R {
-            R::from_ok(())
+        fn ok<R: Try<Output = ()>>(_: (), _: ()) -> R {
+            R::from_output(())
         }
 
         self.map_init(init, op).try_reduce(<()>::default, ok)
@@ -921,7 +923,7 @@ pub trait ParallelIterator: Sized + Send {
 
     /// An adaptor that flattens serial-iterable `Item`s into one large iterator.
     ///
-    /// See also [`flatten`](#method.flatten) and the analagous comparison of
+    /// See also [`flatten`](#method.flatten) and the analogous comparison of
     /// [`flat_map_iter` versus `flat_map`](#flat_map_iter-versus-flat_map).
     ///
     /// # Examples
@@ -1065,7 +1067,7 @@ pub trait ParallelIterator: Sized + Send {
     where
         OP: Fn(T, T) -> Self::Item + Sync + Send,
         ID: Fn() -> T + Sync + Send,
-        Self::Item: Try<Ok = T>,
+        Self::Item: Try<Output = T>,
     {
         try_reduce::try_reduce(self, identity, op)
     }
@@ -1108,7 +1110,7 @@ pub trait ParallelIterator: Sized + Send {
     fn try_reduce_with<T, OP>(self, op: OP) -> Option<Self::Item>
     where
         OP: Fn(T, T) -> Self::Item + Sync + Send,
-        Self::Item: Try<Ok = T>,
+        Self::Item: Try<Output = T>,
     {
         try_reduce_with::try_reduce_with(self, op)
     }
@@ -1124,7 +1126,7 @@ pub trait ParallelIterator: Sized + Send {
     /// multiple sums. The number of results is nondeterministic, as
     /// is the point where the breaks occur.
     ///
-    /// So if did the same parallel fold (`fold(0, |a,b| a+b)`) on
+    /// So if we did the same parallel fold (`fold(0, |a,b| a+b)`) on
     /// our example list, we might wind up with a sequence of two numbers,
     /// like so:
     ///
@@ -1311,7 +1313,7 @@ pub trait ParallelIterator: Sized + Send {
     where
         F: Fn(T, Self::Item) -> R + Sync + Send,
         ID: Fn() -> T + Sync + Send,
-        R: Try<Ok = T> + Send,
+        R: Try<Output = T> + Send,
     {
         TryFold::new(self, identity, fold_op)
     }
@@ -1337,7 +1339,7 @@ pub trait ParallelIterator: Sized + Send {
     fn try_fold_with<F, T, R>(self, init: T, fold_op: F) -> TryFoldWith<Self, R, F>
     where
         F: Fn(T, Self::Item) -> R + Sync + Send,
-        R: Try<Ok = T> + Send,
+        R: Try<Output = T> + Send,
         T: Clone + Send,
     {
         TryFoldWith::new(self, init, fold_op)
@@ -2098,7 +2100,7 @@ pub trait ParallelIterator: Sized + Send {
     /// Note: unlike the standard `Iterator::partition`, this allows distinct
     /// collection types for the left and right items.  This is more flexible,
     /// but may require new type annotations when converting sequential code
-    /// that used type inferrence assuming the two were the same.
+    /// that used type inference assuming the two were the same.
     ///
     /// # Examples
     ///
@@ -2241,6 +2243,8 @@ impl<T: ParallelIterator> IntoParallelIterator for T {
 /// those points.
 ///
 /// **Note:** Not implemented for `u64`, `i64`, `u128`, or `i128` ranges
+// Waiting for `ExactSizeIterator::is_empty` to be stabilized. See rust-lang/rust#35428
+#[allow(clippy::len_without_is_empty)]
 pub trait IndexedParallelIterator: ParallelIterator {
     /// Collects the results of the iterator into the specified
     /// vector. The vector is always truncated before execution
@@ -2339,13 +2343,18 @@ pub trait IndexedParallelIterator: ParallelIterator {
     /// // we should never get here
     /// assert_eq!(1, zipped.len());
     /// ```
+    #[track_caller]
     fn zip_eq<Z>(self, zip_op: Z) -> ZipEq<Self, Z::Iter>
     where
         Z: IntoParallelIterator,
         Z::Iter: IndexedParallelIterator,
     {
         let zip_op_iter = zip_op.into_par_iter();
-        assert_eq!(self.len(), zip_op_iter.len());
+        assert_eq!(
+            self.len(),
+            zip_op_iter.len(),
+            "iterators must have the same length"
+        );
         ZipEq::new(self, zip_op_iter)
     }
 
@@ -2415,6 +2424,89 @@ pub trait IndexedParallelIterator: ParallelIterator {
         Chunks::new(self, chunk_size)
     }
 
+    /// Splits an iterator into fixed-size chunks, performing a sequential [`fold()`] on
+    /// each chunk.
+    ///
+    /// Returns an iterator that produces a folded result for each chunk of items
+    /// produced by this iterator.
+    ///
+    /// This works essentially like:
+    ///
+    /// ```text
+    /// iter.chunks(chunk_size)
+    ///     .map(|chunk|
+    ///         chunk.into_iter()
+    ///             .fold(identity, fold_op)
+    ///     )
+    /// ```
+    ///
+    /// except there is no per-chunk allocation overhead.
+    ///
+    /// [`fold()`]: std::iter::Iterator#method.fold
+    ///
+    /// **Panics** if `chunk_size` is 0.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let nums = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    /// let chunk_sums = nums.into_par_iter().fold_chunks(2, || 0, |a, n| a + n).collect::<Vec<_>>();
+    /// assert_eq!(chunk_sums, vec![3, 7, 11, 15, 19]);
+    /// ```
+    #[track_caller]
+    fn fold_chunks<T, ID, F>(
+        self,
+        chunk_size: usize,
+        identity: ID,
+        fold_op: F,
+    ) -> FoldChunks<Self, ID, F>
+    where
+        ID: Fn() -> T + Send + Sync,
+        F: Fn(T, Self::Item) -> T + Send + Sync,
+        T: Send,
+    {
+        assert!(chunk_size != 0, "chunk_size must not be zero");
+        FoldChunks::new(self, chunk_size, identity, fold_op)
+    }
+
+    /// Splits an iterator into fixed-size chunks, performing a sequential [`fold()`] on
+    /// each chunk.
+    ///
+    /// Returns an iterator that produces a folded result for each chunk of items
+    /// produced by this iterator.
+    ///
+    /// This works essentially like `fold_chunks(chunk_size, || init.clone(), fold_op)`,
+    /// except it doesn't require the `init` type to be `Sync`, nor any other form of
+    /// added synchronization.
+    ///
+    /// [`fold()`]: std::iter::Iterator#method.fold
+    ///
+    /// **Panics** if `chunk_size` is 0.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let nums = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    /// let chunk_sums = nums.into_par_iter().fold_chunks_with(2, 0, |a, n| a + n).collect::<Vec<_>>();
+    /// assert_eq!(chunk_sums, vec![3, 7, 11, 15, 19]);
+    /// ```
+    #[track_caller]
+    fn fold_chunks_with<T, F>(
+        self,
+        chunk_size: usize,
+        init: T,
+        fold_op: F,
+    ) -> FoldChunksWith<Self, T, F>
+    where
+        T: Send + Clone,
+        F: Fn(T, Self::Item) -> T + Send + Sync,
+    {
+        assert!(chunk_size != 0, "chunk_size must not be zero");
+        FoldChunksWith::new(self, chunk_size, init, fold_op)
+    }
+
     /// Lexicographically compares the elements of this `ParallelIterator` with those of
     /// another.
     ///
@@ -2601,11 +2693,6 @@ pub trait IndexedParallelIterator: ParallelIterator {
     ///
     /// assert_eq!(result, [3, 6, 9])
     /// ```
-    ///
-    /// # Compatibility
-    ///
-    /// This method is only available on Rust 1.38 or greater.
-    #[cfg(step_by)]
     fn step_by(self, step: usize) -> StepBy<Self> {
         StepBy::new(self, step)
     }
@@ -2808,7 +2895,7 @@ pub trait IndexedParallelIterator: ParallelIterator {
     }
 
     /// Sets the minimum length of iterators desired to process in each
-    /// thread.  Rayon will not split any smaller than this length, but
+    /// rayon job.  Rayon will not split any smaller than this length, but
     /// of course an iterator could already be smaller to begin with.
     ///
     /// Producers like `zip` and `interleave` will use greater of the two
@@ -2834,7 +2921,7 @@ pub trait IndexedParallelIterator: ParallelIterator {
     }
 
     /// Sets the maximum length of iterators desired to process in each
-    /// thread.  Rayon will try to split at least below this length,
+    /// rayon job.  Rayon will try to split at least below this length,
     /// unless that would put it below the length from `with_min_len()`.
     /// For example, given min=10 and max=15, a length of 16 will not be
     /// split any further.
@@ -3145,50 +3232,154 @@ pub trait ParallelDrainRange<Idx = usize> {
 /// We hide the `Try` trait in a private module, as it's only meant to be a
 /// stable clone of the standard library's `Try` trait, as yet unstable.
 mod private {
+    use std::convert::Infallible;
+    use std::ops::ControlFlow::{self, Break, Continue};
+    use std::task::Poll;
+
     /// Clone of `std::ops::Try`.
     ///
     /// Implementing this trait is not permitted outside of `rayon`.
     pub trait Try {
         private_decl! {}
 
-        type Ok;
-        type Error;
-        fn into_result(self) -> Result<Self::Ok, Self::Error>;
-        fn from_ok(v: Self::Ok) -> Self;
-        fn from_error(v: Self::Error) -> Self;
+        type Output;
+        type Residual;
+
+        fn from_output(output: Self::Output) -> Self;
+
+        fn from_residual(residual: Self::Residual) -> Self;
+
+        fn branch(self) -> ControlFlow<Self::Residual, Self::Output>;
+    }
+
+    impl<B, C> Try for ControlFlow<B, C> {
+        private_impl! {}
+
+        type Output = C;
+        type Residual = ControlFlow<B, Infallible>;
+
+        fn from_output(output: Self::Output) -> Self {
+            Continue(output)
+        }
+
+        fn from_residual(residual: Self::Residual) -> Self {
+            match residual {
+                Break(b) => Break(b),
+                Continue(_) => unreachable!(),
+            }
+        }
+
+        fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
+            match self {
+                Continue(c) => Continue(c),
+                Break(b) => Break(Break(b)),
+            }
+        }
     }
 
     impl<T> Try for Option<T> {
         private_impl! {}
 
-        type Ok = T;
-        type Error = ();
+        type Output = T;
+        type Residual = Option<Infallible>;
 
-        fn into_result(self) -> Result<T, ()> {
-            self.ok_or(())
+        fn from_output(output: Self::Output) -> Self {
+            Some(output)
         }
-        fn from_ok(v: T) -> Self {
-            Some(v)
+
+        fn from_residual(residual: Self::Residual) -> Self {
+            match residual {
+                None => None,
+                Some(_) => unreachable!(),
+            }
         }
-        fn from_error(_: ()) -> Self {
-            None
+
+        fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
+            match self {
+                Some(c) => Continue(c),
+                None => Break(None),
+            }
         }
     }
 
     impl<T, E> Try for Result<T, E> {
         private_impl! {}
 
-        type Ok = T;
-        type Error = E;
+        type Output = T;
+        type Residual = Result<Infallible, E>;
 
-        fn into_result(self) -> Result<T, E> {
-            self
+        fn from_output(output: Self::Output) -> Self {
+            Ok(output)
         }
-        fn from_ok(v: T) -> Self {
-            Ok(v)
+
+        fn from_residual(residual: Self::Residual) -> Self {
+            match residual {
+                Err(e) => Err(e),
+                Ok(_) => unreachable!(),
+            }
         }
-        fn from_error(v: E) -> Self {
-            Err(v)
+
+        fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
+            match self {
+                Ok(c) => Continue(c),
+                Err(e) => Break(Err(e)),
+            }
+        }
+    }
+
+    impl<T, E> Try for Poll<Result<T, E>> {
+        private_impl! {}
+
+        type Output = Poll<T>;
+        type Residual = Result<Infallible, E>;
+
+        fn from_output(output: Self::Output) -> Self {
+            output.map(Ok)
+        }
+
+        fn from_residual(residual: Self::Residual) -> Self {
+            match residual {
+                Err(e) => Poll::Ready(Err(e)),
+                Ok(_) => unreachable!(),
+            }
+        }
+
+        fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
+            match self {
+                Poll::Pending => Continue(Poll::Pending),
+                Poll::Ready(Ok(c)) => Continue(Poll::Ready(c)),
+                Poll::Ready(Err(e)) => Break(Err(e)),
+            }
+        }
+    }
+
+    impl<T, E> Try for Poll<Option<Result<T, E>>> {
+        private_impl! {}
+
+        type Output = Poll<Option<T>>;
+        type Residual = Result<Infallible, E>;
+
+        fn from_output(output: Self::Output) -> Self {
+            match output {
+                Poll::Ready(o) => Poll::Ready(o.map(Ok)),
+                Poll::Pending => Poll::Pending,
+            }
+        }
+
+        fn from_residual(residual: Self::Residual) -> Self {
+            match residual {
+                Err(e) => Poll::Ready(Some(Err(e))),
+                Ok(_) => unreachable!(),
+            }
+        }
+
+        fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
+            match self {
+                Poll::Pending => Continue(Poll::Pending),
+                Poll::Ready(None) => Continue(Poll::Ready(None)),
+                Poll::Ready(Some(Ok(c))) => Continue(Poll::Ready(Some(c))),
+                Poll::Ready(Some(Err(e))) => Break(Err(e)),
+            }
         }
     }
 }
diff --git a/src/iter/par_bridge.rs b/src/iter/par_bridge.rs
index 339ac1a..8398274 100644
--- a/src/iter/par_bridge.rs
+++ b/src/iter/par_bridge.rs
@@ -1,12 +1,9 @@
-use crossbeam_deque::{Steal, Stealer, Worker};
-
 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
-use std::sync::{Mutex, TryLockError};
-use std::thread::yield_now;
+use std::sync::Mutex;
 
-use crate::current_num_threads;
 use crate::iter::plumbing::{bridge_unindexed, Folder, UnindexedConsumer, UnindexedProducer};
 use crate::iter::ParallelIterator;
+use crate::{current_num_threads, current_thread_index};
 
 /// Conversion trait to convert an `Iterator` to a `ParallelIterator`.
 ///
@@ -78,71 +75,46 @@ where
     where
         C: UnindexedConsumer<Self::Item>,
     {
-        let split_count = AtomicUsize::new(current_num_threads());
-        let worker = Worker::new_fifo();
-        let stealer = worker.stealer();
-        let done = AtomicBool::new(false);
-        let iter = Mutex::new((self.iter, worker));
+        let num_threads = current_num_threads();
+        let threads_started: Vec<_> = (0..num_threads).map(|_| AtomicBool::new(false)).collect();
 
         bridge_unindexed(
-            IterParallelProducer {
-                split_count: &split_count,
-                done: &done,
-                iter: &iter,
-                items: stealer,
+            &IterParallelProducer {
+                split_count: AtomicUsize::new(num_threads),
+                iter: Mutex::new(self.iter.fuse()),
+                threads_started: &threads_started,
             },
             consumer,
         )
     }
 }
 
-struct IterParallelProducer<'a, Iter: Iterator> {
-    split_count: &'a AtomicUsize,
-    done: &'a AtomicBool,
-    iter: &'a Mutex<(Iter, Worker<Iter::Item>)>,
-    items: Stealer<Iter::Item>,
-}
-
-// manual clone because T doesn't need to be Clone, but the derive assumes it should be
-impl<'a, Iter: Iterator + 'a> Clone for IterParallelProducer<'a, Iter> {
-    fn clone(&self) -> Self {
-        IterParallelProducer {
-            split_count: self.split_count,
-            done: self.done,
-            iter: self.iter,
-            items: self.items.clone(),
-        }
-    }
+struct IterParallelProducer<'a, Iter> {
+    split_count: AtomicUsize,
+    iter: Mutex<std::iter::Fuse<Iter>>,
+    threads_started: &'a [AtomicBool],
 }
 
-impl<'a, Iter: Iterator + Send + 'a> UnindexedProducer for IterParallelProducer<'a, Iter>
-where
-    Iter::Item: Send,
-{
+impl<Iter: Iterator + Send> UnindexedProducer for &IterParallelProducer<'_, Iter> {
     type Item = Iter::Item;
 
     fn split(self) -> (Self, Option<Self>) {
         let mut count = self.split_count.load(Ordering::SeqCst);
 
         loop {
-            // Check if the iterator is exhausted *and* we've consumed every item from it.
-            let done = self.done.load(Ordering::SeqCst) && self.items.is_empty();
-
-            match count.checked_sub(1) {
-                Some(new_count) if !done => {
-                    match self.split_count.compare_exchange_weak(
-                        count,
-                        new_count,
-                        Ordering::SeqCst,
-                        Ordering::SeqCst,
-                    ) {
-                        Ok(_) => return (self.clone(), Some(self)),
-                        Err(last_count) => count = last_count,
-                    }
-                }
-                _ => {
-                    return (self, None);
+            // Check if the iterator is exhausted
+            if let Some(new_count) = count.checked_sub(1) {
+                match self.split_count.compare_exchange_weak(
+                    count,
+                    new_count,
+                    Ordering::SeqCst,
+                    Ordering::SeqCst,
+                ) {
+                    Ok(_) => return (self, Some(self)),
+                    Err(last_count) => count = last_count,
                 }
+            } else {
+                return (self, None);
             }
         }
     }
@@ -151,66 +123,39 @@ where
     where
         F: Folder<Self::Item>,
     {
+        // Guard against work-stealing-induced recursion, in case `Iter::next()`
+        // calls rayon internally, so we don't deadlock our mutex. We might also
+        // be recursing via `folder` methods, which doesn't present a mutex hazard,
+        // but it's lower overhead for us to just check this once, rather than
+        // updating additional shared state on every mutex lock/unlock.
+        // (If this isn't a rayon thread, then there's no work-stealing anyway...)
+        if let Some(i) = current_thread_index() {
+            // Note: If the number of threads in the pool ever grows dynamically, then
+            // we'll end up sharing flags and may falsely detect recursion -- that's
+            // still fine for overall correctness, just not optimal for parallelism.
+            let thread_started = &self.threads_started[i % self.threads_started.len()];
+            if thread_started.swap(true, Ordering::Relaxed) {
+                // We can't make progress with a nested mutex, so just return and let
+                // the outermost loop continue with the rest of the iterator items.
+                return folder;
+            }
+        }
+
         loop {
-            match self.items.steal() {
-                Steal::Success(it) => {
+            if let Ok(mut iter) = self.iter.lock() {
+                if let Some(it) = iter.next() {
+                    drop(iter);
                     folder = folder.consume(it);
                     if folder.full() {
                         return folder;
                     }
+                } else {
+                    return folder;
                 }
-                Steal::Empty => {
-                    // Don't storm the mutex if we're already done.
-                    if self.done.load(Ordering::SeqCst) {
-                        // Someone might have pushed more between our `steal()` and `done.load()`
-                        if self.items.is_empty() {
-                            // The iterator is out of items, no use in continuing
-                            return folder;
-                        }
-                    } else {
-                        // our cache is out of items, time to load more from the iterator
-                        match self.iter.try_lock() {
-                            Ok(mut guard) => {
-                                // Check `done` again in case we raced with the previous lock
-                                // holder on its way out.
-                                if self.done.load(Ordering::SeqCst) {
-                                    if self.items.is_empty() {
-                                        return folder;
-                                    }
-                                    continue;
-                                }
-
-                                let count = current_num_threads();
-                                let count = (count * count) * 2;
-
-                                let (ref mut iter, ref worker) = *guard;
-
-                                // while worker.len() < count {
-                                // FIXME the new deque doesn't let us count items.  We can just
-                                // push a number of items, but that doesn't consider active
-                                // stealers elsewhere.
-                                for _ in 0..count {
-                                    if let Some(it) = iter.next() {
-                                        worker.push(it);
-                                    } else {
-                                        self.done.store(true, Ordering::SeqCst);
-                                        break;
-                                    }
-                                }
-                            }
-                            Err(TryLockError::WouldBlock) => {
-                                // someone else has the mutex, just sit tight until it's ready
-                                yield_now(); //TODO: use a thread-pool-aware yield? (#548)
-                            }
-                            Err(TryLockError::Poisoned(_)) => {
-                                // any panics from other threads will have been caught by the pool,
-                                // and will be re-thrown when joined - just exit
-                                return folder;
-                            }
-                        }
-                    }
-                }
-                Steal::Retry => (),
+            } else {
+                // any panics from other threads will have been caught by the pool,
+                // and will be re-thrown when joined - just exit
+                return folder;
             }
         }
     }
diff --git a/src/iter/plumbing/README.md b/src/iter/plumbing/README.md
index cd94eae..dbee36b 100644
--- a/src/iter/plumbing/README.md
+++ b/src/iter/plumbing/README.md
@@ -124,7 +124,7 @@ implement `IndexedParallelIterator`.
 
 The `bridge` function will then connect the consumer, which is
 handling the `flat_map` and `for_each`, with the producer, which is
-handling the `zip` and its preecessors. It will split down until the
+handling the `zip` and its predecessors. It will split down until the
 chunks seem reasonably small, then pull items from the producer and
 feed them to the consumer.
 
diff --git a/src/iter/skip.rs b/src/iter/skip.rs
index 9983f16..2d0f947 100644
--- a/src/iter/skip.rs
+++ b/src/iter/skip.rs
@@ -79,9 +79,16 @@ where
             where
                 P: Producer<Item = T>,
             {
-                let (before_skip, after_skip) = base.split_at(self.n);
-                bridge_producer_consumer(self.n, before_skip, NoopConsumer);
-                self.callback.callback(after_skip)
+                crate::in_place_scope(|scope| {
+                    let Self { callback, n } = self;
+                    let (before_skip, after_skip) = base.split_at(n);
+
+                    // Run the skipped part separately for side effects.
+                    // We'll still get any panics propagated back by the scope.
+                    scope.spawn(move |_| bridge_producer_consumer(n, before_skip, NoopConsumer));
+
+                    callback.callback(after_skip)
+                })
             }
         }
     }
diff --git a/src/iter/step_by.rs b/src/iter/step_by.rs
index 2002f42..94b8334 100644
--- a/src/iter/step_by.rs
+++ b/src/iter/step_by.rs
@@ -1,4 +1,3 @@
-#![cfg(step_by)]
 use std::cmp::min;
 
 use super::plumbing::*;
diff --git a/src/iter/test.rs b/src/iter/test.rs
index bc5106b..94323d7 100644
--- a/src/iter/test.rs
+++ b/src/iter/test.rs
@@ -117,13 +117,10 @@ fn fold_map_reduce() {
     let r1 = (0_i32..32)
         .into_par_iter()
         .with_max_len(1)
-        .fold(
-            || vec![],
-            |mut v, e| {
-                v.push(e);
-                v
-            },
-        )
+        .fold(Vec::new, |mut v, e| {
+            v.push(e);
+            v
+        })
         .map(|v| vec![v])
         .reduce_with(|mut v_a, v_b| {
             v_a.extend(v_b);
@@ -394,7 +391,7 @@ fn check_slice_mut_indexed() {
 #[test]
 fn check_vec_indexed() {
     let a = vec![1, 2, 3];
-    is_indexed(a.clone().into_par_iter());
+    is_indexed(a.into_par_iter());
 }
 
 #[test]
@@ -1371,10 +1368,10 @@ fn check_find_is_present() {
     let counter = AtomicUsize::new(0);
     let value: Option<i32> = (0_i32..2048).into_par_iter().find_any(|&p| {
         counter.fetch_add(1, Ordering::SeqCst);
-        p >= 1024 && p < 1096
+        (1024..1096).contains(&p)
     });
     let q = value.unwrap();
-    assert!(q >= 1024 && q < 1096);
+    assert!((1024..1096).contains(&q));
     assert!(counter.load(Ordering::SeqCst) < 2048); // should not have visited every single one
 }
 
@@ -1892,7 +1889,7 @@ fn check_either() {
 
     // try an indexed iterator
     let left: E = Either::Left(v.clone().into_par_iter());
-    assert!(left.enumerate().eq(v.clone().into_par_iter().enumerate()));
+    assert!(left.enumerate().eq(v.into_par_iter().enumerate()));
 }
 
 #[test]
@@ -2063,7 +2060,7 @@ fn check_chunks_len() {
     assert_eq!(4, (0..8).into_par_iter().chunks(2).len());
     assert_eq!(3, (0..9).into_par_iter().chunks(3).len());
     assert_eq!(3, (0..8).into_par_iter().chunks(3).len());
-    assert_eq!(1, (&[1]).par_iter().chunks(3).len());
+    assert_eq!(1, [1].par_iter().chunks(3).len());
     assert_eq!(0, (0..0).into_par_iter().chunks(3).len());
 }
 
diff --git a/src/iter/try_fold.rs b/src/iter/try_fold.rs
index c1a57a4..6d1048d 100644
--- a/src/iter/try_fold.rs
+++ b/src/iter/try_fold.rs
@@ -1,15 +1,16 @@
 use super::plumbing::*;
-use super::*;
+use super::ParallelIterator;
+use super::Try;
 
-use super::private::Try;
 use std::fmt::{self, Debug};
 use std::marker::PhantomData;
+use std::ops::ControlFlow::{self, Break, Continue};
 
 impl<U, I, ID, F> TryFold<I, U, ID, F>
 where
     I: ParallelIterator,
-    F: Fn(U::Ok, I::Item) -> U + Sync + Send,
-    ID: Fn() -> U::Ok + Sync + Send,
+    F: Fn(U::Output, I::Item) -> U + Sync + Send,
+    ID: Fn() -> U::Output + Sync + Send,
     U: Try + Send,
 {
     pub(super) fn new(base: I, identity: ID, fold_op: F) -> Self {
@@ -45,8 +46,8 @@ impl<U, I: ParallelIterator + Debug, ID, F> Debug for TryFold<I, U, ID, F> {
 impl<U, I, ID, F> ParallelIterator for TryFold<I, U, ID, F>
 where
     I: ParallelIterator,
-    F: Fn(U::Ok, I::Item) -> U + Sync + Send,
-    ID: Fn() -> U::Ok + Sync + Send,
+    F: Fn(U::Output, I::Item) -> U + Sync + Send,
+    ID: Fn() -> U::Output + Sync + Send,
     U: Try + Send,
 {
     type Item = U;
@@ -75,8 +76,8 @@ struct TryFoldConsumer<'c, U, C, ID, F> {
 impl<'r, U, T, C, ID, F> Consumer<T> for TryFoldConsumer<'r, U, C, ID, F>
 where
     C: Consumer<U>,
-    F: Fn(U::Ok, T) -> U + Sync,
-    ID: Fn() -> U::Ok + Sync,
+    F: Fn(U::Output, T) -> U + Sync,
+    ID: Fn() -> U::Output + Sync,
     U: Try + Send,
 {
     type Folder = TryFoldFolder<'r, C::Folder, U, F>;
@@ -98,7 +99,7 @@ where
     fn into_folder(self) -> Self::Folder {
         TryFoldFolder {
             base: self.base.into_folder(),
-            result: Ok((self.identity)()),
+            control: Continue((self.identity)()),
             fold_op: self.fold_op,
         }
     }
@@ -111,8 +112,8 @@ where
 impl<'r, U, T, C, ID, F> UnindexedConsumer<T> for TryFoldConsumer<'r, U, C, ID, F>
 where
     C: UnindexedConsumer<U>,
-    F: Fn(U::Ok, T) -> U + Sync,
-    ID: Fn() -> U::Ok + Sync,
+    F: Fn(U::Output, T) -> U + Sync,
+    ID: Fn() -> U::Output + Sync,
     U: Try + Send,
 {
     fn split_off_left(&self) -> Self {
@@ -130,35 +131,38 @@ where
 struct TryFoldFolder<'r, C, U: Try, F> {
     base: C,
     fold_op: &'r F,
-    result: Result<U::Ok, U::Error>,
+    control: ControlFlow<U::Residual, U::Output>,
 }
 
 impl<'r, C, U, F, T> Folder<T> for TryFoldFolder<'r, C, U, F>
 where
     C: Folder<U>,
-    F: Fn(U::Ok, T) -> U + Sync,
+    F: Fn(U::Output, T) -> U + Sync,
     U: Try,
 {
     type Result = C::Result;
 
     fn consume(mut self, item: T) -> Self {
         let fold_op = self.fold_op;
-        if let Ok(acc) = self.result {
-            self.result = fold_op(acc, item).into_result();
+        if let Continue(acc) = self.control {
+            self.control = fold_op(acc, item).branch();
         }
         self
     }
 
     fn complete(self) -> C::Result {
-        let item = match self.result {
-            Ok(ok) => U::from_ok(ok),
-            Err(error) => U::from_error(error),
+        let item = match self.control {
+            Continue(c) => U::from_output(c),
+            Break(r) => U::from_residual(r),
         };
         self.base.consume(item).complete()
     }
 
     fn full(&self) -> bool {
-        self.result.is_err() || self.base.full()
+        match self.control {
+            Break(_) => true,
+            _ => self.base.full(),
+        }
     }
 }
 
@@ -167,11 +171,11 @@ where
 impl<U, I, F> TryFoldWith<I, U, F>
 where
     I: ParallelIterator,
-    F: Fn(U::Ok, I::Item) -> U + Sync,
+    F: Fn(U::Output, I::Item) -> U + Sync,
     U: Try + Send,
-    U::Ok: Clone + Send,
+    U::Output: Clone + Send,
 {
-    pub(super) fn new(base: I, item: U::Ok, fold_op: F) -> Self {
+    pub(super) fn new(base: I, item: U::Output, fold_op: F) -> Self {
         TryFoldWith {
             base,
             item,
@@ -189,13 +193,13 @@ where
 #[derive(Clone)]
 pub struct TryFoldWith<I, U: Try, F> {
     base: I,
-    item: U::Ok,
+    item: U::Output,
     fold_op: F,
 }
 
 impl<I: ParallelIterator + Debug, U: Try, F> Debug for TryFoldWith<I, U, F>
 where
-    U::Ok: Debug,
+    U::Output: Debug,
 {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_struct("TryFoldWith")
@@ -208,9 +212,9 @@ where
 impl<U, I, F> ParallelIterator for TryFoldWith<I, U, F>
 where
     I: ParallelIterator,
-    F: Fn(U::Ok, I::Item) -> U + Sync + Send,
+    F: Fn(U::Output, I::Item) -> U + Sync + Send,
     U: Try + Send,
-    U::Ok: Clone + Send,
+    U::Output: Clone + Send,
 {
     type Item = U;
 
@@ -229,16 +233,16 @@ where
 
 struct TryFoldWithConsumer<'c, C, U: Try, F> {
     base: C,
-    item: U::Ok,
+    item: U::Output,
     fold_op: &'c F,
 }
 
 impl<'r, U, T, C, F> Consumer<T> for TryFoldWithConsumer<'r, C, U, F>
 where
     C: Consumer<U>,
-    F: Fn(U::Ok, T) -> U + Sync,
+    F: Fn(U::Output, T) -> U + Sync,
     U: Try + Send,
-    U::Ok: Clone + Send,
+    U::Output: Clone + Send,
 {
     type Folder = TryFoldFolder<'r, C::Folder, U, F>;
     type Reducer = C::Reducer;
@@ -263,7 +267,7 @@ where
     fn into_folder(self) -> Self::Folder {
         TryFoldFolder {
             base: self.base.into_folder(),
-            result: Ok(self.item),
+            control: Continue(self.item),
             fold_op: self.fold_op,
         }
     }
@@ -276,9 +280,9 @@ where
 impl<'r, U, T, C, F> UnindexedConsumer<T> for TryFoldWithConsumer<'r, C, U, F>
 where
     C: UnindexedConsumer<U>,
-    F: Fn(U::Ok, T) -> U + Sync,
+    F: Fn(U::Output, T) -> U + Sync,
     U: Try + Send,
-    U::Ok: Clone + Send,
+    U::Output: Clone + Send,
 {
     fn split_off_left(&self) -> Self {
         TryFoldWithConsumer {
diff --git a/src/iter/try_reduce.rs b/src/iter/try_reduce.rs
index 76b3850..35a724c 100644
--- a/src/iter/try_reduce.rs
+++ b/src/iter/try_reduce.rs
@@ -1,14 +1,15 @@
 use super::plumbing::*;
 use super::ParallelIterator;
+use super::Try;
 
-use super::private::Try;
+use std::ops::ControlFlow::{self, Break, Continue};
 use std::sync::atomic::{AtomicBool, Ordering};
 
 pub(super) fn try_reduce<PI, R, ID, T>(pi: PI, identity: ID, reduce_op: R) -> T
 where
     PI: ParallelIterator<Item = T>,
-    R: Fn(T::Ok, T::Ok) -> T + Sync,
-    ID: Fn() -> T::Ok + Sync,
+    R: Fn(T::Output, T::Output) -> T + Sync,
+    ID: Fn() -> T::Output + Sync,
     T: Try + Send,
 {
     let full = AtomicBool::new(false);
@@ -36,8 +37,8 @@ impl<'r, R, ID> Clone for TryReduceConsumer<'r, R, ID> {
 
 impl<'r, R, ID, T> Consumer<T> for TryReduceConsumer<'r, R, ID>
 where
-    R: Fn(T::Ok, T::Ok) -> T + Sync,
-    ID: Fn() -> T::Ok + Sync,
+    R: Fn(T::Output, T::Output) -> T + Sync,
+    ID: Fn() -> T::Output + Sync,
     T: Try + Send,
 {
     type Folder = TryReduceFolder<'r, R, T>;
@@ -51,7 +52,7 @@ where
     fn into_folder(self) -> Self::Folder {
         TryReduceFolder {
             reduce_op: self.reduce_op,
-            result: Ok((self.identity)()),
+            control: Continue((self.identity)()),
             full: self.full,
         }
     }
@@ -63,8 +64,8 @@ where
 
 impl<'r, R, ID, T> UnindexedConsumer<T> for TryReduceConsumer<'r, R, ID>
 where
-    R: Fn(T::Ok, T::Ok) -> T + Sync,
-    ID: Fn() -> T::Ok + Sync,
+    R: Fn(T::Output, T::Output) -> T + Sync,
+    ID: Fn() -> T::Output + Sync,
     T: Try + Send,
 {
     fn split_off_left(&self) -> Self {
@@ -78,52 +79,53 @@ where
 
 impl<'r, R, ID, T> Reducer<T> for TryReduceConsumer<'r, R, ID>
 where
-    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    R: Fn(T::Output, T::Output) -> T + Sync,
     T: Try,
 {
     fn reduce(self, left: T, right: T) -> T {
-        match (left.into_result(), right.into_result()) {
-            (Ok(left), Ok(right)) => (self.reduce_op)(left, right),
-            (Err(e), _) | (_, Err(e)) => T::from_error(e),
+        match (left.branch(), right.branch()) {
+            (Continue(left), Continue(right)) => (self.reduce_op)(left, right),
+            (Break(r), _) | (_, Break(r)) => T::from_residual(r),
         }
     }
 }
 
 struct TryReduceFolder<'r, R, T: Try> {
     reduce_op: &'r R,
-    result: Result<T::Ok, T::Error>,
+    control: ControlFlow<T::Residual, T::Output>,
     full: &'r AtomicBool,
 }
 
 impl<'r, R, T> Folder<T> for TryReduceFolder<'r, R, T>
 where
-    R: Fn(T::Ok, T::Ok) -> T,
+    R: Fn(T::Output, T::Output) -> T,
     T: Try,
 {
     type Result = T;
 
     fn consume(mut self, item: T) -> Self {
         let reduce_op = self.reduce_op;
-        if let Ok(left) = self.result {
-            self.result = match item.into_result() {
-                Ok(right) => reduce_op(left, right).into_result(),
-                Err(error) => Err(error),
-            };
-        }
-        if self.result.is_err() {
-            self.full.store(true, Ordering::Relaxed)
+        self.control = match (self.control, item.branch()) {
+            (Continue(left), Continue(right)) => reduce_op(left, right).branch(),
+            (control @ Break(_), _) | (_, control @ Break(_)) => control,
+        };
+        if let Break(_) = self.control {
+            self.full.store(true, Ordering::Relaxed);
         }
         self
     }
 
     fn complete(self) -> T {
-        match self.result {
-            Ok(ok) => T::from_ok(ok),
-            Err(error) => T::from_error(error),
+        match self.control {
+            Continue(c) => T::from_output(c),
+            Break(r) => T::from_residual(r),
         }
     }
 
     fn full(&self) -> bool {
-        self.full.load(Ordering::Relaxed)
+        match self.control {
+            Break(_) => true,
+            _ => self.full.load(Ordering::Relaxed),
+        }
     }
 }
diff --git a/src/iter/try_reduce_with.rs b/src/iter/try_reduce_with.rs
index 6be3100..cd7c83e 100644
--- a/src/iter/try_reduce_with.rs
+++ b/src/iter/try_reduce_with.rs
@@ -1,13 +1,14 @@
 use super::plumbing::*;
 use super::ParallelIterator;
+use super::Try;
 
-use super::private::Try;
+use std::ops::ControlFlow::{self, Break, Continue};
 use std::sync::atomic::{AtomicBool, Ordering};
 
 pub(super) fn try_reduce_with<PI, R, T>(pi: PI, reduce_op: R) -> Option<T>
 where
     PI: ParallelIterator<Item = T>,
-    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    R: Fn(T::Output, T::Output) -> T + Sync,
     T: Try + Send,
 {
     let full = AtomicBool::new(false);
@@ -33,7 +34,7 @@ impl<'r, R> Clone for TryReduceWithConsumer<'r, R> {
 
 impl<'r, R, T> Consumer<T> for TryReduceWithConsumer<'r, R>
 where
-    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    R: Fn(T::Output, T::Output) -> T + Sync,
     T: Try + Send,
 {
     type Folder = TryReduceWithFolder<'r, R, T>;
@@ -47,7 +48,7 @@ where
     fn into_folder(self) -> Self::Folder {
         TryReduceWithFolder {
             reduce_op: self.reduce_op,
-            opt_result: None,
+            opt_control: None,
             full: self.full,
         }
     }
@@ -59,7 +60,7 @@ where
 
 impl<'r, R, T> UnindexedConsumer<T> for TryReduceWithConsumer<'r, R>
 where
-    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    R: Fn(T::Output, T::Output) -> T + Sync,
     T: Try + Send,
 {
     fn split_off_left(&self) -> Self {
@@ -73,62 +74,59 @@ where
 
 impl<'r, R, T> Reducer<Option<T>> for TryReduceWithConsumer<'r, R>
 where
-    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    R: Fn(T::Output, T::Output) -> T + Sync,
     T: Try,
 {
     fn reduce(self, left: Option<T>, right: Option<T>) -> Option<T> {
         let reduce_op = self.reduce_op;
         match (left, right) {
-            (None, x) | (x, None) => x,
-            (Some(a), Some(b)) => match (a.into_result(), b.into_result()) {
-                (Ok(a), Ok(b)) => Some(reduce_op(a, b)),
-                (Err(e), _) | (_, Err(e)) => Some(T::from_error(e)),
+            (Some(left), Some(right)) => match (left.branch(), right.branch()) {
+                (Continue(left), Continue(right)) => Some(reduce_op(left, right)),
+                (Break(r), _) | (_, Break(r)) => Some(T::from_residual(r)),
             },
+            (None, x) | (x, None) => x,
         }
     }
 }
 
 struct TryReduceWithFolder<'r, R, T: Try> {
     reduce_op: &'r R,
-    opt_result: Option<Result<T::Ok, T::Error>>,
+    opt_control: Option<ControlFlow<T::Residual, T::Output>>,
     full: &'r AtomicBool,
 }
 
 impl<'r, R, T> Folder<T> for TryReduceWithFolder<'r, R, T>
 where
-    R: Fn(T::Ok, T::Ok) -> T,
+    R: Fn(T::Output, T::Output) -> T,
     T: Try,
 {
     type Result = Option<T>;
 
-    fn consume(self, item: T) -> Self {
+    fn consume(mut self, item: T) -> Self {
         let reduce_op = self.reduce_op;
-        let result = match self.opt_result {
-            None => item.into_result(),
-            Some(Ok(a)) => match item.into_result() {
-                Ok(b) => reduce_op(a, b).into_result(),
-                Err(e) => Err(e),
-            },
-            Some(Err(e)) => Err(e),
+        let control = match (self.opt_control, item.branch()) {
+            (Some(Continue(left)), Continue(right)) => reduce_op(left, right).branch(),
+            (Some(control @ Break(_)), _) | (_, control) => control,
         };
-        if result.is_err() {
+        if let Break(_) = control {
             self.full.store(true, Ordering::Relaxed)
         }
-        TryReduceWithFolder {
-            opt_result: Some(result),
-            ..self
-        }
+        self.opt_control = Some(control);
+        self
     }
 
     fn complete(self) -> Option<T> {
-        let result = self.opt_result?;
-        Some(match result {
-            Ok(ok) => T::from_ok(ok),
-            Err(error) => T::from_error(error),
-        })
+        match self.opt_control {
+            Some(Continue(c)) => Some(T::from_output(c)),
+            Some(Break(r)) => Some(T::from_residual(r)),
+            None => None,
+        }
     }
 
     fn full(&self) -> bool {
-        self.full.load(Ordering::Relaxed)
+        match self.opt_control {
+            Some(Break(_)) => true,
+            _ => self.full.load(Ordering::Relaxed),
+        }
     }
 }
diff --git a/src/iter/update.rs b/src/iter/update.rs
index 373a4d7..c693ac8 100644
--- a/src/iter/update.rs
+++ b/src/iter/update.rs
@@ -210,7 +210,7 @@ where
     F: Fn(&mut T) + Send + Sync,
 {
     fn split_off_left(&self) -> Self {
-        UpdateConsumer::new(self.base.split_off_left(), &self.update_op)
+        UpdateConsumer::new(self.base.split_off_left(), self.update_op)
     }
 
     fn to_reducer(&self) -> Self::Reducer {
-- 
cgit v1.2.3